mdify-cli 3.2.1__tar.gz → 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/PKG-INFO +1 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/__init__.py +1 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/cli.py +115 -89
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/docling_client.py +24 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/ssh/models.py +0 -12
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/ssh/remote_container.py +1 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/ssh/transfer.py +3 -2
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/PKG-INFO +1 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/pyproject.toml +1 -1
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/LICENSE +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/README.md +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/assets/mdify.png +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/__main__.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/container.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/formatting.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/ssh/__init__.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify/ssh/client.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/setup.cfg +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/tests/test_cli.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/tests/test_container.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/tests/test_docling_client.py +0 -0
- {mdify_cli-3.2.1 → mdify_cli-3.3.0}/tests/test_ssh_client.py +0 -0
|
@@ -8,10 +8,10 @@ is lightweight and has no ML dependencies.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
-
import asyncio
|
|
12
11
|
import json
|
|
13
12
|
import os
|
|
14
13
|
import platform
|
|
14
|
+
import shlex
|
|
15
15
|
import shutil
|
|
16
16
|
import subprocess
|
|
17
17
|
import sys
|
|
@@ -273,14 +273,14 @@ def check_for_update(force: bool = False) -> None:
|
|
|
273
273
|
|
|
274
274
|
_update_last_check_time()
|
|
275
275
|
|
|
276
|
+
from mdify.formatting import Colorizer
|
|
277
|
+
|
|
276
278
|
if not _compare_versions(__version__, remote_version):
|
|
277
279
|
if force:
|
|
278
|
-
from mdify.formatting import Colorizer
|
|
279
280
|
color = Colorizer(sys.stdout)
|
|
280
281
|
print(color.success(f"✓ mdify is up to date (v{__version__})"))
|
|
281
282
|
return
|
|
282
283
|
|
|
283
|
-
from mdify.formatting import Colorizer
|
|
284
284
|
color = Colorizer(sys.stdout)
|
|
285
285
|
print(f"\n{color.bright_yellow('=' * 60)}")
|
|
286
286
|
print(color.bold_yellow("🎉 A new version of mdify-cli is available!"))
|
|
@@ -1175,7 +1175,6 @@ def main_async_remote(args) -> int:
|
|
|
1175
1175
|
input_path = Path(args.input)
|
|
1176
1176
|
if not input_path.exists():
|
|
1177
1177
|
await ssh_client.disconnect()
|
|
1178
|
-
color = Colorizer(sys.stderr)
|
|
1179
1178
|
print(f"{color.error('✗ Error:')} Input file or directory not found: {args.input}", file=sys.stderr)
|
|
1180
1179
|
return 1
|
|
1181
1180
|
|
|
@@ -1185,7 +1184,6 @@ def main_async_remote(args) -> int:
|
|
|
1185
1184
|
|
|
1186
1185
|
if not files_to_convert:
|
|
1187
1186
|
await ssh_client.disconnect()
|
|
1188
|
-
color = Colorizer(sys.stderr)
|
|
1189
1187
|
print(f"{color.error('✗ Error:')} No supported files found in {args.input}", file=sys.stderr)
|
|
1190
1188
|
print(f" {color.dim_white(f'Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}')} ", file=sys.stderr)
|
|
1191
1189
|
return 1
|
|
@@ -1268,7 +1266,7 @@ def main_async_remote(args) -> int:
|
|
|
1268
1266
|
)
|
|
1269
1267
|
|
|
1270
1268
|
attempt = 0
|
|
1271
|
-
while
|
|
1269
|
+
while attempt <= 1: # Max 2 attempts (0 and 1)
|
|
1272
1270
|
try:
|
|
1273
1271
|
# Upload file
|
|
1274
1272
|
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
@@ -1322,7 +1320,7 @@ def main_async_remote(args) -> int:
|
|
|
1322
1320
|
f"curl -X POST "
|
|
1323
1321
|
f"--connect-timeout 60 "
|
|
1324
1322
|
f"--max-time {remote_conversion_timeout} "
|
|
1325
|
-
f"-F 'files=@{remote_file_path}' "
|
|
1323
|
+
f"-F 'files=@{shlex.quote(remote_file_path)}' "
|
|
1326
1324
|
f"-F 'to_formats=md' "
|
|
1327
1325
|
f"-F 'do_ocr=true' "
|
|
1328
1326
|
)
|
|
@@ -1335,11 +1333,12 @@ def main_async_remote(args) -> int:
|
|
|
1335
1333
|
conversion_success = False
|
|
1336
1334
|
conversion_output = None
|
|
1337
1335
|
while conversion_attempt < 3 and not conversion_success:
|
|
1336
|
+
conversion_attempt += 1
|
|
1338
1337
|
try:
|
|
1339
|
-
if conversion_attempt >
|
|
1340
|
-
# Exponential backoff: 2s, 4s
|
|
1341
|
-
backoff_delay = 2 ** conversion_attempt
|
|
1342
|
-
print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
|
|
1338
|
+
if conversion_attempt > 1 and not args.quiet:
|
|
1339
|
+
# Exponential backoff: 2s, 4s
|
|
1340
|
+
backoff_delay = 2 ** (conversion_attempt - 1)
|
|
1341
|
+
print(f" ↻ Conversion retry {conversion_attempt - 1} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
|
|
1343
1342
|
await asyncio.sleep(backoff_delay)
|
|
1344
1343
|
|
|
1345
1344
|
conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
|
|
@@ -1352,8 +1351,7 @@ def main_async_remote(args) -> int:
|
|
|
1352
1351
|
break
|
|
1353
1352
|
except Exception as conv_exc:
|
|
1354
1353
|
is_conn_err = is_connection_error(conv_exc)
|
|
1355
|
-
if is_conn_err and conversion_attempt <
|
|
1356
|
-
conversion_attempt += 1
|
|
1354
|
+
if is_conn_err and conversion_attempt < 3:
|
|
1357
1355
|
if not args.quiet:
|
|
1358
1356
|
# Exponential backoff: 5s, 10s
|
|
1359
1357
|
backoff_delay = 5 * conversion_attempt
|
|
@@ -1364,6 +1362,7 @@ def main_async_remote(args) -> int:
|
|
|
1364
1362
|
try:
|
|
1365
1363
|
await ssh_client.disconnect()
|
|
1366
1364
|
except Exception:
|
|
1365
|
+
# Best-effort disconnect; ignore errors (e.g., already closed) before reconnecting
|
|
1367
1366
|
pass
|
|
1368
1367
|
|
|
1369
1368
|
# Reconnect with retry
|
|
@@ -1375,51 +1374,65 @@ def main_async_remote(args) -> int:
|
|
|
1375
1374
|
continue
|
|
1376
1375
|
else:
|
|
1377
1376
|
# Either not a connection error, or we've exhausted retries
|
|
1378
|
-
if
|
|
1379
|
-
print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
|
|
1380
|
-
if conversion_attempt >= 2 and is_conn_err:
|
|
1377
|
+
if conversion_attempt >= 3 and is_conn_err:
|
|
1381
1378
|
if not args.quiet:
|
|
1382
1379
|
print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
|
|
1383
1380
|
break
|
|
1384
1381
|
|
|
1385
1382
|
if not conversion_success:
|
|
1386
|
-
|
|
1387
|
-
print(f" {color_error.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
|
|
1383
|
+
print(f" {color.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
|
|
1388
1384
|
failed += 1
|
|
1389
1385
|
break
|
|
1390
1386
|
|
|
1391
1387
|
# Parse JSON response to extract markdown content
|
|
1392
1388
|
try:
|
|
1393
1389
|
response_data = json.loads(conversion_output)
|
|
1390
|
+
color_err = Colorizer(sys.stderr)
|
|
1391
|
+
|
|
1392
|
+
# Check if response is an error (has error keys)
|
|
1393
|
+
error_keys = {"detail", "error", "message", "code", "status"}
|
|
1394
|
+
response_keys = set(response_data.keys()) if isinstance(response_data, dict) else set()
|
|
1395
|
+
if error_keys & response_keys:
|
|
1396
|
+
# Error response - extract and display error
|
|
1397
|
+
error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
|
|
1398
|
+
print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
|
|
1399
|
+
if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
|
|
1400
|
+
timeout_val = args.remote_timeout or 3600
|
|
1401
|
+
print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
|
|
1402
|
+
failed += 1
|
|
1403
|
+
break
|
|
1394
1404
|
|
|
1395
1405
|
# Extract content from response structure
|
|
1396
1406
|
# Actual format: {"document": {"md_content": "..."}, "status": "success"}
|
|
1407
|
+
markdown_content = None
|
|
1397
1408
|
if "document" in response_data:
|
|
1398
1409
|
document = response_data["document"]
|
|
1399
1410
|
if "md_content" in document and document["md_content"]:
|
|
1400
1411
|
markdown_content = document["md_content"]
|
|
1401
1412
|
elif "text_content" in document and document["text_content"]:
|
|
1402
1413
|
markdown_content = document["text_content"]
|
|
1403
|
-
|
|
1404
|
-
# Fallback - use whole document
|
|
1405
|
-
markdown_content = json.dumps(document, indent=2)
|
|
1406
|
-
else:
|
|
1414
|
+
elif "results" in response_data and response_data["results"]:
|
|
1407
1415
|
# Legacy format fallback
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
markdown_content = content
|
|
1416
|
-
else:
|
|
1417
|
-
markdown_content = str(content)
|
|
1416
|
+
result = response_data["results"][0]
|
|
1417
|
+
if "content" in result:
|
|
1418
|
+
content = result["content"]
|
|
1419
|
+
if isinstance(content, dict) and "markdown" in content:
|
|
1420
|
+
markdown_content = content["markdown"]
|
|
1421
|
+
elif isinstance(content, str):
|
|
1422
|
+
markdown_content = content
|
|
1418
1423
|
else:
|
|
1419
|
-
markdown_content = str(
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1424
|
+
markdown_content = str(content)
|
|
1425
|
+
|
|
1426
|
+
# Validate content exists and is not empty/too short
|
|
1427
|
+
if not markdown_content or len(markdown_content.strip()) < 50:
|
|
1428
|
+
print(f" {color_err.error('✗ Failed:')} Empty or invalid conversion result", file=sys.stderr)
|
|
1429
|
+
if args.remote_timeout and args.remote_timeout < 300:
|
|
1430
|
+
print(
|
|
1431
|
+
f" {color_err.info('ℹ Tip:')} Timeout is only {args.remote_timeout}s. Consider increasing with --remote-timeout (default: 3600s)",
|
|
1432
|
+
file=sys.stderr,
|
|
1433
|
+
)
|
|
1434
|
+
failed += 1
|
|
1435
|
+
break
|
|
1423
1436
|
|
|
1424
1437
|
# Write markdown content to local temp file first, then upload via SFTP
|
|
1425
1438
|
# (Piping large content through SSH here-documents can crash the connection)
|
|
@@ -1427,6 +1440,7 @@ def main_async_remote(args) -> int:
|
|
|
1427
1440
|
if not args.quiet:
|
|
1428
1441
|
print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
|
|
1429
1442
|
|
|
1443
|
+
temp_path = None
|
|
1430
1444
|
try:
|
|
1431
1445
|
# Write to temporary local file
|
|
1432
1446
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
|
|
@@ -1441,12 +1455,6 @@ def main_async_remote(args) -> int:
|
|
|
1441
1455
|
compress=False,
|
|
1442
1456
|
)
|
|
1443
1457
|
|
|
1444
|
-
# Cleanup temp file
|
|
1445
|
-
try:
|
|
1446
|
-
os.unlink(temp_path)
|
|
1447
|
-
except Exception:
|
|
1448
|
-
pass
|
|
1449
|
-
|
|
1450
1458
|
if not args.quiet:
|
|
1451
1459
|
print(f" {color.green('✓')} Markdown written", file=sys.stderr)
|
|
1452
1460
|
except Exception as write_exc:
|
|
@@ -1454,6 +1462,14 @@ def main_async_remote(args) -> int:
|
|
|
1454
1462
|
print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
|
|
1455
1463
|
failed += 1
|
|
1456
1464
|
break
|
|
1465
|
+
finally:
|
|
1466
|
+
# Cleanup temp file
|
|
1467
|
+
if temp_path:
|
|
1468
|
+
try:
|
|
1469
|
+
os.unlink(temp_path)
|
|
1470
|
+
except Exception as cleanup_exc:
|
|
1471
|
+
if DEBUG:
|
|
1472
|
+
print(f" ! Failed to remove temporary file {temp_path}: {cleanup_exc}", file=sys.stderr)
|
|
1457
1473
|
|
|
1458
1474
|
except (json.JSONDecodeError, KeyError, IndexError):
|
|
1459
1475
|
print(f" ✗ Failed to parse conversion response", file=sys.stderr)
|
|
@@ -1481,7 +1497,7 @@ def main_async_remote(args) -> int:
|
|
|
1481
1497
|
successful += 1
|
|
1482
1498
|
|
|
1483
1499
|
# Cleanup remote files
|
|
1484
|
-
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1500
|
+
await ssh_client.run_command(f"rm -f {shlex.quote(remote_file_path)} {shlex.quote(remote_output_path)}")
|
|
1485
1501
|
|
|
1486
1502
|
break
|
|
1487
1503
|
except Exception as e:
|
|
@@ -1492,12 +1508,12 @@ def main_async_remote(args) -> int:
|
|
|
1492
1508
|
try:
|
|
1493
1509
|
await ssh_client.disconnect()
|
|
1494
1510
|
except Exception:
|
|
1511
|
+
# Best-effort disconnect; ignore errors since we'll immediately reconnect.
|
|
1495
1512
|
pass
|
|
1496
1513
|
await ssh_client.connect()
|
|
1497
1514
|
continue
|
|
1498
1515
|
|
|
1499
|
-
|
|
1500
|
-
print(f" {color_err.error('✗ Failed:')} {str(e)}", file=sys.stderr)
|
|
1516
|
+
print(f" {color.error('✗ Failed:')} {str(e)}", file=sys.stderr)
|
|
1501
1517
|
if DEBUG:
|
|
1502
1518
|
import traceback
|
|
1503
1519
|
traceback.print_exc(file=sys.stderr)
|
|
@@ -1519,7 +1535,7 @@ def main_async_remote(args) -> int:
|
|
|
1519
1535
|
|
|
1520
1536
|
# Cleanup remote work directory
|
|
1521
1537
|
try:
|
|
1522
|
-
await ssh_client.run_command(f"rm -rf {work_dir}")
|
|
1538
|
+
await ssh_client.run_command(f"rm -rf {shlex.quote(work_dir)}")
|
|
1523
1539
|
if not args.quiet:
|
|
1524
1540
|
print(color.green(f"✓ Cleaned up remote directory"), file=sys.stderr)
|
|
1525
1541
|
except Exception as e:
|
|
@@ -1594,8 +1610,11 @@ def main_async_remote(args) -> int:
|
|
|
1594
1610
|
def main() -> int:
|
|
1595
1611
|
"""Main entry point for the CLI."""
|
|
1596
1612
|
from mdify.formatting import Colorizer
|
|
1597
|
-
|
|
1598
|
-
|
|
1613
|
+
|
|
1614
|
+
color_stderr = Colorizer(sys.stderr)
|
|
1615
|
+
color_stdout = Colorizer(sys.stdout)
|
|
1616
|
+
|
|
1617
|
+
print(color_stderr.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
|
|
1599
1618
|
args = parse_args()
|
|
1600
1619
|
|
|
1601
1620
|
# Handle --check-update flag
|
|
@@ -1743,41 +1762,34 @@ def main() -> int:
|
|
|
1743
1762
|
|
|
1744
1763
|
# Validate input
|
|
1745
1764
|
if not input_path.exists():
|
|
1746
|
-
|
|
1747
|
-
print(f"{color.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
|
|
1765
|
+
print(f"{color_stderr.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
|
|
1748
1766
|
return 1
|
|
1749
1767
|
|
|
1750
1768
|
# Get files to convert
|
|
1751
1769
|
try:
|
|
1752
1770
|
files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
|
|
1753
1771
|
except Exception as e:
|
|
1754
|
-
|
|
1755
|
-
print(f"{color.error('✗ Error:')} {e}", file=sys.stderr)
|
|
1772
|
+
print(f"{color_stderr.error('✗ Error:')} {e}", file=sys.stderr)
|
|
1756
1773
|
return 1
|
|
1757
1774
|
|
|
1758
1775
|
if not files_to_convert:
|
|
1759
|
-
|
|
1760
|
-
print(f"{color.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
|
|
1776
|
+
print(f"{color_stderr.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
|
|
1761
1777
|
return 1
|
|
1762
1778
|
|
|
1763
1779
|
total_files = len(files_to_convert)
|
|
1764
1780
|
total_size = sum(f.stat().st_size for f in files_to_convert)
|
|
1765
1781
|
|
|
1766
1782
|
if not args.quiet:
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
print(f"{
|
|
1770
|
-
print(f"{
|
|
1771
|
-
print(f"{
|
|
1772
|
-
print(f"{color_info.cyan('🐳 Runtime:')} {color_info.bright_white(runtime)}")
|
|
1773
|
-
print(f"{color_info.cyan('🖼️ Image:')} {color_info.dim_white(image)}")
|
|
1783
|
+
print(f"{color_stdout.bright_cyan('📦 Found')} {color_stdout.bold(str(total_files))} {color_stdout.bright_cyan('file(s)')} {color_stdout.dim_white(f'({format_size(total_size)})')}")
|
|
1784
|
+
print(f"{color_stdout.cyan('📁 Source:')} {color_stdout.bright_white(str(input_path.resolve()))}")
|
|
1785
|
+
print(f"{color_stdout.cyan('💾 Output:')} {color_stdout.bright_white(str(output_dir.resolve()))}")
|
|
1786
|
+
print(f"{color_stdout.cyan('🐳 Runtime:')} {color_stdout.bright_white(runtime)}")
|
|
1787
|
+
print(f"{color_stdout.cyan('🖼️ Image:')} {color_stdout.dim_white(image)}")
|
|
1774
1788
|
print()
|
|
1775
1789
|
|
|
1776
1790
|
if args.mask:
|
|
1777
|
-
from mdify.formatting import Colorizer
|
|
1778
|
-
color_warn = Colorizer(sys.stderr)
|
|
1779
1791
|
print(
|
|
1780
|
-
|
|
1792
|
+
color_stderr.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
|
|
1781
1793
|
file=sys.stderr,
|
|
1782
1794
|
)
|
|
1783
1795
|
|
|
@@ -1794,9 +1806,7 @@ def main() -> int:
|
|
|
1794
1806
|
|
|
1795
1807
|
try:
|
|
1796
1808
|
if not args.quiet:
|
|
1797
|
-
|
|
1798
|
-
color_start = Colorizer(sys.stdout)
|
|
1799
|
-
print(f"{color_start.bright_cyan('▶️ Starting')} {color_start.bright_white('docling-serve')} {color_start.bright_cyan('container')}...\n")
|
|
1809
|
+
print(f"{color_stdout.bright_cyan('▶️ Starting')} {color_stdout.bright_white('docling-serve')} {color_stdout.bright_cyan('container')}...\n")
|
|
1800
1810
|
|
|
1801
1811
|
# Apply resource profile
|
|
1802
1812
|
profile = RESOURCE_PROFILES[args.profile]
|
|
@@ -1871,13 +1881,33 @@ def main() -> int:
|
|
|
1871
1881
|
spinner.stop()
|
|
1872
1882
|
|
|
1873
1883
|
if result.success:
|
|
1874
|
-
#
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1884
|
+
# Validate content is not empty and not an error response
|
|
1885
|
+
content_length = len(result.content.strip()) if result.content else 0
|
|
1886
|
+
if content_length < 50:
|
|
1887
|
+
# Too short - likely an error or empty document
|
|
1888
|
+
failed_count += 1
|
|
1889
|
+
if not args.quiet:
|
|
1890
|
+
print(
|
|
1891
|
+
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
1892
|
+
)
|
|
1893
|
+
error_msg = "Empty or invalid conversion result"
|
|
1894
|
+
if result.content:
|
|
1895
|
+
error_msg += f" ({len(result.content)} bytes)"
|
|
1896
|
+
print(f" Error: {error_msg}", file=sys.stderr)
|
|
1897
|
+
if args.timeout and args.timeout < 300:
|
|
1898
|
+
color_out = Colorizer(sys.stderr)
|
|
1899
|
+
print(
|
|
1900
|
+
f" {color_out.info('ℹ Tip:')} Timeout is only {args.timeout}s. Consider increasing with --timeout (default: 1200s)",
|
|
1901
|
+
file=sys.stderr,
|
|
1902
|
+
)
|
|
1903
|
+
else:
|
|
1904
|
+
# Write result to output file
|
|
1905
|
+
output_file.write_text(result.content)
|
|
1906
|
+
success_count += 1
|
|
1907
|
+
if not args.quiet:
|
|
1908
|
+
print(
|
|
1909
|
+
f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
|
|
1910
|
+
)
|
|
1881
1911
|
else:
|
|
1882
1912
|
failed_count += 1
|
|
1883
1913
|
error_msg = result.error or "Unknown error"
|
|
@@ -2035,30 +2065,26 @@ def main() -> int:
|
|
|
2035
2065
|
|
|
2036
2066
|
# Print summary
|
|
2037
2067
|
if not args.quiet:
|
|
2038
|
-
from mdify.formatting import Colorizer
|
|
2039
|
-
color_out = Colorizer(sys.stdout)
|
|
2040
2068
|
print()
|
|
2041
|
-
print(
|
|
2042
|
-
print(
|
|
2043
|
-
print(
|
|
2044
|
-
print(f" {
|
|
2069
|
+
print(color_stdout.cyan("=" * 60))
|
|
2070
|
+
print(color_stdout.bold_cyan("📊 Local Conversion Summary"))
|
|
2071
|
+
print(color_stdout.cyan("=" * 60))
|
|
2072
|
+
print(f" {color_stdout.cyan('Total files:')} {color_stdout.bold(str(total_files))}")
|
|
2045
2073
|
if success_count > 0:
|
|
2046
|
-
print(f" {
|
|
2074
|
+
print(f" {color_stdout.green('✓ Successful:')} {color_stdout.bold_green(str(success_count))}")
|
|
2047
2075
|
if skipped_count > 0:
|
|
2048
|
-
print(f" {
|
|
2076
|
+
print(f" {color_stdout.yellow('⊘ Skipped:')} {color_stdout.bold_yellow(str(skipped_count))}")
|
|
2049
2077
|
if failed_count > 0:
|
|
2050
|
-
print(f" {
|
|
2051
|
-
print(f" {
|
|
2052
|
-
print(
|
|
2078
|
+
print(f" {color_stdout.red('✗ Failed:')} {color_stdout.bold_red(str(failed_count))}")
|
|
2079
|
+
print(f" {color_stdout.cyan('Total time:')} {color_stdout.bright_cyan(format_duration(total_elapsed))}")
|
|
2080
|
+
print(color_stdout.cyan("=" * 60))
|
|
2053
2081
|
|
|
2054
2082
|
except KeyboardInterrupt:
|
|
2055
2083
|
if not args.quiet:
|
|
2056
|
-
|
|
2057
|
-
color_out = Colorizer(sys.stdout)
|
|
2058
|
-
print(f"\n\n{color_out.warning('⚠ Interrupted by user. Container stopped.')}")
|
|
2084
|
+
print(f"\n\n{color_stdout.warning('⚠ Interrupted by user. Container stopped.')}")
|
|
2059
2085
|
if success_count > 0 or skipped_count > 0 or failed_count > 0:
|
|
2060
2086
|
print(
|
|
2061
|
-
f"{
|
|
2087
|
+
f"{color_stdout.dim_white('Partial progress:')} {color_stdout.green(str(success_count))} successful, {color_stdout.red(str(failed_count))} failed, {color_stdout.yellow(str(skipped_count))} skipped"
|
|
2062
2088
|
)
|
|
2063
2089
|
return 130
|
|
2064
2090
|
|
|
@@ -48,6 +48,25 @@ def _get_mime_type(file_path: Path) -> str:
|
|
|
48
48
|
return mime_type or "application/octet-stream"
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _is_error_response(result_data) -> bool:
|
|
52
|
+
"""Check if response contains an error instead of content.
|
|
53
|
+
|
|
54
|
+
Detects common API error response patterns.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
result_data: Response data to check
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
True if response appears to be an error
|
|
61
|
+
"""
|
|
62
|
+
if not isinstance(result_data, dict):
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
# Check for common error keys at top level
|
|
66
|
+
error_keys = {"detail", "error", "message", "code", "status"}
|
|
67
|
+
return bool(error_keys & set(result_data.keys()))
|
|
68
|
+
|
|
69
|
+
|
|
51
70
|
def _extract_content(result_data) -> str:
|
|
52
71
|
"""Extract content from API response, supporting both old and new formats.
|
|
53
72
|
|
|
@@ -61,8 +80,12 @@ def _extract_content(result_data) -> str:
|
|
|
61
80
|
result_data: Response data from docling-serve API
|
|
62
81
|
|
|
63
82
|
Returns:
|
|
64
|
-
Extracted content string, or empty string if not found
|
|
83
|
+
Extracted content string, or empty string if not found or if response is an error
|
|
65
84
|
"""
|
|
85
|
+
# Detect error responses and return empty string instead of parsing them
|
|
86
|
+
if _is_error_response(result_data):
|
|
87
|
+
return ""
|
|
88
|
+
|
|
66
89
|
if isinstance(result_data, dict):
|
|
67
90
|
# New format with document field
|
|
68
91
|
if "document" in result_data:
|
|
@@ -66,18 +66,6 @@ class SSHConfig:
|
|
|
66
66
|
|
|
67
67
|
def __post_init__(self):
|
|
68
68
|
"""Validate config after initialization."""
|
|
69
|
-
if self.port is None:
|
|
70
|
-
self.port = 22
|
|
71
|
-
if self.timeout is None:
|
|
72
|
-
self.timeout = 30
|
|
73
|
-
if self.keepalive is None:
|
|
74
|
-
self.keepalive = 60
|
|
75
|
-
if self.compression is None:
|
|
76
|
-
self.compression = False
|
|
77
|
-
if self.work_dir is None:
|
|
78
|
-
self.work_dir = "/tmp/mdify"
|
|
79
|
-
if self.username is None:
|
|
80
|
-
self.username = ""
|
|
81
69
|
if not self.host:
|
|
82
70
|
raise ConfigError("host is required")
|
|
83
71
|
if not 1 <= self.port <= 65535:
|
|
@@ -63,7 +63,7 @@ class RemoteContainer(DoclingContainer):
|
|
|
63
63
|
try:
|
|
64
64
|
# Find containers using this port
|
|
65
65
|
# Using docker inspect with port filter
|
|
66
|
-
cmd = f"{self.runtime} ps -a --filter 'publish={self.port}' --format '{{{{.ID}}}}'"
|
|
66
|
+
cmd = f"{self.runtime} ps -a --filter 'publish={int(self.port)}' --format '{{{{.ID}}}}'"
|
|
67
67
|
stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=10)
|
|
68
68
|
|
|
69
69
|
if code == 0 and stdout.strip():
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import gzip
|
|
4
4
|
import hashlib
|
|
5
5
|
import logging
|
|
6
|
+
import shlex
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Callable
|
|
8
9
|
from mdify.ssh.models import TransferSession
|
|
@@ -233,8 +234,8 @@ class FileTransferManager:
|
|
|
233
234
|
|
|
234
235
|
# Calculate remote checksum (sha256sum or shasum fallback)
|
|
235
236
|
checksum_cmd = (
|
|
236
|
-
f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
|
|
237
|
-
f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
|
|
237
|
+
f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {shlex.quote(remote_path)} | awk '{{print $1}}') "
|
|
238
|
+
f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {shlex.quote(remote_path)} | awk '{{print $1}}')"
|
|
238
239
|
)
|
|
239
240
|
stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
|
|
240
241
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|