mdify-cli 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/cli.py +118 -85
- mdify/docling_client.py +24 -1
- mdify/ssh/models.py +0 -12
- mdify/ssh/remote_container.py +1 -1
- mdify/ssh/transfer.py +3 -2
- {mdify_cli-3.2.0.dist-info → mdify_cli-3.3.0.dist-info}/METADATA +1 -1
- mdify_cli-3.3.0.dist-info/RECORD +18 -0
- mdify_cli-3.2.0.dist-info/RECORD +0 -18
- {mdify_cli-3.2.0.dist-info → mdify_cli-3.3.0.dist-info}/WHEEL +0 -0
- {mdify_cli-3.2.0.dist-info → mdify_cli-3.3.0.dist-info}/entry_points.txt +0 -0
- {mdify_cli-3.2.0.dist-info → mdify_cli-3.3.0.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-3.2.0.dist-info → mdify_cli-3.3.0.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/cli.py
CHANGED
|
@@ -8,10 +8,10 @@ is lightweight and has no ML dependencies.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
-
import asyncio
|
|
12
11
|
import json
|
|
13
12
|
import os
|
|
14
13
|
import platform
|
|
14
|
+
import shlex
|
|
15
15
|
import shutil
|
|
16
16
|
import subprocess
|
|
17
17
|
import sys
|
|
@@ -273,14 +273,14 @@ def check_for_update(force: bool = False) -> None:
|
|
|
273
273
|
|
|
274
274
|
_update_last_check_time()
|
|
275
275
|
|
|
276
|
+
from mdify.formatting import Colorizer
|
|
277
|
+
|
|
276
278
|
if not _compare_versions(__version__, remote_version):
|
|
277
279
|
if force:
|
|
278
|
-
from mdify.formatting import Colorizer
|
|
279
280
|
color = Colorizer(sys.stdout)
|
|
280
281
|
print(color.success(f"✓ mdify is up to date (v{__version__})"))
|
|
281
282
|
return
|
|
282
283
|
|
|
283
|
-
from mdify.formatting import Colorizer
|
|
284
284
|
color = Colorizer(sys.stdout)
|
|
285
285
|
print(f"\n{color.bright_yellow('=' * 60)}")
|
|
286
286
|
print(color.bold_yellow("🎉 A new version of mdify-cli is available!"))
|
|
@@ -1175,7 +1175,7 @@ def main_async_remote(args) -> int:
|
|
|
1175
1175
|
input_path = Path(args.input)
|
|
1176
1176
|
if not input_path.exists():
|
|
1177
1177
|
await ssh_client.disconnect()
|
|
1178
|
-
print(f"Error: Input file or directory not found: {args.input}", file=sys.stderr)
|
|
1178
|
+
print(f"{color.error('✗ Error:')} Input file or directory not found: {args.input}", file=sys.stderr)
|
|
1179
1179
|
return 1
|
|
1180
1180
|
|
|
1181
1181
|
# Store resolved path as base for relative path calculations
|
|
@@ -1184,8 +1184,8 @@ def main_async_remote(args) -> int:
|
|
|
1184
1184
|
|
|
1185
1185
|
if not files_to_convert:
|
|
1186
1186
|
await ssh_client.disconnect()
|
|
1187
|
-
print(f"Error: No supported files found in {args.input}", file=sys.stderr)
|
|
1188
|
-
print(f" Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", file=sys.stderr)
|
|
1187
|
+
print(f"{color.error('✗ Error:')} No supported files found in {args.input}", file=sys.stderr)
|
|
1188
|
+
print(f" {color.dim_white(f'Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}')} ", file=sys.stderr)
|
|
1189
1189
|
return 1
|
|
1190
1190
|
|
|
1191
1191
|
if not args.quiet:
|
|
@@ -1266,7 +1266,7 @@ def main_async_remote(args) -> int:
|
|
|
1266
1266
|
)
|
|
1267
1267
|
|
|
1268
1268
|
attempt = 0
|
|
1269
|
-
while
|
|
1269
|
+
while attempt <= 1: # Max 2 attempts (0 and 1)
|
|
1270
1270
|
try:
|
|
1271
1271
|
# Upload file
|
|
1272
1272
|
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
@@ -1320,7 +1320,7 @@ def main_async_remote(args) -> int:
|
|
|
1320
1320
|
f"curl -X POST "
|
|
1321
1321
|
f"--connect-timeout 60 "
|
|
1322
1322
|
f"--max-time {remote_conversion_timeout} "
|
|
1323
|
-
f"-F 'files=@{remote_file_path}' "
|
|
1323
|
+
f"-F 'files=@{shlex.quote(remote_file_path)}' "
|
|
1324
1324
|
f"-F 'to_formats=md' "
|
|
1325
1325
|
f"-F 'do_ocr=true' "
|
|
1326
1326
|
)
|
|
@@ -1333,11 +1333,12 @@ def main_async_remote(args) -> int:
|
|
|
1333
1333
|
conversion_success = False
|
|
1334
1334
|
conversion_output = None
|
|
1335
1335
|
while conversion_attempt < 3 and not conversion_success:
|
|
1336
|
+
conversion_attempt += 1
|
|
1336
1337
|
try:
|
|
1337
|
-
if conversion_attempt >
|
|
1338
|
-
# Exponential backoff: 2s, 4s
|
|
1339
|
-
backoff_delay = 2 ** conversion_attempt
|
|
1340
|
-
print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
|
|
1338
|
+
if conversion_attempt > 1 and not args.quiet:
|
|
1339
|
+
# Exponential backoff: 2s, 4s
|
|
1340
|
+
backoff_delay = 2 ** (conversion_attempt - 1)
|
|
1341
|
+
print(f" ↻ Conversion retry {conversion_attempt - 1} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
|
|
1341
1342
|
await asyncio.sleep(backoff_delay)
|
|
1342
1343
|
|
|
1343
1344
|
conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
|
|
@@ -1350,8 +1351,7 @@ def main_async_remote(args) -> int:
|
|
|
1350
1351
|
break
|
|
1351
1352
|
except Exception as conv_exc:
|
|
1352
1353
|
is_conn_err = is_connection_error(conv_exc)
|
|
1353
|
-
if is_conn_err and conversion_attempt <
|
|
1354
|
-
conversion_attempt += 1
|
|
1354
|
+
if is_conn_err and conversion_attempt < 3:
|
|
1355
1355
|
if not args.quiet:
|
|
1356
1356
|
# Exponential backoff: 5s, 10s
|
|
1357
1357
|
backoff_delay = 5 * conversion_attempt
|
|
@@ -1362,6 +1362,7 @@ def main_async_remote(args) -> int:
|
|
|
1362
1362
|
try:
|
|
1363
1363
|
await ssh_client.disconnect()
|
|
1364
1364
|
except Exception:
|
|
1365
|
+
# Best-effort disconnect; ignore errors (e.g., already closed) before reconnecting
|
|
1365
1366
|
pass
|
|
1366
1367
|
|
|
1367
1368
|
# Reconnect with retry
|
|
@@ -1373,50 +1374,65 @@ def main_async_remote(args) -> int:
|
|
|
1373
1374
|
continue
|
|
1374
1375
|
else:
|
|
1375
1376
|
# Either not a connection error, or we've exhausted retries
|
|
1376
|
-
if
|
|
1377
|
-
print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
|
|
1378
|
-
if conversion_attempt >= 2 and is_conn_err:
|
|
1377
|
+
if conversion_attempt >= 3 and is_conn_err:
|
|
1379
1378
|
if not args.quiet:
|
|
1380
1379
|
print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
|
|
1381
1380
|
break
|
|
1382
1381
|
|
|
1383
1382
|
if not conversion_success:
|
|
1384
|
-
print(f" ✗ Failed: Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
|
|
1383
|
+
print(f" {color.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
|
|
1385
1384
|
failed += 1
|
|
1386
1385
|
break
|
|
1387
1386
|
|
|
1388
1387
|
# Parse JSON response to extract markdown content
|
|
1389
1388
|
try:
|
|
1390
1389
|
response_data = json.loads(conversion_output)
|
|
1390
|
+
color_err = Colorizer(sys.stderr)
|
|
1391
|
+
|
|
1392
|
+
# Check if response is an error (has error keys)
|
|
1393
|
+
error_keys = {"detail", "error", "message", "code", "status"}
|
|
1394
|
+
response_keys = set(response_data.keys()) if isinstance(response_data, dict) else set()
|
|
1395
|
+
if error_keys & response_keys:
|
|
1396
|
+
# Error response - extract and display error
|
|
1397
|
+
error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
|
|
1398
|
+
print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
|
|
1399
|
+
if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
|
|
1400
|
+
timeout_val = args.remote_timeout or 3600
|
|
1401
|
+
print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
|
|
1402
|
+
failed += 1
|
|
1403
|
+
break
|
|
1391
1404
|
|
|
1392
1405
|
# Extract content from response structure
|
|
1393
1406
|
# Actual format: {"document": {"md_content": "..."}, "status": "success"}
|
|
1407
|
+
markdown_content = None
|
|
1394
1408
|
if "document" in response_data:
|
|
1395
1409
|
document = response_data["document"]
|
|
1396
1410
|
if "md_content" in document and document["md_content"]:
|
|
1397
1411
|
markdown_content = document["md_content"]
|
|
1398
1412
|
elif "text_content" in document and document["text_content"]:
|
|
1399
1413
|
markdown_content = document["text_content"]
|
|
1400
|
-
|
|
1401
|
-
# Fallback - use whole document
|
|
1402
|
-
markdown_content = json.dumps(document, indent=2)
|
|
1403
|
-
else:
|
|
1414
|
+
elif "results" in response_data and response_data["results"]:
|
|
1404
1415
|
# Legacy format fallback
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
markdown_content = content
|
|
1413
|
-
else:
|
|
1414
|
-
markdown_content = str(content)
|
|
1416
|
+
result = response_data["results"][0]
|
|
1417
|
+
if "content" in result:
|
|
1418
|
+
content = result["content"]
|
|
1419
|
+
if isinstance(content, dict) and "markdown" in content:
|
|
1420
|
+
markdown_content = content["markdown"]
|
|
1421
|
+
elif isinstance(content, str):
|
|
1422
|
+
markdown_content = content
|
|
1415
1423
|
else:
|
|
1416
|
-
markdown_content = str(
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1424
|
+
markdown_content = str(content)
|
|
1425
|
+
|
|
1426
|
+
# Validate content exists and is not empty/too short
|
|
1427
|
+
if not markdown_content or len(markdown_content.strip()) < 50:
|
|
1428
|
+
print(f" {color_err.error('✗ Failed:')} Empty or invalid conversion result", file=sys.stderr)
|
|
1429
|
+
if args.remote_timeout and args.remote_timeout < 300:
|
|
1430
|
+
print(
|
|
1431
|
+
f" {color_err.info('ℹ Tip:')} Timeout is only {args.remote_timeout}s. Consider increasing with --remote-timeout (default: 3600s)",
|
|
1432
|
+
file=sys.stderr,
|
|
1433
|
+
)
|
|
1434
|
+
failed += 1
|
|
1435
|
+
break
|
|
1420
1436
|
|
|
1421
1437
|
# Write markdown content to local temp file first, then upload via SFTP
|
|
1422
1438
|
# (Piping large content through SSH here-documents can crash the connection)
|
|
@@ -1424,6 +1440,7 @@ def main_async_remote(args) -> int:
|
|
|
1424
1440
|
if not args.quiet:
|
|
1425
1441
|
print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
|
|
1426
1442
|
|
|
1443
|
+
temp_path = None
|
|
1427
1444
|
try:
|
|
1428
1445
|
# Write to temporary local file
|
|
1429
1446
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
|
|
@@ -1438,12 +1455,6 @@ def main_async_remote(args) -> int:
|
|
|
1438
1455
|
compress=False,
|
|
1439
1456
|
)
|
|
1440
1457
|
|
|
1441
|
-
# Cleanup temp file
|
|
1442
|
-
try:
|
|
1443
|
-
os.unlink(temp_path)
|
|
1444
|
-
except Exception:
|
|
1445
|
-
pass
|
|
1446
|
-
|
|
1447
1458
|
if not args.quiet:
|
|
1448
1459
|
print(f" {color.green('✓')} Markdown written", file=sys.stderr)
|
|
1449
1460
|
except Exception as write_exc:
|
|
@@ -1451,6 +1462,14 @@ def main_async_remote(args) -> int:
|
|
|
1451
1462
|
print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
|
|
1452
1463
|
failed += 1
|
|
1453
1464
|
break
|
|
1465
|
+
finally:
|
|
1466
|
+
# Cleanup temp file
|
|
1467
|
+
if temp_path:
|
|
1468
|
+
try:
|
|
1469
|
+
os.unlink(temp_path)
|
|
1470
|
+
except Exception as cleanup_exc:
|
|
1471
|
+
if DEBUG:
|
|
1472
|
+
print(f" ! Failed to remove temporary file {temp_path}: {cleanup_exc}", file=sys.stderr)
|
|
1454
1473
|
|
|
1455
1474
|
except (json.JSONDecodeError, KeyError, IndexError):
|
|
1456
1475
|
print(f" ✗ Failed to parse conversion response", file=sys.stderr)
|
|
@@ -1478,7 +1497,7 @@ def main_async_remote(args) -> int:
|
|
|
1478
1497
|
successful += 1
|
|
1479
1498
|
|
|
1480
1499
|
# Cleanup remote files
|
|
1481
|
-
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1500
|
+
await ssh_client.run_command(f"rm -f {shlex.quote(remote_file_path)} {shlex.quote(remote_output_path)}")
|
|
1482
1501
|
|
|
1483
1502
|
break
|
|
1484
1503
|
except Exception as e:
|
|
@@ -1489,11 +1508,12 @@ def main_async_remote(args) -> int:
|
|
|
1489
1508
|
try:
|
|
1490
1509
|
await ssh_client.disconnect()
|
|
1491
1510
|
except Exception:
|
|
1511
|
+
# Best-effort disconnect; ignore errors since we'll immediately reconnect.
|
|
1492
1512
|
pass
|
|
1493
1513
|
await ssh_client.connect()
|
|
1494
1514
|
continue
|
|
1495
1515
|
|
|
1496
|
-
print(f" ✗ Failed: {e}", file=sys.stderr)
|
|
1516
|
+
print(f" {color.error('✗ Failed:')} {str(e)}", file=sys.stderr)
|
|
1497
1517
|
if DEBUG:
|
|
1498
1518
|
import traceback
|
|
1499
1519
|
traceback.print_exc(file=sys.stderr)
|
|
@@ -1515,7 +1535,7 @@ def main_async_remote(args) -> int:
|
|
|
1515
1535
|
|
|
1516
1536
|
# Cleanup remote work directory
|
|
1517
1537
|
try:
|
|
1518
|
-
await ssh_client.run_command(f"rm -rf {work_dir}")
|
|
1538
|
+
await ssh_client.run_command(f"rm -rf {shlex.quote(work_dir)}")
|
|
1519
1539
|
if not args.quiet:
|
|
1520
1540
|
print(color.green(f"✓ Cleaned up remote directory"), file=sys.stderr)
|
|
1521
1541
|
except Exception as e:
|
|
@@ -1590,8 +1610,11 @@ def main_async_remote(args) -> int:
|
|
|
1590
1610
|
def main() -> int:
|
|
1591
1611
|
"""Main entry point for the CLI."""
|
|
1592
1612
|
from mdify.formatting import Colorizer
|
|
1593
|
-
|
|
1594
|
-
|
|
1613
|
+
|
|
1614
|
+
color_stderr = Colorizer(sys.stderr)
|
|
1615
|
+
color_stdout = Colorizer(sys.stdout)
|
|
1616
|
+
|
|
1617
|
+
print(color_stderr.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
|
|
1595
1618
|
args = parse_args()
|
|
1596
1619
|
|
|
1597
1620
|
# Handle --check-update flag
|
|
@@ -1739,38 +1762,34 @@ def main() -> int:
|
|
|
1739
1762
|
|
|
1740
1763
|
# Validate input
|
|
1741
1764
|
if not input_path.exists():
|
|
1742
|
-
print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
|
|
1765
|
+
print(f"{color_stderr.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
|
|
1743
1766
|
return 1
|
|
1744
1767
|
|
|
1745
1768
|
# Get files to convert
|
|
1746
1769
|
try:
|
|
1747
1770
|
files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
|
|
1748
1771
|
except Exception as e:
|
|
1749
|
-
print(f"Error: {e}", file=sys.stderr)
|
|
1772
|
+
print(f"{color_stderr.error('✗ Error:')} {e}", file=sys.stderr)
|
|
1750
1773
|
return 1
|
|
1751
1774
|
|
|
1752
1775
|
if not files_to_convert:
|
|
1753
|
-
print(f"No files found to convert in: {input_path}", file=sys.stderr)
|
|
1776
|
+
print(f"{color_stderr.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
|
|
1754
1777
|
return 1
|
|
1755
1778
|
|
|
1756
1779
|
total_files = len(files_to_convert)
|
|
1757
1780
|
total_size = sum(f.stat().st_size for f in files_to_convert)
|
|
1758
1781
|
|
|
1759
1782
|
if not args.quiet:
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
print(f"{
|
|
1763
|
-
print(f"{
|
|
1764
|
-
print(f"{
|
|
1765
|
-
print(f"{color_info.cyan('🐳 Runtime:')} {color_info.bright_white(runtime)}")
|
|
1766
|
-
print(f"{color_info.cyan('🖼️ Image:')} {color_info.dim_white(image)}")
|
|
1783
|
+
print(f"{color_stdout.bright_cyan('📦 Found')} {color_stdout.bold(str(total_files))} {color_stdout.bright_cyan('file(s)')} {color_stdout.dim_white(f'({format_size(total_size)})')}")
|
|
1784
|
+
print(f"{color_stdout.cyan('📁 Source:')} {color_stdout.bright_white(str(input_path.resolve()))}")
|
|
1785
|
+
print(f"{color_stdout.cyan('💾 Output:')} {color_stdout.bright_white(str(output_dir.resolve()))}")
|
|
1786
|
+
print(f"{color_stdout.cyan('🐳 Runtime:')} {color_stdout.bright_white(runtime)}")
|
|
1787
|
+
print(f"{color_stdout.cyan('🖼️ Image:')} {color_stdout.dim_white(image)}")
|
|
1767
1788
|
print()
|
|
1768
1789
|
|
|
1769
1790
|
if args.mask:
|
|
1770
|
-
from mdify.formatting import Colorizer
|
|
1771
|
-
color_warn = Colorizer(sys.stderr)
|
|
1772
1791
|
print(
|
|
1773
|
-
|
|
1792
|
+
color_stderr.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
|
|
1774
1793
|
file=sys.stderr,
|
|
1775
1794
|
)
|
|
1776
1795
|
|
|
@@ -1787,9 +1806,7 @@ def main() -> int:
|
|
|
1787
1806
|
|
|
1788
1807
|
try:
|
|
1789
1808
|
if not args.quiet:
|
|
1790
|
-
|
|
1791
|
-
color_start = Colorizer(sys.stdout)
|
|
1792
|
-
print(f"{color_start.bright_cyan('▶️ Starting')} {color_start.bright_white('docling-serve')} {color_start.bright_cyan('container')}...\n")
|
|
1809
|
+
print(f"{color_stdout.bright_cyan('▶️ Starting')} {color_stdout.bright_white('docling-serve')} {color_stdout.bright_cyan('container')}...\n")
|
|
1793
1810
|
|
|
1794
1811
|
# Apply resource profile
|
|
1795
1812
|
profile = RESOURCE_PROFILES[args.profile]
|
|
@@ -1864,13 +1881,33 @@ def main() -> int:
|
|
|
1864
1881
|
spinner.stop()
|
|
1865
1882
|
|
|
1866
1883
|
if result.success:
|
|
1867
|
-
#
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1884
|
+
# Validate content is not empty and not an error response
|
|
1885
|
+
content_length = len(result.content.strip()) if result.content else 0
|
|
1886
|
+
if content_length < 50:
|
|
1887
|
+
# Too short - likely an error or empty document
|
|
1888
|
+
failed_count += 1
|
|
1889
|
+
if not args.quiet:
|
|
1890
|
+
print(
|
|
1891
|
+
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
1892
|
+
)
|
|
1893
|
+
error_msg = "Empty or invalid conversion result"
|
|
1894
|
+
if result.content:
|
|
1895
|
+
error_msg += f" ({len(result.content)} bytes)"
|
|
1896
|
+
print(f" Error: {error_msg}", file=sys.stderr)
|
|
1897
|
+
if args.timeout and args.timeout < 300:
|
|
1898
|
+
color_out = Colorizer(sys.stderr)
|
|
1899
|
+
print(
|
|
1900
|
+
f" {color_out.info('ℹ Tip:')} Timeout is only {args.timeout}s. Consider increasing with --timeout (default: 1200s)",
|
|
1901
|
+
file=sys.stderr,
|
|
1902
|
+
)
|
|
1903
|
+
else:
|
|
1904
|
+
# Write result to output file
|
|
1905
|
+
output_file.write_text(result.content)
|
|
1906
|
+
success_count += 1
|
|
1907
|
+
if not args.quiet:
|
|
1908
|
+
print(
|
|
1909
|
+
f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
|
|
1910
|
+
)
|
|
1874
1911
|
else:
|
|
1875
1912
|
failed_count += 1
|
|
1876
1913
|
error_msg = result.error or "Unknown error"
|
|
@@ -2028,30 +2065,26 @@ def main() -> int:
|
|
|
2028
2065
|
|
|
2029
2066
|
# Print summary
|
|
2030
2067
|
if not args.quiet:
|
|
2031
|
-
from mdify.formatting import Colorizer
|
|
2032
|
-
color_out = Colorizer(sys.stdout)
|
|
2033
2068
|
print()
|
|
2034
|
-
print(
|
|
2035
|
-
print(
|
|
2036
|
-
print(
|
|
2037
|
-
print(f" {
|
|
2069
|
+
print(color_stdout.cyan("=" * 60))
|
|
2070
|
+
print(color_stdout.bold_cyan("📊 Local Conversion Summary"))
|
|
2071
|
+
print(color_stdout.cyan("=" * 60))
|
|
2072
|
+
print(f" {color_stdout.cyan('Total files:')} {color_stdout.bold(str(total_files))}")
|
|
2038
2073
|
if success_count > 0:
|
|
2039
|
-
print(f" {
|
|
2074
|
+
print(f" {color_stdout.green('✓ Successful:')} {color_stdout.bold_green(str(success_count))}")
|
|
2040
2075
|
if skipped_count > 0:
|
|
2041
|
-
print(f" {
|
|
2076
|
+
print(f" {color_stdout.yellow('⊘ Skipped:')} {color_stdout.bold_yellow(str(skipped_count))}")
|
|
2042
2077
|
if failed_count > 0:
|
|
2043
|
-
print(f" {
|
|
2044
|
-
print(f" {
|
|
2045
|
-
print(
|
|
2078
|
+
print(f" {color_stdout.red('✗ Failed:')} {color_stdout.bold_red(str(failed_count))}")
|
|
2079
|
+
print(f" {color_stdout.cyan('Total time:')} {color_stdout.bright_cyan(format_duration(total_elapsed))}")
|
|
2080
|
+
print(color_stdout.cyan("=" * 60))
|
|
2046
2081
|
|
|
2047
2082
|
except KeyboardInterrupt:
|
|
2048
2083
|
if not args.quiet:
|
|
2049
|
-
|
|
2050
|
-
color_out = Colorizer(sys.stdout)
|
|
2051
|
-
print(f"\n\n{color_out.warning('⚠ Interrupted by user. Container stopped.')}")
|
|
2084
|
+
print(f"\n\n{color_stdout.warning('⚠ Interrupted by user. Container stopped.')}")
|
|
2052
2085
|
if success_count > 0 or skipped_count > 0 or failed_count > 0:
|
|
2053
2086
|
print(
|
|
2054
|
-
f"{
|
|
2087
|
+
f"{color_stdout.dim_white('Partial progress:')} {color_stdout.green(str(success_count))} successful, {color_stdout.red(str(failed_count))} failed, {color_stdout.yellow(str(skipped_count))} skipped"
|
|
2055
2088
|
)
|
|
2056
2089
|
return 130
|
|
2057
2090
|
|
mdify/docling_client.py
CHANGED
|
@@ -48,6 +48,25 @@ def _get_mime_type(file_path: Path) -> str:
|
|
|
48
48
|
return mime_type or "application/octet-stream"
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _is_error_response(result_data) -> bool:
|
|
52
|
+
"""Check if response contains an error instead of content.
|
|
53
|
+
|
|
54
|
+
Detects common API error response patterns.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
result_data: Response data to check
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
True if response appears to be an error
|
|
61
|
+
"""
|
|
62
|
+
if not isinstance(result_data, dict):
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
# Check for common error keys at top level
|
|
66
|
+
error_keys = {"detail", "error", "message", "code", "status"}
|
|
67
|
+
return bool(error_keys & set(result_data.keys()))
|
|
68
|
+
|
|
69
|
+
|
|
51
70
|
def _extract_content(result_data) -> str:
|
|
52
71
|
"""Extract content from API response, supporting both old and new formats.
|
|
53
72
|
|
|
@@ -61,8 +80,12 @@ def _extract_content(result_data) -> str:
|
|
|
61
80
|
result_data: Response data from docling-serve API
|
|
62
81
|
|
|
63
82
|
Returns:
|
|
64
|
-
Extracted content string, or empty string if not found
|
|
83
|
+
Extracted content string, or empty string if not found or if response is an error
|
|
65
84
|
"""
|
|
85
|
+
# Detect error responses and return empty string instead of parsing them
|
|
86
|
+
if _is_error_response(result_data):
|
|
87
|
+
return ""
|
|
88
|
+
|
|
66
89
|
if isinstance(result_data, dict):
|
|
67
90
|
# New format with document field
|
|
68
91
|
if "document" in result_data:
|
mdify/ssh/models.py
CHANGED
|
@@ -66,18 +66,6 @@ class SSHConfig:
|
|
|
66
66
|
|
|
67
67
|
def __post_init__(self):
|
|
68
68
|
"""Validate config after initialization."""
|
|
69
|
-
if self.port is None:
|
|
70
|
-
self.port = 22
|
|
71
|
-
if self.timeout is None:
|
|
72
|
-
self.timeout = 30
|
|
73
|
-
if self.keepalive is None:
|
|
74
|
-
self.keepalive = 60
|
|
75
|
-
if self.compression is None:
|
|
76
|
-
self.compression = False
|
|
77
|
-
if self.work_dir is None:
|
|
78
|
-
self.work_dir = "/tmp/mdify"
|
|
79
|
-
if self.username is None:
|
|
80
|
-
self.username = ""
|
|
81
69
|
if not self.host:
|
|
82
70
|
raise ConfigError("host is required")
|
|
83
71
|
if not 1 <= self.port <= 65535:
|
mdify/ssh/remote_container.py
CHANGED
|
@@ -63,7 +63,7 @@ class RemoteContainer(DoclingContainer):
|
|
|
63
63
|
try:
|
|
64
64
|
# Find containers using this port
|
|
65
65
|
# Using docker inspect with port filter
|
|
66
|
-
cmd = f"{self.runtime} ps -a --filter 'publish={self.port}' --format '{{{{.ID}}}}'"
|
|
66
|
+
cmd = f"{self.runtime} ps -a --filter 'publish={int(self.port)}' --format '{{{{.ID}}}}'"
|
|
67
67
|
stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=10)
|
|
68
68
|
|
|
69
69
|
if code == 0 and stdout.strip():
|
mdify/ssh/transfer.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import gzip
|
|
4
4
|
import hashlib
|
|
5
5
|
import logging
|
|
6
|
+
import shlex
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Callable
|
|
8
9
|
from mdify.ssh.models import TransferSession
|
|
@@ -233,8 +234,8 @@ class FileTransferManager:
|
|
|
233
234
|
|
|
234
235
|
# Calculate remote checksum (sha256sum or shasum fallback)
|
|
235
236
|
checksum_cmd = (
|
|
236
|
-
f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
|
|
237
|
-
f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
|
|
237
|
+
f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {shlex.quote(remote_path)} | awk '{{print $1}}') "
|
|
238
|
+
f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {shlex.quote(remote_path)} | awk '{{print $1}}')"
|
|
238
239
|
)
|
|
239
240
|
stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
|
|
240
241
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
+
mdify/__init__.py,sha256=0Ak0L1GdMt0kbtaTK41XknPOw59qnYoil9wfKn4ARB0,90
|
|
3
|
+
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
+
mdify/cli.py,sha256=Q5Po16nAs4ysDs7ESRB1trMBongyZFBwpiV7lYBpFXA,87538
|
|
5
|
+
mdify/container.py,sha256=BjL5ZR__n1i_WHifXKllTPoqO7IuOUdPDo5esuNg0Iw,8213
|
|
6
|
+
mdify/docling_client.py,sha256=zrA-KGW3sSup-qxLHPixZWyDVi3tXJck1-MV6NoyQXA,8677
|
|
7
|
+
mdify/formatting.py,sha256=It7yCVQbD5e2G1FqE4ebx783BkBiRWeFdjKrWPs4nEA,3964
|
|
8
|
+
mdify/ssh/__init__.py,sha256=SmRWgwEvAQZ_ARHlKTb9QDPwVAcz6dvPUks2pZFWLAU,271
|
|
9
|
+
mdify/ssh/client.py,sha256=nO7gAQ6eWxIXFIOplW2F2ya0-1ZEFlLmz3ovi1TEFTg,14997
|
|
10
|
+
mdify/ssh/models.py,sha256=IGAf5EfpZuBS2lIGzxmIsl8f44bXg4a8wk4BW9JWKEQ,17275
|
|
11
|
+
mdify/ssh/remote_container.py,sha256=o4npccm627EPNqlq3vCydRvDvwcsJmxkZ8o-vJXy2aU,10235
|
|
12
|
+
mdify/ssh/transfer.py,sha256=Zcr-V8Bjmm37tvd9RxlS-Rwk0SPD1-OGlEPlHQWmtSE,10979
|
|
13
|
+
mdify_cli-3.3.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
14
|
+
mdify_cli-3.3.0.dist-info/METADATA,sha256=OJay-DbNDAmlj5Ko0KfNcTCd5He51h4no7xVxYKKUZk,14766
|
|
15
|
+
mdify_cli-3.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
mdify_cli-3.3.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
17
|
+
mdify_cli-3.3.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
18
|
+
mdify_cli-3.3.0.dist-info/RECORD,,
|
mdify_cli-3.2.0.dist-info/RECORD
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=MBFv6-i5flnB4e1xp3Z3ueBtvptnl6487ZBlB9IJvLc,90
|
|
3
|
-
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=efFNwfJMplkGfnmI_uOfd9gd3nOQ5BFLvfKz7A-T3dw,84399
|
|
5
|
-
mdify/container.py,sha256=BjL5ZR__n1i_WHifXKllTPoqO7IuOUdPDo5esuNg0Iw,8213
|
|
6
|
-
mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
|
|
7
|
-
mdify/formatting.py,sha256=It7yCVQbD5e2G1FqE4ebx783BkBiRWeFdjKrWPs4nEA,3964
|
|
8
|
-
mdify/ssh/__init__.py,sha256=SmRWgwEvAQZ_ARHlKTb9QDPwVAcz6dvPUks2pZFWLAU,271
|
|
9
|
-
mdify/ssh/client.py,sha256=nO7gAQ6eWxIXFIOplW2F2ya0-1ZEFlLmz3ovi1TEFTg,14997
|
|
10
|
-
mdify/ssh/models.py,sha256=jpbDS1yGhd7Xwq2tW7bZv14mTBlR8DCfhT4x-Xf2Wq4,17676
|
|
11
|
-
mdify/ssh/remote_container.py,sha256=KB8rrsp3h5s_2BKPBXhzkOdQFafyPGQhSnFbK20b4yQ,10230
|
|
12
|
-
mdify/ssh/transfer.py,sha256=aQuWa_B81McrgZBBWo_CxnjwoiGHmoxAoqObm19JAk8,10940
|
|
13
|
-
mdify_cli-3.2.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
14
|
-
mdify_cli-3.2.0.dist-info/METADATA,sha256=D_EfxHadSsJZ_j11o_5Y04_m5s6bgeH1zq-X9r0bqw4,14766
|
|
15
|
-
mdify_cli-3.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
-
mdify_cli-3.2.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
17
|
-
mdify_cli-3.2.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
18
|
-
mdify_cli-3.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|