mdify-cli 3.0.7__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/PKG-INFO +1 -1
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/__init__.py +1 -1
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/cli.py +76 -15
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/PKG-INFO +1 -1
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/pyproject.toml +1 -1
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/LICENSE +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/README.md +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/assets/mdify.png +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/__main__.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/container.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/docling_client.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/formatting.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/ssh/__init__.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/ssh/client.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/ssh/models.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/ssh/remote_container.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify/ssh/transfer.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/setup.cfg +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/tests/test_cli.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/tests/test_container.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/tests/test_docling_client.py +0 -0
- {mdify_cli-3.0.7 → mdify_cli-3.1.0}/tests/test_ssh_client.py +0 -0
|
@@ -8,12 +8,14 @@ is lightweight and has no ML dependencies.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
+
import asyncio
|
|
11
12
|
import json
|
|
12
13
|
import os
|
|
13
14
|
import platform
|
|
14
15
|
import shutil
|
|
15
16
|
import subprocess
|
|
16
17
|
import sys
|
|
18
|
+
import tempfile
|
|
17
19
|
import threading
|
|
18
20
|
import time
|
|
19
21
|
from pathlib import Path
|
|
@@ -883,7 +885,7 @@ Examples:
|
|
|
883
885
|
"--timeout",
|
|
884
886
|
type=int,
|
|
885
887
|
default=None,
|
|
886
|
-
help="Conversion timeout in seconds (default:
|
|
888
|
+
help="Conversion timeout in seconds (default: 1200s for local, 3600s for remote with large PDFs, can be set via MDIFY_TIMEOUT env var)",
|
|
887
889
|
)
|
|
888
890
|
|
|
889
891
|
parser.add_argument(
|
|
@@ -1057,6 +1059,10 @@ def main_async_remote(args) -> int:
|
|
|
1057
1059
|
# Resolve timeout value: CLI > env > default 1200
|
|
1058
1060
|
timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
|
|
1059
1061
|
|
|
1062
|
+
# For remote operations, extend timeout significantly for large PDF processing
|
|
1063
|
+
# Remote conversions include network latency, file upload/download, and OCR processing
|
|
1064
|
+
remote_conversion_timeout = max(timeout, 3600) # At least 1 hour for remote conversion
|
|
1065
|
+
|
|
1060
1066
|
# Build SSH config from CLI arguments and SSH config files
|
|
1061
1067
|
try:
|
|
1062
1068
|
# Build config with proper precedence (lowest to highest):
|
|
@@ -1178,7 +1184,8 @@ def main_async_remote(args) -> int:
|
|
|
1178
1184
|
return 1
|
|
1179
1185
|
|
|
1180
1186
|
if not args.quiet:
|
|
1181
|
-
print(color.cyan(f"
|
|
1187
|
+
print(color.cyan(f"Found {len(files_to_convert)} file(s) to convert"), file=sys.stderr)
|
|
1188
|
+
print(color.cyan(f"Conversion timeout: {remote_conversion_timeout}s (for large PDFs with OCR)"), file=sys.stderr)
|
|
1182
1189
|
|
|
1183
1190
|
# Import remote container and transfer manager
|
|
1184
1191
|
from mdify.ssh.transfer import FileTransferManager
|
|
@@ -1242,7 +1249,8 @@ def main_async_remote(args) -> int:
|
|
|
1242
1249
|
if isinstance(exc, SSHConnectionError):
|
|
1243
1250
|
return True
|
|
1244
1251
|
msg = str(exc).lower()
|
|
1245
|
-
|
|
1252
|
+
# Errno 32 = Broken pipe, Errno 54 = Connection reset by peer
|
|
1253
|
+
return any(x in msg for x in ["broken pipe", "connection closed", "connection reset", "errno 32", "errno 54", "ssh connection"])
|
|
1246
1254
|
|
|
1247
1255
|
try:
|
|
1248
1256
|
for idx, input_file in enumerate(files_to_convert, 1):
|
|
@@ -1309,8 +1317,13 @@ def main_async_remote(args) -> int:
|
|
|
1309
1317
|
remote_output_path = f"{work_dir}/{input_file.stem}.md"
|
|
1310
1318
|
|
|
1311
1319
|
# Build conversion command on remote - use -F for multipart form data
|
|
1320
|
+
# Important: use generous timeouts since large PDFs with OCR take time
|
|
1321
|
+
# --connect-timeout: max time to establish connection (60s)
|
|
1322
|
+
# --max-time: max total operation time (extended timeout)
|
|
1312
1323
|
convert_cmd = (
|
|
1313
1324
|
f"curl -X POST "
|
|
1325
|
+
f"--connect-timeout 60 "
|
|
1326
|
+
f"--max-time {remote_conversion_timeout} "
|
|
1314
1327
|
f"-F 'files=@{remote_file_path}' "
|
|
1315
1328
|
f"-F 'to_formats=md' "
|
|
1316
1329
|
f"-F 'do_ocr=true' "
|
|
@@ -1326,27 +1339,50 @@ def main_async_remote(args) -> int:
|
|
|
1326
1339
|
while conversion_attempt < 3 and not conversion_success:
|
|
1327
1340
|
try:
|
|
1328
1341
|
if conversion_attempt > 0 and not args.quiet:
|
|
1329
|
-
|
|
1342
|
+
# Exponential backoff: 2s, 4s, 8s
|
|
1343
|
+
backoff_delay = 2 ** conversion_attempt
|
|
1344
|
+
print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
|
|
1345
|
+
await asyncio.sleep(backoff_delay)
|
|
1330
1346
|
|
|
1331
|
-
conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=
|
|
1347
|
+
conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
|
|
1332
1348
|
|
|
1333
1349
|
if conv_code == 0:
|
|
1334
1350
|
conversion_success = True
|
|
1335
1351
|
break
|
|
1336
1352
|
else:
|
|
1337
|
-
|
|
1353
|
+
# Non-zero exit code - fail without retry for non-connection errors
|
|
1354
|
+
break
|
|
1338
1355
|
except Exception as conv_exc:
|
|
1339
|
-
|
|
1356
|
+
is_conn_err = is_connection_error(conv_exc)
|
|
1357
|
+
if is_conn_err and conversion_attempt < 2:
|
|
1340
1358
|
conversion_attempt += 1
|
|
1341
1359
|
if not args.quiet:
|
|
1342
|
-
|
|
1360
|
+
# Exponential backoff: 5s, 10s
|
|
1361
|
+
backoff_delay = 5 * conversion_attempt
|
|
1362
|
+
print(f" ↻ Connection reset during conversion. Reconnecting in {backoff_delay}s...", file=sys.stderr)
|
|
1363
|
+
|
|
1364
|
+
await asyncio.sleep(backoff_delay)
|
|
1365
|
+
|
|
1343
1366
|
try:
|
|
1344
1367
|
await ssh_client.disconnect()
|
|
1345
1368
|
except Exception:
|
|
1346
1369
|
pass
|
|
1347
|
-
|
|
1370
|
+
|
|
1371
|
+
# Reconnect with retry
|
|
1372
|
+
try:
|
|
1373
|
+
await ssh_client.connect()
|
|
1374
|
+
except Exception:
|
|
1375
|
+
if not args.quiet:
|
|
1376
|
+
print(f" ⚠ Reconnection failed: retrying...", file=sys.stderr)
|
|
1377
|
+
continue
|
|
1348
1378
|
else:
|
|
1349
|
-
|
|
1379
|
+
# Either not a connection error, or we've exhausted retries
|
|
1380
|
+
if not args.quiet:
|
|
1381
|
+
print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
|
|
1382
|
+
if conversion_attempt >= 2 and is_conn_err:
|
|
1383
|
+
if not args.quiet:
|
|
1384
|
+
print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
|
|
1385
|
+
break
|
|
1350
1386
|
|
|
1351
1387
|
if not conversion_success:
|
|
1352
1388
|
print(f" ✗ Failed: Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
|
|
@@ -1386,12 +1422,37 @@ def main_async_remote(args) -> int:
|
|
|
1386
1422
|
# Ultimate fallback
|
|
1387
1423
|
markdown_content = conversion_output
|
|
1388
1424
|
|
|
1389
|
-
# Write markdown content to
|
|
1390
|
-
|
|
1391
|
-
|
|
1425
|
+
# Write markdown content to local temp file first, then upload via SFTP
|
|
1426
|
+
# (Piping large content through SSH here-documents can crash the connection)
|
|
1427
|
+
content_size_kb = len(markdown_content) / 1024
|
|
1428
|
+
if not args.quiet:
|
|
1429
|
+
print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
|
|
1392
1430
|
|
|
1393
|
-
|
|
1394
|
-
|
|
1431
|
+
try:
|
|
1432
|
+
# Write to temporary local file
|
|
1433
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
|
|
1434
|
+
temp_file.write(markdown_content)
|
|
1435
|
+
temp_path = temp_file.name
|
|
1436
|
+
|
|
1437
|
+
# Upload via SFTP (more reliable for large files)
|
|
1438
|
+
await transfer_manager.upload_file(
|
|
1439
|
+
local_path=temp_path,
|
|
1440
|
+
remote_path=remote_output_path,
|
|
1441
|
+
overwrite=True,
|
|
1442
|
+
compress=False,
|
|
1443
|
+
)
|
|
1444
|
+
|
|
1445
|
+
# Cleanup temp file
|
|
1446
|
+
try:
|
|
1447
|
+
os.unlink(temp_path)
|
|
1448
|
+
except Exception:
|
|
1449
|
+
pass
|
|
1450
|
+
|
|
1451
|
+
if not args.quiet:
|
|
1452
|
+
print(f" {color.green('✓')} Markdown written", file=sys.stderr)
|
|
1453
|
+
except Exception as write_exc:
|
|
1454
|
+
if not args.quiet:
|
|
1455
|
+
print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
|
|
1395
1456
|
failed += 1
|
|
1396
1457
|
break
|
|
1397
1458
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|