mdify-cli 3.0.6__tar.gz → 3.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/PKG-INFO +1 -1
  2. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/cli.py +27 -28
  4. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/PKG-INFO +1 -1
  5. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/pyproject.toml +1 -1
  6. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/LICENSE +0 -0
  7. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/README.md +0 -0
  8. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/assets/mdify.png +0 -0
  9. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/__main__.py +0 -0
  10. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/container.py +0 -0
  11. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/docling_client.py +0 -0
  12. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/formatting.py +0 -0
  13. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/ssh/__init__.py +0 -0
  14. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/ssh/client.py +0 -0
  15. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/ssh/models.py +0 -0
  16. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/ssh/remote_container.py +0 -0
  17. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify/ssh/transfer.py +0 -0
  18. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/SOURCES.txt +0 -0
  19. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/dependency_links.txt +0 -0
  20. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/entry_points.txt +0 -0
  21. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/requires.txt +0 -0
  22. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/mdify_cli.egg-info/top_level.txt +0 -0
  23. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/setup.cfg +0 -0
  24. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/tests/test_cli.py +0 -0
  25. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/tests/test_container.py +0 -0
  26. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/tests/test_docling_client.py +0 -0
  27. {mdify_cli-3.0.6 → mdify_cli-3.0.8}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.6
3
+ Version: 3.0.8
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.0.6"
3
+ __version__ = "3.0.8"
@@ -883,7 +883,7 @@ Examples:
883
883
  "--timeout",
884
884
  type=int,
885
885
  default=None,
886
- help="Conversion timeout in seconds (default: 1200, can be set via MDIFY_TIMEOUT env var)",
886
+ help="Conversion timeout in seconds (default: 1200s for local, 3600s for remote with large PDFs, can be set via MDIFY_TIMEOUT env var)",
887
887
  )
888
888
 
889
889
  parser.add_argument(
@@ -1057,6 +1057,10 @@ def main_async_remote(args) -> int:
1057
1057
  # Resolve timeout value: CLI > env > default 1200
1058
1058
  timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
1059
1059
 
1060
+ # For remote operations, extend timeout significantly for large PDF processing
1061
+ # Remote conversions include network latency, file upload/download, and OCR processing
1062
+ remote_conversion_timeout = max(timeout, 3600) # At least 1 hour for remote conversion
1063
+
1060
1064
  # Build SSH config from CLI arguments and SSH config files
1061
1065
  try:
1062
1066
  # Build config with proper precedence (lowest to highest):
@@ -1178,7 +1182,8 @@ def main_async_remote(args) -> int:
1178
1182
  return 1
1179
1183
 
1180
1184
  if not args.quiet:
1181
- print(color.cyan(f"\nFound {len(files_to_convert)} file(s) to convert"), file=sys.stderr)
1185
+ print(color.cyan(f"Found {len(files_to_convert)} file(s) to convert"), file=sys.stderr)
1186
+ print(color.cyan(f"Conversion timeout: {remote_conversion_timeout}s (for large PDFs with OCR)"), file=sys.stderr)
1182
1187
 
1183
1188
  # Import remote container and transfer manager
1184
1189
  from mdify.ssh.transfer import FileTransferManager
@@ -1309,8 +1314,13 @@ def main_async_remote(args) -> int:
1309
1314
  remote_output_path = f"{work_dir}/{input_file.stem}.md"
1310
1315
 
1311
1316
  # Build conversion command on remote - use -F for multipart form data
1317
+ # Important: use generous timeouts since large PDFs with OCR take time
1318
+ # --connect-timeout: max time to establish connection (60s)
1319
+ # --max-time: max total operation time (extended timeout)
1312
1320
  convert_cmd = (
1313
1321
  f"curl -X POST "
1322
+ f"--connect-timeout 60 "
1323
+ f"--max-time {remote_conversion_timeout} "
1314
1324
  f"-F 'files=@{remote_file_path}' "
1315
1325
  f"-F 'to_formats=md' "
1316
1326
  f"-F 'do_ocr=true' "
@@ -1322,42 +1332,31 @@ def main_async_remote(args) -> int:
1322
1332
  # Retry conversion command with exponential backoff
1323
1333
  conversion_attempt = 0
1324
1334
  conversion_success = False
1325
- while conversion_attempt < 3:
1335
+ conversion_output = None
1336
+ while conversion_attempt < 3 and not conversion_success:
1326
1337
  try:
1327
- stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1338
+ if conversion_attempt > 0 and not args.quiet:
1339
+ print(f" ↻ Conversion retry {conversion_attempt}...", file=sys.stderr)
1340
+
1341
+ conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
1328
1342
 
1329
- if code == 0:
1343
+ if conv_code == 0:
1330
1344
  conversion_success = True
1331
1345
  break
1332
- elif is_connection_error(Exception(stderr)) and conversion_attempt < 2:
1333
- conversion_attempt += 1
1334
- if not args.quiet:
1335
- print(f" ↻ Conversion attempt {conversion_attempt}: reconnecting...", file=sys.stderr)
1336
- try:
1337
- await ssh_client.disconnect()
1338
- except Exception:
1339
- pass
1340
- await ssh_client.connect()
1341
- continue
1342
1346
  else:
1343
- if not args.quiet:
1344
- print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1345
- break
1347
+ conversion_attempt += 1
1346
1348
  except Exception as conv_exc:
1347
1349
  if is_connection_error(conv_exc) and conversion_attempt < 2:
1348
1350
  conversion_attempt += 1
1349
1351
  if not args.quiet:
1350
- print(f" ↻ Conversion attempt {conversion_attempt}: reconnecting...", file=sys.stderr)
1352
+ print(f" ↻ Connection lost during conversion. Reconnecting (attempt {conversion_attempt})...", file=sys.stderr)
1351
1353
  try:
1352
1354
  await ssh_client.disconnect()
1353
1355
  except Exception:
1354
1356
  pass
1355
1357
  await ssh_client.connect()
1356
- continue
1357
1358
  else:
1358
- raise conv_exc
1359
-
1360
- conversion_attempt += 1
1359
+ conversion_attempt += 1
1361
1360
 
1362
1361
  if not conversion_success:
1363
1362
  print(f" ✗ Failed: Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
@@ -1366,7 +1365,7 @@ def main_async_remote(args) -> int:
1366
1365
 
1367
1366
  # Parse JSON response to extract markdown content
1368
1367
  try:
1369
- response_data = json.loads(stdout)
1368
+ response_data = json.loads(conversion_output)
1370
1369
 
1371
1370
  # Extract content from response structure
1372
1371
  # Actual format: {"document": {"md_content": "..."}, "status": "success"}
@@ -1395,7 +1394,7 @@ def main_async_remote(args) -> int:
1395
1394
  markdown_content = str(result)
1396
1395
  else:
1397
1396
  # Ultimate fallback
1398
- markdown_content = stdout
1397
+ markdown_content = conversion_output
1399
1398
 
1400
1399
  # Write markdown content to remote file
1401
1400
  write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
@@ -1406,10 +1405,10 @@ def main_async_remote(args) -> int:
1406
1405
  failed += 1
1407
1406
  break
1408
1407
 
1409
- except (json.JSONDecodeError, KeyError, IndexError) as e:
1410
- print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1408
+ except (json.JSONDecodeError, KeyError, IndexError):
1409
+ print(f" ✗ Failed to parse conversion response", file=sys.stderr)
1411
1410
  if DEBUG:
1412
- print(f" Response: {stdout[:500]}", file=sys.stderr)
1411
+ print(f" Response: {conversion_output[:500]}", file=sys.stderr)
1413
1412
  failed += 1
1414
1413
  break
1415
1414
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.6
3
+ Version: 3.0.8
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.0.6"
3
+ version = "3.0.8"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes