mdify-cli 3.2.1__tar.gz → 3.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/PKG-INFO +1 -1
  2. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/cli.py +115 -89
  4. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/docling_client.py +24 -1
  5. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/ssh/models.py +0 -12
  6. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/ssh/remote_container.py +1 -1
  7. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/ssh/transfer.py +3 -2
  8. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/PKG-INFO +1 -1
  9. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/pyproject.toml +1 -1
  10. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/LICENSE +0 -0
  11. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/README.md +0 -0
  12. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/assets/mdify.png +0 -0
  13. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/__main__.py +0 -0
  14. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/container.py +0 -0
  15. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/formatting.py +0 -0
  16. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/ssh/__init__.py +0 -0
  17. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify/ssh/client.py +0 -0
  18. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/SOURCES.txt +0 -0
  19. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/dependency_links.txt +0 -0
  20. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/entry_points.txt +0 -0
  21. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/requires.txt +0 -0
  22. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/mdify_cli.egg-info/top_level.txt +0 -0
  23. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/setup.cfg +0 -0
  24. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/tests/test_cli.py +0 -0
  25. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/tests/test_container.py +0 -0
  26. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/tests/test_docling_client.py +0 -0
  27. {mdify_cli-3.2.1 → mdify_cli-3.3.1}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.2.1
3
+ Version: 3.3.1
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.2.1"
3
+ __version__ = "3.3.1"
@@ -8,10 +8,10 @@ is lightweight and has no ML dependencies.
8
8
  """
9
9
 
10
10
  import argparse
11
- import asyncio
12
11
  import json
13
12
  import os
14
13
  import platform
14
+ import shlex
15
15
  import shutil
16
16
  import subprocess
17
17
  import sys
@@ -273,14 +273,14 @@ def check_for_update(force: bool = False) -> None:
273
273
 
274
274
  _update_last_check_time()
275
275
 
276
+ from mdify.formatting import Colorizer
277
+
276
278
  if not _compare_versions(__version__, remote_version):
277
279
  if force:
278
- from mdify.formatting import Colorizer
279
280
  color = Colorizer(sys.stdout)
280
281
  print(color.success(f"✓ mdify is up to date (v{__version__})"))
281
282
  return
282
283
 
283
- from mdify.formatting import Colorizer
284
284
  color = Colorizer(sys.stdout)
285
285
  print(f"\n{color.bright_yellow('=' * 60)}")
286
286
  print(color.bold_yellow("🎉 A new version of mdify-cli is available!"))
@@ -1175,7 +1175,6 @@ def main_async_remote(args) -> int:
1175
1175
  input_path = Path(args.input)
1176
1176
  if not input_path.exists():
1177
1177
  await ssh_client.disconnect()
1178
- color = Colorizer(sys.stderr)
1179
1178
  print(f"{color.error('✗ Error:')} Input file or directory not found: {args.input}", file=sys.stderr)
1180
1179
  return 1
1181
1180
 
@@ -1185,7 +1184,6 @@ def main_async_remote(args) -> int:
1185
1184
 
1186
1185
  if not files_to_convert:
1187
1186
  await ssh_client.disconnect()
1188
- color = Colorizer(sys.stderr)
1189
1187
  print(f"{color.error('✗ Error:')} No supported files found in {args.input}", file=sys.stderr)
1190
1188
  print(f" {color.dim_white(f'Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}')} ", file=sys.stderr)
1191
1189
  return 1
@@ -1268,7 +1266,7 @@ def main_async_remote(args) -> int:
1268
1266
  )
1269
1267
 
1270
1268
  attempt = 0
1271
- while True:
1269
+ while attempt <= 1: # Max 2 attempts (0 and 1)
1272
1270
  try:
1273
1271
  # Upload file
1274
1272
  remote_file_path = f"{work_dir}/{input_file.name}"
@@ -1322,7 +1320,7 @@ def main_async_remote(args) -> int:
1322
1320
  f"curl -X POST "
1323
1321
  f"--connect-timeout 60 "
1324
1322
  f"--max-time {remote_conversion_timeout} "
1325
- f"-F 'files=@{remote_file_path}' "
1323
+ f"-F 'files=@{shlex.quote(remote_file_path)}' "
1326
1324
  f"-F 'to_formats=md' "
1327
1325
  f"-F 'do_ocr=true' "
1328
1326
  )
@@ -1335,11 +1333,12 @@ def main_async_remote(args) -> int:
1335
1333
  conversion_success = False
1336
1334
  conversion_output = None
1337
1335
  while conversion_attempt < 3 and not conversion_success:
1336
+ conversion_attempt += 1
1338
1337
  try:
1339
- if conversion_attempt > 0 and not args.quiet:
1340
- # Exponential backoff: 2s, 4s, 8s
1341
- backoff_delay = 2 ** conversion_attempt
1342
- print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
1338
+ if conversion_attempt > 1 and not args.quiet:
1339
+ # Exponential backoff: 2s, 4s
1340
+ backoff_delay = 2 ** (conversion_attempt - 1)
1341
+ print(f" ↻ Conversion retry {conversion_attempt - 1} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
1343
1342
  await asyncio.sleep(backoff_delay)
1344
1343
 
1345
1344
  conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
@@ -1352,8 +1351,7 @@ def main_async_remote(args) -> int:
1352
1351
  break
1353
1352
  except Exception as conv_exc:
1354
1353
  is_conn_err = is_connection_error(conv_exc)
1355
- if is_conn_err and conversion_attempt < 2:
1356
- conversion_attempt += 1
1354
+ if is_conn_err and conversion_attempt < 3:
1357
1355
  if not args.quiet:
1358
1356
  # Exponential backoff: 5s, 10s
1359
1357
  backoff_delay = 5 * conversion_attempt
@@ -1364,6 +1362,7 @@ def main_async_remote(args) -> int:
1364
1362
  try:
1365
1363
  await ssh_client.disconnect()
1366
1364
  except Exception:
1365
+ # Best-effort disconnect; ignore errors (e.g., already closed) before reconnecting
1367
1366
  pass
1368
1367
 
1369
1368
  # Reconnect with retry
@@ -1375,51 +1374,65 @@ def main_async_remote(args) -> int:
1375
1374
  continue
1376
1375
  else:
1377
1376
  # Either not a connection error, or we've exhausted retries
1378
- if not args.quiet:
1379
- print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
1380
- if conversion_attempt >= 2 and is_conn_err:
1377
+ if conversion_attempt >= 3 and is_conn_err:
1381
1378
  if not args.quiet:
1382
1379
  print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
1383
1380
  break
1384
1381
 
1385
1382
  if not conversion_success:
1386
- color_error = Colorizer(sys.stderr)
1387
- print(f" {color_error.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
1383
+ print(f" {color.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
1388
1384
  failed += 1
1389
1385
  break
1390
1386
 
1391
1387
  # Parse JSON response to extract markdown content
1392
1388
  try:
1393
1389
  response_data = json.loads(conversion_output)
1390
+ color_err = Colorizer(sys.stderr)
1391
+
1392
+ # Check if response is an error (has error keys)
1393
+ error_keys = {"detail", "error", "message", "code", "status"}
1394
+ response_keys = set(response_data.keys()) if isinstance(response_data, dict) else set()
1395
+ if error_keys & response_keys:
1396
+ # Error response - extract and display error
1397
+ error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
1398
+ print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
1399
+ if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
1400
+ timeout_val = args.remote_timeout or 3600
1401
+ print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
1402
+ failed += 1
1403
+ break
1394
1404
 
1395
1405
  # Extract content from response structure
1396
1406
  # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1407
+ markdown_content = None
1397
1408
  if "document" in response_data:
1398
1409
  document = response_data["document"]
1399
1410
  if "md_content" in document and document["md_content"]:
1400
1411
  markdown_content = document["md_content"]
1401
1412
  elif "text_content" in document and document["text_content"]:
1402
1413
  markdown_content = document["text_content"]
1403
- else:
1404
- # Fallback - use whole document
1405
- markdown_content = json.dumps(document, indent=2)
1406
- else:
1414
+ elif "results" in response_data and response_data["results"]:
1407
1415
  # Legacy format fallback
1408
- if "results" in response_data and response_data["results"]:
1409
- result = response_data["results"][0]
1410
- if "content" in result:
1411
- content = result["content"]
1412
- if isinstance(content, dict) and "markdown" in content:
1413
- markdown_content = content["markdown"]
1414
- elif isinstance(content, str):
1415
- markdown_content = content
1416
- else:
1417
- markdown_content = str(content)
1416
+ result = response_data["results"][0]
1417
+ if "content" in result:
1418
+ content = result["content"]
1419
+ if isinstance(content, dict) and "markdown" in content:
1420
+ markdown_content = content["markdown"]
1421
+ elif isinstance(content, str):
1422
+ markdown_content = content
1418
1423
  else:
1419
- markdown_content = str(result)
1420
- else:
1421
- # Ultimate fallback
1422
- markdown_content = conversion_output
1424
+ markdown_content = str(content)
1425
+
1426
+ # Validate content exists and is not empty/too short
1427
+ if not markdown_content or len(markdown_content.strip()) < 50:
1428
+ print(f" {color_err.error('✗ Failed:')} Empty or invalid conversion result", file=sys.stderr)
1429
+ if args.timeout and args.timeout < 300:
1430
+ print(
1431
+ f" {color_err.info('ℹ Tip:')} Timeout is only {args.timeout}s. Consider increasing with --timeout (default: 3600s for remote)",
1432
+ file=sys.stderr,
1433
+ )
1434
+ failed += 1
1435
+ break
1423
1436
 
1424
1437
  # Write markdown content to local temp file first, then upload via SFTP
1425
1438
  # (Piping large content through SSH here-documents can crash the connection)
@@ -1427,6 +1440,7 @@ def main_async_remote(args) -> int:
1427
1440
  if not args.quiet:
1428
1441
  print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
1429
1442
 
1443
+ temp_path = None
1430
1444
  try:
1431
1445
  # Write to temporary local file
1432
1446
  with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
@@ -1441,12 +1455,6 @@ def main_async_remote(args) -> int:
1441
1455
  compress=False,
1442
1456
  )
1443
1457
 
1444
- # Cleanup temp file
1445
- try:
1446
- os.unlink(temp_path)
1447
- except Exception:
1448
- pass
1449
-
1450
1458
  if not args.quiet:
1451
1459
  print(f" {color.green('✓')} Markdown written", file=sys.stderr)
1452
1460
  except Exception as write_exc:
@@ -1454,6 +1462,14 @@ def main_async_remote(args) -> int:
1454
1462
  print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
1455
1463
  failed += 1
1456
1464
  break
1465
+ finally:
1466
+ # Cleanup temp file
1467
+ if temp_path:
1468
+ try:
1469
+ os.unlink(temp_path)
1470
+ except Exception as cleanup_exc:
1471
+ if DEBUG:
1472
+ print(f" ! Failed to remove temporary file {temp_path}: {cleanup_exc}", file=sys.stderr)
1457
1473
 
1458
1474
  except (json.JSONDecodeError, KeyError, IndexError):
1459
1475
  print(f" ✗ Failed to parse conversion response", file=sys.stderr)
@@ -1481,7 +1497,7 @@ def main_async_remote(args) -> int:
1481
1497
  successful += 1
1482
1498
 
1483
1499
  # Cleanup remote files
1484
- await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1500
+ await ssh_client.run_command(f"rm -f {shlex.quote(remote_file_path)} {shlex.quote(remote_output_path)}")
1485
1501
 
1486
1502
  break
1487
1503
  except Exception as e:
@@ -1492,12 +1508,12 @@ def main_async_remote(args) -> int:
1492
1508
  try:
1493
1509
  await ssh_client.disconnect()
1494
1510
  except Exception:
1511
+ # Best-effort disconnect; ignore errors since we'll immediately reconnect.
1495
1512
  pass
1496
1513
  await ssh_client.connect()
1497
1514
  continue
1498
1515
 
1499
- color_err = Colorizer(sys.stderr)
1500
- print(f" {color_err.error('✗ Failed:')} {str(e)}", file=sys.stderr)
1516
+ print(f" {color.error('✗ Failed:')} {str(e)}", file=sys.stderr)
1501
1517
  if DEBUG:
1502
1518
  import traceback
1503
1519
  traceback.print_exc(file=sys.stderr)
@@ -1519,7 +1535,7 @@ def main_async_remote(args) -> int:
1519
1535
 
1520
1536
  # Cleanup remote work directory
1521
1537
  try:
1522
- await ssh_client.run_command(f"rm -rf {work_dir}")
1538
+ await ssh_client.run_command(f"rm -rf {shlex.quote(work_dir)}")
1523
1539
  if not args.quiet:
1524
1540
  print(color.green(f"✓ Cleaned up remote directory"), file=sys.stderr)
1525
1541
  except Exception as e:
@@ -1594,8 +1610,11 @@ def main_async_remote(args) -> int:
1594
1610
  def main() -> int:
1595
1611
  """Main entry point for the CLI."""
1596
1612
  from mdify.formatting import Colorizer
1597
- color = Colorizer(sys.stderr)
1598
- print(color.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
1613
+
1614
+ color_stderr = Colorizer(sys.stderr)
1615
+ color_stdout = Colorizer(sys.stdout)
1616
+
1617
+ print(color_stderr.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
1599
1618
  args = parse_args()
1600
1619
 
1601
1620
  # Handle --check-update flag
@@ -1743,41 +1762,34 @@ def main() -> int:
1743
1762
 
1744
1763
  # Validate input
1745
1764
  if not input_path.exists():
1746
- color = Colorizer(sys.stderr)
1747
- print(f"{color.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
1765
+ print(f"{color_stderr.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
1748
1766
  return 1
1749
1767
 
1750
1768
  # Get files to convert
1751
1769
  try:
1752
1770
  files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
1753
1771
  except Exception as e:
1754
- color = Colorizer(sys.stderr)
1755
- print(f"{color.error('✗ Error:')} {e}", file=sys.stderr)
1772
+ print(f"{color_stderr.error('✗ Error:')} {e}", file=sys.stderr)
1756
1773
  return 1
1757
1774
 
1758
1775
  if not files_to_convert:
1759
- color = Colorizer(sys.stderr)
1760
- print(f"{color.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
1776
+ print(f"{color_stderr.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
1761
1777
  return 1
1762
1778
 
1763
1779
  total_files = len(files_to_convert)
1764
1780
  total_size = sum(f.stat().st_size for f in files_to_convert)
1765
1781
 
1766
1782
  if not args.quiet:
1767
- from mdify.formatting import Colorizer
1768
- color_info = Colorizer(sys.stdout)
1769
- print(f"{color_info.bright_cyan('📦 Found')} {color_info.bold(str(total_files))} {color_info.bright_cyan('file(s)')} {color_info.dim_white(f'({format_size(total_size)})')}")
1770
- print(f"{color_info.cyan('📁 Source:')} {color_info.bright_white(str(input_path.resolve()))}")
1771
- print(f"{color_info.cyan('💾 Output:')} {color_info.bright_white(str(output_dir.resolve()))}")
1772
- print(f"{color_info.cyan('🐳 Runtime:')} {color_info.bright_white(runtime)}")
1773
- print(f"{color_info.cyan('🖼️ Image:')} {color_info.dim_white(image)}")
1783
+ print(f"{color_stdout.bright_cyan('📦 Found')} {color_stdout.bold(str(total_files))} {color_stdout.bright_cyan('file(s)')} {color_stdout.dim_white(f'({format_size(total_size)})')}")
1784
+ print(f"{color_stdout.cyan('📁 Source:')} {color_stdout.bright_white(str(input_path.resolve()))}")
1785
+ print(f"{color_stdout.cyan('💾 Output:')} {color_stdout.bright_white(str(output_dir.resolve()))}")
1786
+ print(f"{color_stdout.cyan('🐳 Runtime:')} {color_stdout.bright_white(runtime)}")
1787
+ print(f"{color_stdout.cyan('🖼️ Image:')} {color_stdout.dim_white(image)}")
1774
1788
  print()
1775
1789
 
1776
1790
  if args.mask:
1777
- from mdify.formatting import Colorizer
1778
- color_warn = Colorizer(sys.stderr)
1779
1791
  print(
1780
- color_warn.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
1792
+ color_stderr.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
1781
1793
  file=sys.stderr,
1782
1794
  )
1783
1795
 
@@ -1794,9 +1806,7 @@ def main() -> int:
1794
1806
 
1795
1807
  try:
1796
1808
  if not args.quiet:
1797
- from mdify.formatting import Colorizer
1798
- color_start = Colorizer(sys.stdout)
1799
- print(f"{color_start.bright_cyan('▶️ Starting')} {color_start.bright_white('docling-serve')} {color_start.bright_cyan('container')}...\n")
1809
+ print(f"{color_stdout.bright_cyan('▶️ Starting')} {color_stdout.bright_white('docling-serve')} {color_stdout.bright_cyan('container')}...\n")
1800
1810
 
1801
1811
  # Apply resource profile
1802
1812
  profile = RESOURCE_PROFILES[args.profile]
@@ -1871,13 +1881,33 @@ def main() -> int:
1871
1881
  spinner.stop()
1872
1882
 
1873
1883
  if result.success:
1874
- # Write result to output file
1875
- output_file.write_text(result.content)
1876
- success_count += 1
1877
- if not args.quiet:
1878
- print(
1879
- f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
1880
- )
1884
+ # Validate content is not empty and not an error response
1885
+ content_length = len(result.content.strip()) if result.content else 0
1886
+ if content_length < 50:
1887
+ # Too short - likely an error or empty document
1888
+ failed_count += 1
1889
+ if not args.quiet:
1890
+ print(
1891
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1892
+ )
1893
+ error_msg = "Empty or invalid conversion result"
1894
+ if result.content:
1895
+ error_msg += f" ({len(result.content)} bytes)"
1896
+ print(f" Error: {error_msg}", file=sys.stderr)
1897
+ if args.timeout and args.timeout < 300:
1898
+ color_out = Colorizer(sys.stderr)
1899
+ print(
1900
+ f" {color_out.info('ℹ Tip:')} Timeout is only {args.timeout}s. Consider increasing with --timeout (default: 1200s)",
1901
+ file=sys.stderr,
1902
+ )
1903
+ else:
1904
+ # Write result to output file
1905
+ output_file.write_text(result.content)
1906
+ success_count += 1
1907
+ if not args.quiet:
1908
+ print(
1909
+ f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
1910
+ )
1881
1911
  else:
1882
1912
  failed_count += 1
1883
1913
  error_msg = result.error or "Unknown error"
@@ -2035,30 +2065,26 @@ def main() -> int:
2035
2065
 
2036
2066
  # Print summary
2037
2067
  if not args.quiet:
2038
- from mdify.formatting import Colorizer
2039
- color_out = Colorizer(sys.stdout)
2040
2068
  print()
2041
- print(color_out.cyan("=" * 60))
2042
- print(color_out.bold_cyan("📊 Local Conversion Summary"))
2043
- print(color_out.cyan("=" * 60))
2044
- print(f" {color_out.cyan('Total files:')} {color_out.bold(str(total_files))}")
2069
+ print(color_stdout.cyan("=" * 60))
2070
+ print(color_stdout.bold_cyan("📊 Local Conversion Summary"))
2071
+ print(color_stdout.cyan("=" * 60))
2072
+ print(f" {color_stdout.cyan('Total files:')} {color_stdout.bold(str(total_files))}")
2045
2073
  if success_count > 0:
2046
- print(f" {color_out.green('✓ Successful:')} {color_out.bold_green(str(success_count))}")
2074
+ print(f" {color_stdout.green('✓ Successful:')} {color_stdout.bold_green(str(success_count))}")
2047
2075
  if skipped_count > 0:
2048
- print(f" {color_out.yellow('⊘ Skipped:')} {color_out.bold_yellow(str(skipped_count))}")
2076
+ print(f" {color_stdout.yellow('⊘ Skipped:')} {color_stdout.bold_yellow(str(skipped_count))}")
2049
2077
  if failed_count > 0:
2050
- print(f" {color_out.red('✗ Failed:')} {color_out.bold_red(str(failed_count))}")
2051
- print(f" {color_out.cyan('Total time:')} {color_out.bright_cyan(format_duration(total_elapsed))}")
2052
- print(color_out.cyan("=" * 60))
2078
+ print(f" {color_stdout.red('✗ Failed:')} {color_stdout.bold_red(str(failed_count))}")
2079
+ print(f" {color_stdout.cyan('Total time:')} {color_stdout.bright_cyan(format_duration(total_elapsed))}")
2080
+ print(color_stdout.cyan("=" * 60))
2053
2081
 
2054
2082
  except KeyboardInterrupt:
2055
2083
  if not args.quiet:
2056
- from mdify.formatting import Colorizer
2057
- color_out = Colorizer(sys.stdout)
2058
- print(f"\n\n{color_out.warning('⚠ Interrupted by user. Container stopped.')}")
2084
+ print(f"\n\n{color_stdout.warning('⚠ Interrupted by user. Container stopped.')}")
2059
2085
  if success_count > 0 or skipped_count > 0 or failed_count > 0:
2060
2086
  print(
2061
- f"{color_out.dim_white('Partial progress:')} {color_out.green(str(success_count))} successful, {color_out.red(str(failed_count))} failed, {color_out.yellow(str(skipped_count))} skipped"
2087
+ f"{color_stdout.dim_white('Partial progress:')} {color_stdout.green(str(success_count))} successful, {color_stdout.red(str(failed_count))} failed, {color_stdout.yellow(str(skipped_count))} skipped"
2062
2088
  )
2063
2089
  return 130
2064
2090
 
@@ -48,6 +48,25 @@ def _get_mime_type(file_path: Path) -> str:
48
48
  return mime_type or "application/octet-stream"
49
49
 
50
50
 
51
+ def _is_error_response(result_data) -> bool:
52
+ """Check if response contains an error instead of content.
53
+
54
+ Detects common API error response patterns.
55
+
56
+ Args:
57
+ result_data: Response data to check
58
+
59
+ Returns:
60
+ True if response appears to be an error
61
+ """
62
+ if not isinstance(result_data, dict):
63
+ return False
64
+
65
+ # Check for common error keys at top level
66
+ error_keys = {"detail", "error", "message", "code", "status"}
67
+ return bool(error_keys & set(result_data.keys()))
68
+
69
+
51
70
  def _extract_content(result_data) -> str:
52
71
  """Extract content from API response, supporting both old and new formats.
53
72
 
@@ -61,8 +80,12 @@ def _extract_content(result_data) -> str:
61
80
  result_data: Response data from docling-serve API
62
81
 
63
82
  Returns:
64
- Extracted content string, or empty string if not found
83
+ Extracted content string, or empty string if not found or if response is an error
65
84
  """
85
+ # Detect error responses and return empty string instead of parsing them
86
+ if _is_error_response(result_data):
87
+ return ""
88
+
66
89
  if isinstance(result_data, dict):
67
90
  # New format with document field
68
91
  if "document" in result_data:
@@ -66,18 +66,6 @@ class SSHConfig:
66
66
 
67
67
  def __post_init__(self):
68
68
  """Validate config after initialization."""
69
- if self.port is None:
70
- self.port = 22
71
- if self.timeout is None:
72
- self.timeout = 30
73
- if self.keepalive is None:
74
- self.keepalive = 60
75
- if self.compression is None:
76
- self.compression = False
77
- if self.work_dir is None:
78
- self.work_dir = "/tmp/mdify"
79
- if self.username is None:
80
- self.username = ""
81
69
  if not self.host:
82
70
  raise ConfigError("host is required")
83
71
  if not 1 <= self.port <= 65535:
@@ -63,7 +63,7 @@ class RemoteContainer(DoclingContainer):
63
63
  try:
64
64
  # Find containers using this port
65
65
  # Using docker inspect with port filter
66
- cmd = f"{self.runtime} ps -a --filter 'publish={self.port}' --format '{{{{.ID}}}}'"
66
+ cmd = f"{self.runtime} ps -a --filter 'publish={int(self.port)}' --format '{{{{.ID}}}}'"
67
67
  stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=10)
68
68
 
69
69
  if code == 0 and stdout.strip():
@@ -3,6 +3,7 @@
3
3
  import gzip
4
4
  import hashlib
5
5
  import logging
6
+ import shlex
6
7
  from pathlib import Path
7
8
  from typing import Callable
8
9
  from mdify.ssh.models import TransferSession
@@ -233,8 +234,8 @@ class FileTransferManager:
233
234
 
234
235
  # Calculate remote checksum (sha256sum or shasum fallback)
235
236
  checksum_cmd = (
236
- f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
237
- f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
237
+ f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {shlex.quote(remote_path)} | awk '{{print $1}}') "
238
+ f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {shlex.quote(remote_path)} | awk '{{print $1}}')"
238
239
  )
239
240
  stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
240
241
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.2.1
3
+ Version: 3.3.1
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.2.1"
3
+ version = "3.3.1"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes