mdify-cli 3.2.0__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/PKG-INFO +1 -1
  2. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/cli.py +118 -85
  4. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/docling_client.py +24 -1
  5. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/ssh/models.py +0 -12
  6. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/ssh/remote_container.py +1 -1
  7. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/ssh/transfer.py +3 -2
  8. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/PKG-INFO +1 -1
  9. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/pyproject.toml +1 -1
  10. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/LICENSE +0 -0
  11. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/README.md +0 -0
  12. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/assets/mdify.png +0 -0
  13. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/__main__.py +0 -0
  14. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/container.py +0 -0
  15. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/formatting.py +0 -0
  16. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/ssh/__init__.py +0 -0
  17. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify/ssh/client.py +0 -0
  18. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/SOURCES.txt +0 -0
  19. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
  20. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/entry_points.txt +0 -0
  21. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/requires.txt +0 -0
  22. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/mdify_cli.egg-info/top_level.txt +0 -0
  23. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/setup.cfg +0 -0
  24. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/tests/test_cli.py +0 -0
  25. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/tests/test_container.py +0 -0
  26. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/tests/test_docling_client.py +0 -0
  27. {mdify_cli-3.2.0 → mdify_cli-3.3.0}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.2.0"
3
+ __version__ = "3.3.0"
@@ -8,10 +8,10 @@ is lightweight and has no ML dependencies.
8
8
  """
9
9
 
10
10
  import argparse
11
- import asyncio
12
11
  import json
13
12
  import os
14
13
  import platform
14
+ import shlex
15
15
  import shutil
16
16
  import subprocess
17
17
  import sys
@@ -273,14 +273,14 @@ def check_for_update(force: bool = False) -> None:
273
273
 
274
274
  _update_last_check_time()
275
275
 
276
+ from mdify.formatting import Colorizer
277
+
276
278
  if not _compare_versions(__version__, remote_version):
277
279
  if force:
278
- from mdify.formatting import Colorizer
279
280
  color = Colorizer(sys.stdout)
280
281
  print(color.success(f"✓ mdify is up to date (v{__version__})"))
281
282
  return
282
283
 
283
- from mdify.formatting import Colorizer
284
284
  color = Colorizer(sys.stdout)
285
285
  print(f"\n{color.bright_yellow('=' * 60)}")
286
286
  print(color.bold_yellow("🎉 A new version of mdify-cli is available!"))
@@ -1175,7 +1175,7 @@ def main_async_remote(args) -> int:
1175
1175
  input_path = Path(args.input)
1176
1176
  if not input_path.exists():
1177
1177
  await ssh_client.disconnect()
1178
- print(f"Error: Input file or directory not found: {args.input}", file=sys.stderr)
1178
+ print(f"{color.error('✗ Error:')} Input file or directory not found: {args.input}", file=sys.stderr)
1179
1179
  return 1
1180
1180
 
1181
1181
  # Store resolved path as base for relative path calculations
@@ -1184,8 +1184,8 @@ def main_async_remote(args) -> int:
1184
1184
 
1185
1185
  if not files_to_convert:
1186
1186
  await ssh_client.disconnect()
1187
- print(f"Error: No supported files found in {args.input}", file=sys.stderr)
1188
- print(f" Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", file=sys.stderr)
1187
+ print(f"{color.error('✗ Error:')} No supported files found in {args.input}", file=sys.stderr)
1188
+ print(f" {color.dim_white(f'Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}')} ", file=sys.stderr)
1189
1189
  return 1
1190
1190
 
1191
1191
  if not args.quiet:
@@ -1266,7 +1266,7 @@ def main_async_remote(args) -> int:
1266
1266
  )
1267
1267
 
1268
1268
  attempt = 0
1269
- while True:
1269
+ while attempt <= 1: # Max 2 attempts (0 and 1)
1270
1270
  try:
1271
1271
  # Upload file
1272
1272
  remote_file_path = f"{work_dir}/{input_file.name}"
@@ -1320,7 +1320,7 @@ def main_async_remote(args) -> int:
1320
1320
  f"curl -X POST "
1321
1321
  f"--connect-timeout 60 "
1322
1322
  f"--max-time {remote_conversion_timeout} "
1323
- f"-F 'files=@{remote_file_path}' "
1323
+ f"-F 'files=@{shlex.quote(remote_file_path)}' "
1324
1324
  f"-F 'to_formats=md' "
1325
1325
  f"-F 'do_ocr=true' "
1326
1326
  )
@@ -1333,11 +1333,12 @@ def main_async_remote(args) -> int:
1333
1333
  conversion_success = False
1334
1334
  conversion_output = None
1335
1335
  while conversion_attempt < 3 and not conversion_success:
1336
+ conversion_attempt += 1
1336
1337
  try:
1337
- if conversion_attempt > 0 and not args.quiet:
1338
- # Exponential backoff: 2s, 4s, 8s
1339
- backoff_delay = 2 ** conversion_attempt
1340
- print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
1338
+ if conversion_attempt > 1 and not args.quiet:
1339
+ # Exponential backoff: 2s, 4s
1340
+ backoff_delay = 2 ** (conversion_attempt - 1)
1341
+ print(f" ↻ Conversion retry {conversion_attempt - 1} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
1341
1342
  await asyncio.sleep(backoff_delay)
1342
1343
 
1343
1344
  conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
@@ -1350,8 +1351,7 @@ def main_async_remote(args) -> int:
1350
1351
  break
1351
1352
  except Exception as conv_exc:
1352
1353
  is_conn_err = is_connection_error(conv_exc)
1353
- if is_conn_err and conversion_attempt < 2:
1354
- conversion_attempt += 1
1354
+ if is_conn_err and conversion_attempt < 3:
1355
1355
  if not args.quiet:
1356
1356
  # Exponential backoff: 5s, 10s
1357
1357
  backoff_delay = 5 * conversion_attempt
@@ -1362,6 +1362,7 @@ def main_async_remote(args) -> int:
1362
1362
  try:
1363
1363
  await ssh_client.disconnect()
1364
1364
  except Exception:
1365
+ # Best-effort disconnect; ignore errors (e.g., already closed) before reconnecting
1365
1366
  pass
1366
1367
 
1367
1368
  # Reconnect with retry
@@ -1373,50 +1374,65 @@ def main_async_remote(args) -> int:
1373
1374
  continue
1374
1375
  else:
1375
1376
  # Either not a connection error, or we've exhausted retries
1376
- if not args.quiet:
1377
- print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
1378
- if conversion_attempt >= 2 and is_conn_err:
1377
+ if conversion_attempt >= 3 and is_conn_err:
1379
1378
  if not args.quiet:
1380
1379
  print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
1381
1380
  break
1382
1381
 
1383
1382
  if not conversion_success:
1384
- print(f" ✗ Failed: Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
1383
+ print(f" {color.error('✗ Failed:')} Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
1385
1384
  failed += 1
1386
1385
  break
1387
1386
 
1388
1387
  # Parse JSON response to extract markdown content
1389
1388
  try:
1390
1389
  response_data = json.loads(conversion_output)
1390
+ color_err = Colorizer(sys.stderr)
1391
+
1392
+ # Check if response is an error (has error keys)
1393
+ error_keys = {"detail", "error", "message", "code", "status"}
1394
+ response_keys = set(response_data.keys()) if isinstance(response_data, dict) else set()
1395
+ if error_keys & response_keys:
1396
+ # Error response - extract and display error
1397
+ error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
1398
+ print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
1399
+ if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
1400
+ timeout_val = args.remote_timeout or 3600
1401
+ print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
1402
+ failed += 1
1403
+ break
1391
1404
 
1392
1405
  # Extract content from response structure
1393
1406
  # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1407
+ markdown_content = None
1394
1408
  if "document" in response_data:
1395
1409
  document = response_data["document"]
1396
1410
  if "md_content" in document and document["md_content"]:
1397
1411
  markdown_content = document["md_content"]
1398
1412
  elif "text_content" in document and document["text_content"]:
1399
1413
  markdown_content = document["text_content"]
1400
- else:
1401
- # Fallback - use whole document
1402
- markdown_content = json.dumps(document, indent=2)
1403
- else:
1414
+ elif "results" in response_data and response_data["results"]:
1404
1415
  # Legacy format fallback
1405
- if "results" in response_data and response_data["results"]:
1406
- result = response_data["results"][0]
1407
- if "content" in result:
1408
- content = result["content"]
1409
- if isinstance(content, dict) and "markdown" in content:
1410
- markdown_content = content["markdown"]
1411
- elif isinstance(content, str):
1412
- markdown_content = content
1413
- else:
1414
- markdown_content = str(content)
1416
+ result = response_data["results"][0]
1417
+ if "content" in result:
1418
+ content = result["content"]
1419
+ if isinstance(content, dict) and "markdown" in content:
1420
+ markdown_content = content["markdown"]
1421
+ elif isinstance(content, str):
1422
+ markdown_content = content
1415
1423
  else:
1416
- markdown_content = str(result)
1417
- else:
1418
- # Ultimate fallback
1419
- markdown_content = conversion_output
1424
+ markdown_content = str(content)
1425
+
1426
+ # Validate content exists and is not empty/too short
1427
+ if not markdown_content or len(markdown_content.strip()) < 50:
1428
+ print(f" {color_err.error('✗ Failed:')} Empty or invalid conversion result", file=sys.stderr)
1429
+ if args.remote_timeout and args.remote_timeout < 300:
1430
+ print(
1431
+ f" {color_err.info('ℹ Tip:')} Timeout is only {args.remote_timeout}s. Consider increasing with --remote-timeout (default: 3600s)",
1432
+ file=sys.stderr,
1433
+ )
1434
+ failed += 1
1435
+ break
1420
1436
 
1421
1437
  # Write markdown content to local temp file first, then upload via SFTP
1422
1438
  # (Piping large content through SSH here-documents can crash the connection)
@@ -1424,6 +1440,7 @@ def main_async_remote(args) -> int:
1424
1440
  if not args.quiet:
1425
1441
  print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
1426
1442
 
1443
+ temp_path = None
1427
1444
  try:
1428
1445
  # Write to temporary local file
1429
1446
  with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
@@ -1438,12 +1455,6 @@ def main_async_remote(args) -> int:
1438
1455
  compress=False,
1439
1456
  )
1440
1457
 
1441
- # Cleanup temp file
1442
- try:
1443
- os.unlink(temp_path)
1444
- except Exception:
1445
- pass
1446
-
1447
1458
  if not args.quiet:
1448
1459
  print(f" {color.green('✓')} Markdown written", file=sys.stderr)
1449
1460
  except Exception as write_exc:
@@ -1451,6 +1462,14 @@ def main_async_remote(args) -> int:
1451
1462
  print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
1452
1463
  failed += 1
1453
1464
  break
1465
+ finally:
1466
+ # Cleanup temp file
1467
+ if temp_path:
1468
+ try:
1469
+ os.unlink(temp_path)
1470
+ except Exception as cleanup_exc:
1471
+ if DEBUG:
1472
+ print(f" ! Failed to remove temporary file {temp_path}: {cleanup_exc}", file=sys.stderr)
1454
1473
 
1455
1474
  except (json.JSONDecodeError, KeyError, IndexError):
1456
1475
  print(f" ✗ Failed to parse conversion response", file=sys.stderr)
@@ -1478,7 +1497,7 @@ def main_async_remote(args) -> int:
1478
1497
  successful += 1
1479
1498
 
1480
1499
  # Cleanup remote files
1481
- await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1500
+ await ssh_client.run_command(f"rm -f {shlex.quote(remote_file_path)} {shlex.quote(remote_output_path)}")
1482
1501
 
1483
1502
  break
1484
1503
  except Exception as e:
@@ -1489,11 +1508,12 @@ def main_async_remote(args) -> int:
1489
1508
  try:
1490
1509
  await ssh_client.disconnect()
1491
1510
  except Exception:
1511
+ # Best-effort disconnect; ignore errors since we'll immediately reconnect.
1492
1512
  pass
1493
1513
  await ssh_client.connect()
1494
1514
  continue
1495
1515
 
1496
- print(f" ✗ Failed: {e}", file=sys.stderr)
1516
+ print(f" {color.error('✗ Failed:')} {str(e)}", file=sys.stderr)
1497
1517
  if DEBUG:
1498
1518
  import traceback
1499
1519
  traceback.print_exc(file=sys.stderr)
@@ -1515,7 +1535,7 @@ def main_async_remote(args) -> int:
1515
1535
 
1516
1536
  # Cleanup remote work directory
1517
1537
  try:
1518
- await ssh_client.run_command(f"rm -rf {work_dir}")
1538
+ await ssh_client.run_command(f"rm -rf {shlex.quote(work_dir)}")
1519
1539
  if not args.quiet:
1520
1540
  print(color.green(f"✓ Cleaned up remote directory"), file=sys.stderr)
1521
1541
  except Exception as e:
@@ -1590,8 +1610,11 @@ def main_async_remote(args) -> int:
1590
1610
  def main() -> int:
1591
1611
  """Main entry point for the CLI."""
1592
1612
  from mdify.formatting import Colorizer
1593
- color = Colorizer(sys.stderr)
1594
- print(color.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
1613
+
1614
+ color_stderr = Colorizer(sys.stderr)
1615
+ color_stdout = Colorizer(sys.stdout)
1616
+
1617
+ print(color_stderr.bold_cyan(f"📄 mdify v{__version__}"), file=sys.stderr)
1595
1618
  args = parse_args()
1596
1619
 
1597
1620
  # Handle --check-update flag
@@ -1739,38 +1762,34 @@ def main() -> int:
1739
1762
 
1740
1763
  # Validate input
1741
1764
  if not input_path.exists():
1742
- print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
1765
+ print(f"{color_stderr.error('✗ Error:')} Input path does not exist: {input_path}", file=sys.stderr)
1743
1766
  return 1
1744
1767
 
1745
1768
  # Get files to convert
1746
1769
  try:
1747
1770
  files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
1748
1771
  except Exception as e:
1749
- print(f"Error: {e}", file=sys.stderr)
1772
+ print(f"{color_stderr.error('✗ Error:')} {e}", file=sys.stderr)
1750
1773
  return 1
1751
1774
 
1752
1775
  if not files_to_convert:
1753
- print(f"No files found to convert in: {input_path}", file=sys.stderr)
1776
+ print(f"{color_stderr.warning('⚠ Warning:')} No files found to convert in: {input_path}", file=sys.stderr)
1754
1777
  return 1
1755
1778
 
1756
1779
  total_files = len(files_to_convert)
1757
1780
  total_size = sum(f.stat().st_size for f in files_to_convert)
1758
1781
 
1759
1782
  if not args.quiet:
1760
- from mdify.formatting import Colorizer
1761
- color_info = Colorizer(sys.stdout)
1762
- print(f"{color_info.bright_cyan('📦 Found')} {color_info.bold(str(total_files))} {color_info.bright_cyan('file(s)')} {color_info.dim_white(f'({format_size(total_size)})')}")
1763
- print(f"{color_info.cyan('📁 Source:')} {color_info.bright_white(str(input_path.resolve()))}")
1764
- print(f"{color_info.cyan('💾 Output:')} {color_info.bright_white(str(output_dir.resolve()))}")
1765
- print(f"{color_info.cyan('🐳 Runtime:')} {color_info.bright_white(runtime)}")
1766
- print(f"{color_info.cyan('🖼️ Image:')} {color_info.dim_white(image)}")
1783
+ print(f"{color_stdout.bright_cyan('📦 Found')} {color_stdout.bold(str(total_files))} {color_stdout.bright_cyan('file(s)')} {color_stdout.dim_white(f'({format_size(total_size)})')}")
1784
+ print(f"{color_stdout.cyan('📁 Source:')} {color_stdout.bright_white(str(input_path.resolve()))}")
1785
+ print(f"{color_stdout.cyan('💾 Output:')} {color_stdout.bright_white(str(output_dir.resolve()))}")
1786
+ print(f"{color_stdout.cyan('🐳 Runtime:')} {color_stdout.bright_white(runtime)}")
1787
+ print(f"{color_stdout.cyan('🖼️ Image:')} {color_stdout.dim_white(image)}")
1767
1788
  print()
1768
1789
 
1769
1790
  if args.mask:
1770
- from mdify.formatting import Colorizer
1771
- color_warn = Colorizer(sys.stderr)
1772
1791
  print(
1773
- color_warn.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
1792
+ color_stderr.warning("⚠ --mask is not supported with docling-serve and will be ignored"),
1774
1793
  file=sys.stderr,
1775
1794
  )
1776
1795
 
@@ -1787,9 +1806,7 @@ def main() -> int:
1787
1806
 
1788
1807
  try:
1789
1808
  if not args.quiet:
1790
- from mdify.formatting import Colorizer
1791
- color_start = Colorizer(sys.stdout)
1792
- print(f"{color_start.bright_cyan('▶️ Starting')} {color_start.bright_white('docling-serve')} {color_start.bright_cyan('container')}...\n")
1809
+ print(f"{color_stdout.bright_cyan('▶️ Starting')} {color_stdout.bright_white('docling-serve')} {color_stdout.bright_cyan('container')}...\n")
1793
1810
 
1794
1811
  # Apply resource profile
1795
1812
  profile = RESOURCE_PROFILES[args.profile]
@@ -1864,13 +1881,33 @@ def main() -> int:
1864
1881
  spinner.stop()
1865
1882
 
1866
1883
  if result.success:
1867
- # Write result to output file
1868
- output_file.write_text(result.content)
1869
- success_count += 1
1870
- if not args.quiet:
1871
- print(
1872
- f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
1873
- )
1884
+ # Validate content is not empty and not an error response
1885
+ content_length = len(result.content.strip()) if result.content else 0
1886
+ if content_length < 50:
1887
+ # Too short - likely an error or empty document
1888
+ failed_count += 1
1889
+ if not args.quiet:
1890
+ print(
1891
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1892
+ )
1893
+ error_msg = "Empty or invalid conversion result"
1894
+ if result.content:
1895
+ error_msg += f" ({len(result.content)} bytes)"
1896
+ print(f" Error: {error_msg}", file=sys.stderr)
1897
+ if args.timeout and args.timeout < 300:
1898
+ color_out = Colorizer(sys.stderr)
1899
+ print(
1900
+ f" {color_out.info('ℹ Tip:')} Timeout is only {args.timeout}s. Consider increasing with --timeout (default: 1200s)",
1901
+ file=sys.stderr,
1902
+ )
1903
+ else:
1904
+ # Write result to output file
1905
+ output_file.write_text(result.content)
1906
+ success_count += 1
1907
+ if not args.quiet:
1908
+ print(
1909
+ f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})"
1910
+ )
1874
1911
  else:
1875
1912
  failed_count += 1
1876
1913
  error_msg = result.error or "Unknown error"
@@ -2028,30 +2065,26 @@ def main() -> int:
2028
2065
 
2029
2066
  # Print summary
2030
2067
  if not args.quiet:
2031
- from mdify.formatting import Colorizer
2032
- color_out = Colorizer(sys.stdout)
2033
2068
  print()
2034
- print(color_out.cyan("=" * 60))
2035
- print(color_out.bold_cyan("📊 Local Conversion Summary"))
2036
- print(color_out.cyan("=" * 60))
2037
- print(f" {color_out.cyan('Total files:')} {color_out.bold(str(total_files))}")
2069
+ print(color_stdout.cyan("=" * 60))
2070
+ print(color_stdout.bold_cyan("📊 Local Conversion Summary"))
2071
+ print(color_stdout.cyan("=" * 60))
2072
+ print(f" {color_stdout.cyan('Total files:')} {color_stdout.bold(str(total_files))}")
2038
2073
  if success_count > 0:
2039
- print(f" {color_out.green('✓ Successful:')} {color_out.bold_green(str(success_count))}")
2074
+ print(f" {color_stdout.green('✓ Successful:')} {color_stdout.bold_green(str(success_count))}")
2040
2075
  if skipped_count > 0:
2041
- print(f" {color_out.yellow('⊘ Skipped:')} {color_out.bold_yellow(str(skipped_count))}")
2076
+ print(f" {color_stdout.yellow('⊘ Skipped:')} {color_stdout.bold_yellow(str(skipped_count))}")
2042
2077
  if failed_count > 0:
2043
- print(f" {color_out.red('✗ Failed:')} {color_out.bold_red(str(failed_count))}")
2044
- print(f" {color_out.cyan('Total time:')} {color_out.bright_cyan(format_duration(total_elapsed))}")
2045
- print(color_out.cyan("=" * 60))
2078
+ print(f" {color_stdout.red('✗ Failed:')} {color_stdout.bold_red(str(failed_count))}")
2079
+ print(f" {color_stdout.cyan('Total time:')} {color_stdout.bright_cyan(format_duration(total_elapsed))}")
2080
+ print(color_stdout.cyan("=" * 60))
2046
2081
 
2047
2082
  except KeyboardInterrupt:
2048
2083
  if not args.quiet:
2049
- from mdify.formatting import Colorizer
2050
- color_out = Colorizer(sys.stdout)
2051
- print(f"\n\n{color_out.warning('⚠ Interrupted by user. Container stopped.')}")
2084
+ print(f"\n\n{color_stdout.warning('⚠ Interrupted by user. Container stopped.')}")
2052
2085
  if success_count > 0 or skipped_count > 0 or failed_count > 0:
2053
2086
  print(
2054
- f"{color_out.dim_white('Partial progress:')} {color_out.green(str(success_count))} successful, {color_out.red(str(failed_count))} failed, {color_out.yellow(str(skipped_count))} skipped"
2087
+ f"{color_stdout.dim_white('Partial progress:')} {color_stdout.green(str(success_count))} successful, {color_stdout.red(str(failed_count))} failed, {color_stdout.yellow(str(skipped_count))} skipped"
2055
2088
  )
2056
2089
  return 130
2057
2090
 
@@ -48,6 +48,25 @@ def _get_mime_type(file_path: Path) -> str:
48
48
  return mime_type or "application/octet-stream"
49
49
 
50
50
 
51
+ def _is_error_response(result_data) -> bool:
52
+ """Check if response contains an error instead of content.
53
+
54
+ Detects common API error response patterns.
55
+
56
+ Args:
57
+ result_data: Response data to check
58
+
59
+ Returns:
60
+ True if response appears to be an error
61
+ """
62
+ if not isinstance(result_data, dict):
63
+ return False
64
+
65
+ # Check for common error keys at top level
66
+ error_keys = {"detail", "error", "message", "code", "status"}
67
+ return bool(error_keys & set(result_data.keys()))
68
+
69
+
51
70
  def _extract_content(result_data) -> str:
52
71
  """Extract content from API response, supporting both old and new formats.
53
72
 
@@ -61,8 +80,12 @@ def _extract_content(result_data) -> str:
61
80
  result_data: Response data from docling-serve API
62
81
 
63
82
  Returns:
64
- Extracted content string, or empty string if not found
83
+ Extracted content string, or empty string if not found or if response is an error
65
84
  """
85
+ # Detect error responses and return empty string instead of parsing them
86
+ if _is_error_response(result_data):
87
+ return ""
88
+
66
89
  if isinstance(result_data, dict):
67
90
  # New format with document field
68
91
  if "document" in result_data:
@@ -66,18 +66,6 @@ class SSHConfig:
66
66
 
67
67
  def __post_init__(self):
68
68
  """Validate config after initialization."""
69
- if self.port is None:
70
- self.port = 22
71
- if self.timeout is None:
72
- self.timeout = 30
73
- if self.keepalive is None:
74
- self.keepalive = 60
75
- if self.compression is None:
76
- self.compression = False
77
- if self.work_dir is None:
78
- self.work_dir = "/tmp/mdify"
79
- if self.username is None:
80
- self.username = ""
81
69
  if not self.host:
82
70
  raise ConfigError("host is required")
83
71
  if not 1 <= self.port <= 65535:
@@ -63,7 +63,7 @@ class RemoteContainer(DoclingContainer):
63
63
  try:
64
64
  # Find containers using this port
65
65
  # Using docker inspect with port filter
66
- cmd = f"{self.runtime} ps -a --filter 'publish={self.port}' --format '{{{{.ID}}}}'"
66
+ cmd = f"{self.runtime} ps -a --filter 'publish={int(self.port)}' --format '{{{{.ID}}}}'"
67
67
  stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=10)
68
68
 
69
69
  if code == 0 and stdout.strip():
@@ -3,6 +3,7 @@
3
3
  import gzip
4
4
  import hashlib
5
5
  import logging
6
+ import shlex
6
7
  from pathlib import Path
7
8
  from typing import Callable
8
9
  from mdify.ssh.models import TransferSession
@@ -233,8 +234,8 @@ class FileTransferManager:
233
234
 
234
235
  # Calculate remote checksum (sha256sum or shasum fallback)
235
236
  checksum_cmd = (
236
- f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
237
- f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
237
+ f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {shlex.quote(remote_path)} | awk '{{print $1}}') "
238
+ f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {shlex.quote(remote_path)} | awk '{{print $1}}')"
238
239
  )
239
240
  stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
240
241
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.2.0"
3
+ version = "3.3.0"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes