mdify-cli 3.3.1__tar.gz → 3.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/PKG-INFO +1 -1
  2. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/cli.py +7 -7
  4. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/client.py +2 -2
  5. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/PKG-INFO +1 -1
  6. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/pyproject.toml +1 -1
  7. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_docling_client.py +207 -0
  8. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/LICENSE +0 -0
  9. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/README.md +0 -0
  10. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/assets/mdify.png +0 -0
  11. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/__main__.py +0 -0
  12. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/container.py +0 -0
  13. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/docling_client.py +0 -0
  14. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/formatting.py +0 -0
  15. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/__init__.py +0 -0
  16. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/models.py +0 -0
  17. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/remote_container.py +0 -0
  18. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/transfer.py +0 -0
  19. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/SOURCES.txt +0 -0
  20. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/dependency_links.txt +0 -0
  21. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/entry_points.txt +0 -0
  22. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/requires.txt +0 -0
  23. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/top_level.txt +0 -0
  24. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/setup.cfg +0 -0
  25. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_cli.py +0 -0
  26. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_container.py +0 -0
  27. {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.3.1
3
+ Version: 3.3.3
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.3.1"
3
+ __version__ = "3.3.3"
@@ -1060,11 +1060,11 @@ def main_async_remote(args) -> int:
1060
1060
  color = Colorizer(sys.stderr)
1061
1061
 
1062
1062
  # Resolve timeout value: CLI > env > default 1200
1063
- timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
1063
+ timeout = args.timeout if args.timeout is not None else int(os.environ.get("MDIFY_TIMEOUT", 1200))
1064
1064
 
1065
1065
  # For remote operations, extend timeout significantly for large PDF processing
1066
1066
  # Remote conversions include network latency, file upload/download, and OCR processing
1067
- remote_conversion_timeout = max(timeout, 3600) # At least 1 hour for remote conversion
1067
+ remote_conversion_timeout = max(timeout or 1200, 3600) # At least 1 hour for remote conversion
1068
1068
 
1069
1069
  # Build SSH config from CLI arguments and SSH config files
1070
1070
  try:
@@ -1397,8 +1397,8 @@ def main_async_remote(args) -> int:
1397
1397
  error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
1398
1398
  print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
1399
1399
  if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
1400
- timeout_val = args.remote_timeout or 3600
1401
- print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
1400
+ timeout_val = args.timeout or 3600
1401
+ print(f" {color_err.info('ℹ Tip:')} Increase timeout with --timeout (current: {timeout_val}s)", file=sys.stderr)
1402
1402
  failed += 1
1403
1403
  break
1404
1404
 
@@ -1583,9 +1583,9 @@ def main_async_remote(args) -> int:
1583
1583
  return 1
1584
1584
  except Exception as e:
1585
1585
  print(f"Error: Unexpected error during remote execution: {e}", file=sys.stderr)
1586
- if DEBUG:
1587
- import traceback
1588
- traceback.print_exc(file=sys.stderr)
1586
+ # Always print traceback for unexpected errors to help with debugging
1587
+ import traceback
1588
+ traceback.print_exc(file=sys.stderr)
1589
1589
  return 1
1590
1590
 
1591
1591
  # Run async main
@@ -44,7 +44,7 @@ class AsyncSSHClient:
44
44
  # Prepare connection parameters - only include non-None values
45
45
  connect_kwargs = {
46
46
  "port": self.config.port,
47
- "connect_timeout": self.config.timeout,
47
+ "connect_timeout": self.config.timeout or 30,
48
48
  "known_hosts": None, # Skip host key verification for now
49
49
  }
50
50
 
@@ -165,7 +165,7 @@ class AsyncSSHClient:
165
165
  )
166
166
 
167
167
  try:
168
- timeout_val = timeout or self.config.timeout
168
+ timeout_val = timeout or self.config.timeout or 30
169
169
 
170
170
  result = await asyncio.wait_for(
171
171
  self.connection.run(command, check=False),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.3.1
3
+ Version: 3.3.3
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.3.1"
3
+ version = "3.3.3"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -489,3 +489,210 @@ class TestMimeTypeDetection:
489
489
  files_param = call_args[1]["files"]
490
490
  filename, file_obj, mime_type = files_param["files"]
491
491
  assert mime_type == "application/octet-stream"
492
+
493
+
494
+ class TestErrorDetection:
495
+ """Test error response detection functionality."""
496
+
497
+ def test_error_detection_with_detail_key(self):
498
+ """Test detection of error response with 'detail' key."""
499
+ from mdify.docling_client import _is_error_response
500
+
501
+ error_response = {"detail": "Conversion is taking too long"}
502
+ assert _is_error_response(error_response) is True
503
+
504
+ def test_error_detection_with_error_key(self):
505
+ """Test detection of error response with 'error' key."""
506
+ from mdify.docling_client import _is_error_response
507
+
508
+ error_response = {"error": "Internal server error"}
509
+ assert _is_error_response(error_response) is True
510
+
511
+ def test_error_detection_with_message_key(self):
512
+ """Test detection of error response with 'message' key."""
513
+ from mdify.docling_client import _is_error_response
514
+
515
+ error_response = {"message": "Request failed"}
516
+ assert _is_error_response(error_response) is True
517
+
518
+ def test_error_detection_with_code_key(self):
519
+ """Test detection of error response with 'code' key."""
520
+ from mdify.docling_client import _is_error_response
521
+
522
+ error_response = {"code": 500}
523
+ assert _is_error_response(error_response) is True
524
+
525
+ def test_error_detection_with_status_key(self):
526
+ """Test detection of error response with 'status' key."""
527
+ from mdify.docling_client import _is_error_response
528
+
529
+ error_response = {"status": "error"}
530
+ assert _is_error_response(error_response) is True
531
+
532
+ def test_error_detection_timeout_error(self):
533
+ """Test detection of specific timeout error from docling-serve."""
534
+ from mdify.docling_client import _is_error_response
535
+
536
+ error_response = {
537
+ "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
538
+ }
539
+ assert _is_error_response(error_response) is True
540
+
541
+ def test_valid_response_not_detected_as_error(self):
542
+ """Test that valid responses are not marked as errors."""
543
+ from mdify.docling_client import _is_error_response
544
+
545
+ valid_response = {"document": {"md_content": "# Valid Markdown"}}
546
+ assert _is_error_response(valid_response) is False
547
+
548
+ def test_empty_dict_not_detected_as_error(self):
549
+ """Test that empty dict is not detected as error."""
550
+ from mdify.docling_client import _is_error_response
551
+
552
+ assert _is_error_response({}) is False
553
+
554
+ def test_non_dict_not_detected_as_error(self):
555
+ """Test that non-dict values are not detected as errors."""
556
+ from mdify.docling_client import _is_error_response
557
+
558
+ assert _is_error_response("not a dict") is False
559
+ assert _is_error_response(None) is False
560
+ assert _is_error_response([]) is False
561
+
562
+
563
+ class TestContentExtraction:
564
+ """Test content extraction from responses, including error handling."""
565
+
566
+ def test_extract_content_from_error_returns_empty(self):
567
+ """Test that error responses return empty content."""
568
+ from mdify.docling_client import _extract_content
569
+
570
+ error_response = {"detail": "Conversion failed"}
571
+ assert _extract_content(error_response) == ""
572
+
573
+ def test_extract_content_from_document_md_content(self):
574
+ """Test extracting content from document.md_content."""
575
+ from mdify.docling_client import _extract_content
576
+
577
+ valid_response = {
578
+ "document": {
579
+ "md_content": "# Valid Markdown\n\nThis is valid content."
580
+ }
581
+ }
582
+ content = _extract_content(valid_response)
583
+ assert content == "# Valid Markdown\n\nThis is valid content."
584
+
585
+ def test_extract_content_from_document_content_fallback(self):
586
+ """Test extracting content from document.content fallback."""
587
+ from mdify.docling_client import _extract_content
588
+
589
+ response = {
590
+ "document": {
591
+ "content": "Alternative content format"
592
+ }
593
+ }
594
+ content = _extract_content(response)
595
+ assert content == "Alternative content format"
596
+
597
+ def test_extract_content_from_empty_document(self):
598
+ """Test that empty document returns empty string."""
599
+ from mdify.docling_client import _extract_content
600
+
601
+ response = {"document": {"md_content": ""}}
602
+ assert _extract_content(response) == ""
603
+
604
+ def test_extract_content_from_results_old_format(self):
605
+ """Test extracting content from old list format."""
606
+ from mdify.docling_client import _extract_content
607
+
608
+ response = [
609
+ {
610
+ "document": {
611
+ "md_content": "# Header\n\nMarkdown content"
612
+ }
613
+ }
614
+ ]
615
+ content = _extract_content(response)
616
+ assert "Header" in content
617
+
618
+ def test_extract_content_with_nested_markdown(self):
619
+ """Test extracting nested markdown content."""
620
+ from mdify.docling_client import _extract_content
621
+
622
+ response = {
623
+ "document": {
624
+ "content": {
625
+ "markdown": "# Nested Markdown"
626
+ }
627
+ }
628
+ }
629
+ content = _extract_content(response)
630
+ assert len(content) > 0
631
+
632
+
633
+ class TestContentValidation:
634
+ """Test content length validation for preventing invalid files."""
635
+
636
+ def test_validation_threshold_minimum(self):
637
+ """Test that exactly 50 characters passes validation."""
638
+ content = "This is exactly 50 character string for testing."
639
+ # Verify it's exactly 50 characters (or use a longer string)
640
+ if len(content.strip()) < 50:
641
+ content = "This is a string that is at least 50 characters long to validate the threshold."
642
+ assert len(content.strip()) >= 50
643
+
644
+ def test_validation_threshold_below_minimum(self):
645
+ """Test that less than 50 characters fails validation."""
646
+ content = "Too short"
647
+ assert len(content.strip()) < 50
648
+
649
+ def test_validation_empty_content_fails(self):
650
+ """Test that empty content fails validation."""
651
+ content = ""
652
+ content_length = len(content.strip()) if content else 0
653
+ assert content_length < 50
654
+
655
+ def test_validation_whitespace_stripped(self):
656
+ """Test that whitespace is stripped during validation."""
657
+ content = " Too short "
658
+ # After strip, it's less than 50 chars
659
+ assert len(content.strip()) < 50
660
+
661
+ def test_validation_error_json_fails(self):
662
+ """Test that typical error JSON fails validation."""
663
+ import json
664
+ error_json = json.dumps({
665
+ "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
666
+ })
667
+ # Even the error message is < 50 chars when just the string
668
+ error_detail = '{"detail": "Conversion failed"}'
669
+ assert len(error_detail.strip()) < 50
670
+
671
+ def test_validation_valid_content_passes(self):
672
+ """Test that typical valid content passes validation."""
673
+ content = "# Markdown Heading\n\nThis is substantial content that exceeds the 50 character minimum."
674
+ assert len(content.strip()) >= 50
675
+
676
+ def test_convert_result_with_short_content(self):
677
+ """Test ConvertResult with content below threshold."""
678
+ result = ConvertResult(content="Short", format="md", success=True)
679
+ content_length = len(result.content.strip()) if result.content else 0
680
+ assert content_length < 50
681
+
682
+ def test_convert_result_with_valid_content(self):
683
+ """Test ConvertResult with content above threshold."""
684
+ result = ConvertResult(
685
+ content="# Valid content that exceeds the 50 character minimum threshold",
686
+ format="md",
687
+ success=True
688
+ )
689
+ content_length = len(result.content.strip()) if result.content else 0
690
+ assert content_length >= 50
691
+
692
+ def test_convert_result_with_empty_content(self):
693
+ """Test ConvertResult with empty content."""
694
+ result = ConvertResult(content="", format="md", success=True)
695
+ assert result.success is True
696
+ assert result.content == ""
697
+ content_length = len(result.content.strip()) if result.content else 0
698
+ assert content_length < 50
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes