mdify-cli 3.3.1__tar.gz → 3.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/PKG-INFO +1 -1
  2. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/cli.py +2 -2
  4. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/PKG-INFO +1 -1
  5. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/pyproject.toml +1 -1
  6. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/tests/test_docling_client.py +207 -0
  7. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/LICENSE +0 -0
  8. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/README.md +0 -0
  9. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/assets/mdify.png +0 -0
  10. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/__main__.py +0 -0
  11. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/container.py +0 -0
  12. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/docling_client.py +0 -0
  13. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/formatting.py +0 -0
  14. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/ssh/__init__.py +0 -0
  15. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/ssh/client.py +0 -0
  16. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/ssh/models.py +0 -0
  17. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/ssh/remote_container.py +0 -0
  18. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify/ssh/transfer.py +0 -0
  19. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/SOURCES.txt +0 -0
  20. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/dependency_links.txt +0 -0
  21. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/entry_points.txt +0 -0
  22. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/requires.txt +0 -0
  23. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/mdify_cli.egg-info/top_level.txt +0 -0
  24. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/setup.cfg +0 -0
  25. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/tests/test_cli.py +0 -0
  26. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/tests/test_container.py +0 -0
  27. {mdify_cli-3.3.1 → mdify_cli-3.3.2}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.3.1
3
+ Version: 3.3.2
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.3.1"
3
+ __version__ = "3.3.2"
@@ -1397,8 +1397,8 @@ def main_async_remote(args) -> int:
1397
1397
  error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
1398
1398
  print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
1399
1399
  if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
1400
- timeout_val = args.remote_timeout or 3600
1401
- print(f" {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
1400
+ timeout_val = args.timeout or 3600
1401
+ print(f" {color_err.info('ℹ Tip:')} Increase timeout with --timeout (current: {timeout_val}s)", file=sys.stderr)
1402
1402
  failed += 1
1403
1403
  break
1404
1404
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.3.1
3
+ Version: 3.3.2
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.3.1"
3
+ version = "3.3.2"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -489,3 +489,210 @@ class TestMimeTypeDetection:
489
489
  files_param = call_args[1]["files"]
490
490
  filename, file_obj, mime_type = files_param["files"]
491
491
  assert mime_type == "application/octet-stream"
492
+
493
+
494
+ class TestErrorDetection:
495
+ """Test error response detection functionality."""
496
+
497
+ def test_error_detection_with_detail_key(self):
498
+ """Test detection of error response with 'detail' key."""
499
+ from mdify.docling_client import _is_error_response
500
+
501
+ error_response = {"detail": "Conversion is taking too long"}
502
+ assert _is_error_response(error_response) is True
503
+
504
+ def test_error_detection_with_error_key(self):
505
+ """Test detection of error response with 'error' key."""
506
+ from mdify.docling_client import _is_error_response
507
+
508
+ error_response = {"error": "Internal server error"}
509
+ assert _is_error_response(error_response) is True
510
+
511
+ def test_error_detection_with_message_key(self):
512
+ """Test detection of error response with 'message' key."""
513
+ from mdify.docling_client import _is_error_response
514
+
515
+ error_response = {"message": "Request failed"}
516
+ assert _is_error_response(error_response) is True
517
+
518
+ def test_error_detection_with_code_key(self):
519
+ """Test detection of error response with 'code' key."""
520
+ from mdify.docling_client import _is_error_response
521
+
522
+ error_response = {"code": 500}
523
+ assert _is_error_response(error_response) is True
524
+
525
+ def test_error_detection_with_status_key(self):
526
+ """Test detection of error response with 'status' key."""
527
+ from mdify.docling_client import _is_error_response
528
+
529
+ error_response = {"status": "error"}
530
+ assert _is_error_response(error_response) is True
531
+
532
+ def test_error_detection_timeout_error(self):
533
+ """Test detection of specific timeout error from docling-serve."""
534
+ from mdify.docling_client import _is_error_response
535
+
536
+ error_response = {
537
+ "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
538
+ }
539
+ assert _is_error_response(error_response) is True
540
+
541
+ def test_valid_response_not_detected_as_error(self):
542
+ """Test that valid responses are not marked as errors."""
543
+ from mdify.docling_client import _is_error_response
544
+
545
+ valid_response = {"document": {"md_content": "# Valid Markdown"}}
546
+ assert _is_error_response(valid_response) is False
547
+
548
+ def test_empty_dict_not_detected_as_error(self):
549
+ """Test that empty dict is not detected as error."""
550
+ from mdify.docling_client import _is_error_response
551
+
552
+ assert _is_error_response({}) is False
553
+
554
+ def test_non_dict_not_detected_as_error(self):
555
+ """Test that non-dict values are not detected as errors."""
556
+ from mdify.docling_client import _is_error_response
557
+
558
+ assert _is_error_response("not a dict") is False
559
+ assert _is_error_response(None) is False
560
+ assert _is_error_response([]) is False
561
+
562
+
563
+ class TestContentExtraction:
564
+ """Test content extraction from responses, including error handling."""
565
+
566
+ def test_extract_content_from_error_returns_empty(self):
567
+ """Test that error responses return empty content."""
568
+ from mdify.docling_client import _extract_content
569
+
570
+ error_response = {"detail": "Conversion failed"}
571
+ assert _extract_content(error_response) == ""
572
+
573
+ def test_extract_content_from_document_md_content(self):
574
+ """Test extracting content from document.md_content."""
575
+ from mdify.docling_client import _extract_content
576
+
577
+ valid_response = {
578
+ "document": {
579
+ "md_content": "# Valid Markdown\n\nThis is valid content."
580
+ }
581
+ }
582
+ content = _extract_content(valid_response)
583
+ assert content == "# Valid Markdown\n\nThis is valid content."
584
+
585
+ def test_extract_content_from_document_content_fallback(self):
586
+ """Test extracting content from document.content fallback."""
587
+ from mdify.docling_client import _extract_content
588
+
589
+ response = {
590
+ "document": {
591
+ "content": "Alternative content format"
592
+ }
593
+ }
594
+ content = _extract_content(response)
595
+ assert content == "Alternative content format"
596
+
597
+ def test_extract_content_from_empty_document(self):
598
+ """Test that empty document returns empty string."""
599
+ from mdify.docling_client import _extract_content
600
+
601
+ response = {"document": {"md_content": ""}}
602
+ assert _extract_content(response) == ""
603
+
604
+ def test_extract_content_from_results_old_format(self):
605
+ """Test extracting content from old list format."""
606
+ from mdify.docling_client import _extract_content
607
+
608
+ response = [
609
+ {
610
+ "document": {
611
+ "md_content": "# Header\n\nMarkdown content"
612
+ }
613
+ }
614
+ ]
615
+ content = _extract_content(response)
616
+ assert "Header" in content
617
+
618
+ def test_extract_content_with_nested_markdown(self):
619
+ """Test extracting nested markdown content."""
620
+ from mdify.docling_client import _extract_content
621
+
622
+ response = {
623
+ "document": {
624
+ "content": {
625
+ "markdown": "# Nested Markdown"
626
+ }
627
+ }
628
+ }
629
+ content = _extract_content(response)
630
+ assert len(content) > 0
631
+
632
+
633
+ class TestContentValidation:
634
+ """Test content length validation for preventing invalid files."""
635
+
636
+ def test_validation_threshold_minimum(self):
637
+ """Test that exactly 50 characters passes validation."""
638
+ content = "This is exactly 50 character string for testing."
639
+ # Verify it's exactly 50 characters (or use a longer string)
640
+ if len(content.strip()) < 50:
641
+ content = "This is a string that is at least 50 characters long to validate the threshold."
642
+ assert len(content.strip()) >= 50
643
+
644
+ def test_validation_threshold_below_minimum(self):
645
+ """Test that less than 50 characters fails validation."""
646
+ content = "Too short"
647
+ assert len(content.strip()) < 50
648
+
649
+ def test_validation_empty_content_fails(self):
650
+ """Test that empty content fails validation."""
651
+ content = ""
652
+ content_length = len(content.strip()) if content else 0
653
+ assert content_length < 50
654
+
655
+ def test_validation_whitespace_stripped(self):
656
+ """Test that whitespace is stripped during validation."""
657
+ content = " Too short "
658
+ # After strip, it's less than 50 chars
659
+ assert len(content.strip()) < 50
660
+
661
+ def test_validation_error_json_fails(self):
662
+ """Test that typical error JSON fails validation."""
663
+ import json
664
+ error_json = json.dumps({
665
+ "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
666
+ })
667
+ # Even the error message is < 50 chars when just the string
668
+ error_detail = '{"detail": "Conversion failed"}'
669
+ assert len(error_detail.strip()) < 50
670
+
671
+ def test_validation_valid_content_passes(self):
672
+ """Test that typical valid content passes validation."""
673
+ content = "# Markdown Heading\n\nThis is substantial content that exceeds the 50 character minimum."
674
+ assert len(content.strip()) >= 50
675
+
676
+ def test_convert_result_with_short_content(self):
677
+ """Test ConvertResult with content below threshold."""
678
+ result = ConvertResult(content="Short", format="md", success=True)
679
+ content_length = len(result.content.strip()) if result.content else 0
680
+ assert content_length < 50
681
+
682
+ def test_convert_result_with_valid_content(self):
683
+ """Test ConvertResult with content above threshold."""
684
+ result = ConvertResult(
685
+ content="# Valid content that exceeds the 50 character minimum threshold",
686
+ format="md",
687
+ success=True
688
+ )
689
+ content_length = len(result.content.strip()) if result.content else 0
690
+ assert content_length >= 50
691
+
692
+ def test_convert_result_with_empty_content(self):
693
+ """Test ConvertResult with empty content."""
694
+ result = ConvertResult(content="", format="md", success=True)
695
+ assert result.success is True
696
+ assert result.content == ""
697
+ content_length = len(result.content.strip()) if result.content else 0
698
+ assert content_length < 50
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes