mdify-cli 3.3.1__tar.gz → 3.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/PKG-INFO +1 -1
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/__init__.py +1 -1
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/cli.py +7 -7
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/client.py +2 -2
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/PKG-INFO +1 -1
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/pyproject.toml +1 -1
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_docling_client.py +207 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/LICENSE +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/README.md +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/assets/mdify.png +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/__main__.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/container.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/docling_client.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/formatting.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/__init__.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/models.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/remote_container.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/transfer.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/setup.cfg +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_cli.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_container.py +0 -0
- {mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_ssh_client.py +0 -0
|
@@ -1060,11 +1060,11 @@ def main_async_remote(args) -> int:
|
|
|
1060
1060
|
color = Colorizer(sys.stderr)
|
|
1061
1061
|
|
|
1062
1062
|
# Resolve timeout value: CLI > env > default 1200
|
|
1063
|
-
timeout = args.timeout
|
|
1063
|
+
timeout = args.timeout if args.timeout is not None else int(os.environ.get("MDIFY_TIMEOUT", 1200))
|
|
1064
1064
|
|
|
1065
1065
|
# For remote operations, extend timeout significantly for large PDF processing
|
|
1066
1066
|
# Remote conversions include network latency, file upload/download, and OCR processing
|
|
1067
|
-
remote_conversion_timeout = max(timeout, 3600) # At least 1 hour for remote conversion
|
|
1067
|
+
remote_conversion_timeout = max(timeout or 1200, 3600) # At least 1 hour for remote conversion
|
|
1068
1068
|
|
|
1069
1069
|
# Build SSH config from CLI arguments and SSH config files
|
|
1070
1070
|
try:
|
|
@@ -1397,8 +1397,8 @@ def main_async_remote(args) -> int:
|
|
|
1397
1397
|
error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
|
|
1398
1398
|
print(f" {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
|
|
1399
1399
|
if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
|
|
1400
|
-
timeout_val = args.
|
|
1401
|
-
print(f" {color_err.info('ℹ Tip:')} Increase timeout with --
|
|
1400
|
+
timeout_val = args.timeout or 3600
|
|
1401
|
+
print(f" {color_err.info('ℹ Tip:')} Increase timeout with --timeout (current: {timeout_val}s)", file=sys.stderr)
|
|
1402
1402
|
failed += 1
|
|
1403
1403
|
break
|
|
1404
1404
|
|
|
@@ -1583,9 +1583,9 @@ def main_async_remote(args) -> int:
|
|
|
1583
1583
|
return 1
|
|
1584
1584
|
except Exception as e:
|
|
1585
1585
|
print(f"Error: Unexpected error during remote execution: {e}", file=sys.stderr)
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1586
|
+
# Always print traceback for unexpected errors to help with debugging
|
|
1587
|
+
import traceback
|
|
1588
|
+
traceback.print_exc(file=sys.stderr)
|
|
1589
1589
|
return 1
|
|
1590
1590
|
|
|
1591
1591
|
# Run async main
|
|
@@ -44,7 +44,7 @@ class AsyncSSHClient:
|
|
|
44
44
|
# Prepare connection parameters - only include non-None values
|
|
45
45
|
connect_kwargs = {
|
|
46
46
|
"port": self.config.port,
|
|
47
|
-
"connect_timeout": self.config.timeout,
|
|
47
|
+
"connect_timeout": self.config.timeout or 30,
|
|
48
48
|
"known_hosts": None, # Skip host key verification for now
|
|
49
49
|
}
|
|
50
50
|
|
|
@@ -165,7 +165,7 @@ class AsyncSSHClient:
|
|
|
165
165
|
)
|
|
166
166
|
|
|
167
167
|
try:
|
|
168
|
-
timeout_val = timeout or self.config.timeout
|
|
168
|
+
timeout_val = timeout or self.config.timeout or 30
|
|
169
169
|
|
|
170
170
|
result = await asyncio.wait_for(
|
|
171
171
|
self.connection.run(command, check=False),
|
|
@@ -489,3 +489,210 @@ class TestMimeTypeDetection:
|
|
|
489
489
|
files_param = call_args[1]["files"]
|
|
490
490
|
filename, file_obj, mime_type = files_param["files"]
|
|
491
491
|
assert mime_type == "application/octet-stream"
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class TestErrorDetection:
|
|
495
|
+
"""Test error response detection functionality."""
|
|
496
|
+
|
|
497
|
+
def test_error_detection_with_detail_key(self):
|
|
498
|
+
"""Test detection of error response with 'detail' key."""
|
|
499
|
+
from mdify.docling_client import _is_error_response
|
|
500
|
+
|
|
501
|
+
error_response = {"detail": "Conversion is taking too long"}
|
|
502
|
+
assert _is_error_response(error_response) is True
|
|
503
|
+
|
|
504
|
+
def test_error_detection_with_error_key(self):
|
|
505
|
+
"""Test detection of error response with 'error' key."""
|
|
506
|
+
from mdify.docling_client import _is_error_response
|
|
507
|
+
|
|
508
|
+
error_response = {"error": "Internal server error"}
|
|
509
|
+
assert _is_error_response(error_response) is True
|
|
510
|
+
|
|
511
|
+
def test_error_detection_with_message_key(self):
|
|
512
|
+
"""Test detection of error response with 'message' key."""
|
|
513
|
+
from mdify.docling_client import _is_error_response
|
|
514
|
+
|
|
515
|
+
error_response = {"message": "Request failed"}
|
|
516
|
+
assert _is_error_response(error_response) is True
|
|
517
|
+
|
|
518
|
+
def test_error_detection_with_code_key(self):
|
|
519
|
+
"""Test detection of error response with 'code' key."""
|
|
520
|
+
from mdify.docling_client import _is_error_response
|
|
521
|
+
|
|
522
|
+
error_response = {"code": 500}
|
|
523
|
+
assert _is_error_response(error_response) is True
|
|
524
|
+
|
|
525
|
+
def test_error_detection_with_status_key(self):
|
|
526
|
+
"""Test detection of error response with 'status' key."""
|
|
527
|
+
from mdify.docling_client import _is_error_response
|
|
528
|
+
|
|
529
|
+
error_response = {"status": "error"}
|
|
530
|
+
assert _is_error_response(error_response) is True
|
|
531
|
+
|
|
532
|
+
def test_error_detection_timeout_error(self):
|
|
533
|
+
"""Test detection of specific timeout error from docling-serve."""
|
|
534
|
+
from mdify.docling_client import _is_error_response
|
|
535
|
+
|
|
536
|
+
error_response = {
|
|
537
|
+
"detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
|
|
538
|
+
}
|
|
539
|
+
assert _is_error_response(error_response) is True
|
|
540
|
+
|
|
541
|
+
def test_valid_response_not_detected_as_error(self):
|
|
542
|
+
"""Test that valid responses are not marked as errors."""
|
|
543
|
+
from mdify.docling_client import _is_error_response
|
|
544
|
+
|
|
545
|
+
valid_response = {"document": {"md_content": "# Valid Markdown"}}
|
|
546
|
+
assert _is_error_response(valid_response) is False
|
|
547
|
+
|
|
548
|
+
def test_empty_dict_not_detected_as_error(self):
|
|
549
|
+
"""Test that empty dict is not detected as error."""
|
|
550
|
+
from mdify.docling_client import _is_error_response
|
|
551
|
+
|
|
552
|
+
assert _is_error_response({}) is False
|
|
553
|
+
|
|
554
|
+
def test_non_dict_not_detected_as_error(self):
|
|
555
|
+
"""Test that non-dict values are not detected as errors."""
|
|
556
|
+
from mdify.docling_client import _is_error_response
|
|
557
|
+
|
|
558
|
+
assert _is_error_response("not a dict") is False
|
|
559
|
+
assert _is_error_response(None) is False
|
|
560
|
+
assert _is_error_response([]) is False
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
class TestContentExtraction:
|
|
564
|
+
"""Test content extraction from responses, including error handling."""
|
|
565
|
+
|
|
566
|
+
def test_extract_content_from_error_returns_empty(self):
|
|
567
|
+
"""Test that error responses return empty content."""
|
|
568
|
+
from mdify.docling_client import _extract_content
|
|
569
|
+
|
|
570
|
+
error_response = {"detail": "Conversion failed"}
|
|
571
|
+
assert _extract_content(error_response) == ""
|
|
572
|
+
|
|
573
|
+
def test_extract_content_from_document_md_content(self):
|
|
574
|
+
"""Test extracting content from document.md_content."""
|
|
575
|
+
from mdify.docling_client import _extract_content
|
|
576
|
+
|
|
577
|
+
valid_response = {
|
|
578
|
+
"document": {
|
|
579
|
+
"md_content": "# Valid Markdown\n\nThis is valid content."
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
content = _extract_content(valid_response)
|
|
583
|
+
assert content == "# Valid Markdown\n\nThis is valid content."
|
|
584
|
+
|
|
585
|
+
def test_extract_content_from_document_content_fallback(self):
|
|
586
|
+
"""Test extracting content from document.content fallback."""
|
|
587
|
+
from mdify.docling_client import _extract_content
|
|
588
|
+
|
|
589
|
+
response = {
|
|
590
|
+
"document": {
|
|
591
|
+
"content": "Alternative content format"
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
content = _extract_content(response)
|
|
595
|
+
assert content == "Alternative content format"
|
|
596
|
+
|
|
597
|
+
def test_extract_content_from_empty_document(self):
|
|
598
|
+
"""Test that empty document returns empty string."""
|
|
599
|
+
from mdify.docling_client import _extract_content
|
|
600
|
+
|
|
601
|
+
response = {"document": {"md_content": ""}}
|
|
602
|
+
assert _extract_content(response) == ""
|
|
603
|
+
|
|
604
|
+
def test_extract_content_from_results_old_format(self):
|
|
605
|
+
"""Test extracting content from old list format."""
|
|
606
|
+
from mdify.docling_client import _extract_content
|
|
607
|
+
|
|
608
|
+
response = [
|
|
609
|
+
{
|
|
610
|
+
"document": {
|
|
611
|
+
"md_content": "# Header\n\nMarkdown content"
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
]
|
|
615
|
+
content = _extract_content(response)
|
|
616
|
+
assert "Header" in content
|
|
617
|
+
|
|
618
|
+
def test_extract_content_with_nested_markdown(self):
|
|
619
|
+
"""Test extracting nested markdown content."""
|
|
620
|
+
from mdify.docling_client import _extract_content
|
|
621
|
+
|
|
622
|
+
response = {
|
|
623
|
+
"document": {
|
|
624
|
+
"content": {
|
|
625
|
+
"markdown": "# Nested Markdown"
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
content = _extract_content(response)
|
|
630
|
+
assert len(content) > 0
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class TestContentValidation:
|
|
634
|
+
"""Test content length validation for preventing invalid files."""
|
|
635
|
+
|
|
636
|
+
def test_validation_threshold_minimum(self):
|
|
637
|
+
"""Test that exactly 50 characters passes validation."""
|
|
638
|
+
content = "This is exactly 50 character string for testing."
|
|
639
|
+
# Verify it's exactly 50 characters (or use a longer string)
|
|
640
|
+
if len(content.strip()) < 50:
|
|
641
|
+
content = "This is a string that is at least 50 characters long to validate the threshold."
|
|
642
|
+
assert len(content.strip()) >= 50
|
|
643
|
+
|
|
644
|
+
def test_validation_threshold_below_minimum(self):
|
|
645
|
+
"""Test that less than 50 characters fails validation."""
|
|
646
|
+
content = "Too short"
|
|
647
|
+
assert len(content.strip()) < 50
|
|
648
|
+
|
|
649
|
+
def test_validation_empty_content_fails(self):
|
|
650
|
+
"""Test that empty content fails validation."""
|
|
651
|
+
content = ""
|
|
652
|
+
content_length = len(content.strip()) if content else 0
|
|
653
|
+
assert content_length < 50
|
|
654
|
+
|
|
655
|
+
def test_validation_whitespace_stripped(self):
|
|
656
|
+
"""Test that whitespace is stripped during validation."""
|
|
657
|
+
content = " Too short "
|
|
658
|
+
# After strip, it's less than 50 chars
|
|
659
|
+
assert len(content.strip()) < 50
|
|
660
|
+
|
|
661
|
+
def test_validation_error_json_fails(self):
|
|
662
|
+
"""Test that typical error JSON fails validation."""
|
|
663
|
+
import json
|
|
664
|
+
error_json = json.dumps({
|
|
665
|
+
"detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
|
|
666
|
+
})
|
|
667
|
+
# Even the error message is < 50 chars when just the string
|
|
668
|
+
error_detail = '{"detail": "Conversion failed"}'
|
|
669
|
+
assert len(error_detail.strip()) < 50
|
|
670
|
+
|
|
671
|
+
def test_validation_valid_content_passes(self):
|
|
672
|
+
"""Test that typical valid content passes validation."""
|
|
673
|
+
content = "# Markdown Heading\n\nThis is substantial content that exceeds the 50 character minimum."
|
|
674
|
+
assert len(content.strip()) >= 50
|
|
675
|
+
|
|
676
|
+
def test_convert_result_with_short_content(self):
|
|
677
|
+
"""Test ConvertResult with content below threshold."""
|
|
678
|
+
result = ConvertResult(content="Short", format="md", success=True)
|
|
679
|
+
content_length = len(result.content.strip()) if result.content else 0
|
|
680
|
+
assert content_length < 50
|
|
681
|
+
|
|
682
|
+
def test_convert_result_with_valid_content(self):
|
|
683
|
+
"""Test ConvertResult with content above threshold."""
|
|
684
|
+
result = ConvertResult(
|
|
685
|
+
content="# Valid content that exceeds the 50 character minimum threshold",
|
|
686
|
+
format="md",
|
|
687
|
+
success=True
|
|
688
|
+
)
|
|
689
|
+
content_length = len(result.content.strip()) if result.content else 0
|
|
690
|
+
assert content_length >= 50
|
|
691
|
+
|
|
692
|
+
def test_convert_result_with_empty_content(self):
|
|
693
|
+
"""Test ConvertResult with empty content."""
|
|
694
|
+
result = ConvertResult(content="", format="md", success=True)
|
|
695
|
+
assert result.success is True
|
|
696
|
+
assert result.content == ""
|
|
697
|
+
content_length = len(result.content.strip()) if result.content else 0
|
|
698
|
+
assert content_length < 50
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|