PyPI - mdify-cli - Versions diffs - 3.3.1__tar.gz → 3.3.3__tar.gz - Mend

mdify-cli 3.3.1tar.gz → 3.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mdify-cli
-Version: 3.3.1
+Version: 3.3.3
 Summary: Convert PDFs and document images into structured Markdown for LLM workflows
 Author: tiroq
 License-Expression: MIT

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """mdify - Convert documents to Markdown via Docling container."""
-__version__ = "3.3.1"
+__version__ = "3.3.3"

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/cli.py RENAMED Viewed

@@ -1060,11 +1060,11 @@ def main_async_remote(args) -> int:
         color = Colorizer(sys.stderr)
         # Resolve timeout value: CLI > env > default 1200
-        timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
+        timeout = args.timeout if args.timeout is not None else int(os.environ.get("MDIFY_TIMEOUT", 1200))
         # For remote operations, extend timeout significantly for large PDF processing
         # Remote conversions include network latency, file upload/download, and OCR processing
-        remote_conversion_timeout = max(timeout, 3600)  # At least 1 hour for remote conversion
+        remote_conversion_timeout = max(timeout or 1200, 3600)  # At least 1 hour for remote conversion
         # Build SSH config from CLI arguments and SSH config files
         try:
@@ -1397,8 +1397,8 @@ def main_async_remote(args) -> int:
                                     error_detail = response_data.get("detail", response_data.get("error", str(response_data)))
                                     print(f"  {color_err.error('✗ Failed:')} {error_detail}", file=sys.stderr)
                                     if "DOCLING_SERVE_MAX_SYNC_WAIT" in str(error_detail):
-                                        timeout_val = args.remote_timeout or 3600
-                                        print(f"  {color_err.info('ℹ Tip:')} Increase timeout with --remote-timeout (current: {timeout_val}s)", file=sys.stderr)
+                                        timeout_val = args.timeout or 3600
+                                        print(f"  {color_err.info('ℹ Tip:')} Increase timeout with --timeout (current: {timeout_val}s)", file=sys.stderr)
                                     failed += 1
                                     break
@@ -1583,9 +1583,9 @@ def main_async_remote(args) -> int:
             return 1
         except Exception as e:
             print(f"Error: Unexpected error during remote execution: {e}", file=sys.stderr)
-            if DEBUG:
-                import traceback
-                traceback.print_exc(file=sys.stderr)
+            # Always print traceback for unexpected errors to help with debugging
+            import traceback
+            traceback.print_exc(file=sys.stderr)
             return 1
     # Run async main

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify/ssh/client.py RENAMED Viewed

@@ -44,7 +44,7 @@ class AsyncSSHClient:
                 # Prepare connection parameters - only include non-None values
                 connect_kwargs = {
                     "port": self.config.port,
-                    "connect_timeout": self.config.timeout,
+                    "connect_timeout": self.config.timeout or 30,
                     "known_hosts": None,  # Skip host key verification for now
                 }
@@ -165,7 +165,7 @@ class AsyncSSHClient:
             )
         try:
-            timeout_val = timeout or self.config.timeout
+            timeout_val = timeout or self.config.timeout or 30
             result = await asyncio.wait_for(
                 self.connection.run(command, check=False),

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/mdify_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mdify-cli
-Version: 3.3.1
+Version: 3.3.3
 Summary: Convert PDFs and document images into structured Markdown for LLM workflows
 Author: tiroq
 License-Expression: MIT

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mdify-cli"
-version = "3.3.1"
+version = "3.3.3"
 description = "Convert PDFs and document images into structured Markdown for LLM workflows"
 readme = "README.md"
 requires-python = ">=3.10"

{mdify_cli-3.3.1 → mdify_cli-3.3.3}/tests/test_docling_client.py RENAMED Viewed

@@ -489,3 +489,210 @@ class TestMimeTypeDetection:
             files_param = call_args[1]["files"]
             filename, file_obj, mime_type = files_param["files"]
             assert mime_type == "application/octet-stream"
+class TestErrorDetection:
+    """Test error response detection functionality."""
+    def test_error_detection_with_detail_key(self):
+        """Test detection of error response with 'detail' key."""
+        from mdify.docling_client import _is_error_response
+        error_response = {"detail": "Conversion is taking too long"}
+        assert _is_error_response(error_response) is True
+    def test_error_detection_with_error_key(self):
+        """Test detection of error response with 'error' key."""
+        from mdify.docling_client import _is_error_response
+        error_response = {"error": "Internal server error"}
+        assert _is_error_response(error_response) is True
+    def test_error_detection_with_message_key(self):
+        """Test detection of error response with 'message' key."""
+        from mdify.docling_client import _is_error_response
+        error_response = {"message": "Request failed"}
+        assert _is_error_response(error_response) is True
+    def test_error_detection_with_code_key(self):
+        """Test detection of error response with 'code' key."""
+        from mdify.docling_client import _is_error_response
+        error_response = {"code": 500}
+        assert _is_error_response(error_response) is True
+    def test_error_detection_with_status_key(self):
+        """Test detection of error response with 'status' key."""
+        from mdify.docling_client import _is_error_response
+        error_response = {"status": "error"}
+        assert _is_error_response(error_response) is True
+    def test_error_detection_timeout_error(self):
+        """Test detection of specific timeout error from docling-serve."""
+        from mdify.docling_client import _is_error_response
+        error_response = {
+            "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
+        }
+        assert _is_error_response(error_response) is True
+    def test_valid_response_not_detected_as_error(self):
+        """Test that valid responses are not marked as errors."""
+        from mdify.docling_client import _is_error_response
+        valid_response = {"document": {"md_content": "# Valid Markdown"}}
+        assert _is_error_response(valid_response) is False
+    def test_empty_dict_not_detected_as_error(self):
+        """Test that empty dict is not detected as error."""
+        from mdify.docling_client import _is_error_response
+        assert _is_error_response({}) is False
+    def test_non_dict_not_detected_as_error(self):
+        """Test that non-dict values are not detected as errors."""
+        from mdify.docling_client import _is_error_response
+        assert _is_error_response("not a dict") is False
+        assert _is_error_response(None) is False
+        assert _is_error_response([]) is False
+class TestContentExtraction:
+    """Test content extraction from responses, including error handling."""
+    def test_extract_content_from_error_returns_empty(self):
+        """Test that error responses return empty content."""
+        from mdify.docling_client import _extract_content
+        error_response = {"detail": "Conversion failed"}
+        assert _extract_content(error_response) == ""
+    def test_extract_content_from_document_md_content(self):
+        """Test extracting content from document.md_content."""
+        from mdify.docling_client import _extract_content
+        valid_response = {
+            "document": {
+                "md_content": "# Valid Markdown\n\nThis is valid content."
+            }
+        }
+        content = _extract_content(valid_response)
+        assert content == "# Valid Markdown\n\nThis is valid content."
+    def test_extract_content_from_document_content_fallback(self):
+        """Test extracting content from document.content fallback."""
+        from mdify.docling_client import _extract_content
+        response = {
+            "document": {
+                "content": "Alternative content format"
+            }
+        }
+        content = _extract_content(response)
+        assert content == "Alternative content format"
+    def test_extract_content_from_empty_document(self):
+        """Test that empty document returns empty string."""
+        from mdify.docling_client import _extract_content
+        response = {"document": {"md_content": ""}}
+        assert _extract_content(response) == ""
+    def test_extract_content_from_results_old_format(self):
+        """Test extracting content from old list format."""
+        from mdify.docling_client import _extract_content
+        response = [
+            {
+                "document": {
+                    "md_content": "# Header\n\nMarkdown content"
+                }
+            }
+        ]
+        content = _extract_content(response)
+        assert "Header" in content
+    def test_extract_content_with_nested_markdown(self):
+        """Test extracting nested markdown content."""
+        from mdify.docling_client import _extract_content
+        response = {
+            "document": {
+                "content": {
+                    "markdown": "# Nested Markdown"
+                }
+            }
+        }
+        content = _extract_content(response)
+        assert len(content) > 0
+class TestContentValidation:
+    """Test content length validation for preventing invalid files."""
+    def test_validation_threshold_minimum(self):
+        """Test that exactly 50 characters passes validation."""
+        content = "This is exactly 50 character string for testing."
+        # Verify it's exactly 50 characters (or use a longer string)
+        if len(content.strip()) < 50:
+            content = "This is a string that is at least 50 characters long to validate the threshold."
+        assert len(content.strip()) >= 50
+    def test_validation_threshold_below_minimum(self):
+        """Test that less than 50 characters fails validation."""
+        content = "Too short"
+        assert len(content.strip()) < 50
+    def test_validation_empty_content_fails(self):
+        """Test that empty content fails validation."""
+        content = ""
+        content_length = len(content.strip()) if content else 0
+        assert content_length < 50
+    def test_validation_whitespace_stripped(self):
+        """Test that whitespace is stripped during validation."""
+        content = "   Too short   "
+        # After strip, it's less than 50 chars
+        assert len(content.strip()) < 50
+    def test_validation_error_json_fails(self):
+        """Test that typical error JSON fails validation."""
+        import json
+        error_json = json.dumps({
+            "detail": "Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT=120."
+        })
+        # Even the error message is < 50 chars when just the string
+        error_detail = '{"detail": "Conversion failed"}'
+        assert len(error_detail.strip()) < 50
+    def test_validation_valid_content_passes(self):
+        """Test that typical valid content passes validation."""
+        content = "# Markdown Heading\n\nThis is substantial content that exceeds the 50 character minimum."
+        assert len(content.strip()) >= 50
+    def test_convert_result_with_short_content(self):
+        """Test ConvertResult with content below threshold."""
+        result = ConvertResult(content="Short", format="md", success=True)
+        content_length = len(result.content.strip()) if result.content else 0
+        assert content_length < 50
+    def test_convert_result_with_valid_content(self):
+        """Test ConvertResult with content above threshold."""
+        result = ConvertResult(
+            content="# Valid content that exceeds the 50 character minimum threshold",
+            format="md",
+            success=True
+        )
+        content_length = len(result.content.strip()) if result.content else 0
+        assert content_length >= 50
+    def test_convert_result_with_empty_content(self):
+        """Test ConvertResult with empty content."""
+        result = ConvertResult(content="", format="md", success=True)
+        assert result.success is True
+        assert result.content == ""
+        content_length = len(result.content.strip()) if result.content else 0
+        assert content_length < 50