mdify-cli 2.6.0__tar.gz → 2.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.6.0
3
+ Version: 2.7.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.6.0"
3
+ __version__ = "2.7.0"
@@ -4,6 +4,8 @@ from dataclasses import dataclass
4
4
  from pathlib import Path
5
5
  from typing import Optional
6
6
 
7
+ import mimetypes
8
+
7
9
  import requests
8
10
 
9
11
 
@@ -40,6 +42,12 @@ class DoclingHTTPError(DoclingClientError):
40
42
  super().__init__(f"HTTP {status_code}: {message}")
41
43
 
42
44
 
45
+ def _get_mime_type(file_path: Path) -> str:
46
+ """Get MIME type for file, with fallback for unknown types."""
47
+ mime_type, _ = mimetypes.guess_type(str(file_path))
48
+ return mime_type or "application/octet-stream"
49
+
50
+
43
51
  def check_health(base_url: str) -> bool:
44
52
  """Check if docling-serve is healthy.
45
53
 
@@ -77,7 +85,7 @@ def convert_file(
77
85
  with open(file_path, "rb") as f:
78
86
  response = requests.post(
79
87
  f"{base_url}/v1/convert/file",
80
- files={"files": (file_path.name, f, "application/pdf")},
88
+ files={"files": (file_path.name, f, _get_mime_type(file_path))},
81
89
  data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
82
90
  )
83
91
 
@@ -126,7 +134,7 @@ def convert_file_async(
126
134
  with open(file_path, "rb") as f:
127
135
  response = requests.post(
128
136
  f"{base_url}/v1/convert/file/async",
129
- files={"files": (file_path.name, f, "application/pdf")},
137
+ files={"files": (file_path.name, f, _get_mime_type(file_path))},
130
138
  data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
131
139
  )
132
140
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.6.0
3
+ Version: 2.7.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "2.6.0"
3
+ version = "2.7.0"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
@@ -356,3 +356,97 @@ class TestDoclingHTTPError:
356
356
  error = DoclingHTTPError(400, "Bad Request")
357
357
 
358
358
  assert isinstance(error, Exception)
359
+
360
+
361
+ class TestMimeTypeDetection:
362
+ """Test MIME type detection in file conversion."""
363
+
364
+ def test_convert_file_sends_correct_mime_for_xlsx(self, tmp_path):
365
+ """Test that .xlsx files are sent with correct MIME type."""
366
+ test_file = tmp_path / "test.xlsx"
367
+ test_file.write_bytes(b"fake xlsx content")
368
+
369
+ with patch("mdify.docling_client.requests.post") as mock_post:
370
+ mock_response = Mock()
371
+ mock_response.status_code = 200
372
+ mock_response.json.return_value = [
373
+ {"content": "# Test Spreadsheet\n\nContent here."}
374
+ ]
375
+ mock_post.return_value = mock_response
376
+
377
+ convert_file("http://localhost:5001", test_file)
378
+
379
+ mock_post.assert_called_once()
380
+ call_args = mock_post.call_args
381
+ files_param = call_args[1]["files"]
382
+ filename, file_obj, mime_type = files_param["files"]
383
+ assert (
384
+ mime_type
385
+ == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
386
+ )
387
+
388
+ def test_convert_file_sends_correct_mime_for_pdf(self, tmp_path):
389
+ """Test that .pdf files are sent with correct MIME type (regression test)."""
390
+ test_file = tmp_path / "test.pdf"
391
+ test_file.write_bytes(b"fake pdf content")
392
+
393
+ with patch("mdify.docling_client.requests.post") as mock_post:
394
+ mock_response = Mock()
395
+ mock_response.status_code = 200
396
+ mock_response.json.return_value = [
397
+ {"content": "# Test Document\n\nContent here."}
398
+ ]
399
+ mock_post.return_value = mock_response
400
+
401
+ convert_file("http://localhost:5001", test_file)
402
+
403
+ mock_post.assert_called_once()
404
+ call_args = mock_post.call_args
405
+ files_param = call_args[1]["files"]
406
+ filename, file_obj, mime_type = files_param["files"]
407
+ assert mime_type == "application/pdf"
408
+
409
+ def test_convert_file_sends_correct_mime_for_docx(self, tmp_path):
410
+ """Test that .docx files are sent with correct MIME type."""
411
+ test_file = tmp_path / "test.docx"
412
+ test_file.write_bytes(b"fake docx content")
413
+
414
+ with patch("mdify.docling_client.requests.post") as mock_post:
415
+ mock_response = Mock()
416
+ mock_response.status_code = 200
417
+ mock_response.json.return_value = [
418
+ {"content": "# Test Document\n\nContent here."}
419
+ ]
420
+ mock_post.return_value = mock_response
421
+
422
+ convert_file("http://localhost:5001", test_file)
423
+
424
+ mock_post.assert_called_once()
425
+ call_args = mock_post.call_args
426
+ files_param = call_args[1]["files"]
427
+ filename, file_obj, mime_type = files_param["files"]
428
+ assert (
429
+ mime_type
430
+ == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
431
+ )
432
+
433
+ def test_convert_file_fallback_for_unknown_extension(self, tmp_path):
434
+ """Test that unknown file extensions fall back to application/octet-stream."""
435
+ test_file = tmp_path / "test.unknownext123"
436
+ test_file.write_bytes(b"fake unknown content")
437
+
438
+ with patch("mdify.docling_client.requests.post") as mock_post:
439
+ mock_response = Mock()
440
+ mock_response.status_code = 200
441
+ mock_response.json.return_value = [
442
+ {"content": "# Test Content\n\nContent here."}
443
+ ]
444
+ mock_post.return_value = mock_response
445
+
446
+ convert_file("http://localhost:5001", test_file)
447
+
448
+ mock_post.assert_called_once()
449
+ call_args = mock_post.call_args
450
+ files_param = call_args[1]["files"]
451
+ filename, file_obj, mime_type = files_param["files"]
452
+ assert mime_type == "application/octet-stream"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes