mdify-cli 2.7.0__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.7.0"
3
+ __version__ = "2.8.0"
mdify/docling_client.py CHANGED
@@ -48,6 +48,42 @@ def _get_mime_type(file_path: Path) -> str:
48
48
  return mime_type or "application/octet-stream"
49
49
 
50
50
 
51
+ def _extract_content(result_data) -> str:
52
+ """Extract content from API response, supporting both old and new formats.
53
+
54
+ Supports:
55
+ - New format: {"document": {"md_content": "..."}}
56
+ - Fallback: {"document": {"content": "..."}}
57
+ - Old format: {"content": "..."}
58
+ - List format: [{"document": {...}} or {"content": "..."}]
59
+
60
+ Args:
61
+ result_data: Response data from docling-serve API
62
+
63
+ Returns:
64
+ Extracted content string, or empty string if not found
65
+ """
66
+ if isinstance(result_data, dict):
67
+ # New format with document field
68
+ if "document" in result_data:
69
+ doc = result_data["document"]
70
+ # Try md_content first, then content
71
+ return doc.get("md_content", "") or doc.get("content", "")
72
+ # Old format without document field
73
+ return result_data.get("content", "")
74
+ elif isinstance(result_data, list) and len(result_data) > 0:
75
+ # List format - process first item
76
+ first_result = result_data[0]
77
+ if isinstance(first_result, dict):
78
+ if "document" in first_result:
79
+ doc = first_result["document"]
80
+ # Try md_content first, then content
81
+ return doc.get("md_content", "") or doc.get("content", "")
82
+ # Old format without document field
83
+ return first_result.get("content", "")
84
+ return ""
85
+
86
+
51
87
  def check_health(base_url: str) -> bool:
52
88
  """Check if docling-serve is healthy.
53
89
 
@@ -95,17 +131,10 @@ def convert_file(
95
131
  )
96
132
 
97
133
  result_data = response.json()
134
+ content = _extract_content(result_data)
98
135
 
99
- # docling-serve returns results in a list format
100
- if isinstance(result_data, list) and len(result_data) > 0:
101
- first_result = result_data[0]
102
- return ConvertResult(
103
- content=first_result.get("content", ""), format=to_format, success=True
104
- )
105
- elif isinstance(result_data, dict):
106
- return ConvertResult(
107
- content=result_data.get("content", ""), format=to_format, success=True
108
- )
136
+ if content or isinstance(result_data, (dict, list)):
137
+ return ConvertResult(content=content, format=to_format, success=True)
109
138
  else:
110
139
  raise DoclingHTTPError(200, f"Unexpected response format: {result_data}")
111
140
 
@@ -210,19 +239,21 @@ def get_result(base_url: str, task_id: str) -> ConvertResult:
210
239
  )
211
240
 
212
241
  result_data = response.json()
242
+ content = _extract_content(result_data)
213
243
 
214
- # Similar to sync conversion, handle list or dict format
215
- if isinstance(result_data, list) and len(result_data) > 0:
244
+ # Determine format from response, defaulting to "md"
245
+ result_format = "md"
246
+ if isinstance(result_data, dict):
247
+ result_format = result_data.get("format", "md")
248
+ elif isinstance(result_data, list) and len(result_data) > 0:
216
249
  first_result = result_data[0]
250
+ if isinstance(first_result, dict):
251
+ result_format = first_result.get("format", "md")
252
+
253
+ if content or isinstance(result_data, (dict, list)):
217
254
  return ConvertResult(
218
- content=first_result.get("content", ""),
219
- format=first_result.get("format", "md"),
220
- success=True,
221
- )
222
- elif isinstance(result_data, dict):
223
- return ConvertResult(
224
- content=result_data.get("content", ""),
225
- format=result_data.get("format", "md"),
255
+ content=content,
256
+ format=result_format,
226
257
  success=True,
227
258
  )
228
259
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.7.0
3
+ Version: 2.8.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -0,0 +1,12 @@
1
+ assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
+ mdify/__init__.py,sha256=YBAx8MdkINw38Jx6zeS5ikx8buI1avmzcHF2v23nZQU,90
3
+ mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
+ mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
5
+ mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
6
+ mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
+ mdify_cli-2.8.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
+ mdify_cli-2.8.0.dist-info/METADATA,sha256=LKU3PAHABNp5dT9KJ3hGeCMSXxjDkIFXNveXzRv2fIA,7923
9
+ mdify_cli-2.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
+ mdify_cli-2.8.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
+ mdify_cli-2.8.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
+ mdify_cli-2.8.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=ymBvtqVt-BtORLCI0ZO674etO8tlMJxzghl39z6gCUg,90
3
- mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
5
- mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
6
- mdify/docling_client.py,sha256=9QWPmd0W5APzf6LeUrdDBAru6E4d89w2q8WqGVlJoHg,6807
7
- mdify_cli-2.7.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
8
- mdify_cli-2.7.0.dist-info/METADATA,sha256=4v5CMHOhZ2LKgRgH7xm7hOUUYwahYCRJSCMcGtNja5g,7923
9
- mdify_cli-2.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
- mdify_cli-2.7.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
11
- mdify_cli-2.7.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
12
- mdify_cli-2.7.0.dist-info/RECORD,,