mdify-cli 2.7.0__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/docling_client.py +51 -20
- {mdify_cli-2.7.0.dist-info → mdify_cli-2.8.0.dist-info}/METADATA +1 -1
- mdify_cli-2.8.0.dist-info/RECORD +12 -0
- mdify_cli-2.7.0.dist-info/RECORD +0 -12
- {mdify_cli-2.7.0.dist-info → mdify_cli-2.8.0.dist-info}/WHEEL +0 -0
- {mdify_cli-2.7.0.dist-info → mdify_cli-2.8.0.dist-info}/entry_points.txt +0 -0
- {mdify_cli-2.7.0.dist-info → mdify_cli-2.8.0.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-2.7.0.dist-info → mdify_cli-2.8.0.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/docling_client.py
CHANGED
|
@@ -48,6 +48,42 @@ def _get_mime_type(file_path: Path) -> str:
|
|
|
48
48
|
return mime_type or "application/octet-stream"
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _extract_content(result_data) -> str:
|
|
52
|
+
"""Extract content from API response, supporting both old and new formats.
|
|
53
|
+
|
|
54
|
+
Supports:
|
|
55
|
+
- New format: {"document": {"md_content": "..."}}
|
|
56
|
+
- Fallback: {"document": {"content": "..."}}
|
|
57
|
+
- Old format: {"content": "..."}
|
|
58
|
+
- List format: [{"document": {...}} or {"content": "..."}]
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
result_data: Response data from docling-serve API
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Extracted content string, or empty string if not found
|
|
65
|
+
"""
|
|
66
|
+
if isinstance(result_data, dict):
|
|
67
|
+
# New format with document field
|
|
68
|
+
if "document" in result_data:
|
|
69
|
+
doc = result_data["document"]
|
|
70
|
+
# Try md_content first, then content
|
|
71
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
72
|
+
# Old format without document field
|
|
73
|
+
return result_data.get("content", "")
|
|
74
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
75
|
+
# List format - process first item
|
|
76
|
+
first_result = result_data[0]
|
|
77
|
+
if isinstance(first_result, dict):
|
|
78
|
+
if "document" in first_result:
|
|
79
|
+
doc = first_result["document"]
|
|
80
|
+
# Try md_content first, then content
|
|
81
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
82
|
+
# Old format without document field
|
|
83
|
+
return first_result.get("content", "")
|
|
84
|
+
return ""
|
|
85
|
+
|
|
86
|
+
|
|
51
87
|
def check_health(base_url: str) -> bool:
|
|
52
88
|
"""Check if docling-serve is healthy.
|
|
53
89
|
|
|
@@ -95,17 +131,10 @@ def convert_file(
|
|
|
95
131
|
)
|
|
96
132
|
|
|
97
133
|
result_data = response.json()
|
|
134
|
+
content = _extract_content(result_data)
|
|
98
135
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
first_result = result_data[0]
|
|
102
|
-
return ConvertResult(
|
|
103
|
-
content=first_result.get("content", ""), format=to_format, success=True
|
|
104
|
-
)
|
|
105
|
-
elif isinstance(result_data, dict):
|
|
106
|
-
return ConvertResult(
|
|
107
|
-
content=result_data.get("content", ""), format=to_format, success=True
|
|
108
|
-
)
|
|
136
|
+
if content or isinstance(result_data, (dict, list)):
|
|
137
|
+
return ConvertResult(content=content, format=to_format, success=True)
|
|
109
138
|
else:
|
|
110
139
|
raise DoclingHTTPError(200, f"Unexpected response format: {result_data}")
|
|
111
140
|
|
|
@@ -210,19 +239,21 @@ def get_result(base_url: str, task_id: str) -> ConvertResult:
|
|
|
210
239
|
)
|
|
211
240
|
|
|
212
241
|
result_data = response.json()
|
|
242
|
+
content = _extract_content(result_data)
|
|
213
243
|
|
|
214
|
-
#
|
|
215
|
-
|
|
244
|
+
# Determine format from response, defaulting to "md"
|
|
245
|
+
result_format = "md"
|
|
246
|
+
if isinstance(result_data, dict):
|
|
247
|
+
result_format = result_data.get("format", "md")
|
|
248
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
216
249
|
first_result = result_data[0]
|
|
250
|
+
if isinstance(first_result, dict):
|
|
251
|
+
result_format = first_result.get("format", "md")
|
|
252
|
+
|
|
253
|
+
if content or isinstance(result_data, (dict, list)):
|
|
217
254
|
return ConvertResult(
|
|
218
|
-
content=
|
|
219
|
-
format=
|
|
220
|
-
success=True,
|
|
221
|
-
)
|
|
222
|
-
elif isinstance(result_data, dict):
|
|
223
|
-
return ConvertResult(
|
|
224
|
-
content=result_data.get("content", ""),
|
|
225
|
-
format=result_data.get("format", "md"),
|
|
255
|
+
content=content,
|
|
256
|
+
format=result_format,
|
|
226
257
|
success=True,
|
|
227
258
|
)
|
|
228
259
|
else:
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
+
mdify/__init__.py,sha256=YBAx8MdkINw38Jx6zeS5ikx8buI1avmzcHF2v23nZQU,90
|
|
3
|
+
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
+
mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
|
|
5
|
+
mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
|
|
6
|
+
mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
|
|
7
|
+
mdify_cli-2.8.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
+
mdify_cli-2.8.0.dist-info/METADATA,sha256=LKU3PAHABNp5dT9KJ3hGeCMSXxjDkIFXNveXzRv2fIA,7923
|
|
9
|
+
mdify_cli-2.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
+
mdify_cli-2.8.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
+
mdify_cli-2.8.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
+
mdify_cli-2.8.0.dist-info/RECORD,,
|
mdify_cli-2.7.0.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=ymBvtqVt-BtORLCI0ZO674etO8tlMJxzghl39z6gCUg,90
|
|
3
|
-
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
|
|
5
|
-
mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
|
|
6
|
-
mdify/docling_client.py,sha256=9QWPmd0W5APzf6LeUrdDBAru6E4d89w2q8WqGVlJoHg,6807
|
|
7
|
-
mdify_cli-2.7.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
-
mdify_cli-2.7.0.dist-info/METADATA,sha256=4v5CMHOhZ2LKgRgH7xm7hOUUYwahYCRJSCMcGtNja5g,7923
|
|
9
|
-
mdify_cli-2.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
-
mdify_cli-2.7.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
-
mdify_cli-2.7.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
-
mdify_cli-2.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|