mdify-cli 2.6.0__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/docling_client.py +61 -22
- {mdify_cli-2.6.0.dist-info → mdify_cli-2.8.0.dist-info}/METADATA +1 -1
- mdify_cli-2.8.0.dist-info/RECORD +12 -0
- mdify_cli-2.6.0.dist-info/RECORD +0 -12
- {mdify_cli-2.6.0.dist-info → mdify_cli-2.8.0.dist-info}/WHEEL +0 -0
- {mdify_cli-2.6.0.dist-info → mdify_cli-2.8.0.dist-info}/entry_points.txt +0 -0
- {mdify_cli-2.6.0.dist-info → mdify_cli-2.8.0.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-2.6.0.dist-info → mdify_cli-2.8.0.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/docling_client.py
CHANGED
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
import mimetypes
|
|
8
|
+
|
|
7
9
|
import requests
|
|
8
10
|
|
|
9
11
|
|
|
@@ -40,6 +42,48 @@ class DoclingHTTPError(DoclingClientError):
|
|
|
40
42
|
super().__init__(f"HTTP {status_code}: {message}")
|
|
41
43
|
|
|
42
44
|
|
|
45
|
+
def _get_mime_type(file_path: Path) -> str:
|
|
46
|
+
"""Get MIME type for file, with fallback for unknown types."""
|
|
47
|
+
mime_type, _ = mimetypes.guess_type(str(file_path))
|
|
48
|
+
return mime_type or "application/octet-stream"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _extract_content(result_data) -> str:
|
|
52
|
+
"""Extract content from API response, supporting both old and new formats.
|
|
53
|
+
|
|
54
|
+
Supports:
|
|
55
|
+
- New format: {"document": {"md_content": "..."}}
|
|
56
|
+
- Fallback: {"document": {"content": "..."}}
|
|
57
|
+
- Old format: {"content": "..."}
|
|
58
|
+
- List format: [{"document": {...}} or {"content": "..."}]
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
result_data: Response data from docling-serve API
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Extracted content string, or empty string if not found
|
|
65
|
+
"""
|
|
66
|
+
if isinstance(result_data, dict):
|
|
67
|
+
# New format with document field
|
|
68
|
+
if "document" in result_data:
|
|
69
|
+
doc = result_data["document"]
|
|
70
|
+
# Try md_content first, then content
|
|
71
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
72
|
+
# Old format without document field
|
|
73
|
+
return result_data.get("content", "")
|
|
74
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
75
|
+
# List format - process first item
|
|
76
|
+
first_result = result_data[0]
|
|
77
|
+
if isinstance(first_result, dict):
|
|
78
|
+
if "document" in first_result:
|
|
79
|
+
doc = first_result["document"]
|
|
80
|
+
# Try md_content first, then content
|
|
81
|
+
return doc.get("md_content", "") or doc.get("content", "")
|
|
82
|
+
# Old format without document field
|
|
83
|
+
return first_result.get("content", "")
|
|
84
|
+
return ""
|
|
85
|
+
|
|
86
|
+
|
|
43
87
|
def check_health(base_url: str) -> bool:
|
|
44
88
|
"""Check if docling-serve is healthy.
|
|
45
89
|
|
|
@@ -77,7 +121,7 @@ def convert_file(
|
|
|
77
121
|
with open(file_path, "rb") as f:
|
|
78
122
|
response = requests.post(
|
|
79
123
|
f"{base_url}/v1/convert/file",
|
|
80
|
-
files={"files": (file_path.name, f,
|
|
124
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
81
125
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
82
126
|
)
|
|
83
127
|
|
|
@@ -87,17 +131,10 @@ def convert_file(
|
|
|
87
131
|
)
|
|
88
132
|
|
|
89
133
|
result_data = response.json()
|
|
134
|
+
content = _extract_content(result_data)
|
|
90
135
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
first_result = result_data[0]
|
|
94
|
-
return ConvertResult(
|
|
95
|
-
content=first_result.get("content", ""), format=to_format, success=True
|
|
96
|
-
)
|
|
97
|
-
elif isinstance(result_data, dict):
|
|
98
|
-
return ConvertResult(
|
|
99
|
-
content=result_data.get("content", ""), format=to_format, success=True
|
|
100
|
-
)
|
|
136
|
+
if content or isinstance(result_data, (dict, list)):
|
|
137
|
+
return ConvertResult(content=content, format=to_format, success=True)
|
|
101
138
|
else:
|
|
102
139
|
raise DoclingHTTPError(200, f"Unexpected response format: {result_data}")
|
|
103
140
|
|
|
@@ -126,7 +163,7 @@ def convert_file_async(
|
|
|
126
163
|
with open(file_path, "rb") as f:
|
|
127
164
|
response = requests.post(
|
|
128
165
|
f"{base_url}/v1/convert/file/async",
|
|
129
|
-
files={"files": (file_path.name, f,
|
|
166
|
+
files={"files": (file_path.name, f, _get_mime_type(file_path))},
|
|
130
167
|
data={"to_formats": to_format, "do_ocr": str(do_ocr).lower()},
|
|
131
168
|
)
|
|
132
169
|
|
|
@@ -202,19 +239,21 @@ def get_result(base_url: str, task_id: str) -> ConvertResult:
|
|
|
202
239
|
)
|
|
203
240
|
|
|
204
241
|
result_data = response.json()
|
|
242
|
+
content = _extract_content(result_data)
|
|
205
243
|
|
|
206
|
-
#
|
|
207
|
-
|
|
244
|
+
# Determine format from response, defaulting to "md"
|
|
245
|
+
result_format = "md"
|
|
246
|
+
if isinstance(result_data, dict):
|
|
247
|
+
result_format = result_data.get("format", "md")
|
|
248
|
+
elif isinstance(result_data, list) and len(result_data) > 0:
|
|
208
249
|
first_result = result_data[0]
|
|
250
|
+
if isinstance(first_result, dict):
|
|
251
|
+
result_format = first_result.get("format", "md")
|
|
252
|
+
|
|
253
|
+
if content or isinstance(result_data, (dict, list)):
|
|
209
254
|
return ConvertResult(
|
|
210
|
-
content=
|
|
211
|
-
format=
|
|
212
|
-
success=True,
|
|
213
|
-
)
|
|
214
|
-
elif isinstance(result_data, dict):
|
|
215
|
-
return ConvertResult(
|
|
216
|
-
content=result_data.get("content", ""),
|
|
217
|
-
format=result_data.get("format", "md"),
|
|
255
|
+
content=content,
|
|
256
|
+
format=result_format,
|
|
218
257
|
success=True,
|
|
219
258
|
)
|
|
220
259
|
else:
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
+
mdify/__init__.py,sha256=YBAx8MdkINw38Jx6zeS5ikx8buI1avmzcHF2v23nZQU,90
|
|
3
|
+
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
+
mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
|
|
5
|
+
mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
|
|
6
|
+
mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
|
|
7
|
+
mdify_cli-2.8.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
+
mdify_cli-2.8.0.dist-info/METADATA,sha256=LKU3PAHABNp5dT9KJ3hGeCMSXxjDkIFXNveXzRv2fIA,7923
|
|
9
|
+
mdify_cli-2.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
+
mdify_cli-2.8.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
+
mdify_cli-2.8.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
+
mdify_cli-2.8.0.dist-info/RECORD,,
|
mdify_cli-2.6.0.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=4mWutp3KF_BH9sz_oEPFBoN7Ee6vamK3cHDBpUtRQVY,90
|
|
3
|
-
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=LqIibolYSKGCNYqxuIyFnvPkjJyNlXvfWeKaSaoOrqo,28542
|
|
5
|
-
mdify/container.py,sha256=tkk0nv7EquL-rKUY4nkS_yGITb7mqw8B7eEfuqaeVrg,5239
|
|
6
|
-
mdify/docling_client.py,sha256=_9qjL5yOOeJahOg6an2P6Iii1xkeR6wmNJZG4Q6NRkk,6553
|
|
7
|
-
mdify_cli-2.6.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
8
|
-
mdify_cli-2.6.0.dist-info/METADATA,sha256=NcyfsGSLiSkz0NkRdc6g5pOervCpXJbWEIDSPnYSvFk,7923
|
|
9
|
-
mdify_cli-2.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
-
mdify_cli-2.6.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
11
|
-
mdify_cli-2.6.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
12
|
-
mdify_cli-2.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|