mdify-cli 2.9.1__tar.gz → 2.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-2.9.1/mdify_cli.egg-info → mdify_cli-2.11.4}/PKG-INFO +1 -1
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify/__init__.py +1 -1
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify/cli.py +73 -11
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify/container.py +46 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4/mdify_cli.egg-info}/PKG-INFO +1 -1
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/pyproject.toml +1 -1
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/tests/test_cli.py +23 -9
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/LICENSE +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/README.md +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/assets/mdify.png +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify/__main__.py +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify/docling_client.py +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/setup.cfg +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/tests/test_container.py +0 -0
- {mdify_cli-2.9.1 → mdify_cli-2.11.4}/tests/test_docling_client.py +0 -0
|
@@ -38,6 +38,9 @@ SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
|
|
|
38
38
|
MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
|
|
39
39
|
OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
|
|
40
40
|
|
|
41
|
+
# Debug mode
|
|
42
|
+
DEBUG = os.environ.get("MDIFY_DEBUG", "").lower() in ("1", "true", "yes")
|
|
43
|
+
|
|
41
44
|
|
|
42
45
|
# =============================================================================
|
|
43
46
|
# Update checking functions
|
|
@@ -283,10 +286,10 @@ def check_image_exists(runtime: str, image: str) -> bool:
|
|
|
283
286
|
try:
|
|
284
287
|
runtime_name = os.path.basename(runtime)
|
|
285
288
|
|
|
286
|
-
# Apple Container uses 'image
|
|
289
|
+
# Apple Container uses 'image list' command (two words)
|
|
287
290
|
if runtime_name == "container":
|
|
288
291
|
result = subprocess.run(
|
|
289
|
-
[runtime, "image
|
|
292
|
+
[runtime, "image", "list", "--format", "json"],
|
|
290
293
|
capture_output=True,
|
|
291
294
|
check=False,
|
|
292
295
|
)
|
|
@@ -294,8 +297,10 @@ def check_image_exists(runtime: str, image: str) -> bool:
|
|
|
294
297
|
try:
|
|
295
298
|
images = json.loads(result.stdout.decode())
|
|
296
299
|
# Check if image exists in the list
|
|
300
|
+
# Apple Container returns format: [{"reference": "image:tag", "descriptor": {...}}]
|
|
297
301
|
for img in images:
|
|
298
|
-
|
|
302
|
+
reference = img.get("reference", "")
|
|
303
|
+
if reference == image or reference.startswith(f"{image}:"):
|
|
299
304
|
return True
|
|
300
305
|
except json.JSONDecodeError:
|
|
301
306
|
pass
|
|
@@ -330,10 +335,10 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
|
|
|
330
335
|
try:
|
|
331
336
|
runtime_name = os.path.basename(runtime)
|
|
332
337
|
|
|
333
|
-
# Apple Container uses 'image
|
|
338
|
+
# Apple Container uses 'image pull' command (two words)
|
|
334
339
|
if runtime_name == "container":
|
|
335
340
|
result = subprocess.run(
|
|
336
|
-
[runtime, "image
|
|
341
|
+
[runtime, "image", "pull", image],
|
|
337
342
|
capture_output=quiet,
|
|
338
343
|
check=False,
|
|
339
344
|
)
|
|
@@ -521,8 +526,8 @@ class Spinner:
|
|
|
521
526
|
self.running = False
|
|
522
527
|
if self.thread:
|
|
523
528
|
self.thread.join(timeout=0.5)
|
|
524
|
-
# Clear the spinner line
|
|
525
|
-
print(f"\r{' ' *
|
|
529
|
+
# Clear the spinner line with enough spaces to cover the longest possible line
|
|
530
|
+
print(f"\r{' ' * 120}\r", end="", flush=True)
|
|
526
531
|
|
|
527
532
|
|
|
528
533
|
# =============================================================================
|
|
@@ -770,6 +775,7 @@ Examples:
|
|
|
770
775
|
|
|
771
776
|
def main() -> int:
|
|
772
777
|
"""Main entry point for the CLI."""
|
|
778
|
+
print(f"mdify v{__version__}", file=sys.stderr)
|
|
773
779
|
args = parse_args()
|
|
774
780
|
|
|
775
781
|
# Handle --check-update flag
|
|
@@ -812,6 +818,10 @@ def main() -> int:
|
|
|
812
818
|
|
|
813
819
|
image_exists = check_image_exists(runtime, image)
|
|
814
820
|
|
|
821
|
+
if not args.quiet and image_exists:
|
|
822
|
+
print(f"Using cached image: {image}")
|
|
823
|
+
print()
|
|
824
|
+
|
|
815
825
|
# NOTE: Docker Desktop on macOS/Windows uses a VM, so disk space checks may not
|
|
816
826
|
# accurately reflect available space in the container's filesystem. Remote Docker
|
|
817
827
|
# daemons (DOCKER_HOST) are also not supported. In these cases, the check will
|
|
@@ -917,6 +927,8 @@ def main() -> int:
|
|
|
917
927
|
|
|
918
928
|
if not args.quiet:
|
|
919
929
|
print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
|
|
930
|
+
print(f"Source: {input_path.resolve()}")
|
|
931
|
+
print(f"Output: {output_dir.resolve()}")
|
|
920
932
|
print(f"Using runtime: {runtime}")
|
|
921
933
|
print(f"Using image: {image}")
|
|
922
934
|
print()
|
|
@@ -974,11 +986,15 @@ def main() -> int:
|
|
|
974
986
|
start_time = time.time()
|
|
975
987
|
try:
|
|
976
988
|
# Convert via HTTP API
|
|
989
|
+
if DEBUG:
|
|
990
|
+
print(f" DEBUG: Converting {input_file.name} via {container.base_url}/v1/convert/file", file=sys.stderr)
|
|
991
|
+
|
|
977
992
|
result = convert_file(
|
|
978
993
|
container.base_url, input_file, to_format="md"
|
|
979
994
|
)
|
|
980
995
|
elapsed = time.time() - start_time
|
|
981
996
|
|
|
997
|
+
# Stop spinner before any output
|
|
982
998
|
if not args.quiet:
|
|
983
999
|
spinner.stop()
|
|
984
1000
|
|
|
@@ -1001,12 +1017,58 @@ def main() -> int:
|
|
|
1001
1017
|
except Exception as e:
|
|
1002
1018
|
elapsed = time.time() - start_time
|
|
1003
1019
|
failed_count += 1
|
|
1020
|
+
# Stop spinner before printing error
|
|
1004
1021
|
if not args.quiet:
|
|
1005
1022
|
spinner.stop()
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1023
|
+
|
|
1024
|
+
# Check if container is still healthy
|
|
1025
|
+
error_msg = str(e)
|
|
1026
|
+
is_connection_error = "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg
|
|
1027
|
+
|
|
1028
|
+
if is_connection_error:
|
|
1029
|
+
container_alive = container.is_ready()
|
|
1030
|
+
if not args.quiet:
|
|
1031
|
+
print(
|
|
1032
|
+
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
1033
|
+
)
|
|
1034
|
+
if container_alive:
|
|
1035
|
+
print(
|
|
1036
|
+
" Error: Connection lost (server may have crashed and restarted)",
|
|
1037
|
+
file=sys.stderr,
|
|
1038
|
+
)
|
|
1039
|
+
else:
|
|
1040
|
+
print(
|
|
1041
|
+
" Error: Container crashed while processing file",
|
|
1042
|
+
file=sys.stderr,
|
|
1043
|
+
)
|
|
1044
|
+
print(
|
|
1045
|
+
" File may be too complex, large, or malformed",
|
|
1046
|
+
file=sys.stderr,
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
# Always show logs for connection errors to surface root cause
|
|
1050
|
+
print(" Retrieving container logs...", file=sys.stderr)
|
|
1051
|
+
logs = container.get_logs(tail=30)
|
|
1052
|
+
if logs:
|
|
1053
|
+
print(" Container logs (last 30 lines):", file=sys.stderr)
|
|
1054
|
+
for line in logs.strip().split("\n"):
|
|
1055
|
+
print(f" {line}", file=sys.stderr)
|
|
1056
|
+
else:
|
|
1057
|
+
print(" No logs available", file=sys.stderr)
|
|
1058
|
+
|
|
1059
|
+
if not container_alive:
|
|
1060
|
+
print(" Stopping remaining conversions", file=sys.stderr)
|
|
1061
|
+
|
|
1062
|
+
# Stop processing if container is dead
|
|
1063
|
+
if not container_alive:
|
|
1064
|
+
break
|
|
1065
|
+
else:
|
|
1066
|
+
# Non-connection error
|
|
1067
|
+
if not args.quiet:
|
|
1068
|
+
print(
|
|
1069
|
+
f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
|
|
1070
|
+
)
|
|
1071
|
+
print(f" Error: {error_msg}", file=sys.stderr)
|
|
1010
1072
|
|
|
1011
1073
|
total_elapsed = time.time() - conversion_start
|
|
1012
1074
|
|
|
@@ -125,6 +125,48 @@ class DoclingContainer:
|
|
|
125
125
|
check=False,
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
+
def get_logs(self, tail: int = 50) -> str:
|
|
129
|
+
"""Get container logs for debugging.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
tail: Number of lines to retrieve from end of logs
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Container logs as string
|
|
136
|
+
"""
|
|
137
|
+
if not self.container_name:
|
|
138
|
+
return ""
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
result = subprocess.run(
|
|
142
|
+
[self.runtime, "logs", "--tail", str(tail), self.container_name],
|
|
143
|
+
capture_output=True,
|
|
144
|
+
text=True,
|
|
145
|
+
check=False,
|
|
146
|
+
)
|
|
147
|
+
return result.stdout if result.returncode == 0 else ""
|
|
148
|
+
except Exception:
|
|
149
|
+
return ""
|
|
150
|
+
|
|
151
|
+
def is_running(self) -> bool:
|
|
152
|
+
"""Check if container process is still running.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
True if container is running, False otherwise
|
|
156
|
+
"""
|
|
157
|
+
if not self.container_name:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
result = subprocess.run(
|
|
162
|
+
[self.runtime, "ps", "-q", "-f", f"name={self.container_name}"],
|
|
163
|
+
capture_output=True,
|
|
164
|
+
check=False,
|
|
165
|
+
)
|
|
166
|
+
return result.returncode == 0 and bool(result.stdout.strip())
|
|
167
|
+
except Exception:
|
|
168
|
+
return False
|
|
169
|
+
|
|
128
170
|
def is_ready(self) -> bool:
|
|
129
171
|
"""Check if container is healthy.
|
|
130
172
|
|
|
@@ -132,6 +174,10 @@ class DoclingContainer:
|
|
|
132
174
|
True if container is healthy, False otherwise
|
|
133
175
|
"""
|
|
134
176
|
try:
|
|
177
|
+
# First check if container is still running
|
|
178
|
+
if not self.is_running():
|
|
179
|
+
return False
|
|
180
|
+
# Then check health endpoint
|
|
135
181
|
return check_health(self.base_url)
|
|
136
182
|
except Exception:
|
|
137
183
|
return False
|
|
@@ -979,31 +979,38 @@ class TestContainerRuntime:
|
|
|
979
979
|
assert "Error pulling image" in captured.err
|
|
980
980
|
|
|
981
981
|
def test_apple_container_pull_success(self):
|
|
982
|
-
"""Test pull_image uses 'image
|
|
982
|
+
"""Test pull_image uses 'image pull' for Apple Container."""
|
|
983
983
|
mock_result = Mock()
|
|
984
984
|
mock_result.returncode = 0
|
|
985
985
|
with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
|
|
986
986
|
result = pull_image("/usr/local/bin/container", "test-image", quiet=True)
|
|
987
987
|
assert result is True
|
|
988
988
|
mock_run.assert_called_once_with(
|
|
989
|
-
["/usr/local/bin/container", "image
|
|
989
|
+
["/usr/local/bin/container", "image", "pull", "test-image"],
|
|
990
990
|
capture_output=True,
|
|
991
991
|
check=False,
|
|
992
992
|
)
|
|
993
993
|
|
|
994
994
|
def test_apple_container_image_exists(self):
|
|
995
|
-
"""Test check_image_exists uses 'image
|
|
995
|
+
"""Test check_image_exists uses 'image list' for Apple Container."""
|
|
996
996
|
mock_result = Mock()
|
|
997
997
|
mock_result.returncode = 0
|
|
998
|
+
# Use actual Apple Container response format with 'reference' field
|
|
998
999
|
mock_result.stdout = json.dumps([
|
|
999
|
-
{
|
|
1000
|
-
|
|
1000
|
+
{
|
|
1001
|
+
"reference": "ghcr.io/docling-project/docling-serve-cpu:main",
|
|
1002
|
+
"descriptor": {
|
|
1003
|
+
"size": 1609,
|
|
1004
|
+
"mediaType": "application/vnd.oci.image.index.v1+json",
|
|
1005
|
+
"digest": "sha256:25e82dfa30371d17a0af17edc42261a4b9bedb37f0f337887c366184bc3ee291"
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1001
1008
|
]).encode()
|
|
1002
1009
|
with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
|
|
1003
|
-
result = check_image_exists("/usr/local/bin/container", "
|
|
1010
|
+
result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
|
|
1004
1011
|
assert result is True
|
|
1005
1012
|
mock_run.assert_called_once_with(
|
|
1006
|
-
["/usr/local/bin/container", "image
|
|
1013
|
+
["/usr/local/bin/container", "image", "list", "--format", "json"],
|
|
1007
1014
|
capture_output=True,
|
|
1008
1015
|
check=False,
|
|
1009
1016
|
)
|
|
@@ -1013,10 +1020,17 @@ class TestContainerRuntime:
|
|
|
1013
1020
|
mock_result = Mock()
|
|
1014
1021
|
mock_result.returncode = 0
|
|
1015
1022
|
mock_result.stdout = json.dumps([
|
|
1016
|
-
{
|
|
1023
|
+
{
|
|
1024
|
+
"reference": "ghcr.io/other-project/other-image:latest",
|
|
1025
|
+
"descriptor": {
|
|
1026
|
+
"size": 1234,
|
|
1027
|
+
"mediaType": "application/vnd.oci.image.index.v1+json",
|
|
1028
|
+
"digest": "sha256:abcd1234"
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1017
1031
|
]).encode()
|
|
1018
1032
|
with patch("mdify.cli.subprocess.run", return_value=mock_result):
|
|
1019
|
-
result = check_image_exists("/usr/local/bin/container", "
|
|
1033
|
+
result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
|
|
1020
1034
|
assert result is False
|
|
1021
1035
|
|
|
1022
1036
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|