mdify-cli 2.9.1__tar.gz → 2.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.9.1
3
+ Version: 2.11.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.9.1"
3
+ __version__ = "2.11.4"
@@ -38,6 +38,9 @@ SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
38
38
  MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
39
39
  OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
40
40
 
41
+ # Debug mode
42
+ DEBUG = os.environ.get("MDIFY_DEBUG", "").lower() in ("1", "true", "yes")
43
+
41
44
 
42
45
  # =============================================================================
43
46
  # Update checking functions
@@ -283,10 +286,10 @@ def check_image_exists(runtime: str, image: str) -> bool:
283
286
  try:
284
287
  runtime_name = os.path.basename(runtime)
285
288
 
286
- # Apple Container uses 'image-list' command
289
+ # Apple Container uses 'image list' command (two words)
287
290
  if runtime_name == "container":
288
291
  result = subprocess.run(
289
- [runtime, "image-list", "--format", "json"],
292
+ [runtime, "image", "list", "--format", "json"],
290
293
  capture_output=True,
291
294
  check=False,
292
295
  )
@@ -294,8 +297,10 @@ def check_image_exists(runtime: str, image: str) -> bool:
294
297
  try:
295
298
  images = json.loads(result.stdout.decode())
296
299
  # Check if image exists in the list
300
+ # Apple Container returns format: [{"reference": "image:tag", "descriptor": {...}}]
297
301
  for img in images:
298
- if img.get("name") == image or image in img.get("repoTags", []):
302
+ reference = img.get("reference", "")
303
+ if reference == image or reference.startswith(f"{image}:"):
299
304
  return True
300
305
  except json.JSONDecodeError:
301
306
  pass
@@ -330,10 +335,10 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
330
335
  try:
331
336
  runtime_name = os.path.basename(runtime)
332
337
 
333
- # Apple Container uses 'image-pull' command
338
+ # Apple Container uses 'image pull' command (two words)
334
339
  if runtime_name == "container":
335
340
  result = subprocess.run(
336
- [runtime, "image-pull", image],
341
+ [runtime, "image", "pull", image],
337
342
  capture_output=quiet,
338
343
  check=False,
339
344
  )
@@ -521,8 +526,8 @@ class Spinner:
521
526
  self.running = False
522
527
  if self.thread:
523
528
  self.thread.join(timeout=0.5)
524
- # Clear the spinner line
525
- print(f"\r{' ' * 80}\r", end="", flush=True)
529
+ # Clear the spinner line with enough spaces to cover the longest possible line
530
+ print(f"\r{' ' * 120}\r", end="", flush=True)
526
531
 
527
532
 
528
533
  # =============================================================================
@@ -770,6 +775,7 @@ Examples:
770
775
 
771
776
  def main() -> int:
772
777
  """Main entry point for the CLI."""
778
+ print(f"mdify v{__version__}", file=sys.stderr)
773
779
  args = parse_args()
774
780
 
775
781
  # Handle --check-update flag
@@ -812,6 +818,10 @@ def main() -> int:
812
818
 
813
819
  image_exists = check_image_exists(runtime, image)
814
820
 
821
+ if not args.quiet and image_exists:
822
+ print(f"Using cached image: {image}")
823
+ print()
824
+
815
825
  # NOTE: Docker Desktop on macOS/Windows uses a VM, so disk space checks may not
816
826
  # accurately reflect available space in the container's filesystem. Remote Docker
817
827
  # daemons (DOCKER_HOST) are also not supported. In these cases, the check will
@@ -917,6 +927,8 @@ def main() -> int:
917
927
 
918
928
  if not args.quiet:
919
929
  print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
930
+ print(f"Source: {input_path.resolve()}")
931
+ print(f"Output: {output_dir.resolve()}")
920
932
  print(f"Using runtime: {runtime}")
921
933
  print(f"Using image: {image}")
922
934
  print()
@@ -974,11 +986,15 @@ def main() -> int:
974
986
  start_time = time.time()
975
987
  try:
976
988
  # Convert via HTTP API
989
+ if DEBUG:
990
+ print(f" DEBUG: Converting {input_file.name} via {container.base_url}/v1/convert/file", file=sys.stderr)
991
+
977
992
  result = convert_file(
978
993
  container.base_url, input_file, to_format="md"
979
994
  )
980
995
  elapsed = time.time() - start_time
981
996
 
997
+ # Stop spinner before any output
982
998
  if not args.quiet:
983
999
  spinner.stop()
984
1000
 
@@ -1001,12 +1017,58 @@ def main() -> int:
1001
1017
  except Exception as e:
1002
1018
  elapsed = time.time() - start_time
1003
1019
  failed_count += 1
1020
+ # Stop spinner before printing error
1004
1021
  if not args.quiet:
1005
1022
  spinner.stop()
1006
- print(
1007
- f"{progress} {input_file.name} ({format_duration(elapsed)})"
1008
- )
1009
- print(f" Error: {str(e)}", file=sys.stderr)
1023
+
1024
+ # Check if container is still healthy
1025
+ error_msg = str(e)
1026
+ is_connection_error = "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg
1027
+
1028
+ if is_connection_error:
1029
+ container_alive = container.is_ready()
1030
+ if not args.quiet:
1031
+ print(
1032
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1033
+ )
1034
+ if container_alive:
1035
+ print(
1036
+ " Error: Connection lost (server may have crashed and restarted)",
1037
+ file=sys.stderr,
1038
+ )
1039
+ else:
1040
+ print(
1041
+ " Error: Container crashed while processing file",
1042
+ file=sys.stderr,
1043
+ )
1044
+ print(
1045
+ " File may be too complex, large, or malformed",
1046
+ file=sys.stderr,
1047
+ )
1048
+
1049
+ # Always show logs for connection errors to surface root cause
1050
+ print(" Retrieving container logs...", file=sys.stderr)
1051
+ logs = container.get_logs(tail=30)
1052
+ if logs:
1053
+ print(" Container logs (last 30 lines):", file=sys.stderr)
1054
+ for line in logs.strip().split("\n"):
1055
+ print(f" {line}", file=sys.stderr)
1056
+ else:
1057
+ print(" No logs available", file=sys.stderr)
1058
+
1059
+ if not container_alive:
1060
+ print(" Stopping remaining conversions", file=sys.stderr)
1061
+
1062
+ # Stop processing if container is dead
1063
+ if not container_alive:
1064
+ break
1065
+ else:
1066
+ # Non-connection error
1067
+ if not args.quiet:
1068
+ print(
1069
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1070
+ )
1071
+ print(f" Error: {error_msg}", file=sys.stderr)
1010
1072
 
1011
1073
  total_elapsed = time.time() - conversion_start
1012
1074
 
@@ -125,6 +125,48 @@ class DoclingContainer:
125
125
  check=False,
126
126
  )
127
127
 
128
+ def get_logs(self, tail: int = 50) -> str:
129
+ """Get container logs for debugging.
130
+
131
+ Args:
132
+ tail: Number of lines to retrieve from end of logs
133
+
134
+ Returns:
135
+ Container logs as string
136
+ """
137
+ if not self.container_name:
138
+ return ""
139
+
140
+ try:
141
+ result = subprocess.run(
142
+ [self.runtime, "logs", "--tail", str(tail), self.container_name],
143
+ capture_output=True,
144
+ text=True,
145
+ check=False,
146
+ )
147
+ return result.stdout if result.returncode == 0 else ""
148
+ except Exception:
149
+ return ""
150
+
151
+ def is_running(self) -> bool:
152
+ """Check if container process is still running.
153
+
154
+ Returns:
155
+ True if container is running, False otherwise
156
+ """
157
+ if not self.container_name:
158
+ return False
159
+
160
+ try:
161
+ result = subprocess.run(
162
+ [self.runtime, "ps", "-q", "-f", f"name={self.container_name}"],
163
+ capture_output=True,
164
+ check=False,
165
+ )
166
+ return result.returncode == 0 and bool(result.stdout.strip())
167
+ except Exception:
168
+ return False
169
+
128
170
  def is_ready(self) -> bool:
129
171
  """Check if container is healthy.
130
172
 
@@ -132,6 +174,10 @@ class DoclingContainer:
132
174
  True if container is healthy, False otherwise
133
175
  """
134
176
  try:
177
+ # First check if container is still running
178
+ if not self.is_running():
179
+ return False
180
+ # Then check health endpoint
135
181
  return check_health(self.base_url)
136
182
  except Exception:
137
183
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.9.1
3
+ Version: 2.11.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "2.9.1"
3
+ version = "2.11.4"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
@@ -979,31 +979,38 @@ class TestContainerRuntime:
979
979
  assert "Error pulling image" in captured.err
980
980
 
981
981
  def test_apple_container_pull_success(self):
982
- """Test pull_image uses 'image-pull' for Apple Container."""
982
+ """Test pull_image uses 'image pull' for Apple Container."""
983
983
  mock_result = Mock()
984
984
  mock_result.returncode = 0
985
985
  with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
986
986
  result = pull_image("/usr/local/bin/container", "test-image", quiet=True)
987
987
  assert result is True
988
988
  mock_run.assert_called_once_with(
989
- ["/usr/local/bin/container", "image-pull", "test-image"],
989
+ ["/usr/local/bin/container", "image", "pull", "test-image"],
990
990
  capture_output=True,
991
991
  check=False,
992
992
  )
993
993
 
994
994
  def test_apple_container_image_exists(self):
995
- """Test check_image_exists uses 'image-list' for Apple Container."""
995
+ """Test check_image_exists uses 'image list' for Apple Container."""
996
996
  mock_result = Mock()
997
997
  mock_result.returncode = 0
998
+ # Use actual Apple Container response format with 'reference' field
998
999
  mock_result.stdout = json.dumps([
999
- {"name": "test-image", "repoTags": ["test-image:latest"]},
1000
- {"name": "other-image", "repoTags": ["other-image:latest"]}
1000
+ {
1001
+ "reference": "ghcr.io/docling-project/docling-serve-cpu:main",
1002
+ "descriptor": {
1003
+ "size": 1609,
1004
+ "mediaType": "application/vnd.oci.image.index.v1+json",
1005
+ "digest": "sha256:25e82dfa30371d17a0af17edc42261a4b9bedb37f0f337887c366184bc3ee291"
1006
+ }
1007
+ }
1001
1008
  ]).encode()
1002
1009
  with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
1003
- result = check_image_exists("/usr/local/bin/container", "test-image")
1010
+ result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
1004
1011
  assert result is True
1005
1012
  mock_run.assert_called_once_with(
1006
- ["/usr/local/bin/container", "image-list", "--format", "json"],
1013
+ ["/usr/local/bin/container", "image", "list", "--format", "json"],
1007
1014
  capture_output=True,
1008
1015
  check=False,
1009
1016
  )
@@ -1013,10 +1020,17 @@ class TestContainerRuntime:
1013
1020
  mock_result = Mock()
1014
1021
  mock_result.returncode = 0
1015
1022
  mock_result.stdout = json.dumps([
1016
- {"name": "other-image", "repoTags": ["other-image:latest"]}
1023
+ {
1024
+ "reference": "ghcr.io/other-project/other-image:latest",
1025
+ "descriptor": {
1026
+ "size": 1234,
1027
+ "mediaType": "application/vnd.oci.image.index.v1+json",
1028
+ "digest": "sha256:abcd1234"
1029
+ }
1030
+ }
1017
1031
  ]).encode()
1018
1032
  with patch("mdify.cli.subprocess.run", return_value=mock_result):
1019
- result = check_image_exists("/usr/local/bin/container", "test-image")
1033
+ result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
1020
1034
  assert result is False
1021
1035
 
1022
1036
 
File without changes
File without changes
File without changes
File without changes
File without changes