mdify-cli 2.9.5__tar.gz → 2.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.9.5
3
+ Version: 2.11.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.9.5"
3
+ __version__ = "2.11.4"
@@ -38,6 +38,9 @@ SUPPORTED_RUNTIMES = ("docker", "podman", "orbstack", "colima", "container")
38
38
  MACOS_RUNTIMES_PRIORITY = ("container", "orbstack", "colima", "podman", "docker")
39
39
  OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
40
40
 
41
+ # Debug mode
42
+ DEBUG = os.environ.get("MDIFY_DEBUG", "").lower() in ("1", "true", "yes")
43
+
41
44
 
42
45
  # =============================================================================
43
46
  # Update checking functions
@@ -294,8 +297,10 @@ def check_image_exists(runtime: str, image: str) -> bool:
294
297
  try:
295
298
  images = json.loads(result.stdout.decode())
296
299
  # Check if image exists in the list
300
+ # Apple Container returns format: [{"reference": "image:tag", "descriptor": {...}}]
297
301
  for img in images:
298
- if img.get("name") == image or image in img.get("repoTags", []):
302
+ reference = img.get("reference", "")
303
+ if reference == image or reference.startswith(f"{image}:"):
299
304
  return True
300
305
  except json.JSONDecodeError:
301
306
  pass
@@ -521,8 +526,8 @@ class Spinner:
521
526
  self.running = False
522
527
  if self.thread:
523
528
  self.thread.join(timeout=0.5)
524
- # Clear the spinner line
525
- print(f"\r{' ' * 80}\r", end="", flush=True)
529
+ # Clear the spinner line with enough spaces to cover the longest possible line
530
+ print(f"\r{' ' * 120}\r", end="", flush=True)
526
531
 
527
532
 
528
533
  # =============================================================================
@@ -813,6 +818,10 @@ def main() -> int:
813
818
 
814
819
  image_exists = check_image_exists(runtime, image)
815
820
 
821
+ if not args.quiet and image_exists:
822
+ print(f"Using cached image: {image}")
823
+ print()
824
+
816
825
  # NOTE: Docker Desktop on macOS/Windows uses a VM, so disk space checks may not
817
826
  # accurately reflect available space in the container's filesystem. Remote Docker
818
827
  # daemons (DOCKER_HOST) are also not supported. In these cases, the check will
@@ -977,11 +986,15 @@ def main() -> int:
977
986
  start_time = time.time()
978
987
  try:
979
988
  # Convert via HTTP API
989
+ if DEBUG:
990
+ print(f" DEBUG: Converting {input_file.name} via {container.base_url}/v1/convert/file", file=sys.stderr)
991
+
980
992
  result = convert_file(
981
993
  container.base_url, input_file, to_format="md"
982
994
  )
983
995
  elapsed = time.time() - start_time
984
996
 
997
+ # Stop spinner before any output
985
998
  if not args.quiet:
986
999
  spinner.stop()
987
1000
 
@@ -1004,12 +1017,58 @@ def main() -> int:
1004
1017
  except Exception as e:
1005
1018
  elapsed = time.time() - start_time
1006
1019
  failed_count += 1
1020
+ # Stop spinner before printing error
1007
1021
  if not args.quiet:
1008
1022
  spinner.stop()
1009
- print(
1010
- f"{progress} {input_file.name} ({format_duration(elapsed)})"
1011
- )
1012
- print(f" Error: {str(e)}", file=sys.stderr)
1023
+
1024
+ # Check if container is still healthy
1025
+ error_msg = str(e)
1026
+ is_connection_error = "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg
1027
+
1028
+ if is_connection_error:
1029
+ container_alive = container.is_ready()
1030
+ if not args.quiet:
1031
+ print(
1032
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1033
+ )
1034
+ if container_alive:
1035
+ print(
1036
+ " Error: Connection lost (server may have crashed and restarted)",
1037
+ file=sys.stderr,
1038
+ )
1039
+ else:
1040
+ print(
1041
+ " Error: Container crashed while processing file",
1042
+ file=sys.stderr,
1043
+ )
1044
+ print(
1045
+ " File may be too complex, large, or malformed",
1046
+ file=sys.stderr,
1047
+ )
1048
+
1049
+ # Always show logs for connection errors to surface root cause
1050
+ print(" Retrieving container logs...", file=sys.stderr)
1051
+ logs = container.get_logs(tail=30)
1052
+ if logs:
1053
+ print(" Container logs (last 30 lines):", file=sys.stderr)
1054
+ for line in logs.strip().split("\n"):
1055
+ print(f" {line}", file=sys.stderr)
1056
+ else:
1057
+ print(" No logs available", file=sys.stderr)
1058
+
1059
+ if not container_alive:
1060
+ print(" Stopping remaining conversions", file=sys.stderr)
1061
+
1062
+ # Stop processing if container is dead
1063
+ if not container_alive:
1064
+ break
1065
+ else:
1066
+ # Non-connection error
1067
+ if not args.quiet:
1068
+ print(
1069
+ f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1070
+ )
1071
+ print(f" Error: {error_msg}", file=sys.stderr)
1013
1072
 
1014
1073
  total_elapsed = time.time() - conversion_start
1015
1074
 
@@ -125,6 +125,48 @@ class DoclingContainer:
125
125
  check=False,
126
126
  )
127
127
 
128
+ def get_logs(self, tail: int = 50) -> str:
129
+ """Get container logs for debugging.
130
+
131
+ Args:
132
+ tail: Number of lines to retrieve from end of logs
133
+
134
+ Returns:
135
+ Container logs as string
136
+ """
137
+ if not self.container_name:
138
+ return ""
139
+
140
+ try:
141
+ result = subprocess.run(
142
+ [self.runtime, "logs", "--tail", str(tail), self.container_name],
143
+ capture_output=True,
144
+ text=True,
145
+ check=False,
146
+ )
147
+ return result.stdout if result.returncode == 0 else ""
148
+ except Exception:
149
+ return ""
150
+
151
+ def is_running(self) -> bool:
152
+ """Check if container process is still running.
153
+
154
+ Returns:
155
+ True if container is running, False otherwise
156
+ """
157
+ if not self.container_name:
158
+ return False
159
+
160
+ try:
161
+ result = subprocess.run(
162
+ [self.runtime, "ps", "-q", "-f", f"name={self.container_name}"],
163
+ capture_output=True,
164
+ check=False,
165
+ )
166
+ return result.returncode == 0 and bool(result.stdout.strip())
167
+ except Exception:
168
+ return False
169
+
128
170
  def is_ready(self) -> bool:
129
171
  """Check if container is healthy.
130
172
 
@@ -132,6 +174,10 @@ class DoclingContainer:
132
174
  True if container is healthy, False otherwise
133
175
  """
134
176
  try:
177
+ # First check if container is still running
178
+ if not self.is_running():
179
+ return False
180
+ # Then check health endpoint
135
181
  return check_health(self.base_url)
136
182
  except Exception:
137
183
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.9.5
3
+ Version: 2.11.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "2.9.5"
3
+ version = "2.11.4"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
@@ -995,12 +995,19 @@ class TestContainerRuntime:
995
995
  """Test check_image_exists uses 'image list' for Apple Container."""
996
996
  mock_result = Mock()
997
997
  mock_result.returncode = 0
998
+ # Use actual Apple Container response format with 'reference' field
998
999
  mock_result.stdout = json.dumps([
999
- {"name": "test-image", "repoTags": ["test-image:latest"]},
1000
- {"name": "other-image", "repoTags": ["other-image:latest"]}
1000
+ {
1001
+ "reference": "ghcr.io/docling-project/docling-serve-cpu:main",
1002
+ "descriptor": {
1003
+ "size": 1609,
1004
+ "mediaType": "application/vnd.oci.image.index.v1+json",
1005
+ "digest": "sha256:25e82dfa30371d17a0af17edc42261a4b9bedb37f0f337887c366184bc3ee291"
1006
+ }
1007
+ }
1001
1008
  ]).encode()
1002
1009
  with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
1003
- result = check_image_exists("/usr/local/bin/container", "test-image")
1010
+ result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
1004
1011
  assert result is True
1005
1012
  mock_run.assert_called_once_with(
1006
1013
  ["/usr/local/bin/container", "image", "list", "--format", "json"],
@@ -1013,10 +1020,17 @@ class TestContainerRuntime:
1013
1020
  mock_result = Mock()
1014
1021
  mock_result.returncode = 0
1015
1022
  mock_result.stdout = json.dumps([
1016
- {"name": "other-image", "repoTags": ["other-image:latest"]}
1023
+ {
1024
+ "reference": "ghcr.io/other-project/other-image:latest",
1025
+ "descriptor": {
1026
+ "size": 1234,
1027
+ "mediaType": "application/vnd.oci.image.index.v1+json",
1028
+ "digest": "sha256:abcd1234"
1029
+ }
1030
+ }
1017
1031
  ]).encode()
1018
1032
  with patch("mdify.cli.subprocess.run", return_value=mock_result):
1019
- result = check_image_exists("/usr/local/bin/container", "test-image")
1033
+ result = check_image_exists("/usr/local/bin/container", "ghcr.io/docling-project/docling-serve-cpu:main")
1020
1034
  assert result is False
1021
1035
 
1022
1036
 
File without changes
File without changes
File without changes
File without changes
File without changes