mdify-cli 2.11.6__tar.gz → 2.11.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.11.6
3
+ Version: 2.11.8
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.11.6"
3
+ __version__ = "2.11.8"
@@ -41,6 +41,108 @@ OTHER_RUNTIMES_PRIORITY = ("docker", "podman")
41
41
  # Debug mode
42
42
  DEBUG = os.environ.get("MDIFY_DEBUG", "").lower() in ("1", "true", "yes")
43
43
 
44
+ # Resource profiles for container execution
45
+ RESOURCE_PROFILES = {
46
+ "minimal": {"cpus": 4, "memory": "8g", "description": "Small PDFs, text-only documents"},
47
+ "default": {"cpus": 6, "memory": "12g", "description": "Large PDFs, OCR, tables (recommended)"},
48
+ "heavy": {"cpus": 8, "memory": "16g", "description": "Batch processing, very large files"},
49
+ }
50
+
51
+
52
+ def get_available_memory_gb() -> float:
53
+ """Get available system memory in GB.
54
+
55
+ Returns:
56
+ Available memory in GB, or -1 if unable to determine
57
+ """
58
+ system = platform.system()
59
+ try:
60
+ if system == "Darwin": # macOS
61
+ # Get page size
62
+ result = subprocess.run(["pagesize"], capture_output=True, text=True, check=True)
63
+ page_size = int(result.stdout.strip())
64
+
65
+ # Get memory stats
66
+ result = subprocess.run(["vm_stat"], capture_output=True, text=True, check=True)
67
+ free_pages = 0
68
+ inactive_pages = 0
69
+ speculative_pages = 0
70
+
71
+ for line in result.stdout.split("\n"):
72
+ if "Pages free" in line:
73
+ free_pages = int(line.split(":")[1].strip().rstrip("."))
74
+ elif "Pages inactive" in line:
75
+ inactive_pages = int(line.split(":")[1].strip().rstrip("."))
76
+ elif "Pages speculative" in line:
77
+ speculative_pages = int(line.split(":")[1].strip().rstrip("."))
78
+
79
+ # Available memory = free + inactive + speculative
80
+ available_pages = free_pages + inactive_pages + speculative_pages
81
+ available_bytes = available_pages * page_size
82
+ return available_bytes / (1024**3) # Convert to GB
83
+ elif system == "Linux":
84
+ # Read from /proc/meminfo
85
+ with open("/proc/meminfo") as f:
86
+ for line in f:
87
+ if line.startswith("MemAvailable:"):
88
+ kb = int(line.split()[1])
89
+ return kb / (1024**2) # Convert to GB
90
+ except Exception:
91
+ pass
92
+
93
+ return -1
94
+
95
+
96
+ def parse_memory_string(mem_str: str) -> float:
97
+ """Parse memory string (e.g., '12g', '8192m') to GB.
98
+
99
+ Args:
100
+ mem_str: Memory string with unit (g, m, gb, mb)
101
+
102
+ Returns:
103
+ Memory in GB
104
+ """
105
+ mem_str = mem_str.lower().strip()
106
+
107
+ if mem_str.endswith("gb"):
108
+ return float(mem_str[:-2])
109
+ elif mem_str.endswith("g"):
110
+ return float(mem_str[:-1])
111
+ elif mem_str.endswith("mb"):
112
+ return float(mem_str[:-2]) / 1024
113
+ elif mem_str.endswith("m"):
114
+ return float(mem_str[:-1]) / 1024
115
+ else:
116
+ raise ValueError(f"Invalid memory format: {mem_str}")
117
+
118
+
119
+ def validate_memory_availability(required_gb: float) -> tuple[bool, str]:
120
+ """Check if system has sufficient available memory.
121
+
122
+ Args:
123
+ required_gb: Required memory in GB
124
+
125
+ Returns:
126
+ Tuple of (is_sufficient, error_message)
127
+ """
128
+ available_gb = get_available_memory_gb()
129
+
130
+ if available_gb < 0:
131
+ # Unable to determine, allow startup with warning
132
+ return True, ""
133
+
134
+ if available_gb < required_gb:
135
+ error = (
136
+ f"Insufficient memory available for container startup.\n"
137
+ f" Required: {required_gb:.1f} GB\n"
138
+ f" Available: {available_gb:.1f} GB\n"
139
+ f" Short by: {required_gb - available_gb:.1f} GB\n\n"
140
+ f"Please close other applications or use a smaller profile (--profile minimal)"
141
+ )
142
+ return False, error
143
+
144
+ return True, ""
145
+
44
146
 
45
147
  # =============================================================================
46
148
  # Update checking functions
@@ -752,6 +854,34 @@ Examples:
752
854
  help="Conversion timeout in seconds (default: 1200, can be set via MDIFY_TIMEOUT env var)",
753
855
  )
754
856
 
857
+ parser.add_argument(
858
+ "--memory",
859
+ type=str,
860
+ default=None,
861
+ help="Container memory limit (e.g., 2g, 512m, 4096m). Overrides --profile setting",
862
+ )
863
+
864
+ parser.add_argument(
865
+ "--cpus",
866
+ type=int,
867
+ default=None,
868
+ help="Number of CPUs to allocate to container. Overrides --profile setting",
869
+ )
870
+
871
+ parser.add_argument(
872
+ "--profile",
873
+ type=str,
874
+ choices=["minimal", "default", "heavy"],
875
+ default="default",
876
+ help="Resource profile for container: minimal (4 CPU, 8GB), default (6 CPU, 12GB), heavy (8 CPU, 16GB)",
877
+ )
878
+
879
+ parser.add_argument(
880
+ "--skip-memory-check",
881
+ action="store_true",
882
+ help="Skip memory availability validation (not recommended)",
883
+ )
884
+
755
885
  # Utility options
756
886
  parser.add_argument(
757
887
  "--check-update",
@@ -952,7 +1082,25 @@ def main() -> int:
952
1082
 
953
1083
  try:
954
1084
  if not args.quiet:
955
- print(f"Starting docling-serve container...")
1085
+ print(f"Starting docling-serve container...\\n")
1086
+
1087
+ # Apply resource profile
1088
+ profile = RESOURCE_PROFILES[args.profile]
1089
+ cpus = args.cpus if args.cpus is not None else profile["cpus"]
1090
+ memory = args.memory if args.memory is not None else profile["memory"]
1091
+
1092
+ # Validate memory availability unless skipped
1093
+ if not args.skip_memory_check:
1094
+ required_gb = parse_memory_string(memory)
1095
+ is_sufficient, error_msg = validate_memory_availability(required_gb)
1096
+ if not is_sufficient:
1097
+ print(f"Error: {error_msg}", file=sys.stderr)
1098
+ return 1
1099
+
1100
+ if not args.quiet:
1101
+ print(f"Resource profile: {args.profile} ({cpus} CPUs, {memory} memory)")
1102
+ if args.cpus or args.memory:
1103
+ print(" (customized via command-line arguments)")
956
1104
  print()
957
1105
 
958
1106
  with DoclingContainer(
@@ -961,6 +1109,8 @@ def main() -> int:
961
1109
  args.port,
962
1110
  timeout=timeout,
963
1111
  keep_container=DEBUG,
1112
+ memory=memory,
1113
+ cpus=cpus,
964
1114
  ) as container:
965
1115
  # Convert files
966
1116
  conversion_start = time.time()
@@ -1020,6 +1170,60 @@ def main() -> int:
1020
1170
  f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})"
1021
1171
  )
1022
1172
  print(f" Error: {error_msg}", file=sys.stderr)
1173
+
1174
+ # Check if it's a connection error and retrieve logs
1175
+ is_connection_error = "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg
1176
+ if is_connection_error:
1177
+ container_alive = container.is_ready()
1178
+ if container_alive:
1179
+ print(
1180
+ " Connection lost (server may have crashed and restarted)",
1181
+ file=sys.stderr,
1182
+ )
1183
+ else:
1184
+ print(
1185
+ " Container crashed while processing file",
1186
+ file=sys.stderr,
1187
+ )
1188
+ print(
1189
+ " File may be too complex, large, or malformed",
1190
+ file=sys.stderr,
1191
+ )
1192
+
1193
+ # Always show logs for connection errors
1194
+ print(" Retrieving container logs...", file=sys.stderr)
1195
+ logs, log_error = container.get_logs(tail=50)
1196
+ if logs:
1197
+ print(" Container logs (last 50 lines):", file=sys.stderr)
1198
+ for line in logs.strip().split("\n"):
1199
+ if line.strip():
1200
+ print(f" {line}", file=sys.stderr)
1201
+ elif log_error:
1202
+ print(f" Error retrieving logs: {log_error}", file=sys.stderr)
1203
+ else:
1204
+ print(" No logs available (container may have been removed)", file=sys.stderr)
1205
+
1206
+ # Restart container if it crashed
1207
+ if not container_alive:
1208
+ print(" Container crashed - attempting to restart...", file=sys.stderr)
1209
+ try:
1210
+ # Stop and remove the dead container
1211
+ container.stop()
1212
+ container.remove()
1213
+ # Generate new container name to avoid conflicts
1214
+ import uuid
1215
+ container.container_name = f"mdify-serve-{uuid.uuid4().hex[:8]}"
1216
+ # Start a new one
1217
+ container.start(timeout=120)
1218
+ print(" Container restarted successfully", file=sys.stderr)
1219
+ print(" Continuing with next file...", file=sys.stderr)
1220
+ except Exception as restart_error:
1221
+ print(f" Failed to restart container: {restart_error}", file=sys.stderr)
1222
+ if DEBUG:
1223
+ import traceback
1224
+ traceback.print_exc()
1225
+ print(" Stopping remaining conversions", file=sys.stderr)
1226
+ break
1023
1227
  except Exception as e:
1024
1228
  elapsed = time.time() - start_time
1025
1229
  failed_count += 1
@@ -1031,6 +1235,10 @@ def main() -> int:
1031
1235
  error_msg = str(e)
1032
1236
  is_connection_error = "Connection refused" in error_msg or "Connection aborted" in error_msg or "RemoteDisconnected" in error_msg
1033
1237
 
1238
+ if DEBUG:
1239
+ print(f" DEBUG: Exception caught: {type(e).__name__}", file=sys.stderr)
1240
+ print(f" DEBUG: is_connection_error={is_connection_error}", file=sys.stderr)
1241
+
1034
1242
  if is_connection_error:
1035
1243
  container_alive = container.is_ready()
1036
1244
  if not args.quiet:
@@ -1078,9 +1286,27 @@ def main() -> int:
1078
1286
  if not container_alive:
1079
1287
  print(" Stopping remaining conversions", file=sys.stderr)
1080
1288
 
1081
- # Stop processing if container is dead
1289
+ # Restart container if it crashed
1082
1290
  if not container_alive:
1083
- break
1291
+ print(" Container crashed - attempting to restart...", file=sys.stderr)
1292
+ try:
1293
+ # Stop and remove the dead container
1294
+ container.stop()
1295
+ container.remove()
1296
+ # Generate new container name to avoid conflicts
1297
+ import uuid
1298
+ container.container_name = f"mdify-serve-{uuid.uuid4().hex[:8]}"
1299
+ # Start a new one
1300
+ container.start(timeout=120)
1301
+ print(" Container restarted successfully", file=sys.stderr)
1302
+ print(" Continuing with next file...", file=sys.stderr)
1303
+ except Exception as restart_error:
1304
+ print(f" Failed to restart container: {restart_error}", file=sys.stderr)
1305
+ if DEBUG:
1306
+ import traceback
1307
+ traceback.print_exc()
1308
+ print(" Stopping remaining conversions", file=sys.stderr)
1309
+ break
1084
1310
  else:
1085
1311
  # Non-connection error
1086
1312
  if not args.quiet:
@@ -27,6 +27,8 @@ class DoclingContainer:
27
27
  port: int = 5001,
28
28
  timeout: int = 1200,
29
29
  keep_container: bool = False,
30
+ memory: Optional[str] = None,
31
+ cpus: Optional[int] = None,
30
32
  ):
31
33
  """Initialize container manager.
32
34
 
@@ -36,12 +38,16 @@ class DoclingContainer:
36
38
  port: Host port to bind (default: 5001)
37
39
  timeout: Conversion timeout in seconds (default: 1200)
38
40
  keep_container: If True, do not auto-remove container (preserve logs)
41
+ memory: Memory limit (e.g., "2g", "512m"). None for no limit.
42
+ cpus: Number of CPUs to allocate. None for no limit.
39
43
  """
40
44
  self.runtime = runtime
41
45
  self.image = image
42
46
  self.port = port
43
47
  self.timeout = timeout
44
48
  self.keep_container = keep_container
49
+ self.memory = memory
50
+ self.cpus = cpus
45
51
  self.container_name = f"mdify-serve-{uuid.uuid4().hex[:8]}"
46
52
  self.container_id: Optional[str] = None
47
53
 
@@ -110,6 +116,15 @@ class DoclingContainer:
110
116
  ]
111
117
  if not self.keep_container:
112
118
  cmd.insert(3, "--rm") # Auto-remove on stop
119
+
120
+ # Add resource limits if specified
121
+ if self.cpus:
122
+ cmd.insert(3, str(self.cpus))
123
+ cmd.insert(3, "--cpus")
124
+
125
+ if self.memory:
126
+ cmd.insert(3, self.memory)
127
+ cmd.insert(3, "-m")
113
128
 
114
129
  try:
115
130
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
@@ -135,6 +150,15 @@ class DoclingContainer:
135
150
  check=False,
136
151
  )
137
152
 
153
+ def remove(self) -> None:
154
+ """Remove container. Safe to call multiple times."""
155
+ if self.container_name:
156
+ subprocess.run(
157
+ [self.runtime, "rm", "-f", self.container_name],
158
+ capture_output=True,
159
+ check=False,
160
+ )
161
+
138
162
  def get_logs(self, tail: int = 50) -> tuple[str, str]:
139
163
  """Get container logs for debugging.
140
164
 
@@ -148,8 +172,17 @@ class DoclingContainer:
148
172
  return ("", "No container name set")
149
173
 
150
174
  try:
175
+ import os
176
+ runtime_name = os.path.basename(self.runtime)
177
+
178
+ # Apple Container uses -n instead of --tail
179
+ if runtime_name == "container":
180
+ cmd = [self.runtime, "logs", "-n", str(tail), self.container_name]
181
+ else:
182
+ cmd = [self.runtime, "logs", "--tail", str(tail), self.container_name]
183
+
151
184
  result = subprocess.run(
152
- [self.runtime, "logs", "--tail", str(tail), self.container_name],
185
+ cmd,
153
186
  capture_output=True,
154
187
  text=True,
155
188
  check=False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.11.6
3
+ Version: 2.11.8
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "2.11.6"
3
+ version = "2.11.8"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes