mdify-cli 1.4.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/cli.py +163 -114
- {mdify_cli-1.4.1.dist-info → mdify_cli-1.6.0.dist-info}/METADATA +5 -3
- mdify_cli-1.6.0.dist-info/RECORD +10 -0
- {mdify_cli-1.4.1.dist-info → mdify_cli-1.6.0.dist-info}/WHEEL +1 -1
- mdify_cli-1.4.1.dist-info/RECORD +0 -10
- {mdify_cli-1.4.1.dist-info → mdify_cli-1.6.0.dist-info}/entry_points.txt +0 -0
- {mdify_cli-1.4.1.dist-info → mdify_cli-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-1.4.1.dist-info → mdify_cli-1.6.0.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/cli.py
CHANGED
|
@@ -37,10 +37,11 @@ SUPPORTED_RUNTIMES = ("docker", "podman")
|
|
|
37
37
|
# Update checking functions
|
|
38
38
|
# =============================================================================
|
|
39
39
|
|
|
40
|
+
|
|
40
41
|
def _get_remote_version(timeout: int = 5) -> Optional[str]:
|
|
41
42
|
"""
|
|
42
43
|
Fetch the latest version from PyPI.
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
Returns:
|
|
45
46
|
Version string (e.g., "1.1.0") or None if fetch failed.
|
|
46
47
|
"""
|
|
@@ -56,16 +57,16 @@ def _get_remote_version(timeout: int = 5) -> Optional[str]:
|
|
|
56
57
|
def _should_check_for_update() -> bool:
|
|
57
58
|
"""
|
|
58
59
|
Determine if we should check for updates based on last check time.
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
Returns:
|
|
61
62
|
True if check should be performed, False otherwise.
|
|
62
63
|
"""
|
|
63
64
|
if os.environ.get("MDIFY_NO_UPDATE_CHECK", "").lower() in ("1", "true", "yes"):
|
|
64
65
|
return False
|
|
65
|
-
|
|
66
|
+
|
|
66
67
|
if not LAST_CHECK_FILE.exists():
|
|
67
68
|
return True
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
try:
|
|
70
71
|
last_check = float(LAST_CHECK_FILE.read_text().strip())
|
|
71
72
|
elapsed = time.time() - last_check
|
|
@@ -86,18 +87,18 @@ def _update_last_check_time() -> None:
|
|
|
86
87
|
def _compare_versions(current: str, remote: str) -> bool:
|
|
87
88
|
"""
|
|
88
89
|
Compare version strings.
|
|
89
|
-
|
|
90
|
+
|
|
90
91
|
Returns:
|
|
91
92
|
True if remote version is newer than current.
|
|
92
93
|
"""
|
|
93
94
|
try:
|
|
94
95
|
current_parts = [int(x) for x in current.split(".")]
|
|
95
96
|
remote_parts = [int(x) for x in remote.split(".")]
|
|
96
|
-
|
|
97
|
+
|
|
97
98
|
max_len = max(len(current_parts), len(remote_parts))
|
|
98
99
|
current_parts.extend([0] * (max_len - len(current_parts)))
|
|
99
100
|
remote_parts.extend([0] * (max_len - len(remote_parts)))
|
|
100
|
-
|
|
101
|
+
|
|
101
102
|
return remote_parts > current_parts
|
|
102
103
|
except (ValueError, AttributeError):
|
|
103
104
|
return False
|
|
@@ -106,15 +107,15 @@ def _compare_versions(current: str, remote: str) -> bool:
|
|
|
106
107
|
def check_for_update(force: bool = False) -> None:
|
|
107
108
|
"""
|
|
108
109
|
Check for updates and prompt user to upgrade if available.
|
|
109
|
-
|
|
110
|
+
|
|
110
111
|
Args:
|
|
111
112
|
force: If True, check regardless of last check time and show errors.
|
|
112
113
|
"""
|
|
113
114
|
if not force and not _should_check_for_update():
|
|
114
115
|
return
|
|
115
|
-
|
|
116
|
+
|
|
116
117
|
remote_version = _get_remote_version()
|
|
117
|
-
|
|
118
|
+
|
|
118
119
|
if remote_version is None:
|
|
119
120
|
if force:
|
|
120
121
|
print(
|
|
@@ -124,19 +125,19 @@ def check_for_update(force: bool = False) -> None:
|
|
|
124
125
|
)
|
|
125
126
|
sys.exit(1)
|
|
126
127
|
return
|
|
127
|
-
|
|
128
|
+
|
|
128
129
|
_update_last_check_time()
|
|
129
|
-
|
|
130
|
+
|
|
130
131
|
if not _compare_versions(__version__, remote_version):
|
|
131
132
|
if force:
|
|
132
133
|
print(f"mdify is up to date (version {__version__})")
|
|
133
134
|
return
|
|
134
|
-
|
|
135
|
-
print(f"\n{'='*50}")
|
|
135
|
+
|
|
136
|
+
print(f"\n{'=' * 50}")
|
|
136
137
|
print(f"A new version of mdify-cli is available!")
|
|
137
138
|
print(f" Current version: {__version__}")
|
|
138
139
|
print(f" Latest version: {remote_version}")
|
|
139
|
-
print(f"{'='*50}")
|
|
140
|
+
print(f"{'=' * 50}")
|
|
140
141
|
print(f"\nTo upgrade, run:")
|
|
141
142
|
print(f" pipx upgrade mdify-cli")
|
|
142
143
|
print(f" # or: pip install --upgrade mdify-cli\n")
|
|
@@ -146,13 +147,18 @@ def check_for_update(force: bool = False) -> None:
|
|
|
146
147
|
# Container runtime functions
|
|
147
148
|
# =============================================================================
|
|
148
149
|
|
|
149
|
-
|
|
150
|
+
|
|
151
|
+
def detect_runtime(preferred: str, explicit: bool = True) -> Optional[str]:
|
|
150
152
|
"""
|
|
151
153
|
Detect available container runtime.
|
|
152
|
-
|
|
154
|
+
|
|
153
155
|
Args:
|
|
154
156
|
preferred: Preferred runtime ('docker' or 'podman')
|
|
155
|
-
|
|
157
|
+
explicit: If True, warn when falling back to alternative.
|
|
158
|
+
If False, silently use alternative without warning.
|
|
159
|
+
Note: This only controls warning emission; selection order
|
|
160
|
+
is always preferred → alternative regardless of this flag.
|
|
161
|
+
|
|
156
162
|
Returns:
|
|
157
163
|
Path to runtime executable, or None if not found.
|
|
158
164
|
"""
|
|
@@ -160,25 +166,28 @@ def detect_runtime(preferred: str) -> Optional[str]:
|
|
|
160
166
|
runtime_path = shutil.which(preferred)
|
|
161
167
|
if runtime_path:
|
|
162
168
|
return runtime_path
|
|
163
|
-
|
|
169
|
+
|
|
164
170
|
# Try alternative
|
|
165
171
|
alternative = "podman" if preferred == "docker" else "docker"
|
|
166
172
|
runtime_path = shutil.which(alternative)
|
|
167
173
|
if runtime_path:
|
|
168
|
-
|
|
174
|
+
if explicit:
|
|
175
|
+
print(
|
|
176
|
+
f"Warning: {preferred} not found, using {alternative}", file=sys.stderr
|
|
177
|
+
)
|
|
169
178
|
return runtime_path
|
|
170
|
-
|
|
179
|
+
|
|
171
180
|
return None
|
|
172
181
|
|
|
173
182
|
|
|
174
183
|
def check_image_exists(runtime: str, image: str) -> bool:
|
|
175
184
|
"""
|
|
176
185
|
Check if container image exists locally.
|
|
177
|
-
|
|
186
|
+
|
|
178
187
|
Args:
|
|
179
188
|
runtime: Path to container runtime
|
|
180
189
|
image: Image name/tag
|
|
181
|
-
|
|
190
|
+
|
|
182
191
|
Returns:
|
|
183
192
|
True if image exists locally.
|
|
184
193
|
"""
|
|
@@ -196,18 +205,18 @@ def check_image_exists(runtime: str, image: str) -> bool:
|
|
|
196
205
|
def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
|
|
197
206
|
"""
|
|
198
207
|
Pull container image.
|
|
199
|
-
|
|
208
|
+
|
|
200
209
|
Args:
|
|
201
210
|
runtime: Path to container runtime
|
|
202
211
|
image: Image name/tag
|
|
203
212
|
quiet: Suppress progress output
|
|
204
|
-
|
|
213
|
+
|
|
205
214
|
Returns:
|
|
206
215
|
True if pull succeeded.
|
|
207
216
|
"""
|
|
208
217
|
if not quiet:
|
|
209
218
|
print(f"Pulling image: {image}")
|
|
210
|
-
|
|
219
|
+
|
|
211
220
|
try:
|
|
212
221
|
result = subprocess.run(
|
|
213
222
|
[runtime, "pull", image],
|
|
@@ -222,9 +231,9 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
|
|
|
222
231
|
|
|
223
232
|
def format_size(size_bytes: int) -> str:
|
|
224
233
|
"""Format file size in human-readable format."""
|
|
225
|
-
for unit in [
|
|
234
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
226
235
|
if size_bytes < 1024:
|
|
227
|
-
return f"{size_bytes:.1f} {unit}" if unit !=
|
|
236
|
+
return f"{size_bytes:.1f} {unit}" if unit != "B" else f"{size_bytes} {unit}"
|
|
228
237
|
size_bytes /= 1024
|
|
229
238
|
return f"{size_bytes:.1f} TB"
|
|
230
239
|
|
|
@@ -244,29 +253,33 @@ def format_duration(seconds: float) -> str:
|
|
|
244
253
|
|
|
245
254
|
class Spinner:
|
|
246
255
|
"""A simple spinner to show progress during long operations."""
|
|
247
|
-
|
|
256
|
+
|
|
248
257
|
def __init__(self):
|
|
249
|
-
self.frames = [
|
|
258
|
+
self.frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
|
250
259
|
self.running = False
|
|
251
260
|
self.thread = None
|
|
252
261
|
self.start_time = None
|
|
253
|
-
|
|
262
|
+
|
|
254
263
|
def _spin(self):
|
|
255
264
|
idx = 0
|
|
256
265
|
while self.running:
|
|
257
266
|
elapsed = time.time() - self.start_time
|
|
258
267
|
frame = self.frames[idx % len(self.frames)]
|
|
259
|
-
print(
|
|
268
|
+
print(
|
|
269
|
+
f"\r{self.prefix} {frame} ({format_duration(elapsed)})",
|
|
270
|
+
end="",
|
|
271
|
+
flush=True,
|
|
272
|
+
)
|
|
260
273
|
idx += 1
|
|
261
274
|
time.sleep(0.1)
|
|
262
|
-
|
|
275
|
+
|
|
263
276
|
def start(self, prefix: str = ""):
|
|
264
277
|
self.prefix = prefix
|
|
265
278
|
self.running = True
|
|
266
279
|
self.start_time = time.time()
|
|
267
280
|
self.thread = threading.Thread(target=self._spin, daemon=True)
|
|
268
281
|
self.thread.start()
|
|
269
|
-
|
|
282
|
+
|
|
270
283
|
def stop(self):
|
|
271
284
|
self.running = False
|
|
272
285
|
if self.thread:
|
|
@@ -284,42 +297,48 @@ def run_container(
|
|
|
284
297
|
) -> Tuple[bool, str, float]:
|
|
285
298
|
"""
|
|
286
299
|
Run container to convert a single file.
|
|
287
|
-
|
|
300
|
+
|
|
288
301
|
Args:
|
|
289
302
|
runtime: Path to container runtime
|
|
290
303
|
image: Image name/tag
|
|
291
304
|
input_file: Absolute path to input file
|
|
292
305
|
output_file: Absolute path to output file
|
|
293
306
|
mask_pii: Whether to mask PII in images
|
|
294
|
-
|
|
307
|
+
|
|
295
308
|
Returns:
|
|
296
309
|
Tuple of (success: bool, message: str, elapsed_seconds: float)
|
|
297
310
|
"""
|
|
298
311
|
start_time = time.time()
|
|
299
|
-
|
|
312
|
+
|
|
300
313
|
# Ensure output directory exists
|
|
301
314
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
302
|
-
|
|
315
|
+
|
|
303
316
|
# Mount directories
|
|
304
317
|
input_dir = input_file.parent
|
|
305
318
|
output_dir = output_file.parent
|
|
306
|
-
|
|
319
|
+
|
|
307
320
|
# Container paths
|
|
308
321
|
container_in = f"/work/in/{input_file.name}"
|
|
309
322
|
container_out = f"/work/out/{output_file.name}"
|
|
310
|
-
|
|
323
|
+
|
|
311
324
|
cmd = [
|
|
312
|
-
runtime,
|
|
313
|
-
"
|
|
314
|
-
"
|
|
325
|
+
runtime,
|
|
326
|
+
"run",
|
|
327
|
+
"--rm",
|
|
328
|
+
"-v",
|
|
329
|
+
f"{input_dir}:/work/in:ro",
|
|
330
|
+
"-v",
|
|
331
|
+
f"{output_dir}:/work/out",
|
|
315
332
|
image,
|
|
316
|
-
"--in",
|
|
317
|
-
|
|
333
|
+
"--in",
|
|
334
|
+
container_in,
|
|
335
|
+
"--out",
|
|
336
|
+
container_out,
|
|
318
337
|
]
|
|
319
|
-
|
|
338
|
+
|
|
320
339
|
if mask_pii:
|
|
321
340
|
cmd.append("--mask")
|
|
322
|
-
|
|
341
|
+
|
|
323
342
|
try:
|
|
324
343
|
result = subprocess.run(
|
|
325
344
|
cmd,
|
|
@@ -328,13 +347,15 @@ def run_container(
|
|
|
328
347
|
check=False,
|
|
329
348
|
)
|
|
330
349
|
elapsed = time.time() - start_time
|
|
331
|
-
|
|
350
|
+
|
|
332
351
|
if result.returncode == 0:
|
|
333
352
|
return True, "success", elapsed
|
|
334
353
|
else:
|
|
335
|
-
error_msg =
|
|
354
|
+
error_msg = (
|
|
355
|
+
result.stderr.strip() or result.stdout.strip() or "Unknown error"
|
|
356
|
+
)
|
|
336
357
|
return False, error_msg, elapsed
|
|
337
|
-
|
|
358
|
+
|
|
338
359
|
except OSError as e:
|
|
339
360
|
elapsed = time.time() - start_time
|
|
340
361
|
return False, str(e), elapsed
|
|
@@ -346,22 +367,39 @@ def run_container(
|
|
|
346
367
|
|
|
347
368
|
# Supported file extensions (based on Docling InputFormat)
|
|
348
369
|
SUPPORTED_EXTENSIONS = {
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
370
|
+
".pdf",
|
|
371
|
+
".docx",
|
|
372
|
+
".pptx",
|
|
373
|
+
".html",
|
|
374
|
+
".htm",
|
|
375
|
+
".png",
|
|
376
|
+
".jpg",
|
|
377
|
+
".jpeg",
|
|
378
|
+
".gif",
|
|
379
|
+
".bmp",
|
|
380
|
+
".tiff",
|
|
381
|
+
".tif", # images
|
|
382
|
+
".asciidoc",
|
|
383
|
+
".adoc",
|
|
384
|
+
".asc", # asciidoc
|
|
385
|
+
".md",
|
|
386
|
+
".markdown", # markdown
|
|
387
|
+
".csv",
|
|
388
|
+
".xlsx", # spreadsheets
|
|
389
|
+
".xml", # XML formats
|
|
390
|
+
".json", # JSON docling
|
|
391
|
+
".mp3",
|
|
392
|
+
".wav",
|
|
393
|
+
".m4a",
|
|
394
|
+
".flac", # audio
|
|
395
|
+
".vtt", # subtitles
|
|
358
396
|
}
|
|
359
397
|
|
|
360
398
|
|
|
361
399
|
def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[Path]:
|
|
362
400
|
"""Get list of files to convert based on input path and options."""
|
|
363
401
|
files = []
|
|
364
|
-
|
|
402
|
+
|
|
365
403
|
if input_path.is_file():
|
|
366
404
|
files.append(input_path)
|
|
367
405
|
elif input_path.is_dir():
|
|
@@ -369,19 +407,19 @@ def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[P
|
|
|
369
407
|
files = list(input_path.rglob(mask))
|
|
370
408
|
else:
|
|
371
409
|
files = list(input_path.glob(mask))
|
|
372
|
-
|
|
410
|
+
|
|
373
411
|
# Filter to only files
|
|
374
412
|
files = [f for f in files if f.is_file()]
|
|
375
413
|
else:
|
|
376
414
|
raise FileNotFoundError(f"Input path does not exist: {input_path}")
|
|
377
|
-
|
|
415
|
+
|
|
378
416
|
# Filter out hidden files and unsupported formats
|
|
379
417
|
files = [
|
|
380
|
-
f
|
|
381
|
-
|
|
382
|
-
and f.suffix.lower() in SUPPORTED_EXTENSIONS
|
|
418
|
+
f
|
|
419
|
+
for f in files
|
|
420
|
+
if not f.name.startswith(".") and f.suffix.lower() in SUPPORTED_EXTENSIONS
|
|
383
421
|
]
|
|
384
|
-
|
|
422
|
+
|
|
385
423
|
return files
|
|
386
424
|
|
|
387
425
|
|
|
@@ -414,7 +452,7 @@ def get_output_path(
|
|
|
414
452
|
output_path = output_dir / relative_path.parent / output_name
|
|
415
453
|
except ValueError:
|
|
416
454
|
output_path = output_dir / output_name
|
|
417
|
-
|
|
455
|
+
|
|
418
456
|
return output_path
|
|
419
457
|
|
|
420
458
|
|
|
@@ -422,6 +460,7 @@ def get_output_path(
|
|
|
422
460
|
# CLI argument parsing
|
|
423
461
|
# =============================================================================
|
|
424
462
|
|
|
463
|
+
|
|
425
464
|
def parse_args() -> argparse.Namespace:
|
|
426
465
|
"""Parse command line arguments."""
|
|
427
466
|
parser = argparse.ArgumentParser(
|
|
@@ -436,74 +475,79 @@ Examples:
|
|
|
436
475
|
mdify ./docs --runtime podman Use Podman instead of Docker
|
|
437
476
|
""",
|
|
438
477
|
)
|
|
439
|
-
|
|
478
|
+
|
|
440
479
|
parser.add_argument(
|
|
441
480
|
"input",
|
|
442
481
|
type=str,
|
|
443
482
|
nargs="?",
|
|
444
483
|
help="Input file or directory to convert",
|
|
445
484
|
)
|
|
446
|
-
|
|
485
|
+
|
|
447
486
|
parser.add_argument(
|
|
448
|
-
"-o",
|
|
487
|
+
"-o",
|
|
488
|
+
"--out-dir",
|
|
449
489
|
type=str,
|
|
450
490
|
default="output",
|
|
451
491
|
help="Output directory for converted files (default: output)",
|
|
452
492
|
)
|
|
453
|
-
|
|
493
|
+
|
|
454
494
|
parser.add_argument(
|
|
455
|
-
"-g",
|
|
495
|
+
"-g",
|
|
496
|
+
"--glob",
|
|
456
497
|
type=str,
|
|
457
498
|
default="*",
|
|
458
499
|
help="Glob pattern for filtering files in directory (default: *)",
|
|
459
500
|
)
|
|
460
|
-
|
|
501
|
+
|
|
461
502
|
parser.add_argument(
|
|
462
|
-
"-r",
|
|
503
|
+
"-r",
|
|
504
|
+
"--recursive",
|
|
463
505
|
action="store_true",
|
|
464
506
|
help="Recursively scan directories",
|
|
465
507
|
)
|
|
466
|
-
|
|
508
|
+
|
|
467
509
|
parser.add_argument(
|
|
468
510
|
"--flat",
|
|
469
511
|
action="store_true",
|
|
470
512
|
help="Disable directory structure preservation in output",
|
|
471
513
|
)
|
|
472
|
-
|
|
514
|
+
|
|
473
515
|
parser.add_argument(
|
|
474
516
|
"--overwrite",
|
|
475
517
|
action="store_true",
|
|
476
518
|
help="Overwrite existing output files",
|
|
477
519
|
)
|
|
478
|
-
|
|
520
|
+
|
|
479
521
|
parser.add_argument(
|
|
480
|
-
"-q",
|
|
522
|
+
"-q",
|
|
523
|
+
"--quiet",
|
|
481
524
|
action="store_true",
|
|
482
525
|
help="Suppress progress messages",
|
|
483
526
|
)
|
|
484
|
-
|
|
527
|
+
|
|
485
528
|
parser.add_argument(
|
|
486
|
-
"-m",
|
|
529
|
+
"-m",
|
|
530
|
+
"--mask",
|
|
487
531
|
action="store_true",
|
|
488
532
|
help="Mask PII and sensitive content in document images",
|
|
489
533
|
)
|
|
490
|
-
|
|
534
|
+
|
|
491
535
|
# Container options
|
|
492
536
|
parser.add_argument(
|
|
493
537
|
"--runtime",
|
|
494
538
|
type=str,
|
|
495
539
|
choices=SUPPORTED_RUNTIMES,
|
|
496
|
-
default=
|
|
497
|
-
help="Container runtime to use (
|
|
540
|
+
default=None,
|
|
541
|
+
help="Container runtime to use (auto-detects docker or podman if not specified)",
|
|
498
542
|
)
|
|
499
|
-
|
|
543
|
+
|
|
500
544
|
parser.add_argument(
|
|
501
545
|
"--image",
|
|
502
546
|
type=str,
|
|
503
547
|
default=DEFAULT_IMAGE,
|
|
504
548
|
help=f"Container image to use (default: {DEFAULT_IMAGE})",
|
|
505
549
|
)
|
|
506
|
-
|
|
550
|
+
|
|
507
551
|
parser.add_argument(
|
|
508
552
|
"--pull",
|
|
509
553
|
type=str,
|
|
@@ -511,20 +555,20 @@ Examples:
|
|
|
511
555
|
default="missing",
|
|
512
556
|
help="Image pull policy: always, missing, never (default: missing)",
|
|
513
557
|
)
|
|
514
|
-
|
|
558
|
+
|
|
515
559
|
# Utility options
|
|
516
560
|
parser.add_argument(
|
|
517
561
|
"--check-update",
|
|
518
562
|
action="store_true",
|
|
519
563
|
help="Check for available updates and exit",
|
|
520
564
|
)
|
|
521
|
-
|
|
565
|
+
|
|
522
566
|
parser.add_argument(
|
|
523
567
|
"--version",
|
|
524
568
|
action="version",
|
|
525
569
|
version=f"mdify {__version__}",
|
|
526
570
|
)
|
|
527
|
-
|
|
571
|
+
|
|
528
572
|
return parser.parse_args()
|
|
529
573
|
|
|
530
574
|
|
|
@@ -532,27 +576,30 @@ Examples:
|
|
|
532
576
|
# Main entry point
|
|
533
577
|
# =============================================================================
|
|
534
578
|
|
|
579
|
+
|
|
535
580
|
def main() -> int:
|
|
536
581
|
"""Main entry point for the CLI."""
|
|
537
582
|
args = parse_args()
|
|
538
|
-
|
|
583
|
+
|
|
539
584
|
# Handle --check-update flag
|
|
540
585
|
if args.check_update:
|
|
541
586
|
check_for_update(force=True)
|
|
542
587
|
return 0
|
|
543
|
-
|
|
588
|
+
|
|
544
589
|
# Check for updates (daily, silent on errors)
|
|
545
590
|
check_for_update(force=False)
|
|
546
|
-
|
|
591
|
+
|
|
547
592
|
# Validate input is provided
|
|
548
593
|
if args.input is None:
|
|
549
594
|
print("Error: Input file or directory is required", file=sys.stderr)
|
|
550
595
|
print("Usage: mdify <input> [options]", file=sys.stderr)
|
|
551
596
|
print(" mdify --help for more information", file=sys.stderr)
|
|
552
597
|
return 1
|
|
553
|
-
|
|
598
|
+
|
|
554
599
|
# Detect container runtime
|
|
555
|
-
|
|
600
|
+
preferred = args.runtime if args.runtime else "docker"
|
|
601
|
+
explicit = args.runtime is not None
|
|
602
|
+
runtime = detect_runtime(preferred, explicit=explicit)
|
|
556
603
|
if runtime is None:
|
|
557
604
|
print(
|
|
558
605
|
f"Error: Container runtime not found ({', '.join(SUPPORTED_RUNTIMES)})",
|
|
@@ -560,85 +607,87 @@ def main() -> int:
|
|
|
560
607
|
)
|
|
561
608
|
print("Please install Docker or Podman to use mdify.", file=sys.stderr)
|
|
562
609
|
return 2
|
|
563
|
-
|
|
610
|
+
|
|
564
611
|
# Handle image pull policy
|
|
565
612
|
image = args.image
|
|
566
613
|
image_exists = check_image_exists(runtime, image)
|
|
567
|
-
|
|
614
|
+
|
|
568
615
|
if args.pull == "always" or (args.pull == "missing" and not image_exists):
|
|
569
616
|
if not pull_image(runtime, image, args.quiet):
|
|
570
617
|
print(f"Error: Failed to pull image: {image}", file=sys.stderr)
|
|
571
618
|
return 1
|
|
572
619
|
elif args.pull == "never" and not image_exists:
|
|
573
620
|
print(f"Error: Image not found locally: {image}", file=sys.stderr)
|
|
574
|
-
print(f"Run with --pull=missing or pull manually: {
|
|
621
|
+
print(f"Run with --pull=missing or pull manually: {preferred} pull {image}")
|
|
575
622
|
return 1
|
|
576
|
-
|
|
623
|
+
|
|
577
624
|
# Resolve paths
|
|
578
625
|
input_path = Path(args.input).resolve()
|
|
579
626
|
output_dir = Path(args.out_dir).resolve()
|
|
580
|
-
|
|
627
|
+
|
|
581
628
|
# Validate input
|
|
582
629
|
if not input_path.exists():
|
|
583
630
|
print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
|
|
584
631
|
return 1
|
|
585
|
-
|
|
632
|
+
|
|
586
633
|
# Get files to convert
|
|
587
634
|
try:
|
|
588
635
|
files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
|
|
589
636
|
except Exception as e:
|
|
590
637
|
print(f"Error: {e}", file=sys.stderr)
|
|
591
638
|
return 1
|
|
592
|
-
|
|
639
|
+
|
|
593
640
|
if not files_to_convert:
|
|
594
641
|
print(f"No files found to convert in: {input_path}", file=sys.stderr)
|
|
595
642
|
return 1
|
|
596
|
-
|
|
643
|
+
|
|
597
644
|
total_files = len(files_to_convert)
|
|
598
645
|
total_size = sum(f.stat().st_size for f in files_to_convert)
|
|
599
|
-
|
|
646
|
+
|
|
600
647
|
if not args.quiet:
|
|
601
648
|
print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
|
|
602
649
|
print(f"Using runtime: {runtime}")
|
|
603
650
|
print(f"Using image: {image}")
|
|
604
651
|
print()
|
|
605
|
-
|
|
652
|
+
|
|
606
653
|
# Determine input base for directory structure preservation
|
|
607
654
|
if input_path.is_file():
|
|
608
655
|
input_base = input_path.parent
|
|
609
656
|
else:
|
|
610
657
|
input_base = input_path
|
|
611
|
-
|
|
658
|
+
|
|
612
659
|
# Convert files
|
|
613
660
|
success_count = 0
|
|
614
661
|
skipped_count = 0
|
|
615
662
|
failed_count = 0
|
|
616
663
|
conversion_start = time.time()
|
|
617
664
|
spinner = Spinner()
|
|
618
|
-
|
|
665
|
+
|
|
619
666
|
for idx, input_file in enumerate(files_to_convert, 1):
|
|
620
667
|
output_file = get_output_path(input_file, input_base, output_dir, args.flat)
|
|
621
668
|
file_size = input_file.stat().st_size
|
|
622
669
|
progress = f"[{idx}/{total_files}]"
|
|
623
|
-
|
|
670
|
+
|
|
624
671
|
# Check if output exists and skip if not overwriting
|
|
625
672
|
if output_file.exists() and not args.overwrite:
|
|
626
673
|
if not args.quiet:
|
|
627
674
|
print(f"{progress} Skipped (exists): {input_file.name}")
|
|
628
675
|
skipped_count += 1
|
|
629
676
|
continue
|
|
630
|
-
|
|
677
|
+
|
|
631
678
|
# Show spinner while processing
|
|
632
679
|
if not args.quiet:
|
|
633
|
-
spinner.start(
|
|
634
|
-
|
|
680
|
+
spinner.start(
|
|
681
|
+
f"{progress} Processing: {input_file.name} ({format_size(file_size)})"
|
|
682
|
+
)
|
|
683
|
+
|
|
635
684
|
success, result, elapsed = run_container(
|
|
636
685
|
runtime, image, input_file, output_file, args.mask
|
|
637
686
|
)
|
|
638
|
-
|
|
687
|
+
|
|
639
688
|
if not args.quiet:
|
|
640
689
|
spinner.stop()
|
|
641
|
-
|
|
690
|
+
|
|
642
691
|
if success:
|
|
643
692
|
success_count += 1
|
|
644
693
|
if not args.quiet:
|
|
@@ -648,9 +697,9 @@ def main() -> int:
|
|
|
648
697
|
if not args.quiet:
|
|
649
698
|
print(f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})")
|
|
650
699
|
print(f" Error: {result}", file=sys.stderr)
|
|
651
|
-
|
|
700
|
+
|
|
652
701
|
total_elapsed = time.time() - conversion_start
|
|
653
|
-
|
|
702
|
+
|
|
654
703
|
# Print summary
|
|
655
704
|
if not args.quiet:
|
|
656
705
|
print()
|
|
@@ -662,7 +711,7 @@ def main() -> int:
|
|
|
662
711
|
print(f" Failed: {failed_count}")
|
|
663
712
|
print(f" Total time: {format_duration(total_elapsed)}")
|
|
664
713
|
print("=" * 50)
|
|
665
|
-
|
|
714
|
+
|
|
666
715
|
# Return appropriate exit code
|
|
667
716
|
if failed_count > 0:
|
|
668
717
|
return 1
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mdify-cli
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.6.0
|
|
4
|
+
Summary: Convert PDFs and document images into structured Markdown for LLM workflows
|
|
5
5
|
Author: tiroq
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/tiroq/mdify
|
|
@@ -24,6 +24,8 @@ Classifier: Topic :: Utilities
|
|
|
24
24
|
Requires-Python: >=3.8
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
27
29
|
Dynamic: license-file
|
|
28
30
|
|
|
29
31
|
# mdify
|
|
@@ -34,7 +36,7 @@ Dynamic: license-file
|
|
|
34
36
|
[](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
|
|
35
37
|
[](https://opensource.org/licenses/MIT)
|
|
36
38
|
|
|
37
|
-
A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion
|
|
39
|
+
A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
|
|
38
40
|
|
|
39
41
|
## Requirements
|
|
40
42
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
+
mdify/__init__.py,sha256=tvxIF7MWdoaHBgdk4tT81csn-ZhTTOlfooBYqM4YsMg,90
|
|
3
|
+
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
+
mdify/cli.py,sha256=sDwkOf4H33l7WmfAR3tw2MjO-7kuIOHcrQXTZto6bF0,20460
|
|
5
|
+
mdify_cli-1.6.0.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
6
|
+
mdify_cli-1.6.0.dist-info/METADATA,sha256=W-tubNyeCkt6_GAYmS59JHwka8FxQ4D5ZxbHDKFhaLQ,6721
|
|
7
|
+
mdify_cli-1.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
8
|
+
mdify_cli-1.6.0.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
9
|
+
mdify_cli-1.6.0.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
10
|
+
mdify_cli-1.6.0.dist-info/RECORD,,
|
mdify_cli-1.4.1.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=NWY-5XYsO7gQZs9c4utyzGda6anA_FDBB2LNNUIqsdo,90
|
|
3
|
-
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=D8_1_6NgWXkexGWqkgB0JO7c1r2T2_Va7J7iGwvewQA,20038
|
|
5
|
-
mdify_cli-1.4.1.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
6
|
-
mdify_cli-1.4.1.dist-info/METADATA,sha256=sZgTSq6CrpBgpJn0NCnLcBYNTp2e0byKeFkAOO6em3E,6667
|
|
7
|
-
mdify_cli-1.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
mdify_cli-1.4.1.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
9
|
-
mdify_cli-1.4.1.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
10
|
-
mdify_cli-1.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|