mdify-cli 1.2.0__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 1.2.0
4
- Summary: Lightweight CLI for converting documents to Markdown via Docling container
3
+ Version: 1.6.0
4
+ Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/tiroq/mdify
@@ -24,11 +24,19 @@ Classifier: Topic :: Utilities
24
24
  Requires-Python: >=3.8
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
29
  Dynamic: license-file
28
30
 
29
31
  # mdify
30
32
 
31
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
33
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
34
+
35
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
36
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
37
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
38
+
39
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
32
40
 
33
41
  ## Requirements
34
42
 
@@ -1,6 +1,12 @@
1
1
  # mdify
2
2
 
3
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
3
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
6
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
8
+
9
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
4
10
 
5
11
  ## Requirements
6
12
 
Binary file
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "1.2.0"
3
+ __version__ = "1.6.0"
@@ -13,6 +13,7 @@ import os
13
13
  import shutil
14
14
  import subprocess
15
15
  import sys
16
+ import threading
16
17
  import time
17
18
  from pathlib import Path
18
19
  from typing import List, Optional, Tuple
@@ -24,8 +25,7 @@ from . import __version__
24
25
  # Configuration
25
26
  MDIFY_HOME = Path.home() / ".mdify"
26
27
  LAST_CHECK_FILE = MDIFY_HOME / ".last_check"
27
- INSTALLER_PATH = MDIFY_HOME / "install.sh"
28
- GITHUB_API_URL = "https://api.github.com/repos/tiroq/mdify/releases/latest"
28
+ PYPI_API_URL = "https://pypi.org/pypi/mdify-cli/json"
29
29
  CHECK_INTERVAL_SECONDS = 86400 # 24 hours
30
30
 
31
31
  # Container configuration
@@ -37,18 +37,19 @@ SUPPORTED_RUNTIMES = ("docker", "podman")
37
37
  # Update checking functions
38
38
  # =============================================================================
39
39
 
40
+
40
41
  def _get_remote_version(timeout: int = 5) -> Optional[str]:
41
42
  """
42
- Fetch the latest version from GitHub API.
43
-
43
+ Fetch the latest version from PyPI.
44
+
44
45
  Returns:
45
- Version string (e.g., "0.2.0") or None if fetch failed.
46
+ Version string (e.g., "1.1.0") or None if fetch failed.
46
47
  """
47
48
  try:
48
- with urlopen(GITHUB_API_URL, timeout=timeout) as response:
49
+ with urlopen(PYPI_API_URL, timeout=timeout) as response:
49
50
  data = json.loads(response.read().decode("utf-8"))
50
- tag = data.get("tag_name", "")
51
- return tag.lstrip("v") if tag else None
51
+ version = data.get("info", {}).get("version", "")
52
+ return version if version else None
52
53
  except (URLError, json.JSONDecodeError, KeyError, TimeoutError):
53
54
  return None
54
55
 
@@ -56,16 +57,16 @@ def _get_remote_version(timeout: int = 5) -> Optional[str]:
56
57
  def _should_check_for_update() -> bool:
57
58
  """
58
59
  Determine if we should check for updates based on last check time.
59
-
60
+
60
61
  Returns:
61
62
  True if check should be performed, False otherwise.
62
63
  """
63
64
  if os.environ.get("MDIFY_NO_UPDATE_CHECK", "").lower() in ("1", "true", "yes"):
64
65
  return False
65
-
66
+
66
67
  if not LAST_CHECK_FILE.exists():
67
68
  return True
68
-
69
+
69
70
  try:
70
71
  last_check = float(LAST_CHECK_FILE.read_text().strip())
71
72
  elapsed = time.time() - last_check
@@ -86,63 +87,35 @@ def _update_last_check_time() -> None:
86
87
  def _compare_versions(current: str, remote: str) -> bool:
87
88
  """
88
89
  Compare version strings.
89
-
90
+
90
91
  Returns:
91
92
  True if remote version is newer than current.
92
93
  """
93
94
  try:
94
95
  current_parts = [int(x) for x in current.split(".")]
95
96
  remote_parts = [int(x) for x in remote.split(".")]
96
-
97
+
97
98
  max_len = max(len(current_parts), len(remote_parts))
98
99
  current_parts.extend([0] * (max_len - len(current_parts)))
99
100
  remote_parts.extend([0] * (max_len - len(remote_parts)))
100
-
101
+
101
102
  return remote_parts > current_parts
102
103
  except (ValueError, AttributeError):
103
104
  return False
104
105
 
105
106
 
106
- def _run_upgrade() -> bool:
107
- """
108
- Run the upgrade installer.
109
-
110
- Returns:
111
- True if upgrade was successful, False otherwise.
112
- """
113
- if not INSTALLER_PATH.exists():
114
- print(
115
- f"Installer not found at {INSTALLER_PATH}. "
116
- "Please reinstall mdify manually.",
117
- file=sys.stderr,
118
- )
119
- return False
120
-
121
- try:
122
- result = subprocess.run(
123
- [str(INSTALLER_PATH), "--upgrade", "-y"],
124
- check=True,
125
- )
126
- return result.returncode == 0
127
- except subprocess.CalledProcessError:
128
- return False
129
- except OSError as e:
130
- print(f"Failed to run installer: {e}", file=sys.stderr)
131
- return False
132
-
133
-
134
107
  def check_for_update(force: bool = False) -> None:
135
108
  """
136
109
  Check for updates and prompt user to upgrade if available.
137
-
110
+
138
111
  Args:
139
112
  force: If True, check regardless of last check time and show errors.
140
113
  """
141
114
  if not force and not _should_check_for_update():
142
115
  return
143
-
116
+
144
117
  remote_version = _get_remote_version()
145
-
118
+
146
119
  if remote_version is None:
147
120
  if force:
148
121
  print(
@@ -152,49 +125,40 @@ def check_for_update(force: bool = False) -> None:
152
125
  )
153
126
  sys.exit(1)
154
127
  return
155
-
128
+
156
129
  _update_last_check_time()
157
-
130
+
158
131
  if not _compare_versions(__version__, remote_version):
159
132
  if force:
160
133
  print(f"mdify is up to date (version {__version__})")
161
134
  return
162
-
163
- print(f"\n{'='*50}")
164
- print(f"A new version of mdify is available!")
135
+
136
+ print(f"\n{'=' * 50}")
137
+ print(f"A new version of mdify-cli is available!")
165
138
  print(f" Current version: {__version__}")
166
139
  print(f" Latest version: {remote_version}")
167
- print(f"{'='*50}\n")
168
-
169
- try:
170
- response = input("Run upgrade now? [y/N] ").strip().lower()
171
- except (EOFError, KeyboardInterrupt):
172
- print()
173
- return
174
-
175
- if response in ("y", "yes"):
176
- print("\nStarting upgrade...\n")
177
- if _run_upgrade():
178
- print("\nUpgrade completed! Please restart mdify.")
179
- sys.exit(0)
180
- else:
181
- print("\nUpgrade failed. You can try manually with:")
182
- print(f" {INSTALLER_PATH} --upgrade")
183
- else:
184
- print(f"\nTo upgrade later, run: {INSTALLER_PATH} --upgrade\n")
140
+ print(f"{'=' * 50}")
141
+ print(f"\nTo upgrade, run:")
142
+ print(f" pipx upgrade mdify-cli")
143
+ print(f" # or: pip install --upgrade mdify-cli\n")
185
144
 
186
145
 
187
146
  # =============================================================================
188
147
  # Container runtime functions
189
148
  # =============================================================================
190
149
 
191
- def detect_runtime(preferred: str) -> Optional[str]:
150
+
151
+ def detect_runtime(preferred: str, explicit: bool = True) -> Optional[str]:
192
152
  """
193
153
  Detect available container runtime.
194
-
154
+
195
155
  Args:
196
156
  preferred: Preferred runtime ('docker' or 'podman')
197
-
157
+ explicit: If True, warn when falling back to alternative.
158
+ If False, silently use alternative without warning.
159
+ Note: This only controls warning emission; selection order
160
+ is always preferred → alternative regardless of this flag.
161
+
198
162
  Returns:
199
163
  Path to runtime executable, or None if not found.
200
164
  """
@@ -202,25 +166,28 @@ def detect_runtime(preferred: str) -> Optional[str]:
202
166
  runtime_path = shutil.which(preferred)
203
167
  if runtime_path:
204
168
  return runtime_path
205
-
169
+
206
170
  # Try alternative
207
171
  alternative = "podman" if preferred == "docker" else "docker"
208
172
  runtime_path = shutil.which(alternative)
209
173
  if runtime_path:
210
- print(f"Warning: {preferred} not found, using {alternative}", file=sys.stderr)
174
+ if explicit:
175
+ print(
176
+ f"Warning: {preferred} not found, using {alternative}", file=sys.stderr
177
+ )
211
178
  return runtime_path
212
-
179
+
213
180
  return None
214
181
 
215
182
 
216
183
  def check_image_exists(runtime: str, image: str) -> bool:
217
184
  """
218
185
  Check if container image exists locally.
219
-
186
+
220
187
  Args:
221
188
  runtime: Path to container runtime
222
189
  image: Image name/tag
223
-
190
+
224
191
  Returns:
225
192
  True if image exists locally.
226
193
  """
@@ -238,18 +205,18 @@ def check_image_exists(runtime: str, image: str) -> bool:
238
205
  def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
239
206
  """
240
207
  Pull container image.
241
-
208
+
242
209
  Args:
243
210
  runtime: Path to container runtime
244
211
  image: Image name/tag
245
212
  quiet: Suppress progress output
246
-
213
+
247
214
  Returns:
248
215
  True if pull succeeded.
249
216
  """
250
217
  if not quiet:
251
218
  print(f"Pulling image: {image}")
252
-
219
+
253
220
  try:
254
221
  result = subprocess.run(
255
222
  [runtime, "pull", image],
@@ -262,51 +229,116 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
262
229
  return False
263
230
 
264
231
 
232
+ def format_size(size_bytes: int) -> str:
233
+ """Format file size in human-readable format."""
234
+ for unit in ["B", "KB", "MB", "GB"]:
235
+ if size_bytes < 1024:
236
+ return f"{size_bytes:.1f} {unit}" if unit != "B" else f"{size_bytes} {unit}"
237
+ size_bytes /= 1024
238
+ return f"{size_bytes:.1f} TB"
239
+
240
+
241
+ def format_duration(seconds: float) -> str:
242
+ """Format duration in human-readable format."""
243
+ if seconds < 60:
244
+ return f"{seconds:.1f}s"
245
+ minutes = int(seconds // 60)
246
+ secs = seconds % 60
247
+ if minutes < 60:
248
+ return f"{minutes}m {secs:.0f}s"
249
+ hours = minutes // 60
250
+ mins = minutes % 60
251
+ return f"{hours}h {mins}m {secs:.0f}s"
252
+
253
+
254
+ class Spinner:
255
+ """A simple spinner to show progress during long operations."""
256
+
257
+ def __init__(self):
258
+ self.frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
259
+ self.running = False
260
+ self.thread = None
261
+ self.start_time = None
262
+
263
+ def _spin(self):
264
+ idx = 0
265
+ while self.running:
266
+ elapsed = time.time() - self.start_time
267
+ frame = self.frames[idx % len(self.frames)]
268
+ print(
269
+ f"\r{self.prefix} {frame} ({format_duration(elapsed)})",
270
+ end="",
271
+ flush=True,
272
+ )
273
+ idx += 1
274
+ time.sleep(0.1)
275
+
276
+ def start(self, prefix: str = ""):
277
+ self.prefix = prefix
278
+ self.running = True
279
+ self.start_time = time.time()
280
+ self.thread = threading.Thread(target=self._spin, daemon=True)
281
+ self.thread.start()
282
+
283
+ def stop(self):
284
+ self.running = False
285
+ if self.thread:
286
+ self.thread.join(timeout=0.5)
287
+ # Clear the spinner line
288
+ print(f"\r{' ' * 80}\r", end="", flush=True)
289
+
290
+
265
291
  def run_container(
266
292
  runtime: str,
267
293
  image: str,
268
294
  input_file: Path,
269
295
  output_file: Path,
270
296
  mask_pii: bool = False,
271
- quiet: bool = False,
272
- ) -> Tuple[bool, str]:
297
+ ) -> Tuple[bool, str, float]:
273
298
  """
274
299
  Run container to convert a single file.
275
-
300
+
276
301
  Args:
277
302
  runtime: Path to container runtime
278
303
  image: Image name/tag
279
304
  input_file: Absolute path to input file
280
305
  output_file: Absolute path to output file
281
306
  mask_pii: Whether to mask PII in images
282
- quiet: Suppress progress output
283
-
307
+
284
308
  Returns:
285
- Tuple of (success: bool, message: str)
309
+ Tuple of (success: bool, message: str, elapsed_seconds: float)
286
310
  """
311
+ start_time = time.time()
312
+
287
313
  # Ensure output directory exists
288
314
  output_file.parent.mkdir(parents=True, exist_ok=True)
289
-
315
+
290
316
  # Mount directories
291
317
  input_dir = input_file.parent
292
318
  output_dir = output_file.parent
293
-
319
+
294
320
  # Container paths
295
321
  container_in = f"/work/in/{input_file.name}"
296
322
  container_out = f"/work/out/{output_file.name}"
297
-
323
+
298
324
  cmd = [
299
- runtime, "run", "--rm",
300
- "-v", f"{input_dir}:/work/in:ro",
301
- "-v", f"{output_dir}:/work/out",
325
+ runtime,
326
+ "run",
327
+ "--rm",
328
+ "-v",
329
+ f"{input_dir}:/work/in:ro",
330
+ "-v",
331
+ f"{output_dir}:/work/out",
302
332
  image,
303
- "--in", container_in,
304
- "--out", container_out,
333
+ "--in",
334
+ container_in,
335
+ "--out",
336
+ container_out,
305
337
  ]
306
-
338
+
307
339
  if mask_pii:
308
340
  cmd.append("--mask")
309
-
341
+
310
342
  try:
311
343
  result = subprocess.run(
312
344
  cmd,
@@ -314,21 +346,19 @@ def run_container(
314
346
  text=True,
315
347
  check=False,
316
348
  )
317
-
349
+ elapsed = time.time() - start_time
350
+
318
351
  if result.returncode == 0:
319
- if not quiet:
320
- print(f"Converted: {input_file} -> {output_file}")
321
- return True, "success"
352
+ return True, "success", elapsed
322
353
  else:
323
- error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
324
- if not quiet:
325
- print(f"Failed: {input_file} - {error_msg}", file=sys.stderr)
326
- return False, f"error: {error_msg}"
327
-
354
+ error_msg = (
355
+ result.stderr.strip() or result.stdout.strip() or "Unknown error"
356
+ )
357
+ return False, error_msg, elapsed
358
+
328
359
  except OSError as e:
329
- if not quiet:
330
- print(f"Failed: {input_file} - {e}", file=sys.stderr)
331
- return False, f"error: {e}"
360
+ elapsed = time.time() - start_time
361
+ return False, str(e), elapsed
332
362
 
333
363
 
334
364
  # =============================================================================
@@ -337,22 +367,39 @@ def run_container(
337
367
 
338
368
  # Supported file extensions (based on Docling InputFormat)
339
369
  SUPPORTED_EXTENSIONS = {
340
- '.pdf', '.docx', '.pptx', '.html', '.htm',
341
- '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif', # images
342
- '.asciidoc', '.adoc', '.asc', # asciidoc
343
- '.md', '.markdown', # markdown
344
- '.csv', '.xlsx', # spreadsheets
345
- '.xml', # XML formats
346
- '.json', # JSON docling
347
- '.mp3', '.wav', '.m4a', '.flac', # audio
348
- '.vtt', # subtitles
370
+ ".pdf",
371
+ ".docx",
372
+ ".pptx",
373
+ ".html",
374
+ ".htm",
375
+ ".png",
376
+ ".jpg",
377
+ ".jpeg",
378
+ ".gif",
379
+ ".bmp",
380
+ ".tiff",
381
+ ".tif", # images
382
+ ".asciidoc",
383
+ ".adoc",
384
+ ".asc", # asciidoc
385
+ ".md",
386
+ ".markdown", # markdown
387
+ ".csv",
388
+ ".xlsx", # spreadsheets
389
+ ".xml", # XML formats
390
+ ".json", # JSON docling
391
+ ".mp3",
392
+ ".wav",
393
+ ".m4a",
394
+ ".flac", # audio
395
+ ".vtt", # subtitles
349
396
  }
350
397
 
351
398
 
352
399
  def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[Path]:
353
400
  """Get list of files to convert based on input path and options."""
354
401
  files = []
355
-
402
+
356
403
  if input_path.is_file():
357
404
  files.append(input_path)
358
405
  elif input_path.is_dir():
@@ -360,19 +407,19 @@ def get_files_to_convert(input_path: Path, mask: str, recursive: bool) -> List[P
360
407
  files = list(input_path.rglob(mask))
361
408
  else:
362
409
  files = list(input_path.glob(mask))
363
-
410
+
364
411
  # Filter to only files
365
412
  files = [f for f in files if f.is_file()]
366
413
  else:
367
414
  raise FileNotFoundError(f"Input path does not exist: {input_path}")
368
-
415
+
369
416
  # Filter out hidden files and unsupported formats
370
417
  files = [
371
- f for f in files
372
- if not f.name.startswith('.')
373
- and f.suffix.lower() in SUPPORTED_EXTENSIONS
418
+ f
419
+ for f in files
420
+ if not f.name.startswith(".") and f.suffix.lower() in SUPPORTED_EXTENSIONS
374
421
  ]
375
-
422
+
376
423
  return files
377
424
 
378
425
 
@@ -405,7 +452,7 @@ def get_output_path(
405
452
  output_path = output_dir / relative_path.parent / output_name
406
453
  except ValueError:
407
454
  output_path = output_dir / output_name
408
-
455
+
409
456
  return output_path
410
457
 
411
458
 
@@ -413,6 +460,7 @@ def get_output_path(
413
460
  # CLI argument parsing
414
461
  # =============================================================================
415
462
 
463
+
416
464
  def parse_args() -> argparse.Namespace:
417
465
  """Parse command line arguments."""
418
466
  parser = argparse.ArgumentParser(
@@ -427,74 +475,79 @@ Examples:
427
475
  mdify ./docs --runtime podman Use Podman instead of Docker
428
476
  """,
429
477
  )
430
-
478
+
431
479
  parser.add_argument(
432
480
  "input",
433
481
  type=str,
434
482
  nargs="?",
435
483
  help="Input file or directory to convert",
436
484
  )
437
-
485
+
438
486
  parser.add_argument(
439
- "-o", "--out-dir",
487
+ "-o",
488
+ "--out-dir",
440
489
  type=str,
441
490
  default="output",
442
491
  help="Output directory for converted files (default: output)",
443
492
  )
444
-
493
+
445
494
  parser.add_argument(
446
- "-g", "--glob",
495
+ "-g",
496
+ "--glob",
447
497
  type=str,
448
498
  default="*",
449
499
  help="Glob pattern for filtering files in directory (default: *)",
450
500
  )
451
-
501
+
452
502
  parser.add_argument(
453
- "-r", "--recursive",
503
+ "-r",
504
+ "--recursive",
454
505
  action="store_true",
455
506
  help="Recursively scan directories",
456
507
  )
457
-
508
+
458
509
  parser.add_argument(
459
510
  "--flat",
460
511
  action="store_true",
461
512
  help="Disable directory structure preservation in output",
462
513
  )
463
-
514
+
464
515
  parser.add_argument(
465
516
  "--overwrite",
466
517
  action="store_true",
467
518
  help="Overwrite existing output files",
468
519
  )
469
-
520
+
470
521
  parser.add_argument(
471
- "-q", "--quiet",
522
+ "-q",
523
+ "--quiet",
472
524
  action="store_true",
473
525
  help="Suppress progress messages",
474
526
  )
475
-
527
+
476
528
  parser.add_argument(
477
- "-m", "--mask",
529
+ "-m",
530
+ "--mask",
478
531
  action="store_true",
479
532
  help="Mask PII and sensitive content in document images",
480
533
  )
481
-
534
+
482
535
  # Container options
483
536
  parser.add_argument(
484
537
  "--runtime",
485
538
  type=str,
486
539
  choices=SUPPORTED_RUNTIMES,
487
- default="docker",
488
- help="Container runtime to use (default: docker)",
540
+ default=None,
541
+ help="Container runtime to use (auto-detects docker or podman if not specified)",
489
542
  )
490
-
543
+
491
544
  parser.add_argument(
492
545
  "--image",
493
546
  type=str,
494
547
  default=DEFAULT_IMAGE,
495
548
  help=f"Container image to use (default: {DEFAULT_IMAGE})",
496
549
  )
497
-
550
+
498
551
  parser.add_argument(
499
552
  "--pull",
500
553
  type=str,
@@ -502,20 +555,20 @@ Examples:
502
555
  default="missing",
503
556
  help="Image pull policy: always, missing, never (default: missing)",
504
557
  )
505
-
558
+
506
559
  # Utility options
507
560
  parser.add_argument(
508
561
  "--check-update",
509
562
  action="store_true",
510
563
  help="Check for available updates and exit",
511
564
  )
512
-
565
+
513
566
  parser.add_argument(
514
567
  "--version",
515
568
  action="version",
516
569
  version=f"mdify {__version__}",
517
570
  )
518
-
571
+
519
572
  return parser.parse_args()
520
573
 
521
574
 
@@ -523,27 +576,30 @@ Examples:
523
576
  # Main entry point
524
577
  # =============================================================================
525
578
 
579
+
526
580
  def main() -> int:
527
581
  """Main entry point for the CLI."""
528
582
  args = parse_args()
529
-
583
+
530
584
  # Handle --check-update flag
531
585
  if args.check_update:
532
586
  check_for_update(force=True)
533
587
  return 0
534
-
588
+
535
589
  # Check for updates (daily, silent on errors)
536
590
  check_for_update(force=False)
537
-
591
+
538
592
  # Validate input is provided
539
593
  if args.input is None:
540
594
  print("Error: Input file or directory is required", file=sys.stderr)
541
595
  print("Usage: mdify <input> [options]", file=sys.stderr)
542
596
  print(" mdify --help for more information", file=sys.stderr)
543
597
  return 1
544
-
598
+
545
599
  # Detect container runtime
546
- runtime = detect_runtime(args.runtime)
600
+ preferred = args.runtime if args.runtime else "docker"
601
+ explicit = args.runtime is not None
602
+ runtime = detect_runtime(preferred, explicit=explicit)
547
603
  if runtime is None:
548
604
  print(
549
605
  f"Error: Container runtime not found ({', '.join(SUPPORTED_RUNTIMES)})",
@@ -551,87 +607,111 @@ def main() -> int:
551
607
  )
552
608
  print("Please install Docker or Podman to use mdify.", file=sys.stderr)
553
609
  return 2
554
-
610
+
555
611
  # Handle image pull policy
556
612
  image = args.image
557
613
  image_exists = check_image_exists(runtime, image)
558
-
614
+
559
615
  if args.pull == "always" or (args.pull == "missing" and not image_exists):
560
616
  if not pull_image(runtime, image, args.quiet):
561
617
  print(f"Error: Failed to pull image: {image}", file=sys.stderr)
562
618
  return 1
563
619
  elif args.pull == "never" and not image_exists:
564
620
  print(f"Error: Image not found locally: {image}", file=sys.stderr)
565
- print(f"Run with --pull=missing or pull manually: {args.runtime} pull {image}")
621
+ print(f"Run with --pull=missing or pull manually: {preferred} pull {image}")
566
622
  return 1
567
-
623
+
568
624
  # Resolve paths
569
625
  input_path = Path(args.input).resolve()
570
626
  output_dir = Path(args.out_dir).resolve()
571
-
627
+
572
628
  # Validate input
573
629
  if not input_path.exists():
574
630
  print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
575
631
  return 1
576
-
632
+
577
633
  # Get files to convert
578
634
  try:
579
635
  files_to_convert = get_files_to_convert(input_path, args.glob, args.recursive)
580
636
  except Exception as e:
581
637
  print(f"Error: {e}", file=sys.stderr)
582
638
  return 1
583
-
639
+
584
640
  if not files_to_convert:
585
641
  print(f"No files found to convert in: {input_path}", file=sys.stderr)
586
642
  return 1
587
-
643
+
644
+ total_files = len(files_to_convert)
645
+ total_size = sum(f.stat().st_size for f in files_to_convert)
646
+
588
647
  if not args.quiet:
589
- print(f"Found {len(files_to_convert)} file(s) to convert")
648
+ print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
590
649
  print(f"Using runtime: {runtime}")
591
650
  print(f"Using image: {image}")
592
651
  print()
593
-
652
+
594
653
  # Determine input base for directory structure preservation
595
654
  if input_path.is_file():
596
655
  input_base = input_path.parent
597
656
  else:
598
657
  input_base = input_path
599
-
658
+
600
659
  # Convert files
601
660
  success_count = 0
602
661
  skipped_count = 0
603
662
  failed_count = 0
604
-
605
- for input_file in files_to_convert:
663
+ conversion_start = time.time()
664
+ spinner = Spinner()
665
+
666
+ for idx, input_file in enumerate(files_to_convert, 1):
606
667
  output_file = get_output_path(input_file, input_base, output_dir, args.flat)
607
-
668
+ file_size = input_file.stat().st_size
669
+ progress = f"[{idx}/{total_files}]"
670
+
608
671
  # Check if output exists and skip if not overwriting
609
672
  if output_file.exists() and not args.overwrite:
610
673
  if not args.quiet:
611
- print(f"Skipped (exists): {input_file} -> {output_file}")
674
+ print(f"{progress} Skipped (exists): {input_file.name}")
612
675
  skipped_count += 1
613
676
  continue
614
-
615
- success, result = run_container(
616
- runtime, image, input_file, output_file, args.mask, args.quiet
677
+
678
+ # Show spinner while processing
679
+ if not args.quiet:
680
+ spinner.start(
681
+ f"{progress} Processing: {input_file.name} ({format_size(file_size)})"
682
+ )
683
+
684
+ success, result, elapsed = run_container(
685
+ runtime, image, input_file, output_file, args.mask
617
686
  )
618
-
687
+
688
+ if not args.quiet:
689
+ spinner.stop()
690
+
619
691
  if success:
620
692
  success_count += 1
693
+ if not args.quiet:
694
+ print(f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})")
621
695
  else:
622
696
  failed_count += 1
623
-
697
+ if not args.quiet:
698
+ print(f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})")
699
+ print(f" Error: {result}", file=sys.stderr)
700
+
701
+ total_elapsed = time.time() - conversion_start
702
+
624
703
  # Print summary
625
704
  if not args.quiet:
626
705
  print()
627
706
  print("=" * 50)
628
707
  print("Conversion Summary:")
629
- print(f" Total files: {len(files_to_convert)}")
708
+ print(f" Total files: {total_files}")
630
709
  print(f" Successful: {success_count}")
631
710
  print(f" Skipped: {skipped_count}")
632
711
  print(f" Failed: {failed_count}")
712
+ print(f" Total time: {format_duration(total_elapsed)}")
633
713
  print("=" * 50)
634
-
714
+
635
715
  # Return appropriate exit code
636
716
  if failed_count > 0:
637
717
  return 1
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 1.2.0
4
- Summary: Lightweight CLI for converting documents to Markdown via Docling container
3
+ Version: 1.6.0
4
+ Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/tiroq/mdify
@@ -24,11 +24,19 @@ Classifier: Topic :: Utilities
24
24
  Requires-Python: >=3.8
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
29
  Dynamic: license-file
28
30
 
29
31
  # mdify
30
32
 
31
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
33
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
34
+
35
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
36
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
37
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
38
+
39
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
32
40
 
33
41
  ## Requirements
34
42
 
@@ -4,8 +4,11 @@ pyproject.toml
4
4
  mdify/__init__.py
5
5
  mdify/__main__.py
6
6
  mdify/cli.py
7
+ mdify/../assets/mdify.png
7
8
  mdify_cli.egg-info/PKG-INFO
8
9
  mdify_cli.egg-info/SOURCES.txt
9
10
  mdify_cli.egg-info/dependency_links.txt
10
11
  mdify_cli.egg-info/entry_points.txt
11
- mdify_cli.egg-info/top_level.txt
12
+ mdify_cli.egg-info/requires.txt
13
+ mdify_cli.egg-info/top_level.txt
14
+ tests/test_cli.py
@@ -0,0 +1,3 @@
1
+
2
+ [dev]
3
+ pytest>=7.0
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "1.2.0"
4
- description = "Lightweight CLI for converting documents to Markdown via Docling container"
3
+ version = "1.6.0"
4
+ description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
7
7
  license = "MIT"
@@ -26,6 +26,9 @@ classifiers = [
26
26
  ]
27
27
  dependencies = []
28
28
 
29
+ [project.optional-dependencies]
30
+ dev = ["pytest>=7.0"]
31
+
29
32
  [project.urls]
30
33
  Homepage = "https://github.com/tiroq/mdify"
31
34
  Repository = "https://github.com/tiroq/mdify"
@@ -41,3 +44,6 @@ build-backend = "setuptools.build_meta"
41
44
  [tool.setuptools.packages.find]
42
45
  include = ["mdify", "mdify.*"]
43
46
  exclude = ["runtime", "runtime.*"]
47
+
48
+ [tool.setuptools.package-data]
49
+ mdify = ["../assets/*.png"]
@@ -0,0 +1,77 @@
1
+ """Tests for mdify CLI runtime detection."""
2
+
3
+ from unittest.mock import patch
4
+ from mdify.cli import detect_runtime
5
+
6
+
7
+ class TestDetectRuntime:
8
+ """Tests for detect_runtime() function."""
9
+
10
+ def test_auto_docker_exists(self):
11
+ with patch("mdify.cli.shutil.which") as mock_which:
12
+ mock_which.side_effect = (
13
+ lambda x: "/usr/bin/docker" if x == "docker" else None
14
+ )
15
+ result = detect_runtime("docker", explicit=False)
16
+ assert result == "/usr/bin/docker"
17
+
18
+ def test_auto_only_podman_exists(self, capsys):
19
+ with patch("mdify.cli.shutil.which") as mock_which:
20
+ mock_which.side_effect = (
21
+ lambda x: "/usr/bin/podman" if x == "podman" else None
22
+ )
23
+ result = detect_runtime("docker", explicit=False)
24
+ assert result == "/usr/bin/podman"
25
+ captured = capsys.readouterr()
26
+ assert captured.err == ""
27
+
28
+ def test_auto_neither_exists(self):
29
+ with patch("mdify.cli.shutil.which", return_value=None):
30
+ result = detect_runtime("docker", explicit=False)
31
+ assert result is None
32
+
33
+ def test_explicit_docker_exists(self):
34
+ with patch("mdify.cli.shutil.which") as mock_which:
35
+ mock_which.side_effect = (
36
+ lambda x: "/usr/bin/docker" if x == "docker" else None
37
+ )
38
+ result = detect_runtime("docker", explicit=True)
39
+ assert result == "/usr/bin/docker"
40
+
41
+ def test_explicit_docker_fallback_to_podman(self, capsys):
42
+ with patch("mdify.cli.shutil.which") as mock_which:
43
+ mock_which.side_effect = (
44
+ lambda x: "/usr/bin/podman" if x == "podman" else None
45
+ )
46
+ result = detect_runtime("docker", explicit=True)
47
+ assert result == "/usr/bin/podman"
48
+ captured = capsys.readouterr()
49
+ assert "Warning: docker not found, using podman" in captured.err
50
+
51
+ def test_explicit_docker_neither_exists(self):
52
+ with patch("mdify.cli.shutil.which", return_value=None):
53
+ result = detect_runtime("docker", explicit=True)
54
+ assert result is None
55
+
56
+ def test_explicit_podman_exists(self):
57
+ with patch("mdify.cli.shutil.which") as mock_which:
58
+ mock_which.side_effect = (
59
+ lambda x: "/usr/bin/podman" if x == "podman" else None
60
+ )
61
+ result = detect_runtime("podman", explicit=True)
62
+ assert result == "/usr/bin/podman"
63
+
64
+ def test_explicit_podman_fallback_to_docker(self, capsys):
65
+ with patch("mdify.cli.shutil.which") as mock_which:
66
+ mock_which.side_effect = (
67
+ lambda x: "/usr/bin/docker" if x == "docker" else None
68
+ )
69
+ result = detect_runtime("podman", explicit=True)
70
+ assert result == "/usr/bin/docker"
71
+ captured = capsys.readouterr()
72
+ assert "Warning: podman not found, using docker" in captured.err
73
+
74
+ def test_explicit_podman_neither_exists(self):
75
+ with patch("mdify.cli.shutil.which", return_value=None):
76
+ result = detect_runtime("podman", explicit=True)
77
+ assert result is None
File without changes
File without changes
File without changes