mdify-cli 1.2.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 1.2.0
4
- Summary: Lightweight CLI for converting documents to Markdown via Docling container
3
+ Version: 1.5.0
4
+ Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/tiroq/mdify
@@ -28,7 +28,13 @@ Dynamic: license-file
28
28
 
29
29
  # mdify
30
30
 
31
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
31
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
32
+
33
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
34
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
36
+
37
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
32
38
 
33
39
  ## Requirements
34
40
 
@@ -1,6 +1,12 @@
1
1
  # mdify
2
2
 
3
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
3
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
6
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
8
+
9
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
4
10
 
5
11
  ## Requirements
6
12
 
Binary file
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "1.2.0"
3
+ __version__ = "1.5.0"
@@ -13,6 +13,7 @@ import os
13
13
  import shutil
14
14
  import subprocess
15
15
  import sys
16
+ import threading
16
17
  import time
17
18
  from pathlib import Path
18
19
  from typing import List, Optional, Tuple
@@ -24,8 +25,7 @@ from . import __version__
24
25
  # Configuration
25
26
  MDIFY_HOME = Path.home() / ".mdify"
26
27
  LAST_CHECK_FILE = MDIFY_HOME / ".last_check"
27
- INSTALLER_PATH = MDIFY_HOME / "install.sh"
28
- GITHUB_API_URL = "https://api.github.com/repos/tiroq/mdify/releases/latest"
28
+ PYPI_API_URL = "https://pypi.org/pypi/mdify-cli/json"
29
29
  CHECK_INTERVAL_SECONDS = 86400 # 24 hours
30
30
 
31
31
  # Container configuration
@@ -39,16 +39,16 @@ SUPPORTED_RUNTIMES = ("docker", "podman")
39
39
 
40
40
  def _get_remote_version(timeout: int = 5) -> Optional[str]:
41
41
  """
42
- Fetch the latest version from GitHub API.
42
+ Fetch the latest version from PyPI.
43
43
 
44
44
  Returns:
45
- Version string (e.g., "0.2.0") or None if fetch failed.
45
+ Version string (e.g., "1.1.0") or None if fetch failed.
46
46
  """
47
47
  try:
48
- with urlopen(GITHUB_API_URL, timeout=timeout) as response:
48
+ with urlopen(PYPI_API_URL, timeout=timeout) as response:
49
49
  data = json.loads(response.read().decode("utf-8"))
50
- tag = data.get("tag_name", "")
51
- return tag.lstrip("v") if tag else None
50
+ version = data.get("info", {}).get("version", "")
51
+ return version if version else None
52
52
  except (URLError, json.JSONDecodeError, KeyError, TimeoutError):
53
53
  return None
54
54
 
@@ -103,34 +103,6 @@ def _compare_versions(current: str, remote: str) -> bool:
103
103
  return False
104
104
 
105
105
 
106
- def _run_upgrade() -> bool:
107
- """
108
- Run the upgrade installer.
109
-
110
- Returns:
111
- True if upgrade was successful, False otherwise.
112
- """
113
- if not INSTALLER_PATH.exists():
114
- print(
115
- f"Installer not found at {INSTALLER_PATH}. "
116
- "Please reinstall mdify manually.",
117
- file=sys.stderr,
118
- )
119
- return False
120
-
121
- try:
122
- result = subprocess.run(
123
- [str(INSTALLER_PATH), "--upgrade", "-y"],
124
- check=True,
125
- )
126
- return result.returncode == 0
127
- except subprocess.CalledProcessError:
128
- return False
129
- except OSError as e:
130
- print(f"Failed to run installer: {e}", file=sys.stderr)
131
- return False
132
-
133
-
134
106
  def check_for_update(force: bool = False) -> None:
135
107
  """
136
108
  Check for updates and prompt user to upgrade if available.
@@ -161,27 +133,13 @@ def check_for_update(force: bool = False) -> None:
161
133
  return
162
134
 
163
135
  print(f"\n{'='*50}")
164
- print(f"A new version of mdify is available!")
136
+ print(f"A new version of mdify-cli is available!")
165
137
  print(f" Current version: {__version__}")
166
138
  print(f" Latest version: {remote_version}")
167
- print(f"{'='*50}\n")
168
-
169
- try:
170
- response = input("Run upgrade now? [y/N] ").strip().lower()
171
- except (EOFError, KeyboardInterrupt):
172
- print()
173
- return
174
-
175
- if response in ("y", "yes"):
176
- print("\nStarting upgrade...\n")
177
- if _run_upgrade():
178
- print("\nUpgrade completed! Please restart mdify.")
179
- sys.exit(0)
180
- else:
181
- print("\nUpgrade failed. You can try manually with:")
182
- print(f" {INSTALLER_PATH} --upgrade")
183
- else:
184
- print(f"\nTo upgrade later, run: {INSTALLER_PATH} --upgrade\n")
139
+ print(f"{'='*50}")
140
+ print(f"\nTo upgrade, run:")
141
+ print(f" pipx upgrade mdify-cli")
142
+ print(f" # or: pip install --upgrade mdify-cli\n")
185
143
 
186
144
 
187
145
  # =============================================================================
@@ -262,14 +220,68 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
262
220
  return False
263
221
 
264
222
 
223
+ def format_size(size_bytes: int) -> str:
224
+ """Format file size in human-readable format."""
225
+ for unit in ['B', 'KB', 'MB', 'GB']:
226
+ if size_bytes < 1024:
227
+ return f"{size_bytes:.1f} {unit}" if unit != 'B' else f"{size_bytes} {unit}"
228
+ size_bytes /= 1024
229
+ return f"{size_bytes:.1f} TB"
230
+
231
+
232
+ def format_duration(seconds: float) -> str:
233
+ """Format duration in human-readable format."""
234
+ if seconds < 60:
235
+ return f"{seconds:.1f}s"
236
+ minutes = int(seconds // 60)
237
+ secs = seconds % 60
238
+ if minutes < 60:
239
+ return f"{minutes}m {secs:.0f}s"
240
+ hours = minutes // 60
241
+ mins = minutes % 60
242
+ return f"{hours}h {mins}m {secs:.0f}s"
243
+
244
+
245
+ class Spinner:
246
+ """A simple spinner to show progress during long operations."""
247
+
248
+ def __init__(self):
249
+ self.frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
250
+ self.running = False
251
+ self.thread = None
252
+ self.start_time = None
253
+
254
+ def _spin(self):
255
+ idx = 0
256
+ while self.running:
257
+ elapsed = time.time() - self.start_time
258
+ frame = self.frames[idx % len(self.frames)]
259
+ print(f"\r{self.prefix} {frame} ({format_duration(elapsed)})", end="", flush=True)
260
+ idx += 1
261
+ time.sleep(0.1)
262
+
263
+ def start(self, prefix: str = ""):
264
+ self.prefix = prefix
265
+ self.running = True
266
+ self.start_time = time.time()
267
+ self.thread = threading.Thread(target=self._spin, daemon=True)
268
+ self.thread.start()
269
+
270
+ def stop(self):
271
+ self.running = False
272
+ if self.thread:
273
+ self.thread.join(timeout=0.5)
274
+ # Clear the spinner line
275
+ print(f"\r{' ' * 80}\r", end="", flush=True)
276
+
277
+
265
278
  def run_container(
266
279
  runtime: str,
267
280
  image: str,
268
281
  input_file: Path,
269
282
  output_file: Path,
270
283
  mask_pii: bool = False,
271
- quiet: bool = False,
272
- ) -> Tuple[bool, str]:
284
+ ) -> Tuple[bool, str, float]:
273
285
  """
274
286
  Run container to convert a single file.
275
287
 
@@ -279,11 +291,12 @@ def run_container(
279
291
  input_file: Absolute path to input file
280
292
  output_file: Absolute path to output file
281
293
  mask_pii: Whether to mask PII in images
282
- quiet: Suppress progress output
283
294
 
284
295
  Returns:
285
- Tuple of (success: bool, message: str)
296
+ Tuple of (success: bool, message: str, elapsed_seconds: float)
286
297
  """
298
+ start_time = time.time()
299
+
287
300
  # Ensure output directory exists
288
301
  output_file.parent.mkdir(parents=True, exist_ok=True)
289
302
 
@@ -314,21 +327,17 @@ def run_container(
314
327
  text=True,
315
328
  check=False,
316
329
  )
330
+ elapsed = time.time() - start_time
317
331
 
318
332
  if result.returncode == 0:
319
- if not quiet:
320
- print(f"Converted: {input_file} -> {output_file}")
321
- return True, "success"
333
+ return True, "success", elapsed
322
334
  else:
323
335
  error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
324
- if not quiet:
325
- print(f"Failed: {input_file} - {error_msg}", file=sys.stderr)
326
- return False, f"error: {error_msg}"
336
+ return False, error_msg, elapsed
327
337
 
328
338
  except OSError as e:
329
- if not quiet:
330
- print(f"Failed: {input_file} - {e}", file=sys.stderr)
331
- return False, f"error: {e}"
339
+ elapsed = time.time() - start_time
340
+ return False, str(e), elapsed
332
341
 
333
342
 
334
343
  # =============================================================================
@@ -585,8 +594,11 @@ def main() -> int:
585
594
  print(f"No files found to convert in: {input_path}", file=sys.stderr)
586
595
  return 1
587
596
 
597
+ total_files = len(files_to_convert)
598
+ total_size = sum(f.stat().st_size for f in files_to_convert)
599
+
588
600
  if not args.quiet:
589
- print(f"Found {len(files_to_convert)} file(s) to convert")
601
+ print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
590
602
  print(f"Using runtime: {runtime}")
591
603
  print(f"Using image: {image}")
592
604
  print()
@@ -601,35 +613,54 @@ def main() -> int:
601
613
  success_count = 0
602
614
  skipped_count = 0
603
615
  failed_count = 0
616
+ conversion_start = time.time()
617
+ spinner = Spinner()
604
618
 
605
- for input_file in files_to_convert:
619
+ for idx, input_file in enumerate(files_to_convert, 1):
606
620
  output_file = get_output_path(input_file, input_base, output_dir, args.flat)
621
+ file_size = input_file.stat().st_size
622
+ progress = f"[{idx}/{total_files}]"
607
623
 
608
624
  # Check if output exists and skip if not overwriting
609
625
  if output_file.exists() and not args.overwrite:
610
626
  if not args.quiet:
611
- print(f"Skipped (exists): {input_file} -> {output_file}")
627
+ print(f"{progress} Skipped (exists): {input_file.name}")
612
628
  skipped_count += 1
613
629
  continue
614
630
 
615
- success, result = run_container(
616
- runtime, image, input_file, output_file, args.mask, args.quiet
631
+ # Show spinner while processing
632
+ if not args.quiet:
633
+ spinner.start(f"{progress} Processing: {input_file.name} ({format_size(file_size)})")
634
+
635
+ success, result, elapsed = run_container(
636
+ runtime, image, input_file, output_file, args.mask
617
637
  )
618
638
 
639
+ if not args.quiet:
640
+ spinner.stop()
641
+
619
642
  if success:
620
643
  success_count += 1
644
+ if not args.quiet:
645
+ print(f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})")
621
646
  else:
622
647
  failed_count += 1
648
+ if not args.quiet:
649
+ print(f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})")
650
+ print(f" Error: {result}", file=sys.stderr)
651
+
652
+ total_elapsed = time.time() - conversion_start
623
653
 
624
654
  # Print summary
625
655
  if not args.quiet:
626
656
  print()
627
657
  print("=" * 50)
628
658
  print("Conversion Summary:")
629
- print(f" Total files: {len(files_to_convert)}")
659
+ print(f" Total files: {total_files}")
630
660
  print(f" Successful: {success_count}")
631
661
  print(f" Skipped: {skipped_count}")
632
662
  print(f" Failed: {failed_count}")
663
+ print(f" Total time: {format_duration(total_elapsed)}")
633
664
  print("=" * 50)
634
665
 
635
666
  # Return appropriate exit code
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 1.2.0
4
- Summary: Lightweight CLI for converting documents to Markdown via Docling container
3
+ Version: 1.5.0
4
+ Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/tiroq/mdify
@@ -28,7 +28,13 @@ Dynamic: license-file
28
28
 
29
29
  # mdify
30
30
 
31
- A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion (Docling) runs inside a container.
31
+ ![mdify banner](https://raw.githubusercontent.com/tiroq/mdify/main/assets/mdify.png)
32
+
33
+ [![PyPI](https://img.shields.io/pypi/v/mdify-cli?logo=python&style=flat-square)](https://pypi.org/project/mdify-cli/)
34
+ [![Container](https://img.shields.io/badge/container-ghcr.io-blue?logo=docker&style=flat-square)](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
36
+
37
+ A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
32
38
 
33
39
  ## Requirements
34
40
 
@@ -4,6 +4,7 @@ pyproject.toml
4
4
  mdify/__init__.py
5
5
  mdify/__main__.py
6
6
  mdify/cli.py
7
+ mdify/../assets/mdify.png
7
8
  mdify_cli.egg-info/PKG-INFO
8
9
  mdify_cli.egg-info/SOURCES.txt
9
10
  mdify_cli.egg-info/dependency_links.txt
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "1.2.0"
4
- description = "Lightweight CLI for converting documents to Markdown via Docling container"
3
+ version = "1.5.0"
4
+ description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
7
7
  license = "MIT"
@@ -41,3 +41,6 @@ build-backend = "setuptools.build_meta"
41
41
  [tool.setuptools.packages.find]
42
42
  include = ["mdify", "mdify.*"]
43
43
  exclude = ["runtime", "runtime.*"]
44
+
45
+ [tool.setuptools.package-data]
46
+ mdify = ["../assets/*.png"]
File without changes
File without changes
File without changes