mdify-cli 1.2.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-1.2.0/mdify_cli.egg-info → mdify_cli-1.5.0}/PKG-INFO +9 -3
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/README.md +7 -1
- mdify_cli-1.5.0/assets/mdify.png +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify/__init__.py +1 -1
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify/cli.py +104 -73
- {mdify_cli-1.2.0 → mdify_cli-1.5.0/mdify_cli.egg-info}/PKG-INFO +9 -3
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify_cli.egg-info/SOURCES.txt +1 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/pyproject.toml +5 -2
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/LICENSE +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify/__main__.py +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-1.2.0 → mdify_cli-1.5.0}/setup.cfg +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mdify-cli
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.5.0
|
|
4
|
+
Summary: Convert PDFs and document images into structured Markdown for LLM workflows
|
|
5
5
|
Author: tiroq
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/tiroq/mdify
|
|
@@ -28,7 +28,13 @@ Dynamic: license-file
|
|
|
28
28
|
|
|
29
29
|
# mdify
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
[](https://pypi.org/project/mdify-cli/)
|
|
34
|
+
[](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
|
|
35
|
+
[](https://opensource.org/licenses/MIT)
|
|
36
|
+
|
|
37
|
+
A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
|
|
32
38
|
|
|
33
39
|
## Requirements
|
|
34
40
|
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
# mdify
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/mdify-cli/)
|
|
6
|
+
[](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
|
|
4
10
|
|
|
5
11
|
## Requirements
|
|
6
12
|
|
|
Binary file
|
|
@@ -13,6 +13,7 @@ import os
|
|
|
13
13
|
import shutil
|
|
14
14
|
import subprocess
|
|
15
15
|
import sys
|
|
16
|
+
import threading
|
|
16
17
|
import time
|
|
17
18
|
from pathlib import Path
|
|
18
19
|
from typing import List, Optional, Tuple
|
|
@@ -24,8 +25,7 @@ from . import __version__
|
|
|
24
25
|
# Configuration
|
|
25
26
|
MDIFY_HOME = Path.home() / ".mdify"
|
|
26
27
|
LAST_CHECK_FILE = MDIFY_HOME / ".last_check"
|
|
27
|
-
|
|
28
|
-
GITHUB_API_URL = "https://api.github.com/repos/tiroq/mdify/releases/latest"
|
|
28
|
+
PYPI_API_URL = "https://pypi.org/pypi/mdify-cli/json"
|
|
29
29
|
CHECK_INTERVAL_SECONDS = 86400 # 24 hours
|
|
30
30
|
|
|
31
31
|
# Container configuration
|
|
@@ -39,16 +39,16 @@ SUPPORTED_RUNTIMES = ("docker", "podman")
|
|
|
39
39
|
|
|
40
40
|
def _get_remote_version(timeout: int = 5) -> Optional[str]:
|
|
41
41
|
"""
|
|
42
|
-
Fetch the latest version from
|
|
42
|
+
Fetch the latest version from PyPI.
|
|
43
43
|
|
|
44
44
|
Returns:
|
|
45
|
-
Version string (e.g., "
|
|
45
|
+
Version string (e.g., "1.1.0") or None if fetch failed.
|
|
46
46
|
"""
|
|
47
47
|
try:
|
|
48
|
-
with urlopen(
|
|
48
|
+
with urlopen(PYPI_API_URL, timeout=timeout) as response:
|
|
49
49
|
data = json.loads(response.read().decode("utf-8"))
|
|
50
|
-
|
|
51
|
-
return
|
|
50
|
+
version = data.get("info", {}).get("version", "")
|
|
51
|
+
return version if version else None
|
|
52
52
|
except (URLError, json.JSONDecodeError, KeyError, TimeoutError):
|
|
53
53
|
return None
|
|
54
54
|
|
|
@@ -103,34 +103,6 @@ def _compare_versions(current: str, remote: str) -> bool:
|
|
|
103
103
|
return False
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
def _run_upgrade() -> bool:
|
|
107
|
-
"""
|
|
108
|
-
Run the upgrade installer.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
True if upgrade was successful, False otherwise.
|
|
112
|
-
"""
|
|
113
|
-
if not INSTALLER_PATH.exists():
|
|
114
|
-
print(
|
|
115
|
-
f"Installer not found at {INSTALLER_PATH}. "
|
|
116
|
-
"Please reinstall mdify manually.",
|
|
117
|
-
file=sys.stderr,
|
|
118
|
-
)
|
|
119
|
-
return False
|
|
120
|
-
|
|
121
|
-
try:
|
|
122
|
-
result = subprocess.run(
|
|
123
|
-
[str(INSTALLER_PATH), "--upgrade", "-y"],
|
|
124
|
-
check=True,
|
|
125
|
-
)
|
|
126
|
-
return result.returncode == 0
|
|
127
|
-
except subprocess.CalledProcessError:
|
|
128
|
-
return False
|
|
129
|
-
except OSError as e:
|
|
130
|
-
print(f"Failed to run installer: {e}", file=sys.stderr)
|
|
131
|
-
return False
|
|
132
|
-
|
|
133
|
-
|
|
134
106
|
def check_for_update(force: bool = False) -> None:
|
|
135
107
|
"""
|
|
136
108
|
Check for updates and prompt user to upgrade if available.
|
|
@@ -161,27 +133,13 @@ def check_for_update(force: bool = False) -> None:
|
|
|
161
133
|
return
|
|
162
134
|
|
|
163
135
|
print(f"\n{'='*50}")
|
|
164
|
-
print(f"A new version of mdify is available!")
|
|
136
|
+
print(f"A new version of mdify-cli is available!")
|
|
165
137
|
print(f" Current version: {__version__}")
|
|
166
138
|
print(f" Latest version: {remote_version}")
|
|
167
|
-
print(f"{'='*50}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
except (EOFError, KeyboardInterrupt):
|
|
172
|
-
print()
|
|
173
|
-
return
|
|
174
|
-
|
|
175
|
-
if response in ("y", "yes"):
|
|
176
|
-
print("\nStarting upgrade...\n")
|
|
177
|
-
if _run_upgrade():
|
|
178
|
-
print("\nUpgrade completed! Please restart mdify.")
|
|
179
|
-
sys.exit(0)
|
|
180
|
-
else:
|
|
181
|
-
print("\nUpgrade failed. You can try manually with:")
|
|
182
|
-
print(f" {INSTALLER_PATH} --upgrade")
|
|
183
|
-
else:
|
|
184
|
-
print(f"\nTo upgrade later, run: {INSTALLER_PATH} --upgrade\n")
|
|
139
|
+
print(f"{'='*50}")
|
|
140
|
+
print(f"\nTo upgrade, run:")
|
|
141
|
+
print(f" pipx upgrade mdify-cli")
|
|
142
|
+
print(f" # or: pip install --upgrade mdify-cli\n")
|
|
185
143
|
|
|
186
144
|
|
|
187
145
|
# =============================================================================
|
|
@@ -262,14 +220,68 @@ def pull_image(runtime: str, image: str, quiet: bool = False) -> bool:
|
|
|
262
220
|
return False
|
|
263
221
|
|
|
264
222
|
|
|
223
|
+
def format_size(size_bytes: int) -> str:
|
|
224
|
+
"""Format file size in human-readable format."""
|
|
225
|
+
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
226
|
+
if size_bytes < 1024:
|
|
227
|
+
return f"{size_bytes:.1f} {unit}" if unit != 'B' else f"{size_bytes} {unit}"
|
|
228
|
+
size_bytes /= 1024
|
|
229
|
+
return f"{size_bytes:.1f} TB"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def format_duration(seconds: float) -> str:
|
|
233
|
+
"""Format duration in human-readable format."""
|
|
234
|
+
if seconds < 60:
|
|
235
|
+
return f"{seconds:.1f}s"
|
|
236
|
+
minutes = int(seconds // 60)
|
|
237
|
+
secs = seconds % 60
|
|
238
|
+
if minutes < 60:
|
|
239
|
+
return f"{minutes}m {secs:.0f}s"
|
|
240
|
+
hours = minutes // 60
|
|
241
|
+
mins = minutes % 60
|
|
242
|
+
return f"{hours}h {mins}m {secs:.0f}s"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class Spinner:
|
|
246
|
+
"""A simple spinner to show progress during long operations."""
|
|
247
|
+
|
|
248
|
+
def __init__(self):
|
|
249
|
+
self.frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
|
|
250
|
+
self.running = False
|
|
251
|
+
self.thread = None
|
|
252
|
+
self.start_time = None
|
|
253
|
+
|
|
254
|
+
def _spin(self):
|
|
255
|
+
idx = 0
|
|
256
|
+
while self.running:
|
|
257
|
+
elapsed = time.time() - self.start_time
|
|
258
|
+
frame = self.frames[idx % len(self.frames)]
|
|
259
|
+
print(f"\r{self.prefix} {frame} ({format_duration(elapsed)})", end="", flush=True)
|
|
260
|
+
idx += 1
|
|
261
|
+
time.sleep(0.1)
|
|
262
|
+
|
|
263
|
+
def start(self, prefix: str = ""):
|
|
264
|
+
self.prefix = prefix
|
|
265
|
+
self.running = True
|
|
266
|
+
self.start_time = time.time()
|
|
267
|
+
self.thread = threading.Thread(target=self._spin, daemon=True)
|
|
268
|
+
self.thread.start()
|
|
269
|
+
|
|
270
|
+
def stop(self):
|
|
271
|
+
self.running = False
|
|
272
|
+
if self.thread:
|
|
273
|
+
self.thread.join(timeout=0.5)
|
|
274
|
+
# Clear the spinner line
|
|
275
|
+
print(f"\r{' ' * 80}\r", end="", flush=True)
|
|
276
|
+
|
|
277
|
+
|
|
265
278
|
def run_container(
|
|
266
279
|
runtime: str,
|
|
267
280
|
image: str,
|
|
268
281
|
input_file: Path,
|
|
269
282
|
output_file: Path,
|
|
270
283
|
mask_pii: bool = False,
|
|
271
|
-
|
|
272
|
-
) -> Tuple[bool, str]:
|
|
284
|
+
) -> Tuple[bool, str, float]:
|
|
273
285
|
"""
|
|
274
286
|
Run container to convert a single file.
|
|
275
287
|
|
|
@@ -279,11 +291,12 @@ def run_container(
|
|
|
279
291
|
input_file: Absolute path to input file
|
|
280
292
|
output_file: Absolute path to output file
|
|
281
293
|
mask_pii: Whether to mask PII in images
|
|
282
|
-
quiet: Suppress progress output
|
|
283
294
|
|
|
284
295
|
Returns:
|
|
285
|
-
Tuple of (success: bool, message: str)
|
|
296
|
+
Tuple of (success: bool, message: str, elapsed_seconds: float)
|
|
286
297
|
"""
|
|
298
|
+
start_time = time.time()
|
|
299
|
+
|
|
287
300
|
# Ensure output directory exists
|
|
288
301
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
289
302
|
|
|
@@ -314,21 +327,17 @@ def run_container(
|
|
|
314
327
|
text=True,
|
|
315
328
|
check=False,
|
|
316
329
|
)
|
|
330
|
+
elapsed = time.time() - start_time
|
|
317
331
|
|
|
318
332
|
if result.returncode == 0:
|
|
319
|
-
|
|
320
|
-
print(f"Converted: {input_file} -> {output_file}")
|
|
321
|
-
return True, "success"
|
|
333
|
+
return True, "success", elapsed
|
|
322
334
|
else:
|
|
323
335
|
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
|
|
324
|
-
|
|
325
|
-
print(f"Failed: {input_file} - {error_msg}", file=sys.stderr)
|
|
326
|
-
return False, f"error: {error_msg}"
|
|
336
|
+
return False, error_msg, elapsed
|
|
327
337
|
|
|
328
338
|
except OSError as e:
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
return False, f"error: {e}"
|
|
339
|
+
elapsed = time.time() - start_time
|
|
340
|
+
return False, str(e), elapsed
|
|
332
341
|
|
|
333
342
|
|
|
334
343
|
# =============================================================================
|
|
@@ -585,8 +594,11 @@ def main() -> int:
|
|
|
585
594
|
print(f"No files found to convert in: {input_path}", file=sys.stderr)
|
|
586
595
|
return 1
|
|
587
596
|
|
|
597
|
+
total_files = len(files_to_convert)
|
|
598
|
+
total_size = sum(f.stat().st_size for f in files_to_convert)
|
|
599
|
+
|
|
588
600
|
if not args.quiet:
|
|
589
|
-
print(f"Found {
|
|
601
|
+
print(f"Found {total_files} file(s) to convert ({format_size(total_size)})")
|
|
590
602
|
print(f"Using runtime: {runtime}")
|
|
591
603
|
print(f"Using image: {image}")
|
|
592
604
|
print()
|
|
@@ -601,35 +613,54 @@ def main() -> int:
|
|
|
601
613
|
success_count = 0
|
|
602
614
|
skipped_count = 0
|
|
603
615
|
failed_count = 0
|
|
616
|
+
conversion_start = time.time()
|
|
617
|
+
spinner = Spinner()
|
|
604
618
|
|
|
605
|
-
for input_file in files_to_convert:
|
|
619
|
+
for idx, input_file in enumerate(files_to_convert, 1):
|
|
606
620
|
output_file = get_output_path(input_file, input_base, output_dir, args.flat)
|
|
621
|
+
file_size = input_file.stat().st_size
|
|
622
|
+
progress = f"[{idx}/{total_files}]"
|
|
607
623
|
|
|
608
624
|
# Check if output exists and skip if not overwriting
|
|
609
625
|
if output_file.exists() and not args.overwrite:
|
|
610
626
|
if not args.quiet:
|
|
611
|
-
print(f"Skipped (exists): {input_file}
|
|
627
|
+
print(f"{progress} Skipped (exists): {input_file.name}")
|
|
612
628
|
skipped_count += 1
|
|
613
629
|
continue
|
|
614
630
|
|
|
615
|
-
|
|
616
|
-
|
|
631
|
+
# Show spinner while processing
|
|
632
|
+
if not args.quiet:
|
|
633
|
+
spinner.start(f"{progress} Processing: {input_file.name} ({format_size(file_size)})")
|
|
634
|
+
|
|
635
|
+
success, result, elapsed = run_container(
|
|
636
|
+
runtime, image, input_file, output_file, args.mask
|
|
617
637
|
)
|
|
618
638
|
|
|
639
|
+
if not args.quiet:
|
|
640
|
+
spinner.stop()
|
|
641
|
+
|
|
619
642
|
if success:
|
|
620
643
|
success_count += 1
|
|
644
|
+
if not args.quiet:
|
|
645
|
+
print(f"{progress} {input_file.name} ✓ ({format_duration(elapsed)})")
|
|
621
646
|
else:
|
|
622
647
|
failed_count += 1
|
|
648
|
+
if not args.quiet:
|
|
649
|
+
print(f"{progress} {input_file.name} ✗ ({format_duration(elapsed)})")
|
|
650
|
+
print(f" Error: {result}", file=sys.stderr)
|
|
651
|
+
|
|
652
|
+
total_elapsed = time.time() - conversion_start
|
|
623
653
|
|
|
624
654
|
# Print summary
|
|
625
655
|
if not args.quiet:
|
|
626
656
|
print()
|
|
627
657
|
print("=" * 50)
|
|
628
658
|
print("Conversion Summary:")
|
|
629
|
-
print(f" Total files: {
|
|
659
|
+
print(f" Total files: {total_files}")
|
|
630
660
|
print(f" Successful: {success_count}")
|
|
631
661
|
print(f" Skipped: {skipped_count}")
|
|
632
662
|
print(f" Failed: {failed_count}")
|
|
663
|
+
print(f" Total time: {format_duration(total_elapsed)}")
|
|
633
664
|
print("=" * 50)
|
|
634
665
|
|
|
635
666
|
# Return appropriate exit code
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mdify-cli
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.5.0
|
|
4
|
+
Summary: Convert PDFs and document images into structured Markdown for LLM workflows
|
|
5
5
|
Author: tiroq
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/tiroq/mdify
|
|
@@ -28,7 +28,13 @@ Dynamic: license-file
|
|
|
28
28
|
|
|
29
29
|
# mdify
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
[](https://pypi.org/project/mdify-cli/)
|
|
34
|
+
[](https://github.com/tiroq/mdify/pkgs/container/mdify-runtime)
|
|
35
|
+
[](https://opensource.org/licenses/MIT)
|
|
36
|
+
|
|
37
|
+
A lightweight CLI for converting documents to Markdown. The CLI is fast to install via pipx, while the heavy ML conversion runs inside a container.
|
|
32
38
|
|
|
33
39
|
## Requirements
|
|
34
40
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mdify-cli"
|
|
3
|
-
version = "1.
|
|
4
|
-
description = "
|
|
3
|
+
version = "1.5.0"
|
|
4
|
+
description = "Convert PDFs and document images into structured Markdown for LLM workflows"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.8"
|
|
7
7
|
license = "MIT"
|
|
@@ -41,3 +41,6 @@ build-backend = "setuptools.build_meta"
|
|
|
41
41
|
[tool.setuptools.packages.find]
|
|
42
42
|
include = ["mdify", "mdify.*"]
|
|
43
43
|
exclude = ["runtime", "runtime.*"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.package-data]
|
|
46
|
+
mdify = ["../assets/*.png"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|