mdify-cli 2.1.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.1.0
3
+ Version: 2.3.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.1.0"
3
+ __version__ = "2.3.0"
@@ -499,6 +499,13 @@ Examples:
499
499
  help="Image pull policy: always, missing, never (default: missing)",
500
500
  )
501
501
 
502
+ parser.add_argument(
503
+ "--timeout",
504
+ type=int,
505
+ default=None,
506
+ help="Conversion timeout in seconds (default: 1200, can be set via MDIFY_TIMEOUT env var)",
507
+ )
508
+
502
509
  # Utility options
503
510
  parser.add_argument(
504
511
  "--check-update",
@@ -532,6 +539,9 @@ def main() -> int:
532
539
  # Check for updates (daily, silent on errors)
533
540
  check_for_update(force=False)
534
541
 
542
+ # Resolve timeout value: CLI > env > default 1200
543
+ timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
544
+
535
545
  # Validate input is provided
536
546
  if args.input is None:
537
547
  print("Error: Input file or directory is required", file=sys.stderr)
@@ -628,7 +638,7 @@ def main() -> int:
628
638
  print(f"Starting docling-serve container...")
629
639
  print()
630
640
 
631
- with DoclingContainer(runtime, image, args.port) as container:
641
+ with DoclingContainer(runtime, image, args.port, timeout=timeout) as container:
632
642
  # Convert files
633
643
  conversion_start = time.time()
634
644
  spinner = Spinner()
@@ -20,17 +20,19 @@ class DoclingContainer:
20
20
  # Container automatically stopped and removed
21
21
  """
22
22
 
23
- def __init__(self, runtime: str, image: str, port: int = 5001):
23
+ def __init__(self, runtime: str, image: str, port: int = 5001, timeout: int = 1200):
24
24
  """Initialize container manager.
25
25
 
26
26
  Args:
27
27
  runtime: Container runtime ("docker" or "podman")
28
28
  image: Container image to use
29
29
  port: Host port to bind (default: 5001)
30
+ timeout: Conversion timeout in seconds (default: 1200)
30
31
  """
31
32
  self.runtime = runtime
32
33
  self.image = image
33
34
  self.port = port
35
+ self.timeout = timeout
34
36
  self.container_name = f"mdify-serve-{uuid.uuid4().hex[:8]}"
35
37
  self.container_id: Optional[str] = None
36
38
 
@@ -59,6 +61,8 @@ class DoclingContainer:
59
61
  self.container_name,
60
62
  "-p",
61
63
  f"{self.port}:5001",
64
+ "-e",
65
+ f"DOCLING_SERVE_MAX_SYNC_WAIT={self.timeout}",
62
66
  self.image,
63
67
  ]
64
68
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 2.1.0
3
+ Version: 2.3.0
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "2.1.0"
3
+ version = "2.3.0"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
@@ -0,0 +1,716 @@
1
+ """Tests for mdify CLI runtime detection."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from unittest.mock import patch, Mock
6
+ import pytest
7
+ from urllib.error import URLError
8
+
9
+ from mdify.cli import (
10
+ detect_runtime,
11
+ parse_args,
12
+ format_size,
13
+ format_duration,
14
+ _compare_versions,
15
+ _get_remote_version,
16
+ _should_check_for_update,
17
+ _update_last_check_time,
18
+ check_for_update,
19
+ get_files_to_convert,
20
+ get_output_path,
21
+ check_image_exists,
22
+ pull_image,
23
+ )
24
+
25
+
26
+ @pytest.fixture
27
+ def isolated_mdify_home(tmp_path, monkeypatch):
28
+ """Redirect MDIFY_HOME and LAST_CHECK_FILE to tmp_path.
29
+
30
+ This MUST be used for any test that could trigger _update_last_check_time(),
31
+ which includes ALL check_for_update() tests EXCEPT when the function
32
+ returns early due to MDIFY_NO_UPDATE_CHECK=1.
33
+ """
34
+ fake_home = tmp_path / ".mdify"
35
+ fake_last_check = fake_home / ".last_check"
36
+ monkeypatch.setattr("mdify.cli.MDIFY_HOME", fake_home)
37
+ monkeypatch.setattr("mdify.cli.LAST_CHECK_FILE", fake_last_check)
38
+ return fake_home, fake_last_check
39
+
40
+
41
+ class TestDetectRuntime:
42
+ """Tests for detect_runtime() function."""
43
+
44
+ def test_auto_docker_exists(self):
45
+ with patch("mdify.cli.shutil.which") as mock_which:
46
+ mock_which.side_effect = (
47
+ lambda x: "/usr/bin/docker" if x == "docker" else None
48
+ )
49
+ result = detect_runtime("docker", explicit=False)
50
+ assert result == "/usr/bin/docker"
51
+
52
+ def test_auto_only_podman_exists(self, capsys):
53
+ with patch("mdify.cli.shutil.which") as mock_which:
54
+ mock_which.side_effect = (
55
+ lambda x: "/usr/bin/podman" if x == "podman" else None
56
+ )
57
+ result = detect_runtime("docker", explicit=False)
58
+ assert result == "/usr/bin/podman"
59
+ captured = capsys.readouterr()
60
+ assert captured.err == ""
61
+
62
+ def test_auto_neither_exists(self):
63
+ with patch("mdify.cli.shutil.which", return_value=None):
64
+ result = detect_runtime("docker", explicit=False)
65
+ assert result is None
66
+
67
+ def test_explicit_docker_exists(self):
68
+ with patch("mdify.cli.shutil.which") as mock_which:
69
+ mock_which.side_effect = (
70
+ lambda x: "/usr/bin/docker" if x == "docker" else None
71
+ )
72
+ result = detect_runtime("docker", explicit=True)
73
+ assert result == "/usr/bin/docker"
74
+
75
+ def test_explicit_docker_fallback_to_podman(self, capsys):
76
+ with patch("mdify.cli.shutil.which") as mock_which:
77
+ mock_which.side_effect = (
78
+ lambda x: "/usr/bin/podman" if x == "podman" else None
79
+ )
80
+ result = detect_runtime("docker", explicit=True)
81
+ assert result == "/usr/bin/podman"
82
+ captured = capsys.readouterr()
83
+ assert "Warning: docker not found, using podman" in captured.err
84
+
85
+ def test_explicit_docker_neither_exists(self):
86
+ with patch("mdify.cli.shutil.which", return_value=None):
87
+ result = detect_runtime("docker", explicit=True)
88
+ assert result is None
89
+
90
+ def test_explicit_podman_exists(self):
91
+ with patch("mdify.cli.shutil.which") as mock_which:
92
+ mock_which.side_effect = (
93
+ lambda x: "/usr/bin/podman" if x == "podman" else None
94
+ )
95
+ result = detect_runtime("podman", explicit=True)
96
+ assert result == "/usr/bin/podman"
97
+
98
+ def test_explicit_podman_fallback_to_docker(self, capsys):
99
+ with patch("mdify.cli.shutil.which") as mock_which:
100
+ mock_which.side_effect = (
101
+ lambda x: "/usr/bin/docker" if x == "docker" else None
102
+ )
103
+ result = detect_runtime("podman", explicit=True)
104
+ assert result == "/usr/bin/docker"
105
+ captured = capsys.readouterr()
106
+ assert "Warning: podman not found, using docker" in captured.err
107
+
108
+ def test_explicit_podman_neither_exists(self):
109
+ with patch("mdify.cli.shutil.which", return_value=None):
110
+ result = detect_runtime("podman", explicit=True)
111
+ assert result is None
112
+
113
+
114
+ class TestNewCLIArgs:
115
+ """Test new CLI arguments for docling-serve."""
116
+
117
+ def test_gpu_flag_is_parsed(self):
118
+ """Test --gpu flag is parsed correctly."""
119
+ with patch.object(sys, "argv", ["mdify", "--gpu", "test.pdf"]):
120
+ args = parse_args()
121
+ assert args.gpu is True
122
+ assert args.input == "test.pdf"
123
+
124
+ def test_gpu_flag_default_false(self):
125
+ """Test --gpu flag defaults to False."""
126
+ with patch.object(sys, "argv", ["mdify", "test.pdf"]):
127
+ args = parse_args()
128
+ assert args.gpu is False
129
+
130
+ def test_port_argument_default(self):
131
+ """Test --port argument has correct default."""
132
+ with patch.object(sys, "argv", ["mdify", "test.pdf"]):
133
+ args = parse_args()
134
+ assert args.port == 5001
135
+
136
+ def test_port_argument_custom(self):
137
+ """Test --port argument accepts custom value."""
138
+ with patch.object(sys, "argv", ["mdify", "--port", "8080", "test.pdf"]):
139
+ args = parse_args()
140
+ assert args.port == 8080
141
+
142
+ def test_port_argument_invalid_type(self):
143
+ """Test --port argument rejects non-integer values."""
144
+ with patch.object(sys, "argv", ["mdify", "--port", "invalid", "test.pdf"]):
145
+ with pytest.raises(SystemExit):
146
+ parse_args()
147
+
148
+ def test_mask_flag_still_exists(self):
149
+ """Test --mask flag still exists (for deprecation warning)."""
150
+ with patch.object(sys, "argv", ["mdify", "--mask", "test.pdf"]):
151
+ args = parse_args()
152
+ assert args.mask is True
153
+
154
+ def test_gpu_and_port_together(self):
155
+ """Test --gpu and --port work together."""
156
+ with patch.object(
157
+ sys, "argv", ["mdify", "--gpu", "--port", "9000", "test.pdf"]
158
+ ):
159
+ args = parse_args()
160
+ assert args.gpu is True
161
+ assert args.port == 9000
162
+ assert args.input == "test.pdf"
163
+
164
+ def test_port_argument_high_number(self):
165
+ """Test --port accepts high port numbers."""
166
+ with patch.object(sys, "argv", ["mdify", "--port", "65535", "test.pdf"]):
167
+ args = parse_args()
168
+ assert args.port == 65535
169
+
170
+
171
+ class TestPathResolution:
172
+ """Tests for path resolution error handling."""
173
+
174
+ def test_input_path_permission_error_fallback(self, tmp_path, monkeypatch):
175
+ """Test that main() exits with code 2 when detect_runtime returns None.
176
+
177
+ Note: With detect_runtime mocked to None, main() returns 2 at line 562
178
+ BEFORE reaching path resolution code (lines 584-592). This test verifies
179
+ the runtime-missing exit path, not the PermissionError fallback.
180
+ The PermissionError fallback in path resolution is defensive coding that
181
+ would only be exercised if runtime detection succeeds.
182
+
183
+ MDIFY_NO_UPDATE_CHECK=1 prevents check_for_update() from hitting network.
184
+ """
185
+ monkeypatch.setenv("MDIFY_NO_UPDATE_CHECK", "1")
186
+ test_file = tmp_path / "test.pdf"
187
+ test_file.write_bytes(b"%PDF-1.4 test")
188
+
189
+ original_resolve = Path.resolve
190
+
191
+ def mock_resolve(self, strict=False):
192
+ if "test.pdf" in str(self):
193
+ raise PermissionError("Operation not permitted")
194
+ return original_resolve(self, strict=strict)
195
+
196
+ with patch.object(Path, "resolve", mock_resolve):
197
+ with patch.object(sys, "argv", ["mdify", str(test_file)]):
198
+ with patch("mdify.cli.detect_runtime", return_value=None):
199
+ from mdify.cli import main
200
+
201
+ result = main()
202
+ assert result == 2
203
+
204
+
205
+ class TestUtilityFunctions:
206
+ """Tests for utility formatting functions."""
207
+
208
+ def test_format_size_bytes(self):
209
+ """Test format_size with value < 1024 returns bytes."""
210
+ result = format_size(512)
211
+ assert result == "512 B"
212
+
213
+ def test_format_size_kilobytes(self):
214
+ """Test format_size with value >= 1024 returns KB."""
215
+ result = format_size(2048)
216
+ assert result == "2.0 KB"
217
+
218
+ def test_format_size_megabytes(self):
219
+ """Test format_size with value >= 1MB returns MB."""
220
+ result = format_size(2097152)
221
+ assert result == "2.0 MB"
222
+
223
+ def test_format_size_gigabytes(self):
224
+ """Test format_size with value >= 1GB returns GB."""
225
+ result = format_size(1073741824)
226
+ assert result == "1.0 GB"
227
+
228
+ def test_format_size_zero(self):
229
+ """Test format_size with zero bytes."""
230
+ result = format_size(0)
231
+ assert result == "0 B"
232
+
233
+ def test_format_size_exact_boundary(self):
234
+ """Test format_size at exact 1KB boundary."""
235
+ result = format_size(1024)
236
+ assert result == "1.0 KB"
237
+
238
+ def test_format_duration_seconds(self):
239
+ """Test format_duration with value < 60 returns seconds."""
240
+ result = format_duration(45.5)
241
+ assert result == "45.5s"
242
+
243
+ def test_format_duration_minutes(self):
244
+ """Test format_duration with value >= 60 returns minutes and seconds."""
245
+ result = format_duration(125)
246
+ assert result == "2m 5s"
247
+
248
+ def test_format_duration_hours(self):
249
+ """Test format_duration with value >= 3600 returns hours, minutes, and seconds."""
250
+ result = format_duration(3725)
251
+ assert result == "1h 2m 5s"
252
+
253
+ def test_format_duration_zero(self):
254
+ """Test format_duration with zero seconds."""
255
+ result = format_duration(0)
256
+ assert result == "0.0s"
257
+
258
+ def test_format_duration_exact_minute(self):
259
+ """Test format_duration at exact 60-second boundary."""
260
+ result = format_duration(60)
261
+ assert result == "1m 0s"
262
+
263
+
264
+ class TestVersionComparison:
265
+ """Tests for version comparison logic."""
266
+
267
+ def test_remote_newer_major(self):
268
+ """Test that major version increase returns True."""
269
+ result = _compare_versions("1.0.0", "2.0.0")
270
+ assert result is True
271
+
272
+ def test_remote_newer_minor(self):
273
+ """Test that minor version increase returns True."""
274
+ result = _compare_versions("1.0.0", "1.1.0")
275
+ assert result is True
276
+
277
+ def test_remote_newer_patch(self):
278
+ """Test that patch version increase returns True."""
279
+ result = _compare_versions("1.0.0", "1.0.1")
280
+ assert result is True
281
+
282
+ def test_same_version(self):
283
+ """Test that same versions return False."""
284
+ result = _compare_versions("1.0.0", "1.0.0")
285
+ assert result is False
286
+
287
+ def test_current_newer(self):
288
+ """Test that current version newer than remote returns False."""
289
+ result = _compare_versions("2.0.0", "1.0.0")
290
+ assert result is False
291
+
292
+ def test_different_length_versions(self):
293
+ """Test that different length versions are padded and compared correctly."""
294
+ result = _compare_versions("1.0", "1.0.0")
295
+ assert result is False
296
+
297
+ def test_invalid_current_version(self):
298
+ """Test that invalid current version returns False (graceful failure)."""
299
+ result = _compare_versions("invalid", "1.0.0")
300
+ assert result is False
301
+
302
+ def test_invalid_remote_version(self):
303
+ """Test that invalid remote version returns False (graceful failure)."""
304
+ result = _compare_versions("1.0.0", "invalid")
305
+ assert result is False
306
+
307
+
308
+ class TestVersionChecking:
309
+ """Tests for version checking functions."""
310
+
311
+ # =========================================================================
312
+ # _get_remote_version tests (4 tests)
313
+ # =========================================================================
314
+
315
+ def test_get_remote_version_success(self):
316
+ """Test successful version fetch from PyPI."""
317
+ mock_response = Mock()
318
+ mock_response.read.return_value = b'{"info": {"version": "1.2.3"}}'
319
+ mock_response.__enter__ = Mock(return_value=mock_response)
320
+ mock_response.__exit__ = Mock(return_value=False)
321
+ with patch("mdify.cli.urlopen", return_value=mock_response):
322
+ result = _get_remote_version()
323
+ assert result == "1.2.3"
324
+
325
+ def test_get_remote_version_timeout(self):
326
+ """Test timeout handling returns None."""
327
+ with patch("mdify.cli.urlopen", side_effect=URLError("timeout")):
328
+ result = _get_remote_version()
329
+ assert result is None
330
+
331
+ def test_get_remote_version_invalid_json(self):
332
+ """Test invalid JSON response returns None."""
333
+ mock_response = Mock()
334
+ mock_response.read.return_value = b"not json"
335
+ mock_response.__enter__ = Mock(return_value=mock_response)
336
+ mock_response.__exit__ = Mock(return_value=False)
337
+ with patch("mdify.cli.urlopen", return_value=mock_response):
338
+ result = _get_remote_version()
339
+ assert result is None
340
+
341
+ def test_get_remote_version_missing_version(self):
342
+ """Test missing version key returns None."""
343
+ mock_response = Mock()
344
+ mock_response.read.return_value = b'{"info": {}}'
345
+ mock_response.__enter__ = Mock(return_value=mock_response)
346
+ mock_response.__exit__ = Mock(return_value=False)
347
+ with patch("mdify.cli.urlopen", return_value=mock_response):
348
+ result = _get_remote_version()
349
+ assert result is None
350
+
351
+ # =========================================================================
352
+ # _should_check_for_update tests (5 tests)
353
+ # =========================================================================
354
+
355
+ def test_should_check_env_disabled(self, monkeypatch):
356
+ """Test returns False when MDIFY_NO_UPDATE_CHECK=1."""
357
+ monkeypatch.setenv("MDIFY_NO_UPDATE_CHECK", "1")
358
+ result = _should_check_for_update()
359
+ assert result is False
360
+
361
+ def test_should_check_no_file(self, isolated_mdify_home):
362
+ """Test returns True when .last_check doesn't exist."""
363
+ result = _should_check_for_update()
364
+ assert result is True
365
+
366
+ def test_should_check_recent(self, isolated_mdify_home):
367
+ """Test returns False when last check was recent (< 24h)."""
368
+ fake_home, fake_last_check = isolated_mdify_home
369
+ fake_home.mkdir(parents=True)
370
+ fake_last_check.write_text("1000000") # timestamp in past
371
+ with patch("mdify.cli.time.time", return_value=1000000 + 3600): # 1 hour later
372
+ result = _should_check_for_update()
373
+ assert result is False # Less than CHECK_INTERVAL_SECONDS (86400)
374
+
375
+ def test_should_check_old(self, isolated_mdify_home):
376
+ """Test returns True when last check was > 24h ago."""
377
+ fake_home, fake_last_check = isolated_mdify_home
378
+ fake_home.mkdir(parents=True)
379
+ fake_last_check.write_text("1000000") # timestamp in past
380
+ with patch("mdify.cli.time.time", return_value=1000000 + 90000): # 25h later
381
+ result = _should_check_for_update()
382
+ assert result is True
383
+
384
+ def test_should_check_corrupted_file(self, isolated_mdify_home):
385
+ """Test returns True when .last_check contains invalid data."""
386
+ fake_home, fake_last_check = isolated_mdify_home
387
+ fake_home.mkdir(parents=True)
388
+ fake_last_check.write_text("garbage") # invalid timestamp
389
+ result = _should_check_for_update()
390
+ assert result is True
391
+
392
+ # =========================================================================
393
+ # _update_last_check_time tests (2 tests)
394
+ # =========================================================================
395
+
396
+ def test_update_last_check_creates_file(self, isolated_mdify_home):
397
+ """Test creates .last_check file with correct timestamp."""
398
+ fake_home, fake_last_check = isolated_mdify_home
399
+ known_time = 1234567890.123
400
+ with patch("mdify.cli.time.time", return_value=known_time):
401
+ _update_last_check_time()
402
+ assert fake_last_check.exists()
403
+ content = fake_last_check.read_text()
404
+ assert float(content) == known_time
405
+
406
+ def test_update_last_check_oserror_no_crash(self, isolated_mdify_home):
407
+ """Test that OSError on mkdir doesn't crash the function."""
408
+ with patch.object(Path, "mkdir", side_effect=OSError("Permission denied")):
409
+ # Should not raise - function catches OSError
410
+ _update_last_check_time()
411
+ # Function returns None on error, test passes if no exception
412
+
413
+ # =========================================================================
414
+ # check_for_update tests (5 tests)
415
+ # =========================================================================
416
+
417
+ def test_check_for_update_skip_check(self, monkeypatch):
418
+ """Test check is skipped when MDIFY_NO_UPDATE_CHECK=1."""
419
+ monkeypatch.setenv("MDIFY_NO_UPDATE_CHECK", "1")
420
+ with patch("mdify.cli.urlopen") as mock_urlopen:
421
+ check_for_update(force=False)
422
+ mock_urlopen.assert_not_called() # Should skip network call
423
+
424
+ def test_check_for_update_newer_available(
425
+ self, isolated_mdify_home, capsys, monkeypatch
426
+ ):
427
+ """Test prints update message when newer version available."""
428
+ monkeypatch.setattr("mdify.cli.__version__", "1.0.0")
429
+ mock_response = Mock()
430
+ mock_response.read.return_value = b'{"info": {"version": "2.0.0"}}'
431
+ mock_response.__enter__ = Mock(return_value=mock_response)
432
+ mock_response.__exit__ = Mock(return_value=False)
433
+ with patch("mdify.cli.urlopen", return_value=mock_response):
434
+ check_for_update(force=True)
435
+ captured = capsys.readouterr()
436
+ assert "A new version" in captured.out
437
+ assert "2.0.0" in captured.out
438
+
439
+ def test_check_for_update_up_to_date_silent(
440
+ self, isolated_mdify_home, capsys, monkeypatch
441
+ ):
442
+ """Test no output when force=False and versions match."""
443
+ monkeypatch.setattr("mdify.cli.__version__", "1.0.0")
444
+ mock_response = Mock()
445
+ mock_response.read.return_value = b'{"info": {"version": "1.0.0"}}'
446
+ mock_response.__enter__ = Mock(return_value=mock_response)
447
+ mock_response.__exit__ = Mock(return_value=False)
448
+ with patch("mdify.cli.urlopen", return_value=mock_response):
449
+ check_for_update(force=False)
450
+ captured = capsys.readouterr()
451
+ assert captured.out == "" # No output when force=False and up to date
452
+
453
+ def test_check_for_update_force_shows_current(
454
+ self, isolated_mdify_home, capsys, monkeypatch
455
+ ):
456
+ """Test prints 'up to date' message when force=True and versions match."""
457
+ monkeypatch.setattr("mdify.cli.__version__", "1.0.0")
458
+ mock_response = Mock()
459
+ mock_response.read.return_value = b'{"info": {"version": "1.0.0"}}'
460
+ mock_response.__enter__ = Mock(return_value=mock_response)
461
+ mock_response.__exit__ = Mock(return_value=False)
462
+ with patch("mdify.cli.urlopen", return_value=mock_response):
463
+ check_for_update(force=True)
464
+ captured = capsys.readouterr()
465
+ assert "up to date" in captured.out
466
+
467
+ def test_check_for_update_force_network_error(self, capsys):
468
+ """Test sys.exit(1) when force=True and network error."""
469
+ with patch("mdify.cli.urlopen", side_effect=URLError("Network error")):
470
+ with pytest.raises(SystemExit) as exc_info:
471
+ check_for_update(force=True)
472
+ assert exc_info.value.code == 1
473
+ captured = capsys.readouterr()
474
+ assert "Failed to check for updates" in captured.err
475
+
476
+
477
+ class TestFileHandling:
478
+ """Tests for file handling functions."""
479
+
480
+ # =========================================================================
481
+ # Tests for get_files_to_convert (8 tests)
482
+ # =========================================================================
483
+
484
+ def test_single_file(self, tmp_path):
485
+ """Test get_files_to_convert with single file."""
486
+ pdf_file = tmp_path / "doc.pdf"
487
+ pdf_file.touch()
488
+ result = get_files_to_convert(pdf_file, mask="*", recursive=False)
489
+ assert result == [pdf_file]
490
+
491
+ def test_directory_non_recursive(self, tmp_path):
492
+ """Test directory scan is non-recursive by default."""
493
+ (tmp_path / "doc1.pdf").touch()
494
+ (tmp_path / "sub").mkdir()
495
+ (tmp_path / "sub" / "doc2.pdf").touch()
496
+ result = get_files_to_convert(tmp_path, mask="*", recursive=False)
497
+ assert len(result) == 1 # Only top-level doc1.pdf
498
+ assert result[0].name == "doc1.pdf"
499
+
500
+ def test_directory_recursive(self, tmp_path):
501
+ """Test directory scan with recursive flag."""
502
+ (tmp_path / "doc1.pdf").touch()
503
+ (tmp_path / "sub").mkdir()
504
+ (tmp_path / "sub" / "doc2.pdf").touch()
505
+ result = get_files_to_convert(tmp_path, mask="*", recursive=True)
506
+ assert len(result) == 2 # Both files
507
+
508
+ def test_glob_pattern(self, tmp_path):
509
+ """Test glob pattern filtering."""
510
+ (tmp_path / "doc.pdf").touch()
511
+ (tmp_path / "doc.docx").touch()
512
+ result = get_files_to_convert(tmp_path, mask="*.pdf", recursive=False)
513
+ assert len(result) == 1
514
+ assert result[0].name == "doc.pdf"
515
+
516
+ def test_hidden_files_excluded(self, tmp_path):
517
+ """Hidden files are excluded even if they have supported extensions."""
518
+ (tmp_path / "visible.pdf").touch()
519
+ (tmp_path / ".hidden.pdf").touch() # Hidden file with supported extension
520
+ # Note: glob("*") doesn't match dotfiles, so .hidden.pdf won't be in initial set
521
+ # The function's explicit filter `not f.name.startswith(".")` is a safety net
522
+ result = get_files_to_convert(tmp_path, mask="*", recursive=False)
523
+ assert len(result) == 1
524
+ assert result[0].name == "visible.pdf"
525
+
526
+ def test_unsupported_extensions_excluded(self, tmp_path):
527
+ """Files with unsupported extensions are filtered out."""
528
+ (tmp_path / "doc.pdf").touch() # Supported
529
+ (tmp_path / "readme.txt").touch() # NOT in SUPPORTED_EXTENSIONS
530
+ result = get_files_to_convert(tmp_path, mask="*", recursive=False)
531
+ assert len(result) == 1
532
+ assert result[0].name == "doc.pdf"
533
+
534
+ def test_empty_directory(self, tmp_path):
535
+ """Test empty directory returns empty list."""
536
+ result = get_files_to_convert(tmp_path, mask="*", recursive=False)
537
+ assert result == []
538
+
539
+ def test_nonexistent_path(self, tmp_path):
540
+ """Test nonexistent path raises FileNotFoundError."""
541
+ nonexistent = tmp_path / "does_not_exist"
542
+ with pytest.raises(FileNotFoundError):
543
+ get_files_to_convert(nonexistent, mask="*", recursive=False)
544
+
545
+ # =========================================================================
546
+ # Tests for get_output_path (5 tests)
547
+ # =========================================================================
548
+
549
+ def test_output_path_preserves_structure(self, tmp_path):
550
+ """Test output path preserves directory structure when flat=False."""
551
+ input_file = tmp_path / "input" / "sub" / "doc.pdf"
552
+ input_file.parent.mkdir(parents=True)
553
+ input_file.touch()
554
+ input_base = tmp_path / "input"
555
+ output_dir = tmp_path / "output"
556
+ output_dir.mkdir()
557
+
558
+ result = get_output_path(input_file, input_base, output_dir, flat=False)
559
+
560
+ assert result == output_dir / "sub" / "doc.md"
561
+
562
+ def test_output_path_flat_mode(self, tmp_path):
563
+ """Test output path with flat mode combines path separators."""
564
+ input_file = tmp_path / "input" / "sub" / "doc.pdf"
565
+ input_file.parent.mkdir(parents=True)
566
+ input_file.touch()
567
+ input_base = tmp_path / "input"
568
+ output_dir = tmp_path / "output"
569
+ output_dir.mkdir()
570
+
571
+ result = get_output_path(input_file, input_base, output_dir, flat=True)
572
+
573
+ assert result == output_dir / "sub_doc.md"
574
+
575
+ def test_output_path_flat_mode_root_file(self, tmp_path):
576
+ """Test output path with flat mode for file at root."""
577
+ input_file = tmp_path / "input" / "doc.pdf"
578
+ input_file.parent.mkdir(parents=True)
579
+ input_file.touch()
580
+ input_base = tmp_path / "input"
581
+ output_dir = tmp_path / "output"
582
+ output_dir.mkdir()
583
+
584
+ result = get_output_path(input_file, input_base, output_dir, flat=True)
585
+
586
+ assert result == output_dir / "doc.md"
587
+
588
+ def test_output_path_deeply_nested(self, tmp_path):
589
+ """Test output path with deeply nested directory structure in flat mode."""
590
+ input_file = tmp_path / "input" / "a" / "b" / "c" / "doc.pdf"
591
+ input_file.parent.mkdir(parents=True)
592
+ input_file.touch()
593
+ input_base = tmp_path / "input"
594
+ output_dir = tmp_path / "output"
595
+ output_dir.mkdir()
596
+
597
+ result = get_output_path(input_file, input_base, output_dir, flat=True)
598
+
599
+ assert result == output_dir / "a_b_c_doc.md"
600
+
601
+ def test_output_path_file_not_relative(self, tmp_path):
602
+ """Test output path when input file is outside input_base."""
603
+ input_file = tmp_path / "other" / "doc.pdf"
604
+ input_file.parent.mkdir(parents=True)
605
+ input_file.touch()
606
+ input_base = tmp_path / "base"
607
+ input_base.mkdir()
608
+ output_dir = tmp_path / "out"
609
+ output_dir.mkdir()
610
+
611
+ result = get_output_path(input_file, input_base, output_dir, flat=False)
612
+
613
+ # Per mdify/cli.py:384, when relative_to fails, returns output_dir / f"{stem}.md"
614
+ assert result == output_dir / "doc.md"
615
+
616
+
617
+ class TestContainerRuntime:
618
+ """Tests for container runtime functions."""
619
+
620
+ def test_image_exists_returns_true(self):
621
+ """Test check_image_exists returns True when image exists."""
622
+ mock_result = Mock()
623
+ mock_result.returncode = 0
624
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
625
+ result = check_image_exists("/usr/bin/docker", "test-image:latest")
626
+ assert result is True
627
+ mock_run.assert_called_once_with(
628
+ ["/usr/bin/docker", "image", "inspect", "test-image:latest"],
629
+ capture_output=True,
630
+ check=False,
631
+ )
632
+
633
+ def test_image_not_exists_returns_false(self):
634
+ """Test check_image_exists returns False when image doesn't exist."""
635
+ mock_result = Mock()
636
+ mock_result.returncode = 1
637
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
638
+ result = check_image_exists("/usr/bin/docker", "test-image:latest")
639
+ assert result is False
640
+ mock_run.assert_called_once_with(
641
+ ["/usr/bin/docker", "image", "inspect", "test-image:latest"],
642
+ capture_output=True,
643
+ check=False,
644
+ )
645
+
646
+ def test_image_check_oserror_returns_false(self):
647
+ """Test check_image_exists returns False on OSError."""
648
+ with patch(
649
+ "mdify.cli.subprocess.run", side_effect=OSError("Command not found")
650
+ ):
651
+ result = check_image_exists("/usr/bin/docker", "test-image:latest")
652
+ assert result is False
653
+
654
+ def test_pull_success(self):
655
+ """Test pull_image returns True on successful pull."""
656
+ mock_result = Mock()
657
+ mock_result.returncode = 0
658
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
659
+ result = pull_image("/usr/bin/docker", "test-image", quiet=True)
660
+ assert result is True
661
+ mock_run.assert_called_once_with(
662
+ ["/usr/bin/docker", "pull", "test-image"],
663
+ capture_output=True,
664
+ check=False,
665
+ )
666
+
667
+ def test_pull_failure(self):
668
+ """Test pull_image returns False on failed pull."""
669
+ mock_result = Mock()
670
+ mock_result.returncode = 1
671
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
672
+ result = pull_image("/usr/bin/docker", "test-image", quiet=True)
673
+ assert result is False
674
+ mock_run.assert_called_once_with(
675
+ ["/usr/bin/docker", "pull", "test-image"],
676
+ capture_output=True,
677
+ check=False,
678
+ )
679
+
680
+ def test_pull_quiet_mode(self):
681
+ """Test pull_image with quiet=True uses capture_output=True."""
682
+ mock_result = Mock()
683
+ mock_result.returncode = 0
684
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
685
+ result = pull_image("/usr/bin/docker", "test-image", quiet=True)
686
+ assert result is True
687
+ mock_run.assert_called_once_with(
688
+ ["/usr/bin/docker", "pull", "test-image"],
689
+ capture_output=True,
690
+ check=False,
691
+ )
692
+
693
+ def test_pull_verbose_mode(self, capsys):
694
+ """Test pull_image with quiet=False prints and uses capture_output=False."""
695
+ mock_result = Mock()
696
+ mock_result.returncode = 0
697
+ with patch("mdify.cli.subprocess.run", return_value=mock_result) as mock_run:
698
+ result = pull_image("/usr/bin/docker", "test-image", quiet=False)
699
+ assert result is True
700
+ captured = capsys.readouterr()
701
+ assert "Pulling image: test-image" in captured.out
702
+ mock_run.assert_called_once_with(
703
+ ["/usr/bin/docker", "pull", "test-image"],
704
+ capture_output=False,
705
+ check=False,
706
+ )
707
+
708
+ def test_pull_oserror(self, capsys):
709
+ """Test pull_image returns False and prints error on OSError."""
710
+ with patch(
711
+ "mdify.cli.subprocess.run", side_effect=OSError("Command not found")
712
+ ):
713
+ result = pull_image("/usr/bin/docker", "test-image", quiet=False)
714
+ assert result is False
715
+ captured = capsys.readouterr()
716
+ assert "Error pulling image" in captured.err
@@ -1,185 +0,0 @@
1
- """Tests for mdify CLI runtime detection."""
2
-
3
- import sys
4
- from pathlib import Path
5
- from unittest.mock import patch, Mock
6
- import pytest
7
-
8
- from mdify.cli import detect_runtime, parse_args
9
-
10
-
11
- class TestDetectRuntime:
12
- """Tests for detect_runtime() function."""
13
-
14
- def test_auto_docker_exists(self):
15
- with patch("mdify.cli.shutil.which") as mock_which:
16
- mock_which.side_effect = (
17
- lambda x: "/usr/bin/docker" if x == "docker" else None
18
- )
19
- result = detect_runtime("docker", explicit=False)
20
- assert result == "/usr/bin/docker"
21
-
22
- def test_auto_only_podman_exists(self, capsys):
23
- with patch("mdify.cli.shutil.which") as mock_which:
24
- mock_which.side_effect = (
25
- lambda x: "/usr/bin/podman" if x == "podman" else None
26
- )
27
- result = detect_runtime("docker", explicit=False)
28
- assert result == "/usr/bin/podman"
29
- captured = capsys.readouterr()
30
- assert captured.err == ""
31
-
32
- def test_auto_neither_exists(self):
33
- with patch("mdify.cli.shutil.which", return_value=None):
34
- result = detect_runtime("docker", explicit=False)
35
- assert result is None
36
-
37
- def test_explicit_docker_exists(self):
38
- with patch("mdify.cli.shutil.which") as mock_which:
39
- mock_which.side_effect = (
40
- lambda x: "/usr/bin/docker" if x == "docker" else None
41
- )
42
- result = detect_runtime("docker", explicit=True)
43
- assert result == "/usr/bin/docker"
44
-
45
- def test_explicit_docker_fallback_to_podman(self, capsys):
46
- with patch("mdify.cli.shutil.which") as mock_which:
47
- mock_which.side_effect = (
48
- lambda x: "/usr/bin/podman" if x == "podman" else None
49
- )
50
- result = detect_runtime("docker", explicit=True)
51
- assert result == "/usr/bin/podman"
52
- captured = capsys.readouterr()
53
- assert "Warning: docker not found, using podman" in captured.err
54
-
55
- def test_explicit_docker_neither_exists(self):
56
- with patch("mdify.cli.shutil.which", return_value=None):
57
- result = detect_runtime("docker", explicit=True)
58
- assert result is None
59
-
60
- def test_explicit_podman_exists(self):
61
- with patch("mdify.cli.shutil.which") as mock_which:
62
- mock_which.side_effect = (
63
- lambda x: "/usr/bin/podman" if x == "podman" else None
64
- )
65
- result = detect_runtime("podman", explicit=True)
66
- assert result == "/usr/bin/podman"
67
-
68
- def test_explicit_podman_fallback_to_docker(self, capsys):
69
- with patch("mdify.cli.shutil.which") as mock_which:
70
- mock_which.side_effect = (
71
- lambda x: "/usr/bin/docker" if x == "docker" else None
72
- )
73
- result = detect_runtime("podman", explicit=True)
74
- assert result == "/usr/bin/docker"
75
- captured = capsys.readouterr()
76
- assert "Warning: podman not found, using docker" in captured.err
77
-
78
- def test_explicit_podman_neither_exists(self):
79
- with patch("mdify.cli.shutil.which", return_value=None):
80
- result = detect_runtime("podman", explicit=True)
81
- assert result is None
82
-
83
-
84
- class TestNewCLIArgs:
85
- """Test new CLI arguments for docling-serve."""
86
-
87
- def test_gpu_flag_is_parsed(self):
88
- """Test --gpu flag is parsed correctly."""
89
- with patch.object(sys, "argv", ["mdify", "--gpu", "test.pdf"]):
90
- args = parse_args()
91
- assert args.gpu is True
92
- assert args.input == "test.pdf"
93
-
94
- def test_gpu_flag_default_false(self):
95
- """Test --gpu flag defaults to False."""
96
- with patch.object(sys, "argv", ["mdify", "test.pdf"]):
97
- args = parse_args()
98
- assert args.gpu is False
99
-
100
- def test_port_argument_default(self):
101
- """Test --port argument has correct default."""
102
- with patch.object(sys, "argv", ["mdify", "test.pdf"]):
103
- args = parse_args()
104
- assert args.port == 5001
105
-
106
- def test_port_argument_custom(self):
107
- """Test --port argument accepts custom value."""
108
- with patch.object(sys, "argv", ["mdify", "--port", "8080", "test.pdf"]):
109
- args = parse_args()
110
- assert args.port == 8080
111
-
112
- def test_port_argument_invalid_type(self):
113
- """Test --port argument rejects non-integer values."""
114
- with patch.object(sys, "argv", ["mdify", "--port", "invalid", "test.pdf"]):
115
- with pytest.raises(SystemExit):
116
- parse_args()
117
-
118
- def test_mask_flag_still_exists(self):
119
- """Test --mask flag still exists (for deprecation warning)."""
120
- with patch.object(sys, "argv", ["mdify", "--mask", "test.pdf"]):
121
- args = parse_args()
122
- assert args.mask is True
123
-
124
- def test_gpu_and_port_together(self):
125
- """Test --gpu and --port work together."""
126
- with patch.object(
127
- sys, "argv", ["mdify", "--gpu", "--port", "9000", "test.pdf"]
128
- ):
129
- args = parse_args()
130
- assert args.gpu is True
131
- assert args.port == 9000
132
- assert args.input == "test.pdf"
133
-
134
- def test_port_argument_high_number(self):
135
- """Test --port accepts high port numbers."""
136
- with patch.object(sys, "argv", ["mdify", "--port", "65535", "test.pdf"]):
137
- args = parse_args()
138
- assert args.port == 65535
139
-
140
-
141
- class TestPathResolution:
142
- """Tests for path resolution error handling."""
143
-
144
- def test_input_path_permission_error_fallback(self, tmp_path):
145
- """Test that PermissionError on resolve() falls back to absolute()."""
146
- test_file = tmp_path / "test.pdf"
147
- test_file.write_bytes(b"%PDF-1.4 test")
148
-
149
- original_resolve = Path.resolve
150
-
151
- def mock_resolve(self, strict=False):
152
- if "test.pdf" in str(self):
153
- raise PermissionError("Operation not permitted")
154
- return original_resolve(self, strict=strict)
155
-
156
- with patch.object(Path, "resolve", mock_resolve):
157
- with patch.object(sys, "argv", ["mdify", str(test_file)]):
158
- with patch("mdify.cli.detect_runtime", return_value=None):
159
- from mdify.cli import main
160
-
161
- result = main()
162
- assert result == 1
163
-
164
- def test_output_path_permission_error_fallback(self, tmp_path):
165
- """Test that PermissionError on output path resolve() falls back to absolute()."""
166
- test_file = tmp_path / "test.pdf"
167
- test_file.write_bytes(b"%PDF-1.4 test")
168
- output_dir = tmp_path / "output"
169
-
170
- original_resolve = Path.resolve
171
-
172
- def mock_resolve(self, strict=False):
173
- if "output" in str(self):
174
- raise PermissionError("Operation not permitted")
175
- return original_resolve(self, strict=strict)
176
-
177
- with patch.object(Path, "resolve", mock_resolve):
178
- with patch.object(
179
- sys, "argv", ["mdify", str(test_file), "-o", str(output_dir)]
180
- ):
181
- with patch("mdify.cli.detect_runtime", return_value=None):
182
- from mdify.cli import main
183
-
184
- result = main()
185
- assert result == 1
File without changes
File without changes
File without changes
File without changes
File without changes