mdify-cli 3.0.8__py3-none-any.whl → 3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.0.8"
3
+ __version__ = "3.1.1"
mdify/cli.py CHANGED
@@ -8,12 +8,14 @@ is lightweight and has no ML dependencies.
8
8
  """
9
9
 
10
10
  import argparse
11
+ import asyncio
11
12
  import json
12
13
  import os
13
14
  import platform
14
15
  import shutil
15
16
  import subprocess
16
17
  import sys
18
+ import tempfile
17
19
  import threading
18
20
  import time
19
21
  from pathlib import Path
@@ -1173,7 +1175,9 @@ def main_async_remote(args) -> int:
1173
1175
  print(f"Error: Input file or directory not found: {args.input}", file=sys.stderr)
1174
1176
  return 1
1175
1177
 
1176
- files_to_convert = get_files_to_convert(input_path.resolve(), args.glob, args.recursive)
1178
+ # Store resolved path as base for relative path calculations
1179
+ input_base = input_path.resolve()
1180
+ files_to_convert = get_files_to_convert(input_base, args.glob, args.recursive)
1177
1181
 
1178
1182
  if not files_to_convert:
1179
1183
  await ssh_client.disconnect()
@@ -1247,7 +1251,8 @@ def main_async_remote(args) -> int:
1247
1251
  if isinstance(exc, SSHConnectionError):
1248
1252
  return True
1249
1253
  msg = str(exc).lower()
1250
- return "broken pipe" in msg or "connection closed" in msg
1254
+ # Errno 32 = Broken pipe, Errno 54 = Connection reset by peer
1255
+ return any(x in msg for x in ["broken pipe", "connection closed", "connection reset", "errno 32", "errno 54", "ssh connection"])
1251
1256
 
1252
1257
  try:
1253
1258
  for idx, input_file in enumerate(files_to_convert, 1):
@@ -1282,19 +1287,10 @@ def main_async_remote(args) -> int:
1282
1287
 
1283
1288
  # Determine output path
1284
1289
  output_dir = Path(args.out_dir)
1290
+ output_file = get_output_path(input_file, input_base, output_dir, args.flat)
1285
1291
 
1286
- # Preserve directory structure if not flat
1287
- if not args.flat and input_path.is_dir():
1288
- try:
1289
- rel_path = input_file.relative_to(input_path)
1290
- output_subdir = output_dir / rel_path.parent
1291
- except ValueError:
1292
- output_subdir = output_dir
1293
- else:
1294
- output_subdir = output_dir
1295
-
1296
- output_subdir.mkdir(parents=True, exist_ok=True)
1297
- output_file = output_subdir / f"{input_file.stem}.md"
1292
+ # Ensure output directory exists
1293
+ output_file.parent.mkdir(parents=True, exist_ok=True)
1298
1294
 
1299
1295
  # Check if output exists and skip if not overwrite
1300
1296
  if output_file.exists() and not args.overwrite:
@@ -1336,7 +1332,10 @@ def main_async_remote(args) -> int:
1336
1332
  while conversion_attempt < 3 and not conversion_success:
1337
1333
  try:
1338
1334
  if conversion_attempt > 0 and not args.quiet:
1339
- print(f" ↻ Conversion retry {conversion_attempt}...", file=sys.stderr)
1335
+ # Exponential backoff: 2s, 4s, 8s
1336
+ backoff_delay = 2 ** conversion_attempt
1337
+ print(f" ↻ Conversion retry {conversion_attempt} (waiting {backoff_delay}s for server recovery)...", file=sys.stderr)
1338
+ await asyncio.sleep(backoff_delay)
1340
1339
 
1341
1340
  conversion_output, _, conv_code = await ssh_client.run_command(convert_cmd, timeout=remote_conversion_timeout)
1342
1341
 
@@ -1344,19 +1343,39 @@ def main_async_remote(args) -> int:
1344
1343
  conversion_success = True
1345
1344
  break
1346
1345
  else:
1347
- conversion_attempt += 1
1346
+ # Non-zero exit code - fail without retry for non-connection errors
1347
+ break
1348
1348
  except Exception as conv_exc:
1349
- if is_connection_error(conv_exc) and conversion_attempt < 2:
1349
+ is_conn_err = is_connection_error(conv_exc)
1350
+ if is_conn_err and conversion_attempt < 2:
1350
1351
  conversion_attempt += 1
1351
1352
  if not args.quiet:
1352
- print(f" ↻ Connection lost during conversion. Reconnecting (attempt {conversion_attempt})...", file=sys.stderr)
1353
+ # Exponential backoff: 5s, 10s
1354
+ backoff_delay = 5 * conversion_attempt
1355
+ print(f" ↻ Connection reset during conversion. Reconnecting in {backoff_delay}s...", file=sys.stderr)
1356
+
1357
+ await asyncio.sleep(backoff_delay)
1358
+
1353
1359
  try:
1354
1360
  await ssh_client.disconnect()
1355
1361
  except Exception:
1356
1362
  pass
1357
- await ssh_client.connect()
1363
+
1364
+ # Reconnect with retry
1365
+ try:
1366
+ await ssh_client.connect()
1367
+ except Exception:
1368
+ if not args.quiet:
1369
+ print(f" ⚠ Reconnection failed: retrying...", file=sys.stderr)
1370
+ continue
1358
1371
  else:
1359
- conversion_attempt += 1
1372
+ # Either not a connection error, or we've exhausted retries
1373
+ if not args.quiet:
1374
+ print(f" [DEBUG] Breaking loop: not conn_err or exhausted retries", file=sys.stderr)
1375
+ if conversion_attempt >= 2 and is_conn_err:
1376
+ if not args.quiet:
1377
+ print(f" ↻ Connection error on final retry attempt", file=sys.stderr)
1378
+ break
1360
1379
 
1361
1380
  if not conversion_success:
1362
1381
  print(f" ✗ Failed: Conversion failed after {conversion_attempt} attempt(s)", file=sys.stderr)
@@ -1396,12 +1415,37 @@ def main_async_remote(args) -> int:
1396
1415
  # Ultimate fallback
1397
1416
  markdown_content = conversion_output
1398
1417
 
1399
- # Write markdown content to remote file
1400
- write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1401
- _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1418
+ # Write markdown content to local temp file first, then upload via SFTP
1419
+ # (Piping large content through SSH here-documents can crash the connection)
1420
+ content_size_kb = len(markdown_content) / 1024
1421
+ if not args.quiet:
1422
+ print(f" {color.cyan('Writing')} {content_size_kb:.1f}KB markdown via SFTP...", file=sys.stderr)
1402
1423
 
1403
- if write_code != 0:
1404
- print(f" ✗ Failed to write markdown output", file=sys.stderr)
1424
+ try:
1425
+ # Write to temporary local file
1426
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
1427
+ temp_file.write(markdown_content)
1428
+ temp_path = temp_file.name
1429
+
1430
+ # Upload via SFTP (more reliable for large files)
1431
+ await transfer_manager.upload_file(
1432
+ local_path=temp_path,
1433
+ remote_path=remote_output_path,
1434
+ overwrite=True,
1435
+ compress=False,
1436
+ )
1437
+
1438
+ # Cleanup temp file
1439
+ try:
1440
+ os.unlink(temp_path)
1441
+ except Exception:
1442
+ pass
1443
+
1444
+ if not args.quiet:
1445
+ print(f" {color.green('✓')} Markdown written", file=sys.stderr)
1446
+ except Exception as write_exc:
1447
+ if not args.quiet:
1448
+ print(f" ✗ Failed to write markdown: {write_exc}", file=sys.stderr)
1405
1449
  failed += 1
1406
1450
  break
1407
1451
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.8
3
+ Version: 3.1.1
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,7 +1,7 @@
1
1
  assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=iUfoOWZ_3PErQD0XOXLNy8UyG2lK1eTPJFlW9l5NDdc,90
2
+ mdify/__init__.py,sha256=_C45QsMJxMtjDM6sIpFOC7912SJbhfrBhrZ06ZjrAO8,90
3
3
  mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=-HYbPCEjRU4iRcurfRpmMCdbHtQS6oNah-zajkzjojA,78732
4
+ mdify/cli.py,sha256=Ro_v7i1o6rszfAUTu3W52t5OO5kR_dda1CWbnjfXgE8,81843
5
5
  mdify/container.py,sha256=BjL5ZR__n1i_WHifXKllTPoqO7IuOUdPDo5esuNg0Iw,8213
6
6
  mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
7
  mdify/formatting.py,sha256=lJKhMbDPcaWCdyEa7aKwAm_desaWvkfDc8C3EP7LWp4,790
@@ -10,9 +10,9 @@ mdify/ssh/client.py,sha256=nO7gAQ6eWxIXFIOplW2F2ya0-1ZEFlLmz3ovi1TEFTg,14997
10
10
  mdify/ssh/models.py,sha256=jpbDS1yGhd7Xwq2tW7bZv14mTBlR8DCfhT4x-Xf2Wq4,17676
11
11
  mdify/ssh/remote_container.py,sha256=KB8rrsp3h5s_2BKPBXhzkOdQFafyPGQhSnFbK20b4yQ,10230
12
12
  mdify/ssh/transfer.py,sha256=aQuWa_B81McrgZBBWo_CxnjwoiGHmoxAoqObm19JAk8,10940
13
- mdify_cli-3.0.8.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
14
- mdify_cli-3.0.8.dist-info/METADATA,sha256=ap0J1JAN1eHngW32_OFd_5jQJrZrm4gcvCEcOMHNGNI,14766
15
- mdify_cli-3.0.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
- mdify_cli-3.0.8.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
17
- mdify_cli-3.0.8.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
18
- mdify_cli-3.0.8.dist-info/RECORD,,
13
+ mdify_cli-3.1.1.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
14
+ mdify_cli-3.1.1.dist-info/METADATA,sha256=KFklqUB5LjJqiDqynre9fX00KhTwIDdaFAOL8_uwPtE,14766
15
+ mdify_cli-3.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ mdify_cli-3.1.1.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
17
+ mdify_cli-3.1.1.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
18
+ mdify_cli-3.1.1.dist-info/RECORD,,