mdify-cli 3.0.2__py3-none-any.whl → 3.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.0.2"
3
+ __version__ = "3.0.4"
mdify/cli.py CHANGED
@@ -1238,6 +1238,12 @@ def main_async_remote(args) -> int:
1238
1238
  successful = 0
1239
1239
  failed = 0
1240
1240
 
1241
+ def is_connection_error(exc: Exception) -> bool:
1242
+ if isinstance(exc, SSHConnectionError):
1243
+ return True
1244
+ msg = str(exc).lower()
1245
+ return "broken pipe" in msg or "connection closed" in msg
1246
+
1241
1247
  try:
1242
1248
  for idx, input_file in enumerate(files_to_convert, 1):
1243
1249
  if not args.quiet:
@@ -1246,154 +1252,169 @@ def main_async_remote(args) -> int:
1246
1252
  file=sys.stderr,
1247
1253
  )
1248
1254
 
1249
- try:
1250
- # Upload file
1251
- remote_file_path = f"{work_dir}/{input_file.name}"
1252
-
1253
- if not args.quiet:
1254
- print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
1255
-
1256
- await transfer_manager.upload_file(
1257
- local_path=str(input_file),
1258
- remote_path=remote_file_path,
1259
- overwrite=True,
1260
- )
1261
-
1262
- if not args.quiet:
1263
- print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
1264
-
1265
- # Convert via remote container
1266
- if not args.quiet:
1267
- print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
1268
-
1269
- # Determine output path
1270
- output_dir = Path(args.out_dir)
1271
-
1272
- # Preserve directory structure if not flat
1273
- if not args.flat and input_path.is_dir():
1274
- try:
1275
- rel_path = input_file.relative_to(input_path)
1276
- output_subdir = output_dir / rel_path.parent
1277
- except ValueError:
1278
- output_subdir = output_dir
1279
- else:
1280
- output_subdir = output_dir
1281
-
1282
- output_subdir.mkdir(parents=True, exist_ok=True)
1283
- output_file = output_subdir / f"{input_file.stem}.md"
1284
-
1285
- # Check if output exists and skip if not overwrite
1286
- if output_file.exists() and not args.overwrite:
1287
- if not args.quiet:
1288
- print(
1289
- f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
1290
- file=sys.stderr,
1291
- )
1292
- continue
1293
-
1294
- # Convert using remote container's HTTP API
1295
- # The docling-serve API expects:
1296
- # - Endpoint: /v1/convert/file
1297
- # - Method: POST with multipart/form-data
1298
- # - File field: "files" (note the plural)
1299
- # - Additional fields: to_formats=md, do_ocr=true
1300
- remote_output_path = f"{work_dir}/{input_file.stem}.md"
1301
-
1302
- # Build conversion command on remote - use -F for multipart form data
1303
- convert_cmd = (
1304
- f"curl -X POST "
1305
- f"-F 'files=@{remote_file_path}' "
1306
- f"-F 'to_formats=md' "
1307
- f"-F 'do_ocr=true' "
1308
- )
1309
- if args.mask:
1310
- convert_cmd += f"-F 'mask=true' "
1311
- convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
1312
-
1313
- stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1314
-
1315
- if code != 0:
1316
- print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1317
- failed += 1
1318
- continue
1319
-
1320
- # Parse JSON response to extract markdown content
1255
+ attempt = 0
1256
+ while True:
1321
1257
  try:
1322
- response_data = json.loads(stdout)
1258
+ # Upload file
1259
+ remote_file_path = f"{work_dir}/{input_file.name}"
1323
1260
 
1324
- # Extract content from response structure
1325
- # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1326
- if "document" in response_data:
1327
- document = response_data["document"]
1328
- if "md_content" in document and document["md_content"]:
1329
- markdown_content = document["md_content"]
1330
- elif "text_content" in document and document["text_content"]:
1331
- markdown_content = document["text_content"]
1332
- else:
1333
- # Fallback - use whole document
1334
- markdown_content = json.dumps(document, indent=2)
1261
+ if not args.quiet:
1262
+ print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
1263
+
1264
+ await transfer_manager.upload_file(
1265
+ local_path=str(input_file),
1266
+ remote_path=remote_file_path,
1267
+ overwrite=True,
1268
+ compress=False,
1269
+ )
1270
+
1271
+ if not args.quiet:
1272
+ print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
1273
+
1274
+ # Convert via remote container
1275
+ if not args.quiet:
1276
+ print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
1277
+
1278
+ # Determine output path
1279
+ output_dir = Path(args.out_dir)
1280
+
1281
+ # Preserve directory structure if not flat
1282
+ if not args.flat and input_path.is_dir():
1283
+ try:
1284
+ rel_path = input_file.relative_to(input_path)
1285
+ output_subdir = output_dir / rel_path.parent
1286
+ except ValueError:
1287
+ output_subdir = output_dir
1335
1288
  else:
1336
- # Legacy format fallback
1337
- if "results" in response_data and response_data["results"]:
1338
- result = response_data["results"][0]
1339
- if "content" in result:
1340
- content = result["content"]
1341
- if isinstance(content, dict) and "markdown" in content:
1342
- markdown_content = content["markdown"]
1343
- elif isinstance(content, str):
1344
- markdown_content = content
1345
- else:
1346
- markdown_content = str(content)
1289
+ output_subdir = output_dir
1290
+
1291
+ output_subdir.mkdir(parents=True, exist_ok=True)
1292
+ output_file = output_subdir / f"{input_file.stem}.md"
1293
+
1294
+ # Check if output exists and skip if not overwrite
1295
+ if output_file.exists() and not args.overwrite:
1296
+ if not args.quiet:
1297
+ print(
1298
+ f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
1299
+ file=sys.stderr,
1300
+ )
1301
+ break
1302
+
1303
+ # Convert using remote container's HTTP API
1304
+ # The docling-serve API expects:
1305
+ # - Endpoint: /v1/convert/file
1306
+ # - Method: POST with multipart/form-data
1307
+ # - File field: "files" (note the plural)
1308
+ # - Additional fields: to_formats=md, do_ocr=true
1309
+ remote_output_path = f"{work_dir}/{input_file.stem}.md"
1310
+
1311
+ # Build conversion command on remote - use -F for multipart form data
1312
+ convert_cmd = (
1313
+ f"curl -X POST "
1314
+ f"-F 'files=@{remote_file_path}' "
1315
+ f"-F 'to_formats=md' "
1316
+ f"-F 'do_ocr=true' "
1317
+ )
1318
+ if args.mask:
1319
+ convert_cmd += f"-F 'mask=true' "
1320
+ convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
1321
+
1322
+ stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1323
+
1324
+ if code != 0:
1325
+ print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1326
+ failed += 1
1327
+ break
1328
+
1329
+ # Parse JSON response to extract markdown content
1330
+ try:
1331
+ response_data = json.loads(stdout)
1332
+
1333
+ # Extract content from response structure
1334
+ # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1335
+ if "document" in response_data:
1336
+ document = response_data["document"]
1337
+ if "md_content" in document and document["md_content"]:
1338
+ markdown_content = document["md_content"]
1339
+ elif "text_content" in document and document["text_content"]:
1340
+ markdown_content = document["text_content"]
1347
1341
  else:
1348
- markdown_content = str(result)
1342
+ # Fallback - use whole document
1343
+ markdown_content = json.dumps(document, indent=2)
1349
1344
  else:
1350
- # Ultimate fallback
1351
- markdown_content = stdout
1345
+ # Legacy format fallback
1346
+ if "results" in response_data and response_data["results"]:
1347
+ result = response_data["results"][0]
1348
+ if "content" in result:
1349
+ content = result["content"]
1350
+ if isinstance(content, dict) and "markdown" in content:
1351
+ markdown_content = content["markdown"]
1352
+ elif isinstance(content, str):
1353
+ markdown_content = content
1354
+ else:
1355
+ markdown_content = str(content)
1356
+ else:
1357
+ markdown_content = str(result)
1358
+ else:
1359
+ # Ultimate fallback
1360
+ markdown_content = stdout
1361
+
1362
+ # Write markdown content to remote file
1363
+ write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1364
+ _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1365
+
1366
+ if write_code != 0:
1367
+ print(f" ✗ Failed to write markdown output", file=sys.stderr)
1368
+ failed += 1
1369
+ break
1370
+
1371
+ except (json.JSONDecodeError, KeyError, IndexError) as e:
1372
+ print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1373
+ if DEBUG:
1374
+ print(f" Response: {stdout[:500]}", file=sys.stderr)
1375
+ failed += 1
1376
+ break
1352
1377
 
1353
- # Write markdown content to remote file
1354
- write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1355
- _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1378
+ if not args.quiet:
1379
+ print(f" Conversion complete", file=sys.stderr)
1356
1380
 
1357
- if write_code != 0:
1358
- print(f" ✗ Failed to write markdown output", file=sys.stderr)
1359
- failed += 1
1381
+ # Download result
1382
+ if not args.quiet:
1383
+ print(f" Downloading result to {output_file}...", file=sys.stderr)
1384
+
1385
+ await transfer_manager.download_file(
1386
+ remote_path=remote_output_path,
1387
+ local_path=str(output_file),
1388
+ overwrite=True,
1389
+ )
1390
+
1391
+ if not args.quiet:
1392
+ print(f" ✓ Download complete: {output_file}", file=sys.stderr)
1393
+
1394
+ successful += 1
1395
+
1396
+ # Cleanup remote files
1397
+ await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1398
+
1399
+ break
1400
+ except Exception as e:
1401
+ if is_connection_error(e) and attempt == 0:
1402
+ attempt += 1
1403
+ if not args.quiet:
1404
+ print(" ↻ Connection lost. Reconnecting...", file=sys.stderr)
1405
+ try:
1406
+ await ssh_client.disconnect()
1407
+ except Exception:
1408
+ pass
1409
+ await ssh_client.connect()
1360
1410
  continue
1361
1411
 
1362
- except (json.JSONDecodeError, KeyError, IndexError) as e:
1363
- print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1412
+ print(f" ✗ Failed: {e}", file=sys.stderr)
1364
1413
  if DEBUG:
1365
- print(f" Response: {stdout[:500]}", file=sys.stderr)
1414
+ import traceback
1415
+ traceback.print_exc(file=sys.stderr)
1366
1416
  failed += 1
1367
- continue
1368
-
1369
- if not args.quiet:
1370
- print(f" ✓ Conversion complete", file=sys.stderr)
1371
-
1372
- # Download result
1373
- if not args.quiet:
1374
- print(f" Downloading result to {output_file}...", file=sys.stderr)
1375
-
1376
- await transfer_manager.download_file(
1377
- remote_path=remote_output_path,
1378
- local_path=str(output_file),
1379
- overwrite=True,
1380
- )
1381
-
1382
- if not args.quiet:
1383
- print(f" ✓ Download complete: {output_file}", file=sys.stderr)
1384
-
1385
- successful += 1
1386
-
1387
- # Cleanup remote files
1388
- await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1389
-
1390
- except Exception as e:
1391
- print(f" ✗ Failed: {e}", file=sys.stderr)
1392
- if DEBUG:
1393
- import traceback
1394
- traceback.print_exc(file=sys.stderr)
1395
- failed += 1
1396
- continue
1417
+ break
1397
1418
 
1398
1419
  finally:
1399
1420
  # Stop and remove container
mdify/ssh/client.py CHANGED
@@ -47,6 +47,9 @@ class AsyncSSHClient:
47
47
  "connect_timeout": self.config.timeout,
48
48
  "known_hosts": None, # Skip host key verification for now
49
49
  }
50
+
51
+ if self.config.keepalive:
52
+ connect_kwargs["keepalive_interval"] = self.config.keepalive
50
53
 
51
54
  # Add username if provided
52
55
  if self.config.username:
mdify/ssh/transfer.py CHANGED
@@ -231,13 +231,18 @@ class FileTransferManager:
231
231
  local_sha256.update(chunk)
232
232
  local_checksum = local_sha256.hexdigest()
233
233
 
234
- # Calculate remote checksum
235
- stdout, stderr, code = await self.ssh_client.run_command(
236
- f"sha256sum {remote_path} | awk '{{print $1}}'"
234
+ # Calculate remote checksum (sha256sum or shasum fallback)
235
+ checksum_cmd = (
236
+ f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
237
+ f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
237
238
  )
239
+ stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
238
240
 
239
241
  if code == 0:
240
242
  remote_checksum = stdout.strip()
243
+ if not remote_checksum:
244
+ logger.warning("Could not verify checksum: remote checksum unavailable")
245
+ return
241
246
 
242
247
  if local_checksum != remote_checksum:
243
248
  raise ValueError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,18 +1,18 @@
1
1
  assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
2
- mdify/__init__.py,sha256=T6SyoOZjbytRoip9WM4THeDUbin7s6Xq7Cb_HZAAi5Q,90
2
+ mdify/__init__.py,sha256=Le2ZtuARDsPg7kDbPBrwevy3J8cPp-j7fQLAlqx-Na8,90
3
3
  mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
4
- mdify/cli.py,sha256=f7_3bka83jfvYXp2L6Zk4trPuMrN3hJ51iueK7J1Szk,74065
4
+ mdify/cli.py,sha256=CSj3J-mLdGV58pm-eNpq-vhtkQOVKduMq9hiP_4lhWs,75655
5
5
  mdify/container.py,sha256=BjL5ZR__n1i_WHifXKllTPoqO7IuOUdPDo5esuNg0Iw,8213
6
6
  mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
7
7
  mdify/formatting.py,sha256=lJKhMbDPcaWCdyEa7aKwAm_desaWvkfDc8C3EP7LWp4,790
8
8
  mdify/ssh/__init__.py,sha256=SmRWgwEvAQZ_ARHlKTb9QDPwVAcz6dvPUks2pZFWLAU,271
9
- mdify/ssh/client.py,sha256=MNMBrL5Xk2rFo28Ytw80hWX2vQ3_CXlIL4VathNtK-I,14873
9
+ mdify/ssh/client.py,sha256=nO7gAQ6eWxIXFIOplW2F2ya0-1ZEFlLmz3ovi1TEFTg,14997
10
10
  mdify/ssh/models.py,sha256=jpbDS1yGhd7Xwq2tW7bZv14mTBlR8DCfhT4x-Xf2Wq4,17676
11
11
  mdify/ssh/remote_container.py,sha256=kmScAlmHI9rJLKliYcYQXDZHF7PYYiD-_rRV-S0fffM,8462
12
- mdify/ssh/transfer.py,sha256=aZZgylDjoqx6PEpaMu2zxkDF04w7btiOnMExmtt922A,10574
13
- mdify_cli-3.0.2.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
14
- mdify_cli-3.0.2.dist-info/METADATA,sha256=To3g3ZuGkKaTvMw6wPv8zMEJYDajPIOQ4_bJvJhZYX8,14766
15
- mdify_cli-3.0.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
- mdify_cli-3.0.2.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
17
- mdify_cli-3.0.2.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
18
- mdify_cli-3.0.2.dist-info/RECORD,,
12
+ mdify/ssh/transfer.py,sha256=aQuWa_B81McrgZBBWo_CxnjwoiGHmoxAoqObm19JAk8,10940
13
+ mdify_cli-3.0.4.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
14
+ mdify_cli-3.0.4.dist-info/METADATA,sha256=TUgq2AnSvYZtrMFLdDlgBoZTwhbwAcrd2nzoIVkLewU,14766
15
+ mdify_cli-3.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ mdify_cli-3.0.4.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
17
+ mdify_cli-3.0.4.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
18
+ mdify_cli-3.0.4.dist-info/RECORD,,