mdify-cli 3.0.3__tar.gz → 3.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/PKG-INFO +1 -1
  2. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/__init__.py +1 -1
  3. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/cli.py +158 -138
  4. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/ssh/client.py +3 -0
  5. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/PKG-INFO +1 -1
  6. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/pyproject.toml +1 -1
  7. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/LICENSE +0 -0
  8. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/README.md +0 -0
  9. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/assets/mdify.png +0 -0
  10. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/__main__.py +0 -0
  11. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/container.py +0 -0
  12. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/docling_client.py +0 -0
  13. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/formatting.py +0 -0
  14. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/ssh/__init__.py +0 -0
  15. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/ssh/models.py +0 -0
  16. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/ssh/remote_container.py +0 -0
  17. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify/ssh/transfer.py +0 -0
  18. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/SOURCES.txt +0 -0
  19. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/dependency_links.txt +0 -0
  20. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/entry_points.txt +0 -0
  21. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/requires.txt +0 -0
  22. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/mdify_cli.egg-info/top_level.txt +0 -0
  23. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/setup.cfg +0 -0
  24. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/tests/test_cli.py +0 -0
  25. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/tests/test_container.py +0 -0
  26. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/tests/test_docling_client.py +0 -0
  27. {mdify_cli-3.0.3 → mdify_cli-3.0.4}/tests/test_ssh_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.3
3
+ Version: 3.0.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "3.0.3"
3
+ __version__ = "3.0.4"
@@ -1238,6 +1238,12 @@ def main_async_remote(args) -> int:
1238
1238
  successful = 0
1239
1239
  failed = 0
1240
1240
 
1241
+ def is_connection_error(exc: Exception) -> bool:
1242
+ if isinstance(exc, SSHConnectionError):
1243
+ return True
1244
+ msg = str(exc).lower()
1245
+ return "broken pipe" in msg or "connection closed" in msg
1246
+
1241
1247
  try:
1242
1248
  for idx, input_file in enumerate(files_to_convert, 1):
1243
1249
  if not args.quiet:
@@ -1246,155 +1252,169 @@ def main_async_remote(args) -> int:
1246
1252
  file=sys.stderr,
1247
1253
  )
1248
1254
 
1249
- try:
1250
- # Upload file
1251
- remote_file_path = f"{work_dir}/{input_file.name}"
1252
-
1253
- if not args.quiet:
1254
- print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
1255
-
1256
- await transfer_manager.upload_file(
1257
- local_path=str(input_file),
1258
- remote_path=remote_file_path,
1259
- overwrite=True,
1260
- compress=False,
1261
- )
1262
-
1263
- if not args.quiet:
1264
- print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
1265
-
1266
- # Convert via remote container
1267
- if not args.quiet:
1268
- print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
1269
-
1270
- # Determine output path
1271
- output_dir = Path(args.out_dir)
1272
-
1273
- # Preserve directory structure if not flat
1274
- if not args.flat and input_path.is_dir():
1275
- try:
1276
- rel_path = input_file.relative_to(input_path)
1277
- output_subdir = output_dir / rel_path.parent
1278
- except ValueError:
1279
- output_subdir = output_dir
1280
- else:
1281
- output_subdir = output_dir
1282
-
1283
- output_subdir.mkdir(parents=True, exist_ok=True)
1284
- output_file = output_subdir / f"{input_file.stem}.md"
1285
-
1286
- # Check if output exists and skip if not overwrite
1287
- if output_file.exists() and not args.overwrite:
1288
- if not args.quiet:
1289
- print(
1290
- f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
1291
- file=sys.stderr,
1292
- )
1293
- continue
1294
-
1295
- # Convert using remote container's HTTP API
1296
- # The docling-serve API expects:
1297
- # - Endpoint: /v1/convert/file
1298
- # - Method: POST with multipart/form-data
1299
- # - File field: "files" (note the plural)
1300
- # - Additional fields: to_formats=md, do_ocr=true
1301
- remote_output_path = f"{work_dir}/{input_file.stem}.md"
1302
-
1303
- # Build conversion command on remote - use -F for multipart form data
1304
- convert_cmd = (
1305
- f"curl -X POST "
1306
- f"-F 'files=@{remote_file_path}' "
1307
- f"-F 'to_formats=md' "
1308
- f"-F 'do_ocr=true' "
1309
- )
1310
- if args.mask:
1311
- convert_cmd += f"-F 'mask=true' "
1312
- convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
1313
-
1314
- stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1315
-
1316
- if code != 0:
1317
- print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1318
- failed += 1
1319
- continue
1320
-
1321
- # Parse JSON response to extract markdown content
1255
+ attempt = 0
1256
+ while True:
1322
1257
  try:
1323
- response_data = json.loads(stdout)
1258
+ # Upload file
1259
+ remote_file_path = f"{work_dir}/{input_file.name}"
1324
1260
 
1325
- # Extract content from response structure
1326
- # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1327
- if "document" in response_data:
1328
- document = response_data["document"]
1329
- if "md_content" in document and document["md_content"]:
1330
- markdown_content = document["md_content"]
1331
- elif "text_content" in document and document["text_content"]:
1332
- markdown_content = document["text_content"]
1333
- else:
1334
- # Fallback - use whole document
1335
- markdown_content = json.dumps(document, indent=2)
1261
+ if not args.quiet:
1262
+ print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
1263
+
1264
+ await transfer_manager.upload_file(
1265
+ local_path=str(input_file),
1266
+ remote_path=remote_file_path,
1267
+ overwrite=True,
1268
+ compress=False,
1269
+ )
1270
+
1271
+ if not args.quiet:
1272
+ print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
1273
+
1274
+ # Convert via remote container
1275
+ if not args.quiet:
1276
+ print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
1277
+
1278
+ # Determine output path
1279
+ output_dir = Path(args.out_dir)
1280
+
1281
+ # Preserve directory structure if not flat
1282
+ if not args.flat and input_path.is_dir():
1283
+ try:
1284
+ rel_path = input_file.relative_to(input_path)
1285
+ output_subdir = output_dir / rel_path.parent
1286
+ except ValueError:
1287
+ output_subdir = output_dir
1336
1288
  else:
1337
- # Legacy format fallback
1338
- if "results" in response_data and response_data["results"]:
1339
- result = response_data["results"][0]
1340
- if "content" in result:
1341
- content = result["content"]
1342
- if isinstance(content, dict) and "markdown" in content:
1343
- markdown_content = content["markdown"]
1344
- elif isinstance(content, str):
1345
- markdown_content = content
1346
- else:
1347
- markdown_content = str(content)
1289
+ output_subdir = output_dir
1290
+
1291
+ output_subdir.mkdir(parents=True, exist_ok=True)
1292
+ output_file = output_subdir / f"{input_file.stem}.md"
1293
+
1294
+ # Check if output exists and skip if not overwrite
1295
+ if output_file.exists() and not args.overwrite:
1296
+ if not args.quiet:
1297
+ print(
1298
+ f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
1299
+ file=sys.stderr,
1300
+ )
1301
+ break
1302
+
1303
+ # Convert using remote container's HTTP API
1304
+ # The docling-serve API expects:
1305
+ # - Endpoint: /v1/convert/file
1306
+ # - Method: POST with multipart/form-data
1307
+ # - File field: "files" (note the plural)
1308
+ # - Additional fields: to_formats=md, do_ocr=true
1309
+ remote_output_path = f"{work_dir}/{input_file.stem}.md"
1310
+
1311
+ # Build conversion command on remote - use -F for multipart form data
1312
+ convert_cmd = (
1313
+ f"curl -X POST "
1314
+ f"-F 'files=@{remote_file_path}' "
1315
+ f"-F 'to_formats=md' "
1316
+ f"-F 'do_ocr=true' "
1317
+ )
1318
+ if args.mask:
1319
+ convert_cmd += f"-F 'mask=true' "
1320
+ convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
1321
+
1322
+ stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1323
+
1324
+ if code != 0:
1325
+ print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1326
+ failed += 1
1327
+ break
1328
+
1329
+ # Parse JSON response to extract markdown content
1330
+ try:
1331
+ response_data = json.loads(stdout)
1332
+
1333
+ # Extract content from response structure
1334
+ # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1335
+ if "document" in response_data:
1336
+ document = response_data["document"]
1337
+ if "md_content" in document and document["md_content"]:
1338
+ markdown_content = document["md_content"]
1339
+ elif "text_content" in document and document["text_content"]:
1340
+ markdown_content = document["text_content"]
1348
1341
  else:
1349
- markdown_content = str(result)
1342
+ # Fallback - use whole document
1343
+ markdown_content = json.dumps(document, indent=2)
1350
1344
  else:
1351
- # Ultimate fallback
1352
- markdown_content = stdout
1345
+ # Legacy format fallback
1346
+ if "results" in response_data and response_data["results"]:
1347
+ result = response_data["results"][0]
1348
+ if "content" in result:
1349
+ content = result["content"]
1350
+ if isinstance(content, dict) and "markdown" in content:
1351
+ markdown_content = content["markdown"]
1352
+ elif isinstance(content, str):
1353
+ markdown_content = content
1354
+ else:
1355
+ markdown_content = str(content)
1356
+ else:
1357
+ markdown_content = str(result)
1358
+ else:
1359
+ # Ultimate fallback
1360
+ markdown_content = stdout
1361
+
1362
+ # Write markdown content to remote file
1363
+ write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1364
+ _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1365
+
1366
+ if write_code != 0:
1367
+ print(f" ✗ Failed to write markdown output", file=sys.stderr)
1368
+ failed += 1
1369
+ break
1370
+
1371
+ except (json.JSONDecodeError, KeyError, IndexError) as e:
1372
+ print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1373
+ if DEBUG:
1374
+ print(f" Response: {stdout[:500]}", file=sys.stderr)
1375
+ failed += 1
1376
+ break
1353
1377
 
1354
- # Write markdown content to remote file
1355
- write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1356
- _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1378
+ if not args.quiet:
1379
+ print(f" Conversion complete", file=sys.stderr)
1357
1380
 
1358
- if write_code != 0:
1359
- print(f" ✗ Failed to write markdown output", file=sys.stderr)
1360
- failed += 1
1381
+ # Download result
1382
+ if not args.quiet:
1383
+ print(f" Downloading result to {output_file}...", file=sys.stderr)
1384
+
1385
+ await transfer_manager.download_file(
1386
+ remote_path=remote_output_path,
1387
+ local_path=str(output_file),
1388
+ overwrite=True,
1389
+ )
1390
+
1391
+ if not args.quiet:
1392
+ print(f" ✓ Download complete: {output_file}", file=sys.stderr)
1393
+
1394
+ successful += 1
1395
+
1396
+ # Cleanup remote files
1397
+ await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1398
+
1399
+ break
1400
+ except Exception as e:
1401
+ if is_connection_error(e) and attempt == 0:
1402
+ attempt += 1
1403
+ if not args.quiet:
1404
+ print(" ↻ Connection lost. Reconnecting...", file=sys.stderr)
1405
+ try:
1406
+ await ssh_client.disconnect()
1407
+ except Exception:
1408
+ pass
1409
+ await ssh_client.connect()
1361
1410
  continue
1362
1411
 
1363
- except (json.JSONDecodeError, KeyError, IndexError) as e:
1364
- print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1412
+ print(f" ✗ Failed: {e}", file=sys.stderr)
1365
1413
  if DEBUG:
1366
- print(f" Response: {stdout[:500]}", file=sys.stderr)
1414
+ import traceback
1415
+ traceback.print_exc(file=sys.stderr)
1367
1416
  failed += 1
1368
- continue
1369
-
1370
- if not args.quiet:
1371
- print(f" ✓ Conversion complete", file=sys.stderr)
1372
-
1373
- # Download result
1374
- if not args.quiet:
1375
- print(f" Downloading result to {output_file}...", file=sys.stderr)
1376
-
1377
- await transfer_manager.download_file(
1378
- remote_path=remote_output_path,
1379
- local_path=str(output_file),
1380
- overwrite=True,
1381
- )
1382
-
1383
- if not args.quiet:
1384
- print(f" ✓ Download complete: {output_file}", file=sys.stderr)
1385
-
1386
- successful += 1
1387
-
1388
- # Cleanup remote files
1389
- await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1390
-
1391
- except Exception as e:
1392
- print(f" ✗ Failed: {e}", file=sys.stderr)
1393
- if DEBUG:
1394
- import traceback
1395
- traceback.print_exc(file=sys.stderr)
1396
- failed += 1
1397
- continue
1417
+ break
1398
1418
 
1399
1419
  finally:
1400
1420
  # Stop and remove container
@@ -47,6 +47,9 @@ class AsyncSSHClient:
47
47
  "connect_timeout": self.config.timeout,
48
48
  "known_hosts": None, # Skip host key verification for now
49
49
  }
50
+
51
+ if self.config.keepalive:
52
+ connect_kwargs["keepalive_interval"] = self.config.keepalive
50
53
 
51
54
  # Add username if provided
52
55
  if self.config.username:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdify-cli
3
- Version: 3.0.3
3
+ Version: 3.0.4
4
4
  Summary: Convert PDFs and document images into structured Markdown for LLM workflows
5
5
  Author: tiroq
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mdify-cli"
3
- version = "3.0.3"
3
+ version = "3.0.4"
4
4
  description = "Convert PDFs and document images into structured Markdown for LLM workflows"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes