mdify-cli 3.0.3__tar.gz → 3.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/PKG-INFO +1 -1
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/__init__.py +1 -1
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/cli.py +158 -138
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/ssh/client.py +3 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/ssh/remote_container.py +41 -4
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/PKG-INFO +1 -1
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/pyproject.toml +1 -1
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/LICENSE +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/README.md +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/assets/mdify.png +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/__main__.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/container.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/docling_client.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/formatting.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/ssh/__init__.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/ssh/models.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify/ssh/transfer.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/SOURCES.txt +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/dependency_links.txt +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/entry_points.txt +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/requires.txt +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/mdify_cli.egg-info/top_level.txt +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/setup.cfg +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/tests/test_cli.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/tests/test_container.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/tests/test_docling_client.py +0 -0
- {mdify_cli-3.0.3 → mdify_cli-3.0.5}/tests/test_ssh_client.py +0 -0
|
@@ -1238,6 +1238,12 @@ def main_async_remote(args) -> int:
|
|
|
1238
1238
|
successful = 0
|
|
1239
1239
|
failed = 0
|
|
1240
1240
|
|
|
1241
|
+
def is_connection_error(exc: Exception) -> bool:
|
|
1242
|
+
if isinstance(exc, SSHConnectionError):
|
|
1243
|
+
return True
|
|
1244
|
+
msg = str(exc).lower()
|
|
1245
|
+
return "broken pipe" in msg or "connection closed" in msg
|
|
1246
|
+
|
|
1241
1247
|
try:
|
|
1242
1248
|
for idx, input_file in enumerate(files_to_convert, 1):
|
|
1243
1249
|
if not args.quiet:
|
|
@@ -1246,155 +1252,169 @@ def main_async_remote(args) -> int:
|
|
|
1246
1252
|
file=sys.stderr,
|
|
1247
1253
|
)
|
|
1248
1254
|
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
1252
|
-
|
|
1253
|
-
if not args.quiet:
|
|
1254
|
-
print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
|
|
1255
|
-
|
|
1256
|
-
await transfer_manager.upload_file(
|
|
1257
|
-
local_path=str(input_file),
|
|
1258
|
-
remote_path=remote_file_path,
|
|
1259
|
-
overwrite=True,
|
|
1260
|
-
compress=False,
|
|
1261
|
-
)
|
|
1262
|
-
|
|
1263
|
-
if not args.quiet:
|
|
1264
|
-
print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
|
|
1265
|
-
|
|
1266
|
-
# Convert via remote container
|
|
1267
|
-
if not args.quiet:
|
|
1268
|
-
print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
|
|
1269
|
-
|
|
1270
|
-
# Determine output path
|
|
1271
|
-
output_dir = Path(args.out_dir)
|
|
1272
|
-
|
|
1273
|
-
# Preserve directory structure if not flat
|
|
1274
|
-
if not args.flat and input_path.is_dir():
|
|
1275
|
-
try:
|
|
1276
|
-
rel_path = input_file.relative_to(input_path)
|
|
1277
|
-
output_subdir = output_dir / rel_path.parent
|
|
1278
|
-
except ValueError:
|
|
1279
|
-
output_subdir = output_dir
|
|
1280
|
-
else:
|
|
1281
|
-
output_subdir = output_dir
|
|
1282
|
-
|
|
1283
|
-
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
1284
|
-
output_file = output_subdir / f"{input_file.stem}.md"
|
|
1285
|
-
|
|
1286
|
-
# Check if output exists and skip if not overwrite
|
|
1287
|
-
if output_file.exists() and not args.overwrite:
|
|
1288
|
-
if not args.quiet:
|
|
1289
|
-
print(
|
|
1290
|
-
f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
|
|
1291
|
-
file=sys.stderr,
|
|
1292
|
-
)
|
|
1293
|
-
continue
|
|
1294
|
-
|
|
1295
|
-
# Convert using remote container's HTTP API
|
|
1296
|
-
# The docling-serve API expects:
|
|
1297
|
-
# - Endpoint: /v1/convert/file
|
|
1298
|
-
# - Method: POST with multipart/form-data
|
|
1299
|
-
# - File field: "files" (note the plural)
|
|
1300
|
-
# - Additional fields: to_formats=md, do_ocr=true
|
|
1301
|
-
remote_output_path = f"{work_dir}/{input_file.stem}.md"
|
|
1302
|
-
|
|
1303
|
-
# Build conversion command on remote - use -F for multipart form data
|
|
1304
|
-
convert_cmd = (
|
|
1305
|
-
f"curl -X POST "
|
|
1306
|
-
f"-F 'files=@{remote_file_path}' "
|
|
1307
|
-
f"-F 'to_formats=md' "
|
|
1308
|
-
f"-F 'do_ocr=true' "
|
|
1309
|
-
)
|
|
1310
|
-
if args.mask:
|
|
1311
|
-
convert_cmd += f"-F 'mask=true' "
|
|
1312
|
-
convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
|
|
1313
|
-
|
|
1314
|
-
stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
|
|
1315
|
-
|
|
1316
|
-
if code != 0:
|
|
1317
|
-
print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
|
|
1318
|
-
failed += 1
|
|
1319
|
-
continue
|
|
1320
|
-
|
|
1321
|
-
# Parse JSON response to extract markdown content
|
|
1255
|
+
attempt = 0
|
|
1256
|
+
while True:
|
|
1322
1257
|
try:
|
|
1323
|
-
|
|
1258
|
+
# Upload file
|
|
1259
|
+
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
1324
1260
|
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1261
|
+
if not args.quiet:
|
|
1262
|
+
print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
|
|
1263
|
+
|
|
1264
|
+
await transfer_manager.upload_file(
|
|
1265
|
+
local_path=str(input_file),
|
|
1266
|
+
remote_path=remote_file_path,
|
|
1267
|
+
overwrite=True,
|
|
1268
|
+
compress=False,
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
if not args.quiet:
|
|
1272
|
+
print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
|
|
1273
|
+
|
|
1274
|
+
# Convert via remote container
|
|
1275
|
+
if not args.quiet:
|
|
1276
|
+
print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
|
|
1277
|
+
|
|
1278
|
+
# Determine output path
|
|
1279
|
+
output_dir = Path(args.out_dir)
|
|
1280
|
+
|
|
1281
|
+
# Preserve directory structure if not flat
|
|
1282
|
+
if not args.flat and input_path.is_dir():
|
|
1283
|
+
try:
|
|
1284
|
+
rel_path = input_file.relative_to(input_path)
|
|
1285
|
+
output_subdir = output_dir / rel_path.parent
|
|
1286
|
+
except ValueError:
|
|
1287
|
+
output_subdir = output_dir
|
|
1336
1288
|
else:
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1289
|
+
output_subdir = output_dir
|
|
1290
|
+
|
|
1291
|
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
1292
|
+
output_file = output_subdir / f"{input_file.stem}.md"
|
|
1293
|
+
|
|
1294
|
+
# Check if output exists and skip if not overwrite
|
|
1295
|
+
if output_file.exists() and not args.overwrite:
|
|
1296
|
+
if not args.quiet:
|
|
1297
|
+
print(
|
|
1298
|
+
f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
|
|
1299
|
+
file=sys.stderr,
|
|
1300
|
+
)
|
|
1301
|
+
break
|
|
1302
|
+
|
|
1303
|
+
# Convert using remote container's HTTP API
|
|
1304
|
+
# The docling-serve API expects:
|
|
1305
|
+
# - Endpoint: /v1/convert/file
|
|
1306
|
+
# - Method: POST with multipart/form-data
|
|
1307
|
+
# - File field: "files" (note the plural)
|
|
1308
|
+
# - Additional fields: to_formats=md, do_ocr=true
|
|
1309
|
+
remote_output_path = f"{work_dir}/{input_file.stem}.md"
|
|
1310
|
+
|
|
1311
|
+
# Build conversion command on remote - use -F for multipart form data
|
|
1312
|
+
convert_cmd = (
|
|
1313
|
+
f"curl -X POST "
|
|
1314
|
+
f"-F 'files=@{remote_file_path}' "
|
|
1315
|
+
f"-F 'to_formats=md' "
|
|
1316
|
+
f"-F 'do_ocr=true' "
|
|
1317
|
+
)
|
|
1318
|
+
if args.mask:
|
|
1319
|
+
convert_cmd += f"-F 'mask=true' "
|
|
1320
|
+
convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
|
|
1321
|
+
|
|
1322
|
+
stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
|
|
1323
|
+
|
|
1324
|
+
if code != 0:
|
|
1325
|
+
print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
|
|
1326
|
+
failed += 1
|
|
1327
|
+
break
|
|
1328
|
+
|
|
1329
|
+
# Parse JSON response to extract markdown content
|
|
1330
|
+
try:
|
|
1331
|
+
response_data = json.loads(stdout)
|
|
1332
|
+
|
|
1333
|
+
# Extract content from response structure
|
|
1334
|
+
# Actual format: {"document": {"md_content": "..."}, "status": "success"}
|
|
1335
|
+
if "document" in response_data:
|
|
1336
|
+
document = response_data["document"]
|
|
1337
|
+
if "md_content" in document and document["md_content"]:
|
|
1338
|
+
markdown_content = document["md_content"]
|
|
1339
|
+
elif "text_content" in document and document["text_content"]:
|
|
1340
|
+
markdown_content = document["text_content"]
|
|
1348
1341
|
else:
|
|
1349
|
-
|
|
1342
|
+
# Fallback - use whole document
|
|
1343
|
+
markdown_content = json.dumps(document, indent=2)
|
|
1350
1344
|
else:
|
|
1351
|
-
#
|
|
1352
|
-
|
|
1345
|
+
# Legacy format fallback
|
|
1346
|
+
if "results" in response_data and response_data["results"]:
|
|
1347
|
+
result = response_data["results"][0]
|
|
1348
|
+
if "content" in result:
|
|
1349
|
+
content = result["content"]
|
|
1350
|
+
if isinstance(content, dict) and "markdown" in content:
|
|
1351
|
+
markdown_content = content["markdown"]
|
|
1352
|
+
elif isinstance(content, str):
|
|
1353
|
+
markdown_content = content
|
|
1354
|
+
else:
|
|
1355
|
+
markdown_content = str(content)
|
|
1356
|
+
else:
|
|
1357
|
+
markdown_content = str(result)
|
|
1358
|
+
else:
|
|
1359
|
+
# Ultimate fallback
|
|
1360
|
+
markdown_content = stdout
|
|
1361
|
+
|
|
1362
|
+
# Write markdown content to remote file
|
|
1363
|
+
write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
|
|
1364
|
+
_, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
|
|
1365
|
+
|
|
1366
|
+
if write_code != 0:
|
|
1367
|
+
print(f" ✗ Failed to write markdown output", file=sys.stderr)
|
|
1368
|
+
failed += 1
|
|
1369
|
+
break
|
|
1370
|
+
|
|
1371
|
+
except (json.JSONDecodeError, KeyError, IndexError) as e:
|
|
1372
|
+
print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
|
|
1373
|
+
if DEBUG:
|
|
1374
|
+
print(f" Response: {stdout[:500]}", file=sys.stderr)
|
|
1375
|
+
failed += 1
|
|
1376
|
+
break
|
|
1353
1377
|
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
_, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
|
|
1378
|
+
if not args.quiet:
|
|
1379
|
+
print(f" ✓ Conversion complete", file=sys.stderr)
|
|
1357
1380
|
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1381
|
+
# Download result
|
|
1382
|
+
if not args.quiet:
|
|
1383
|
+
print(f" Downloading result to {output_file}...", file=sys.stderr)
|
|
1384
|
+
|
|
1385
|
+
await transfer_manager.download_file(
|
|
1386
|
+
remote_path=remote_output_path,
|
|
1387
|
+
local_path=str(output_file),
|
|
1388
|
+
overwrite=True,
|
|
1389
|
+
)
|
|
1390
|
+
|
|
1391
|
+
if not args.quiet:
|
|
1392
|
+
print(f" ✓ Download complete: {output_file}", file=sys.stderr)
|
|
1393
|
+
|
|
1394
|
+
successful += 1
|
|
1395
|
+
|
|
1396
|
+
# Cleanup remote files
|
|
1397
|
+
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1398
|
+
|
|
1399
|
+
break
|
|
1400
|
+
except Exception as e:
|
|
1401
|
+
if is_connection_error(e) and attempt == 0:
|
|
1402
|
+
attempt += 1
|
|
1403
|
+
if not args.quiet:
|
|
1404
|
+
print(" ↻ Connection lost. Reconnecting...", file=sys.stderr)
|
|
1405
|
+
try:
|
|
1406
|
+
await ssh_client.disconnect()
|
|
1407
|
+
except Exception:
|
|
1408
|
+
pass
|
|
1409
|
+
await ssh_client.connect()
|
|
1361
1410
|
continue
|
|
1362
1411
|
|
|
1363
|
-
|
|
1364
|
-
print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
|
|
1412
|
+
print(f" ✗ Failed: {e}", file=sys.stderr)
|
|
1365
1413
|
if DEBUG:
|
|
1366
|
-
|
|
1414
|
+
import traceback
|
|
1415
|
+
traceback.print_exc(file=sys.stderr)
|
|
1367
1416
|
failed += 1
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
if not args.quiet:
|
|
1371
|
-
print(f" ✓ Conversion complete", file=sys.stderr)
|
|
1372
|
-
|
|
1373
|
-
# Download result
|
|
1374
|
-
if not args.quiet:
|
|
1375
|
-
print(f" Downloading result to {output_file}...", file=sys.stderr)
|
|
1376
|
-
|
|
1377
|
-
await transfer_manager.download_file(
|
|
1378
|
-
remote_path=remote_output_path,
|
|
1379
|
-
local_path=str(output_file),
|
|
1380
|
-
overwrite=True,
|
|
1381
|
-
)
|
|
1382
|
-
|
|
1383
|
-
if not args.quiet:
|
|
1384
|
-
print(f" ✓ Download complete: {output_file}", file=sys.stderr)
|
|
1385
|
-
|
|
1386
|
-
successful += 1
|
|
1387
|
-
|
|
1388
|
-
# Cleanup remote files
|
|
1389
|
-
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1390
|
-
|
|
1391
|
-
except Exception as e:
|
|
1392
|
-
print(f" ✗ Failed: {e}", file=sys.stderr)
|
|
1393
|
-
if DEBUG:
|
|
1394
|
-
import traceback
|
|
1395
|
-
traceback.print_exc(file=sys.stderr)
|
|
1396
|
-
failed += 1
|
|
1397
|
-
continue
|
|
1417
|
+
break
|
|
1398
1418
|
|
|
1399
1419
|
finally:
|
|
1400
1420
|
# Stop and remove container
|
|
@@ -47,6 +47,9 @@ class AsyncSSHClient:
|
|
|
47
47
|
"connect_timeout": self.config.timeout,
|
|
48
48
|
"known_hosts": None, # Skip host key verification for now
|
|
49
49
|
}
|
|
50
|
+
|
|
51
|
+
if self.config.keepalive:
|
|
52
|
+
connect_kwargs["keepalive_interval"] = self.config.keepalive
|
|
50
53
|
|
|
51
54
|
# Add username if provided
|
|
52
55
|
if self.config.username:
|
|
@@ -54,14 +54,48 @@ class RemoteContainer(DoclingContainer):
|
|
|
54
54
|
)
|
|
55
55
|
self.is_healthy = False
|
|
56
56
|
|
|
57
|
+
async def _cleanup_port(self) -> None:
|
|
58
|
+
"""Clean up any existing containers using this port.
|
|
59
|
+
|
|
60
|
+
Attempts to find and stop containers that are bound to self.port.
|
|
61
|
+
This handles the case where a previous container wasn't properly cleaned up.
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
# Find containers using this port
|
|
65
|
+
# Using docker inspect with port filter
|
|
66
|
+
cmd = f"{self.runtime} ps -a --filter 'publish={self.port}' --format '{{{{.ID}}}}'"
|
|
67
|
+
stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=10)
|
|
68
|
+
|
|
69
|
+
if code == 0 and stdout.strip():
|
|
70
|
+
container_ids = stdout.strip().split('\n')
|
|
71
|
+
for container_id in container_ids:
|
|
72
|
+
container_id = container_id.strip()
|
|
73
|
+
if not container_id:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
logger.info(f"Cleaning up existing container on port {self.port}: {container_id}")
|
|
77
|
+
|
|
78
|
+
# Stop the container
|
|
79
|
+
stop_cmd = f"{self.runtime} stop {container_id}"
|
|
80
|
+
await self.ssh_client.run_command(stop_cmd, timeout=10)
|
|
81
|
+
|
|
82
|
+
# Remove the container
|
|
83
|
+
rm_cmd = f"{self.runtime} rm {container_id}"
|
|
84
|
+
await self.ssh_client.run_command(rm_cmd, timeout=10)
|
|
85
|
+
|
|
86
|
+
logger.debug(f"Container removed: {container_id}")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.debug(f"Port cleanup check failed (non-blocking): {e}")
|
|
89
|
+
|
|
57
90
|
async def start(self) -> None:
|
|
58
91
|
"""Start container on remote server.
|
|
59
92
|
|
|
60
93
|
Operations:
|
|
61
|
-
1.
|
|
62
|
-
2.
|
|
63
|
-
3.
|
|
64
|
-
4.
|
|
94
|
+
1. Clean up any existing containers using this port
|
|
95
|
+
2. Detect container runtime on remote
|
|
96
|
+
3. Run docker/podman run command
|
|
97
|
+
4. Extract container ID
|
|
98
|
+
5. Wait for health check
|
|
65
99
|
|
|
66
100
|
Raises:
|
|
67
101
|
RuntimeError: Container already running or start failed
|
|
@@ -72,6 +106,9 @@ class RemoteContainer(DoclingContainer):
|
|
|
72
106
|
|
|
73
107
|
logger.info(f"Starting remote container: {self.name}")
|
|
74
108
|
|
|
109
|
+
# Clean up any existing containers on this port
|
|
110
|
+
await self._cleanup_port()
|
|
111
|
+
|
|
75
112
|
try:
|
|
76
113
|
# Detect runtime if needed
|
|
77
114
|
if not self.runtime:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|