mdify-cli 3.0.2__py3-none-any.whl → 3.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdify/__init__.py +1 -1
- mdify/cli.py +158 -137
- mdify/ssh/client.py +3 -0
- mdify/ssh/transfer.py +8 -3
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/METADATA +1 -1
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/RECORD +10 -10
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/WHEEL +0 -0
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/entry_points.txt +0 -0
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/licenses/LICENSE +0 -0
- {mdify_cli-3.0.2.dist-info → mdify_cli-3.0.4.dist-info}/top_level.txt +0 -0
mdify/__init__.py
CHANGED
mdify/cli.py
CHANGED
|
@@ -1238,6 +1238,12 @@ def main_async_remote(args) -> int:
|
|
|
1238
1238
|
successful = 0
|
|
1239
1239
|
failed = 0
|
|
1240
1240
|
|
|
1241
|
+
def is_connection_error(exc: Exception) -> bool:
|
|
1242
|
+
if isinstance(exc, SSHConnectionError):
|
|
1243
|
+
return True
|
|
1244
|
+
msg = str(exc).lower()
|
|
1245
|
+
return "broken pipe" in msg or "connection closed" in msg
|
|
1246
|
+
|
|
1241
1247
|
try:
|
|
1242
1248
|
for idx, input_file in enumerate(files_to_convert, 1):
|
|
1243
1249
|
if not args.quiet:
|
|
@@ -1246,154 +1252,169 @@ def main_async_remote(args) -> int:
|
|
|
1246
1252
|
file=sys.stderr,
|
|
1247
1253
|
)
|
|
1248
1254
|
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
1252
|
-
|
|
1253
|
-
if not args.quiet:
|
|
1254
|
-
print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
|
|
1255
|
-
|
|
1256
|
-
await transfer_manager.upload_file(
|
|
1257
|
-
local_path=str(input_file),
|
|
1258
|
-
remote_path=remote_file_path,
|
|
1259
|
-
overwrite=True,
|
|
1260
|
-
)
|
|
1261
|
-
|
|
1262
|
-
if not args.quiet:
|
|
1263
|
-
print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
|
|
1264
|
-
|
|
1265
|
-
# Convert via remote container
|
|
1266
|
-
if not args.quiet:
|
|
1267
|
-
print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
|
|
1268
|
-
|
|
1269
|
-
# Determine output path
|
|
1270
|
-
output_dir = Path(args.out_dir)
|
|
1271
|
-
|
|
1272
|
-
# Preserve directory structure if not flat
|
|
1273
|
-
if not args.flat and input_path.is_dir():
|
|
1274
|
-
try:
|
|
1275
|
-
rel_path = input_file.relative_to(input_path)
|
|
1276
|
-
output_subdir = output_dir / rel_path.parent
|
|
1277
|
-
except ValueError:
|
|
1278
|
-
output_subdir = output_dir
|
|
1279
|
-
else:
|
|
1280
|
-
output_subdir = output_dir
|
|
1281
|
-
|
|
1282
|
-
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
1283
|
-
output_file = output_subdir / f"{input_file.stem}.md"
|
|
1284
|
-
|
|
1285
|
-
# Check if output exists and skip if not overwrite
|
|
1286
|
-
if output_file.exists() and not args.overwrite:
|
|
1287
|
-
if not args.quiet:
|
|
1288
|
-
print(
|
|
1289
|
-
f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
|
|
1290
|
-
file=sys.stderr,
|
|
1291
|
-
)
|
|
1292
|
-
continue
|
|
1293
|
-
|
|
1294
|
-
# Convert using remote container's HTTP API
|
|
1295
|
-
# The docling-serve API expects:
|
|
1296
|
-
# - Endpoint: /v1/convert/file
|
|
1297
|
-
# - Method: POST with multipart/form-data
|
|
1298
|
-
# - File field: "files" (note the plural)
|
|
1299
|
-
# - Additional fields: to_formats=md, do_ocr=true
|
|
1300
|
-
remote_output_path = f"{work_dir}/{input_file.stem}.md"
|
|
1301
|
-
|
|
1302
|
-
# Build conversion command on remote - use -F for multipart form data
|
|
1303
|
-
convert_cmd = (
|
|
1304
|
-
f"curl -X POST "
|
|
1305
|
-
f"-F 'files=@{remote_file_path}' "
|
|
1306
|
-
f"-F 'to_formats=md' "
|
|
1307
|
-
f"-F 'do_ocr=true' "
|
|
1308
|
-
)
|
|
1309
|
-
if args.mask:
|
|
1310
|
-
convert_cmd += f"-F 'mask=true' "
|
|
1311
|
-
convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
|
|
1312
|
-
|
|
1313
|
-
stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
|
|
1314
|
-
|
|
1315
|
-
if code != 0:
|
|
1316
|
-
print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
|
|
1317
|
-
failed += 1
|
|
1318
|
-
continue
|
|
1319
|
-
|
|
1320
|
-
# Parse JSON response to extract markdown content
|
|
1255
|
+
attempt = 0
|
|
1256
|
+
while True:
|
|
1321
1257
|
try:
|
|
1322
|
-
|
|
1258
|
+
# Upload file
|
|
1259
|
+
remote_file_path = f"{work_dir}/{input_file.name}"
|
|
1323
1260
|
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1261
|
+
if not args.quiet:
|
|
1262
|
+
print(f" {color.cyan('Uploading to')} {remote_file_path}...", file=sys.stderr)
|
|
1263
|
+
|
|
1264
|
+
await transfer_manager.upload_file(
|
|
1265
|
+
local_path=str(input_file),
|
|
1266
|
+
remote_path=remote_file_path,
|
|
1267
|
+
overwrite=True,
|
|
1268
|
+
compress=False,
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
if not args.quiet:
|
|
1272
|
+
print(f" {color.green('✓ Upload complete')}", file=sys.stderr)
|
|
1273
|
+
|
|
1274
|
+
# Convert via remote container
|
|
1275
|
+
if not args.quiet:
|
|
1276
|
+
print(f" {color.cyan('Converting via remote container')}...", file=sys.stderr)
|
|
1277
|
+
|
|
1278
|
+
# Determine output path
|
|
1279
|
+
output_dir = Path(args.out_dir)
|
|
1280
|
+
|
|
1281
|
+
# Preserve directory structure if not flat
|
|
1282
|
+
if not args.flat and input_path.is_dir():
|
|
1283
|
+
try:
|
|
1284
|
+
rel_path = input_file.relative_to(input_path)
|
|
1285
|
+
output_subdir = output_dir / rel_path.parent
|
|
1286
|
+
except ValueError:
|
|
1287
|
+
output_subdir = output_dir
|
|
1335
1288
|
else:
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1289
|
+
output_subdir = output_dir
|
|
1290
|
+
|
|
1291
|
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
1292
|
+
output_file = output_subdir / f"{input_file.stem}.md"
|
|
1293
|
+
|
|
1294
|
+
# Check if output exists and skip if not overwrite
|
|
1295
|
+
if output_file.exists() and not args.overwrite:
|
|
1296
|
+
if not args.quiet:
|
|
1297
|
+
print(
|
|
1298
|
+
f" {color.yellow('⊘ Skipped:')} {output_file} already exists (use --overwrite to replace)",
|
|
1299
|
+
file=sys.stderr,
|
|
1300
|
+
)
|
|
1301
|
+
break
|
|
1302
|
+
|
|
1303
|
+
# Convert using remote container's HTTP API
|
|
1304
|
+
# The docling-serve API expects:
|
|
1305
|
+
# - Endpoint: /v1/convert/file
|
|
1306
|
+
# - Method: POST with multipart/form-data
|
|
1307
|
+
# - File field: "files" (note the plural)
|
|
1308
|
+
# - Additional fields: to_formats=md, do_ocr=true
|
|
1309
|
+
remote_output_path = f"{work_dir}/{input_file.stem}.md"
|
|
1310
|
+
|
|
1311
|
+
# Build conversion command on remote - use -F for multipart form data
|
|
1312
|
+
convert_cmd = (
|
|
1313
|
+
f"curl -X POST "
|
|
1314
|
+
f"-F 'files=@{remote_file_path}' "
|
|
1315
|
+
f"-F 'to_formats=md' "
|
|
1316
|
+
f"-F 'do_ocr=true' "
|
|
1317
|
+
)
|
|
1318
|
+
if args.mask:
|
|
1319
|
+
convert_cmd += f"-F 'mask=true' "
|
|
1320
|
+
convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
|
|
1321
|
+
|
|
1322
|
+
stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
|
|
1323
|
+
|
|
1324
|
+
if code != 0:
|
|
1325
|
+
print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
|
|
1326
|
+
failed += 1
|
|
1327
|
+
break
|
|
1328
|
+
|
|
1329
|
+
# Parse JSON response to extract markdown content
|
|
1330
|
+
try:
|
|
1331
|
+
response_data = json.loads(stdout)
|
|
1332
|
+
|
|
1333
|
+
# Extract content from response structure
|
|
1334
|
+
# Actual format: {"document": {"md_content": "..."}, "status": "success"}
|
|
1335
|
+
if "document" in response_data:
|
|
1336
|
+
document = response_data["document"]
|
|
1337
|
+
if "md_content" in document and document["md_content"]:
|
|
1338
|
+
markdown_content = document["md_content"]
|
|
1339
|
+
elif "text_content" in document and document["text_content"]:
|
|
1340
|
+
markdown_content = document["text_content"]
|
|
1347
1341
|
else:
|
|
1348
|
-
|
|
1342
|
+
# Fallback - use whole document
|
|
1343
|
+
markdown_content = json.dumps(document, indent=2)
|
|
1349
1344
|
else:
|
|
1350
|
-
#
|
|
1351
|
-
|
|
1345
|
+
# Legacy format fallback
|
|
1346
|
+
if "results" in response_data and response_data["results"]:
|
|
1347
|
+
result = response_data["results"][0]
|
|
1348
|
+
if "content" in result:
|
|
1349
|
+
content = result["content"]
|
|
1350
|
+
if isinstance(content, dict) and "markdown" in content:
|
|
1351
|
+
markdown_content = content["markdown"]
|
|
1352
|
+
elif isinstance(content, str):
|
|
1353
|
+
markdown_content = content
|
|
1354
|
+
else:
|
|
1355
|
+
markdown_content = str(content)
|
|
1356
|
+
else:
|
|
1357
|
+
markdown_content = str(result)
|
|
1358
|
+
else:
|
|
1359
|
+
# Ultimate fallback
|
|
1360
|
+
markdown_content = stdout
|
|
1361
|
+
|
|
1362
|
+
# Write markdown content to remote file
|
|
1363
|
+
write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
|
|
1364
|
+
_, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
|
|
1365
|
+
|
|
1366
|
+
if write_code != 0:
|
|
1367
|
+
print(f" ✗ Failed to write markdown output", file=sys.stderr)
|
|
1368
|
+
failed += 1
|
|
1369
|
+
break
|
|
1370
|
+
|
|
1371
|
+
except (json.JSONDecodeError, KeyError, IndexError) as e:
|
|
1372
|
+
print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
|
|
1373
|
+
if DEBUG:
|
|
1374
|
+
print(f" Response: {stdout[:500]}", file=sys.stderr)
|
|
1375
|
+
failed += 1
|
|
1376
|
+
break
|
|
1352
1377
|
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
_, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
|
|
1378
|
+
if not args.quiet:
|
|
1379
|
+
print(f" ✓ Conversion complete", file=sys.stderr)
|
|
1356
1380
|
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1381
|
+
# Download result
|
|
1382
|
+
if not args.quiet:
|
|
1383
|
+
print(f" Downloading result to {output_file}...", file=sys.stderr)
|
|
1384
|
+
|
|
1385
|
+
await transfer_manager.download_file(
|
|
1386
|
+
remote_path=remote_output_path,
|
|
1387
|
+
local_path=str(output_file),
|
|
1388
|
+
overwrite=True,
|
|
1389
|
+
)
|
|
1390
|
+
|
|
1391
|
+
if not args.quiet:
|
|
1392
|
+
print(f" ✓ Download complete: {output_file}", file=sys.stderr)
|
|
1393
|
+
|
|
1394
|
+
successful += 1
|
|
1395
|
+
|
|
1396
|
+
# Cleanup remote files
|
|
1397
|
+
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1398
|
+
|
|
1399
|
+
break
|
|
1400
|
+
except Exception as e:
|
|
1401
|
+
if is_connection_error(e) and attempt == 0:
|
|
1402
|
+
attempt += 1
|
|
1403
|
+
if not args.quiet:
|
|
1404
|
+
print(" ↻ Connection lost. Reconnecting...", file=sys.stderr)
|
|
1405
|
+
try:
|
|
1406
|
+
await ssh_client.disconnect()
|
|
1407
|
+
except Exception:
|
|
1408
|
+
pass
|
|
1409
|
+
await ssh_client.connect()
|
|
1360
1410
|
continue
|
|
1361
1411
|
|
|
1362
|
-
|
|
1363
|
-
print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
|
|
1412
|
+
print(f" ✗ Failed: {e}", file=sys.stderr)
|
|
1364
1413
|
if DEBUG:
|
|
1365
|
-
|
|
1414
|
+
import traceback
|
|
1415
|
+
traceback.print_exc(file=sys.stderr)
|
|
1366
1416
|
failed += 1
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
if not args.quiet:
|
|
1370
|
-
print(f" ✓ Conversion complete", file=sys.stderr)
|
|
1371
|
-
|
|
1372
|
-
# Download result
|
|
1373
|
-
if not args.quiet:
|
|
1374
|
-
print(f" Downloading result to {output_file}...", file=sys.stderr)
|
|
1375
|
-
|
|
1376
|
-
await transfer_manager.download_file(
|
|
1377
|
-
remote_path=remote_output_path,
|
|
1378
|
-
local_path=str(output_file),
|
|
1379
|
-
overwrite=True,
|
|
1380
|
-
)
|
|
1381
|
-
|
|
1382
|
-
if not args.quiet:
|
|
1383
|
-
print(f" ✓ Download complete: {output_file}", file=sys.stderr)
|
|
1384
|
-
|
|
1385
|
-
successful += 1
|
|
1386
|
-
|
|
1387
|
-
# Cleanup remote files
|
|
1388
|
-
await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
|
|
1389
|
-
|
|
1390
|
-
except Exception as e:
|
|
1391
|
-
print(f" ✗ Failed: {e}", file=sys.stderr)
|
|
1392
|
-
if DEBUG:
|
|
1393
|
-
import traceback
|
|
1394
|
-
traceback.print_exc(file=sys.stderr)
|
|
1395
|
-
failed += 1
|
|
1396
|
-
continue
|
|
1417
|
+
break
|
|
1397
1418
|
|
|
1398
1419
|
finally:
|
|
1399
1420
|
# Stop and remove container
|
mdify/ssh/client.py
CHANGED
|
@@ -47,6 +47,9 @@ class AsyncSSHClient:
|
|
|
47
47
|
"connect_timeout": self.config.timeout,
|
|
48
48
|
"known_hosts": None, # Skip host key verification for now
|
|
49
49
|
}
|
|
50
|
+
|
|
51
|
+
if self.config.keepalive:
|
|
52
|
+
connect_kwargs["keepalive_interval"] = self.config.keepalive
|
|
50
53
|
|
|
51
54
|
# Add username if provided
|
|
52
55
|
if self.config.username:
|
mdify/ssh/transfer.py
CHANGED
|
@@ -231,13 +231,18 @@ class FileTransferManager:
|
|
|
231
231
|
local_sha256.update(chunk)
|
|
232
232
|
local_checksum = local_sha256.hexdigest()
|
|
233
233
|
|
|
234
|
-
# Calculate remote checksum
|
|
235
|
-
|
|
236
|
-
f"sha256sum {remote_path} | awk '{{print $1}}'"
|
|
234
|
+
# Calculate remote checksum (sha256sum or shasum fallback)
|
|
235
|
+
checksum_cmd = (
|
|
236
|
+
f"(command -v sha256sum >/dev/null 2>&1 && sha256sum {remote_path} | awk '{{print $1}}') "
|
|
237
|
+
f"|| (command -v shasum >/dev/null 2>&1 && shasum -a 256 {remote_path} | awk '{{print $1}}')"
|
|
237
238
|
)
|
|
239
|
+
stdout, stderr, code = await self.ssh_client.run_command(checksum_cmd)
|
|
238
240
|
|
|
239
241
|
if code == 0:
|
|
240
242
|
remote_checksum = stdout.strip()
|
|
243
|
+
if not remote_checksum:
|
|
244
|
+
logger.warning("Could not verify checksum: remote checksum unavailable")
|
|
245
|
+
return
|
|
241
246
|
|
|
242
247
|
if local_checksum != remote_checksum:
|
|
243
248
|
raise ValueError(
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
assets/mdify.png,sha256=qUj7WXWqNwpI2KNXOW79XJwqFqa-UI0JEkmt1mmy4Rg,1820418
|
|
2
|
-
mdify/__init__.py,sha256=
|
|
2
|
+
mdify/__init__.py,sha256=Le2ZtuARDsPg7kDbPBrwevy3J8cPp-j7fQLAlqx-Na8,90
|
|
3
3
|
mdify/__main__.py,sha256=bhpJ00co6MfaVOdH4XLoW04NtLYDa_oJK7ODzfLrn9M,143
|
|
4
|
-
mdify/cli.py,sha256=
|
|
4
|
+
mdify/cli.py,sha256=CSj3J-mLdGV58pm-eNpq-vhtkQOVKduMq9hiP_4lhWs,75655
|
|
5
5
|
mdify/container.py,sha256=BjL5ZR__n1i_WHifXKllTPoqO7IuOUdPDo5esuNg0Iw,8213
|
|
6
6
|
mdify/docling_client.py,sha256=xuQR6sC1v3EPloOSwExoHCqT4uUxE8myYq-Yeby3C2I,7975
|
|
7
7
|
mdify/formatting.py,sha256=lJKhMbDPcaWCdyEa7aKwAm_desaWvkfDc8C3EP7LWp4,790
|
|
8
8
|
mdify/ssh/__init__.py,sha256=SmRWgwEvAQZ_ARHlKTb9QDPwVAcz6dvPUks2pZFWLAU,271
|
|
9
|
-
mdify/ssh/client.py,sha256=
|
|
9
|
+
mdify/ssh/client.py,sha256=nO7gAQ6eWxIXFIOplW2F2ya0-1ZEFlLmz3ovi1TEFTg,14997
|
|
10
10
|
mdify/ssh/models.py,sha256=jpbDS1yGhd7Xwq2tW7bZv14mTBlR8DCfhT4x-Xf2Wq4,17676
|
|
11
11
|
mdify/ssh/remote_container.py,sha256=kmScAlmHI9rJLKliYcYQXDZHF7PYYiD-_rRV-S0fffM,8462
|
|
12
|
-
mdify/ssh/transfer.py,sha256=
|
|
13
|
-
mdify_cli-3.0.
|
|
14
|
-
mdify_cli-3.0.
|
|
15
|
-
mdify_cli-3.0.
|
|
16
|
-
mdify_cli-3.0.
|
|
17
|
-
mdify_cli-3.0.
|
|
18
|
-
mdify_cli-3.0.
|
|
12
|
+
mdify/ssh/transfer.py,sha256=aQuWa_B81McrgZBBWo_CxnjwoiGHmoxAoqObm19JAk8,10940
|
|
13
|
+
mdify_cli-3.0.4.dist-info/licenses/LICENSE,sha256=NWM66Uv-XuSMKaU-gaPmvfyk4WgE6zcIPr78wyg6GAo,1065
|
|
14
|
+
mdify_cli-3.0.4.dist-info/METADATA,sha256=TUgq2AnSvYZtrMFLdDlgBoZTwhbwAcrd2nzoIVkLewU,14766
|
|
15
|
+
mdify_cli-3.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
mdify_cli-3.0.4.dist-info/entry_points.txt,sha256=0Xki8f5lADQUtwdt6Eq_FEaieI6Byhk8UE7BuDhChMg,41
|
|
17
|
+
mdify_cli-3.0.4.dist-info/top_level.txt,sha256=qltzf7h8owHq7dxCdfCkSHY8gT21hn1_E8P-VWS_OKM,6
|
|
18
|
+
mdify_cli-3.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|