mdify-cli 2.11.9__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdify/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """mdify - Convert documents to Markdown via Docling container."""
2
2
 
3
- __version__ = "2.11.9"
3
+ __version__ = "3.0.0"
mdify/cli.py CHANGED
@@ -914,6 +914,97 @@ Examples:
914
914
  help="Skip memory availability validation (not recommended)",
915
915
  )
916
916
 
917
+ # SSH/Remote server options
918
+ ssh_group = parser.add_argument_group("Remote SSH Server", "Execute conversion on remote server via SSH")
919
+
920
+ ssh_group.add_argument(
921
+ "--remote-host",
922
+ type=str,
923
+ default=None,
924
+ help="SSH host or alias (e.g., tsrv, 192.168.1.200, or SSH config alias)",
925
+ )
926
+
927
+ ssh_group.add_argument(
928
+ "--remote-port",
929
+ type=int,
930
+ default=None,
931
+ help="SSH port (default: 22 or from SSH config)",
932
+ )
933
+
934
+ ssh_group.add_argument(
935
+ "--remote-user",
936
+ type=str,
937
+ default=None,
938
+ help="SSH username (default: from SSH config or system user)",
939
+ )
940
+
941
+ ssh_group.add_argument(
942
+ "--remote-key",
943
+ type=str,
944
+ default=None,
945
+ help="SSH private key path (default: ~/.ssh/id_rsa or from SSH config)",
946
+ )
947
+
948
+ ssh_group.add_argument(
949
+ "--remote-key-passphrase",
950
+ type=str,
951
+ default=None,
952
+ help="SSH key passphrase (not recommended; use SSH agent)",
953
+ )
954
+
955
+ ssh_group.add_argument(
956
+ "--remote-timeout",
957
+ type=int,
958
+ default=30,
959
+ help="SSH connection timeout in seconds (default: 30)",
960
+ )
961
+
962
+ ssh_group.add_argument(
963
+ "--remote-work-dir",
964
+ type=str,
965
+ default="/tmp/mdify-remote",
966
+ help="Work directory on remote server (default: /tmp/mdify-remote)",
967
+ )
968
+
969
+ ssh_group.add_argument(
970
+ "--remote-runtime",
971
+ type=str,
972
+ choices=("docker", "podman"),
973
+ default=None,
974
+ help="Container runtime on remote (docker or podman; auto-detect if not specified)",
975
+ )
976
+
977
+ ssh_group.add_argument(
978
+ "--remote-config",
979
+ type=str,
980
+ default=None,
981
+ help="Path to mdify remote config file (YAML format, default: ~/.mdify/remote.conf)",
982
+ )
983
+
984
+ ssh_group.add_argument(
985
+ "--remote-skip-ssh-config",
986
+ action="store_true",
987
+ help="Skip loading SSH config (use CLI arguments only)",
988
+ )
989
+
990
+ ssh_group.add_argument(
991
+ "--remote-skip-validation",
992
+ action="store_true",
993
+ help="Skip remote resource validation (not recommended)",
994
+ )
995
+
996
+ ssh_group.add_argument(
997
+ "--remote-validate-only",
998
+ action="store_true",
999
+ help="Validate remote connection and resources, then exit",
1000
+ )
1001
+
1002
+ ssh_group.add_argument(
1003
+ "--remote-debug",
1004
+ action="store_true",
1005
+ help="Enable debug logging for remote SSH operations",
1006
+ )
1007
+
917
1008
  # Utility options
918
1009
  parser.add_argument(
919
1010
  "--check-update",
@@ -930,6 +1021,442 @@ Examples:
930
1021
  return parser.parse_args()
931
1022
 
932
1023
 
1024
+ # =============================================================================
1025
+ # Remote SSH execution support
1026
+ # =============================================================================
1027
+
1028
+
1029
+ def main_async_remote(args) -> int:
1030
+ """Execute conversion on remote server via SSH.
1031
+
1032
+ This function handles:
1033
+ 1. Loading and merging SSH configuration
1034
+ 2. Establishing remote connection
1035
+ 3. Uploading input files
1036
+ 4. Executing remote conversion
1037
+ 5. Downloading output files
1038
+ 6. Cleanup on success or failure
1039
+
1040
+ Args:
1041
+ args: Parsed command-line arguments with remote_* options
1042
+
1043
+ Returns:
1044
+ Exit code (0 for success, non-zero for errors)
1045
+ """
1046
+ import asyncio
1047
+ from pathlib import Path
1048
+ from mdify.ssh import SSHConfig, AsyncSSHClient
1049
+ from mdify.ssh.models import SSHConnectionError, SSHAuthError, ConfigError, ValidationError
1050
+
1051
+ async def async_main() -> int:
1052
+ """Async implementation of remote conversion."""
1053
+
1054
+ # Resolve timeout value: CLI > env > default 1200
1055
+ timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
1056
+
1057
+ # Build SSH config from CLI arguments and SSH config files
1058
+ try:
1059
+ # Build config with proper precedence (lowest to highest):
1060
+ # SSH config -> mdify remote.conf -> CLI args
1061
+ ssh_config = None
1062
+
1063
+ if not args.remote_skip_ssh_config:
1064
+ # Load from SSH config if host looks like an alias
1065
+ if not args.remote_host.replace('.', '').replace('-', '').isdigit():
1066
+ try:
1067
+ ssh_config = SSHConfig.from_ssh_config(args.remote_host)
1068
+ except Exception as e:
1069
+ if not args.quiet:
1070
+ print(f"Warning: Could not load SSH config for {args.remote_host}: {e}", file=sys.stderr)
1071
+
1072
+ # Load from mdify remote.conf if it exists
1073
+ mdify_remote_conf = args.remote_config or (Path.home() / ".mdify" / "remote.conf")
1074
+ if mdify_remote_conf and Path(mdify_remote_conf).exists():
1075
+ try:
1076
+ ssh_from_mdify = SSHConfig.from_remote_conf(str(mdify_remote_conf))
1077
+ if ssh_config:
1078
+ ssh_config = ssh_config.merge(ssh_from_mdify)
1079
+ else:
1080
+ ssh_config = ssh_from_mdify
1081
+ except Exception as e:
1082
+ if not args.quiet:
1083
+ print(f"Warning: Could not load mdify remote config: {e}", file=sys.stderr)
1084
+
1085
+ # Start with minimal defaults if no config loaded
1086
+ if ssh_config is None:
1087
+ ssh_config = SSHConfig(host=args.remote_host, port=22, username=None)
1088
+
1089
+ # Apply CLI arguments with highest precedence
1090
+ cli_config = SSHConfig(
1091
+ host=args.remote_host,
1092
+ port=args.remote_port,
1093
+ username=args.remote_user,
1094
+ key_file=args.remote_key,
1095
+ key_passphrase=args.remote_key_passphrase,
1096
+ timeout=args.remote_timeout,
1097
+ work_dir=args.remote_work_dir,
1098
+ container_runtime=args.remote_runtime,
1099
+ )
1100
+ ssh_config = ssh_config.merge(cli_config)
1101
+
1102
+ # Create SSH client
1103
+ ssh_client = AsyncSSHClient(ssh_config)
1104
+
1105
+ # Connect to remote server
1106
+ if not args.quiet:
1107
+ print(f"Connecting to {ssh_config.host}:{ssh_config.port}...", file=sys.stderr)
1108
+
1109
+ await ssh_client.connect()
1110
+
1111
+ if not args.quiet:
1112
+ print(f"✓ Connected to {ssh_config.host}", file=sys.stderr)
1113
+
1114
+ # Validate remote resources if not skipped
1115
+ if not args.remote_skip_validation:
1116
+ if not args.quiet:
1117
+ print("Validating remote resources...", file=sys.stderr)
1118
+
1119
+ validation_result = await ssh_client.validate_remote_resources()
1120
+
1121
+ if not validation_result.get("can_connect"):
1122
+ await ssh_client.disconnect()
1123
+ print("Error: Cannot connect to remote server", file=sys.stderr)
1124
+ return 1
1125
+
1126
+ if not validation_result.get("work_dir_writable"):
1127
+ await ssh_client.disconnect()
1128
+ print(f"Error: Work directory not writable: {ssh_config.work_dir}", file=sys.stderr)
1129
+ return 1
1130
+
1131
+ if not validation_result.get("container_runtime_available"):
1132
+ await ssh_client.disconnect()
1133
+ runtime_str = ssh_config.container_runtime or "docker/podman"
1134
+ print(f"Error: Container runtime not available: {runtime_str}", file=sys.stderr)
1135
+ return 1
1136
+
1137
+ if not validation_result.get("disk_space_min_5gb"):
1138
+ print(f"Warning: Less than 5GB available on remote", file=sys.stderr)
1139
+ if not args.yes and sys.stdin.isatty():
1140
+ if not confirm_proceed("Continue anyway?"):
1141
+ await ssh_client.disconnect()
1142
+ return 130
1143
+
1144
+ if not validation_result.get("memory_min_2gb"):
1145
+ print(f"Warning: Less than 2GB available memory on remote", file=sys.stderr)
1146
+ if not args.yes and sys.stdin.isatty():
1147
+ if not confirm_proceed("Continue anyway?"):
1148
+ await ssh_client.disconnect()
1149
+ return 130
1150
+
1151
+ if not args.quiet:
1152
+ print("✓ All remote resources validated", file=sys.stderr)
1153
+
1154
+ # If --remote-validate-only, exit here
1155
+ if args.remote_validate_only:
1156
+ await ssh_client.disconnect()
1157
+ print("Remote validation successful", file=sys.stderr)
1158
+ return 0
1159
+
1160
+ # Phase 2.4.2: File upload, remote conversion, and download
1161
+
1162
+ # Build file list
1163
+ input_path = Path(args.input)
1164
+ if not input_path.exists():
1165
+ await ssh_client.disconnect()
1166
+ print(f"Error: Input file or directory not found: {args.input}", file=sys.stderr)
1167
+ return 1
1168
+
1169
+ files_to_convert = get_files_to_convert(input_path.resolve(), args.glob, args.recursive)
1170
+
1171
+ if not files_to_convert:
1172
+ await ssh_client.disconnect()
1173
+ print(f"Error: No supported files found in {args.input}", file=sys.stderr)
1174
+ print(f" Supported formats: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", file=sys.stderr)
1175
+ return 1
1176
+
1177
+ if not args.quiet:
1178
+ print(f"\nFound {len(files_to_convert)} file(s) to convert", file=sys.stderr)
1179
+
1180
+ # Import remote container and transfer manager
1181
+ from mdify.ssh.transfer import FileTransferManager
1182
+ from mdify.ssh.remote_container import RemoteContainer
1183
+
1184
+ # Determine container runtime and image
1185
+ runtime = ssh_config.container_runtime
1186
+ if not runtime:
1187
+ runtime = await ssh_client.check_container_runtime()
1188
+ if not runtime:
1189
+ await ssh_client.disconnect()
1190
+ print("Error: No container runtime found on remote (docker/podman)", file=sys.stderr)
1191
+ return 1
1192
+
1193
+ if args.gpu:
1194
+ image = GPU_IMAGE
1195
+ elif args.image:
1196
+ image = args.image
1197
+ else:
1198
+ image = DEFAULT_IMAGE
1199
+
1200
+ # Create remote container
1201
+ remote_container = RemoteContainer(
1202
+ ssh_client=ssh_client,
1203
+ image=image,
1204
+ port=args.port,
1205
+ runtime=runtime,
1206
+ name=f"mdify-remote-{int(time.time())}",
1207
+ timeout=timeout,
1208
+ )
1209
+
1210
+ # Create file transfer manager
1211
+ transfer_manager = FileTransferManager(ssh_client)
1212
+
1213
+ # Create remote work directory
1214
+ work_dir = ssh_config.work_dir or "/tmp/mdify-remote"
1215
+ stdout, stderr, code = await ssh_client.run_command(f"mkdir -p {work_dir}")
1216
+ if code != 0:
1217
+ await ssh_client.disconnect()
1218
+ print(f"Error: Failed to create remote work directory: {work_dir}", file=sys.stderr)
1219
+ return 1
1220
+
1221
+ # Start remote container
1222
+ if not args.quiet:
1223
+ print(f"\nStarting remote container ({image})...", file=sys.stderr)
1224
+
1225
+ try:
1226
+ await remote_container.start()
1227
+ if not args.quiet:
1228
+ print(f"✓ Container started: {remote_container.state.container_name}", file=sys.stderr)
1229
+ except Exception as e:
1230
+ await ssh_client.disconnect()
1231
+ print(f"Error: Failed to start remote container: {e}", file=sys.stderr)
1232
+ return 1
1233
+
1234
+ # Process files
1235
+ successful = 0
1236
+ failed = 0
1237
+
1238
+ try:
1239
+ for idx, input_file in enumerate(files_to_convert, 1):
1240
+ if not args.quiet:
1241
+ print(f"\n[{idx}/{len(files_to_convert)}] Processing: {input_file.name}", file=sys.stderr)
1242
+
1243
+ try:
1244
+ # Upload file
1245
+ remote_file_path = f"{work_dir}/{input_file.name}"
1246
+
1247
+ if not args.quiet:
1248
+ print(f" Uploading to {remote_file_path}...", file=sys.stderr)
1249
+
1250
+ await transfer_manager.upload_file(
1251
+ local_path=str(input_file),
1252
+ remote_path=remote_file_path,
1253
+ overwrite=True,
1254
+ )
1255
+
1256
+ if not args.quiet:
1257
+ print(f" ✓ Upload complete", file=sys.stderr)
1258
+
1259
+ # Convert via remote container
1260
+ if not args.quiet:
1261
+ print(f" Converting via remote container...", file=sys.stderr)
1262
+
1263
+ # Determine output path
1264
+ output_dir = Path(args.out_dir)
1265
+
1266
+ # Preserve directory structure if not flat
1267
+ if not args.flat and input_path.is_dir():
1268
+ try:
1269
+ rel_path = input_file.relative_to(input_path)
1270
+ output_subdir = output_dir / rel_path.parent
1271
+ except ValueError:
1272
+ output_subdir = output_dir
1273
+ else:
1274
+ output_subdir = output_dir
1275
+
1276
+ output_subdir.mkdir(parents=True, exist_ok=True)
1277
+ output_file = output_subdir / f"{input_file.stem}.md"
1278
+
1279
+ # Check if output exists and skip if not overwrite
1280
+ if output_file.exists() and not args.overwrite:
1281
+ if not args.quiet:
1282
+ print(f" ⊘ Skipped: {output_file} already exists (use --overwrite to replace)", file=sys.stderr)
1283
+ continue
1284
+
1285
+ # Convert using remote container's HTTP API
1286
+ # The docling-serve API expects:
1287
+ # - Endpoint: /v1/convert/file
1288
+ # - Method: POST with multipart/form-data
1289
+ # - File field: "files" (note the plural)
1290
+ # - Additional fields: to_formats=md, do_ocr=true
1291
+ remote_output_path = f"{work_dir}/{input_file.stem}.md"
1292
+
1293
+ # Build conversion command on remote - use -F for multipart form data
1294
+ convert_cmd = (
1295
+ f"curl -X POST "
1296
+ f"-F 'files=@{remote_file_path}' "
1297
+ f"-F 'to_formats=md' "
1298
+ f"-F 'do_ocr=true' "
1299
+ )
1300
+ if args.mask:
1301
+ convert_cmd += f"-F 'mask=true' "
1302
+ convert_cmd += f"http://localhost:{args.port}/v1/convert/file"
1303
+
1304
+ stdout, stderr, code = await ssh_client.run_command(convert_cmd, timeout=timeout)
1305
+
1306
+ if code != 0:
1307
+ print(f" ✗ Conversion failed (curl error code {code}): {stderr}", file=sys.stderr)
1308
+ failed += 1
1309
+ continue
1310
+
1311
+ # Parse JSON response to extract markdown content
1312
+ try:
1313
+ response_data = json.loads(stdout)
1314
+
1315
+ # Extract content from response structure
1316
+ # Actual format: {"document": {"md_content": "..."}, "status": "success"}
1317
+ if "document" in response_data:
1318
+ document = response_data["document"]
1319
+ if "md_content" in document and document["md_content"]:
1320
+ markdown_content = document["md_content"]
1321
+ elif "text_content" in document and document["text_content"]:
1322
+ markdown_content = document["text_content"]
1323
+ else:
1324
+ # Fallback - use whole document
1325
+ markdown_content = json.dumps(document, indent=2)
1326
+ else:
1327
+ # Legacy format fallback
1328
+ if "results" in response_data and response_data["results"]:
1329
+ result = response_data["results"][0]
1330
+ if "content" in result:
1331
+ content = result["content"]
1332
+ if isinstance(content, dict) and "markdown" in content:
1333
+ markdown_content = content["markdown"]
1334
+ elif isinstance(content, str):
1335
+ markdown_content = content
1336
+ else:
1337
+ markdown_content = str(content)
1338
+ else:
1339
+ markdown_content = str(result)
1340
+ else:
1341
+ # Ultimate fallback
1342
+ markdown_content = stdout
1343
+
1344
+ # Write markdown content to remote file
1345
+ write_cmd = f"cat > {remote_output_path} << 'MDIFY_EOF'\n{markdown_content}\nMDIFY_EOF"
1346
+ _, _, write_code = await ssh_client.run_command(write_cmd, timeout=30)
1347
+
1348
+ if write_code != 0:
1349
+ print(f" ✗ Failed to write markdown output", file=sys.stderr)
1350
+ failed += 1
1351
+ continue
1352
+
1353
+ except (json.JSONDecodeError, KeyError, IndexError) as e:
1354
+ print(f" ✗ Failed to parse conversion response: {e}", file=sys.stderr)
1355
+ if DEBUG:
1356
+ print(f" Response: {stdout[:500]}", file=sys.stderr)
1357
+ failed += 1
1358
+ continue
1359
+
1360
+ if not args.quiet:
1361
+ print(f" ✓ Conversion complete", file=sys.stderr)
1362
+
1363
+ # Download result
1364
+ if not args.quiet:
1365
+ print(f" Downloading result to {output_file}...", file=sys.stderr)
1366
+
1367
+ await transfer_manager.download_file(
1368
+ remote_path=remote_output_path,
1369
+ local_path=str(output_file),
1370
+ overwrite=True,
1371
+ )
1372
+
1373
+ if not args.quiet:
1374
+ print(f" ✓ Download complete: {output_file}", file=sys.stderr)
1375
+
1376
+ successful += 1
1377
+
1378
+ # Cleanup remote files
1379
+ await ssh_client.run_command(f"rm -f {remote_file_path} {remote_output_path}")
1380
+
1381
+ except Exception as e:
1382
+ print(f" ✗ Failed: {e}", file=sys.stderr)
1383
+ if DEBUG:
1384
+ import traceback
1385
+ traceback.print_exc(file=sys.stderr)
1386
+ failed += 1
1387
+ continue
1388
+
1389
+ finally:
1390
+ # Stop and remove container
1391
+ if not args.quiet:
1392
+ print(f"\nStopping remote container...", file=sys.stderr)
1393
+
1394
+ try:
1395
+ await remote_container.stop(force=False)
1396
+ if not args.quiet:
1397
+ print(f"✓ Container stopped", file=sys.stderr)
1398
+ except Exception as e:
1399
+ if not args.quiet:
1400
+ print(f"Warning: Failed to stop container: {e}", file=sys.stderr)
1401
+
1402
+ # Cleanup remote work directory
1403
+ try:
1404
+ await ssh_client.run_command(f"rm -rf {work_dir}")
1405
+ if not args.quiet:
1406
+ print(f"✓ Cleaned up remote directory", file=sys.stderr)
1407
+ except Exception as e:
1408
+ if not args.quiet:
1409
+ print(f"Warning: Failed to cleanup remote directory: {e}", file=sys.stderr)
1410
+
1411
+ # Disconnect
1412
+ await ssh_client.disconnect()
1413
+
1414
+ # Print summary
1415
+ print(f"\n{'='*60}", file=sys.stderr)
1416
+ print(f"Remote conversion complete:", file=sys.stderr)
1417
+ print(f" Successful: {successful}", file=sys.stderr)
1418
+ print(f" Failed: {failed}", file=sys.stderr)
1419
+ print(f" Total: {len(files_to_convert)}", file=sys.stderr)
1420
+ print(f"{'='*60}", file=sys.stderr)
1421
+
1422
+ return 0 if failed == 0 else 1
1423
+
1424
+ except SSHAuthError as e:
1425
+ print(f"Error: SSH authentication failed: {e}", file=sys.stderr)
1426
+ print(" Check your SSH key, passphrase, or username", file=sys.stderr)
1427
+ return 1
1428
+ except SSHConnectionError as e:
1429
+ print(f"Error: SSH connection failed: {e}", file=sys.stderr)
1430
+ if hasattr(e, 'host') and hasattr(e, 'port'):
1431
+ print(f" Host: {e.host}:{e.port}", file=sys.stderr)
1432
+ return 1
1433
+ except ConfigError as e:
1434
+ print(f"Error: Configuration error: {e}", file=sys.stderr)
1435
+ return 1
1436
+ except ValidationError as e:
1437
+ print(f"Error: Validation error: {e}", file=sys.stderr)
1438
+ return 1
1439
+ except Exception as e:
1440
+ print(f"Error: Unexpected error during remote execution: {e}", file=sys.stderr)
1441
+ if DEBUG:
1442
+ import traceback
1443
+ traceback.print_exc(file=sys.stderr)
1444
+ return 1
1445
+
1446
+ # Run async main
1447
+ try:
1448
+ return asyncio.run(async_main())
1449
+ except KeyboardInterrupt:
1450
+ print("\n⚠ Interrupted by user", file=sys.stderr)
1451
+ return 130
1452
+ except Exception as e:
1453
+ print(f"Error: Failed to run remote execution: {e}", file=sys.stderr)
1454
+ if DEBUG:
1455
+ import traceback
1456
+ traceback.print_exc(file=sys.stderr)
1457
+ return 1
1458
+
1459
+
933
1460
  # =============================================================================
934
1461
  # Main entry point
935
1462
  # =============================================================================
@@ -948,6 +1475,21 @@ def main() -> int:
948
1475
  # Check for updates (daily, silent on errors)
949
1476
  check_for_update(force=False)
950
1477
 
1478
+ # Detect remote mode (SSH-based execution)
1479
+ is_remote_mode = hasattr(args, 'remote_host') and args.remote_host is not None
1480
+
1481
+ if is_remote_mode:
1482
+ # Remote mode: will use SSH to execute on remote server
1483
+ # Import here to avoid import errors if asyncssh not installed in local environment
1484
+ try:
1485
+ import asyncio
1486
+ from mdify.ssh import AsyncSSHClient, SSHConfig
1487
+ return main_async_remote(args)
1488
+ except ImportError:
1489
+ print("Error: Remote mode requires asyncssh and additional dependencies", file=sys.stderr)
1490
+ print("Install with: pip install mdify-cli[remote]", file=sys.stderr)
1491
+ return 1
1492
+
951
1493
  # Resolve timeout value: CLI > env > default 1200
952
1494
  timeout = args.timeout or int(os.environ.get("MDIFY_TIMEOUT", 1200))
953
1495
 
mdify/container.py CHANGED
@@ -221,10 +221,6 @@ class DoclingContainer:
221
221
  True if container is healthy, False otherwise
222
222
  """
223
223
  try:
224
- # First check if container is still running
225
- if not self.is_running():
226
- return False
227
- # Then check health endpoint
228
224
  return check_health(self.base_url)
229
225
  except Exception:
230
226
  return False
mdify/ssh/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """SSH remote server support for mdify."""
2
+
3
+ from mdify.ssh.models import SSHConfig, TransferSession, RemoteContainerState
4
+ from mdify.ssh.client import AsyncSSHClient
5
+
6
+ __all__ = [
7
+ "SSHConfig",
8
+ "TransferSession",
9
+ "RemoteContainerState",
10
+ "AsyncSSHClient",
11
+ ]