mdify-cli 2.11.8__py3-none-any.whl → 2.11.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,237 @@
1
+ """Remote container management over SSH."""
2
+
3
+ import logging
4
+ import uuid
5
+ from typing import Literal
6
+ from mdify.container import DoclingContainer
7
+ from mdify.ssh.models import RemoteContainerState
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class RemoteContainer(DoclingContainer):
13
+ """Container running on remote server via SSH."""
14
+
15
+ def __init__(
16
+ self,
17
+ ssh_client,
18
+ image: str = "docling-serve:latest",
19
+ port: int = 8000,
20
+ runtime: Literal["docker", "podman"] = "docker",
21
+ name: str | None = None,
22
+ timeout: int = 30,
23
+ health_check_interval: int = 2
24
+ ):
25
+ """Initialize remote container manager.
26
+
27
+ Parameters:
28
+ ssh_client: Connected AsyncSSHClient instance
29
+ image: Container image name
30
+ port: Port to expose from container
31
+ runtime: Container runtime ("docker" or "podman")
32
+ name: Container name (auto-generated if None)
33
+ timeout: Timeout for operations in seconds
34
+ health_check_interval: Health check poll interval in seconds
35
+ """
36
+ # Initialize base class
37
+ super().__init__(
38
+ runtime=runtime,
39
+ image=image,
40
+ port=port,
41
+ timeout=timeout
42
+ )
43
+
44
+ self.ssh_client = ssh_client
45
+ self.name = name or f"mdify-{uuid.uuid4().hex[:8]}"
46
+ self.health_check_interval = health_check_interval
47
+
48
+ self.state = RemoteContainerState(
49
+ container_name=self.name,
50
+ port=port,
51
+ runtime=runtime,
52
+ host=ssh_client.config.host,
53
+ base_url=f"http://{ssh_client.config.host}:{port}"
54
+ )
55
+ self.is_healthy = False
56
+
57
+ async def start(self) -> None:
58
+ """Start container on remote server.
59
+
60
+ Operations:
61
+ 1. Detect container runtime on remote
62
+ 2. Run docker/podman run command
63
+ 3. Extract container ID
64
+ 4. Wait for health check
65
+
66
+ Raises:
67
+ RuntimeError: Container already running or start failed
68
+ SSHConnectionError: SSH connection lost
69
+ """
70
+ if self.state.is_running:
71
+ raise RuntimeError(f"Container {self.name} is already running")
72
+
73
+ logger.info(f"Starting remote container: {self.name}")
74
+
75
+ try:
76
+ # Detect runtime if needed
77
+ if not self.runtime:
78
+ runtime = await self.ssh_client.check_container_runtime()
79
+ if not runtime:
80
+ raise RuntimeError("No container runtime available on remote")
81
+ self.runtime = runtime
82
+
83
+ # Build docker/podman command
84
+ cmd = (
85
+ f"{self.runtime} run "
86
+ f"--name {self.name} "
87
+ f"--publish {self.port}:5001 "
88
+ f"--detach "
89
+ f"{self.image}"
90
+ )
91
+
92
+ logger.debug(f"Running: {cmd}")
93
+ stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=self.timeout)
94
+
95
+ if code != 0:
96
+ raise RuntimeError(f"Container start failed: {stderr}")
97
+
98
+ # Extract container ID
99
+ container_id = stdout.strip()
100
+ self.state.container_id = container_id
101
+ self.state.is_running = True
102
+
103
+ logger.info(f"Container started: {container_id}")
104
+
105
+ # Wait for health check
106
+ await self._wait_for_health()
107
+
108
+ except Exception as e:
109
+ self.state.is_running = False
110
+ logger.error(f"Container start failed: {e}")
111
+ raise
112
+
113
+ async def stop(self, force: bool = False) -> None:
114
+ """Stop container on remote server.
115
+
116
+ Parameters:
117
+ force: If True, kill container; if False, graceful stop
118
+
119
+ Raises:
120
+ RuntimeError: Container not running
121
+ SSHConnectionError: SSH connection lost
122
+ """
123
+ if not self.state.is_running:
124
+ raise RuntimeError(f"Container {self.name} is not running")
125
+
126
+ logger.info(f"Stopping remote container: {self.name}")
127
+
128
+ try:
129
+ action = "stop" if not force else "kill"
130
+ cmd = f"{self.runtime} {action} {self.state.container_id}"
131
+
132
+ _stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=self.timeout)
133
+
134
+ if code != 0:
135
+ logger.warning(f"Container stop returned code {code}: {stderr}")
136
+
137
+ # Remove container
138
+ cmd = f"{self.runtime} rm {self.state.container_id}"
139
+ _stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=self.timeout)
140
+
141
+ if code != 0:
142
+ logger.warning(f"Container remove returned code {code}: {stderr}")
143
+ else:
144
+ logger.debug(f"Container removed: {self.state.container_id}")
145
+
146
+ self.state.is_running = False
147
+ logger.info(f"Container stopped: {self.state.container_id}")
148
+
149
+ except Exception as e:
150
+ logger.error(f"Container stop failed: {e}")
151
+ raise
152
+
153
+ async def is_running(self) -> bool:
154
+ """Check if container is running.
155
+
156
+ Returns:
157
+ True if container is running
158
+ """
159
+ try:
160
+ cmd = f"{self.runtime} ps --filter name={self.state.container_id} --format '{{{{.ID}}}}'"
161
+ stdout, stderr, code = await self.ssh_client.run_command(cmd)
162
+
163
+ is_running = code == 0 and stdout.strip() != ""
164
+ self.state.is_running = is_running
165
+ return is_running
166
+
167
+ except Exception as e:
168
+ logger.error(f"Could not check if running: {e}")
169
+ return False
170
+
171
+ async def check_health(self) -> bool:
172
+ """Check container health.
173
+
174
+ Returns:
175
+ True if health check passes
176
+ """
177
+ if not self.state.is_running:
178
+ return False
179
+
180
+ try:
181
+ # Use curl inside SSH session to check if service responds
182
+ # The docling-serve doesn't have a /health endpoint, so we check if it responds at all
183
+ cmd = f"curl -s -o /dev/null -w '%{{http_code}}' http://localhost:{self.port}/"
184
+ stdout, stderr, code = await self.ssh_client.run_command(cmd, timeout=5)
185
+
186
+ # Any HTTP response (even 404) means the service is running
187
+ http_code = stdout.strip()
188
+ is_healthy = http_code in ["200", "404", "422"] # 404 = Not Found is OK, 422 = Unprocessable Entity
189
+ self.state.health_status = "healthy" if is_healthy else "unhealthy"
190
+ return is_healthy
191
+
192
+ except Exception as e:
193
+ logger.debug(f"Health check failed: {e}")
194
+ self.state.health_status = "unknown"
195
+ return False
196
+
197
+ async def get_logs(self, lines: int = 50) -> str:
198
+ """Get container logs.
199
+
200
+ Parameters:
201
+ lines: Number of recent log lines
202
+
203
+ Returns:
204
+ Container logs as string
205
+ """
206
+ try:
207
+ cmd = f"{self.runtime} logs --tail {lines} {self.state.container_id}"
208
+ stdout, stderr, code = await self.ssh_client.run_command(cmd)
209
+
210
+ return stdout if code == 0 else f"Error getting logs: {stderr}"
211
+
212
+ except Exception as e:
213
+ logger.error(f"Could not get logs: {e}")
214
+ return f"Error: {e}"
215
+
216
+ async def _wait_for_health(self, max_attempts: int = 30) -> None:
217
+ """Wait for container to become healthy.
218
+
219
+ Parameters:
220
+ max_attempts: Maximum health check attempts
221
+
222
+ Raises:
223
+ TimeoutError: Container didn't become healthy
224
+ """
225
+ import asyncio
226
+
227
+ for attempt in range(max_attempts):
228
+ if await self.check_health():
229
+ self.is_healthy = True
230
+ logger.info(f"Container became healthy after {attempt * self.health_check_interval}s")
231
+ return
232
+
233
+ await asyncio.sleep(self.health_check_interval)
234
+
235
+ raise TimeoutError(
236
+ f"Container did not become healthy after {max_attempts * self.health_check_interval}s"
237
+ )
mdify/ssh/transfer.py ADDED
@@ -0,0 +1,297 @@
1
+ """File transfer and progress tracking for SSH."""
2
+
3
+ import gzip
4
+ import hashlib
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Callable
8
+ from mdify.ssh.models import TransferSession
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class FileTransferManager:
14
+ """Manages file transfers with compression and progress tracking."""
15
+
16
+ def __init__(
17
+ self,
18
+ ssh_client,
19
+ compression_threshold: int = 1024 * 1024, # 1MB
20
+ chunk_size: int = 64 * 1024, # 64KB
21
+ verify_checksum: bool = True
22
+ ):
23
+ """Initialize file transfer manager.
24
+
25
+ Parameters:
26
+ ssh_client: Connected AsyncSSHClient instance
27
+ compression_threshold: Compress files larger than this (bytes)
28
+ chunk_size: Read/write chunk size for progress updates
29
+ verify_checksum: Calculate SHA256 checksums
30
+ """
31
+ self.ssh_client = ssh_client
32
+ self.compression_threshold = compression_threshold
33
+ self.chunk_size = chunk_size
34
+ self.verify_checksum = verify_checksum
35
+
36
+ async def upload_file(
37
+ self,
38
+ local_path: str,
39
+ remote_path: str,
40
+ progress_callback: Callable[[TransferSession], None] | None = None,
41
+ overwrite: bool = False,
42
+ compress: bool | None = None
43
+ ) -> TransferSession:
44
+ """Upload file to remote server.
45
+
46
+ Parameters:
47
+ local_path: Local file path
48
+ remote_path: Remote destination path
49
+ progress_callback: Called with TransferSession after each chunk
50
+ overwrite: Allow overwriting existing files
51
+ compress: Force compression (None = auto-detect by size)
52
+
53
+ Returns:
54
+ TransferSession with transfer results
55
+
56
+ Raises:
57
+ FileNotFoundError: Local file doesn't exist
58
+ FileExistsError: Remote file exists and overwrite=False
59
+ """
60
+ local_file = Path(local_path)
61
+ if not local_file.exists():
62
+ raise FileNotFoundError(f"Local file not found: {local_path}")
63
+
64
+ file_size = local_file.stat().st_size
65
+
66
+ # Auto-detect compression
67
+ if compress is None:
68
+ compress = file_size > self.compression_threshold
69
+
70
+ session = TransferSession(
71
+ local_path=local_path,
72
+ remote_path=remote_path,
73
+ direction="upload",
74
+ total_bytes=file_size,
75
+ status="in_progress"
76
+ )
77
+
78
+ try:
79
+ # Check if remote file exists
80
+ if not overwrite:
81
+ stdout, stderr, code = await self.ssh_client.run_command(
82
+ f"test -f {remote_path}"
83
+ )
84
+ if code == 0:
85
+ raise FileExistsError(f"Remote file exists: {remote_path}")
86
+
87
+ # Prepare target path (content is streamed directly from local_file)
88
+ if compress:
89
+ logger.debug(f"Compressing {local_file.name} for upload...")
90
+ session.status = "in_progress"
91
+ await self._compress_file(local_file)
92
+ actual_remote_path = f"{remote_path}.gz"
93
+ session.status = "in_progress"
94
+ else:
95
+ actual_remote_path = remote_path
96
+
97
+ # Upload via SFTP
98
+ async with self.ssh_client.connection.start_sftp_client() as sftp:
99
+ # Write file with progress tracking
100
+ bytes_written = 0
101
+ with open(local_file, 'rb') as local_fp:
102
+ async with await sftp.open(actual_remote_path, 'wb') as remote_fp:
103
+ while True:
104
+ chunk = local_fp.read(self.chunk_size)
105
+ if not chunk:
106
+ break
107
+
108
+ await remote_fp.write(chunk)
109
+ bytes_written += len(chunk)
110
+ session.update_progress(bytes_written)
111
+
112
+ if progress_callback:
113
+ progress_callback(session)
114
+
115
+ # Verify checksum if enabled
116
+ if self.verify_checksum:
117
+ await self._verify_upload_checksum(
118
+ local_file, actual_remote_path, session
119
+ )
120
+
121
+ session.complete()
122
+ logger.info(f"Upload complete: {local_path} → {actual_remote_path}")
123
+ return session
124
+
125
+ except Exception as e:
126
+ session.fail(e)
127
+ logger.error(f"Upload failed: {e}")
128
+ raise
129
+
130
+ async def download_file(
131
+ self,
132
+ remote_path: str,
133
+ local_path: str,
134
+ progress_callback: Callable[[TransferSession], None] | None = None,
135
+ overwrite: bool = False
136
+ ) -> TransferSession:
137
+ """Download file from remote server.
138
+
139
+ Parameters:
140
+ remote_path: Remote file path
141
+ local_path: Local destination path
142
+ progress_callback: Called with TransferSession after each chunk
143
+ overwrite: Allow overwriting existing files
144
+
145
+ Returns:
146
+ TransferSession with transfer results
147
+ """
148
+ local_file = Path(local_path)
149
+
150
+ # Check if local file exists
151
+ if local_file.exists() and not overwrite:
152
+ raise FileExistsError(f"Local file exists: {local_path}")
153
+
154
+ session = TransferSession(
155
+ remote_path=remote_path,
156
+ local_path=local_path,
157
+ direction="download",
158
+ status="in_progress"
159
+ )
160
+
161
+ try:
162
+ # Get remote file size
163
+ stdout, stderr, code = await self.ssh_client.run_command(
164
+ f"wc -c < {remote_path}"
165
+ )
166
+ if code == 0:
167
+ try:
168
+ session.total_bytes = int(stdout.strip())
169
+ except ValueError:
170
+ session.total_bytes = 0
171
+
172
+ # Download via SFTP
173
+ async with self.ssh_client.connection.start_sftp_client() as sftp:
174
+ bytes_read = 0
175
+ with open(local_file, 'wb') as local_fp:
176
+ async with await sftp.open(remote_path, 'rb') as remote_fp:
177
+ while True:
178
+ chunk = await remote_fp.read(self.chunk_size)
179
+ if not chunk:
180
+ break
181
+
182
+ local_fp.write(chunk)
183
+ bytes_read += len(chunk)
184
+ session.update_progress(bytes_read)
185
+
186
+ if progress_callback:
187
+ progress_callback(session)
188
+
189
+ session.complete()
190
+ logger.info(f"Download complete: {remote_path} → {local_path}")
191
+ return session
192
+
193
+ except Exception as e:
194
+ session.fail(e)
195
+ logger.error(f"Download failed: {e}")
196
+ raise
197
+
198
+ async def _compress_file(self, file_path: Path) -> bytes:
199
+ """Compress file for transfer.
200
+
201
+ Parameters:
202
+ file_path: Path to file to compress
203
+
204
+ Returns:
205
+ Compressed file data
206
+ """
207
+ with open(file_path, 'rb') as f_in:
208
+ compressed = gzip.compress(f_in.read())
209
+ return compressed
210
+
211
+ async def _verify_upload_checksum(
212
+ self,
213
+ local_file: Path,
214
+ remote_path: str,
215
+ session: TransferSession
216
+ ) -> None:
217
+ """Verify uploaded file checksum.
218
+
219
+ Parameters:
220
+ local_file: Local file path
221
+ remote_path: Remote file path
222
+ session: Transfer session for error reporting
223
+
224
+ Raises:
225
+ ValueError: Checksum mismatch
226
+ """
227
+ # Calculate local checksum
228
+ local_sha256 = hashlib.sha256()
229
+ with open(local_file, 'rb') as f:
230
+ for chunk in iter(lambda: f.read(self.chunk_size), b''):
231
+ local_sha256.update(chunk)
232
+ local_checksum = local_sha256.hexdigest()
233
+
234
+ # Calculate remote checksum
235
+ stdout, stderr, code = await self.ssh_client.run_command(
236
+ f"sha256sum {remote_path} | awk '{{print $1}}'"
237
+ )
238
+
239
+ if code == 0:
240
+ remote_checksum = stdout.strip()
241
+
242
+ if local_checksum != remote_checksum:
243
+ raise ValueError(
244
+ f"Checksum mismatch: {local_checksum} != {remote_checksum}"
245
+ )
246
+
247
+ logger.debug(f"Checksum verified: {local_checksum}")
248
+ else:
249
+ logger.warning(f"Could not verify checksum: {stderr}")
250
+
251
+
252
+ class ProgressBar:
253
+ """Simple progress bar display."""
254
+
255
+ def __init__(self, total_bytes: int, debug_mode: bool = False):
256
+ """Initialize progress bar.
257
+
258
+ Parameters:
259
+ total_bytes: Total bytes to transfer
260
+ debug_mode: Enable detailed logging
261
+ """
262
+ self.total_bytes = total_bytes
263
+ self.debug_mode = debug_mode
264
+ self.last_update = 0
265
+
266
+ def update(self, session: TransferSession) -> str:
267
+ """Format progress display.
268
+
269
+ Parameters:
270
+ session: TransferSession with current progress
271
+
272
+ Returns:
273
+ Formatted progress string
274
+ """
275
+ if session.total_bytes == 0:
276
+ return ""
277
+
278
+ percent = 100 * session.transferred_bytes / session.total_bytes
279
+ bar_width = 40
280
+ filled = int(bar_width * session.transferred_bytes / session.total_bytes)
281
+ bar = "█" * filled + "░" * (bar_width - filled)
282
+
283
+ speed_str = f"{session.avg_speed_mbps:.1f}MB/s"
284
+ eta_str = f"ETA {session.eta_seconds}s" if session.eta_seconds else "Computing..."
285
+
286
+ return f"[{bar}] {percent:3.0f}% {speed_str} {eta_str}"
287
+
288
+ def log_chunk(self, chunk_num: int, bytes_transferred: int, speed_mbps: float) -> None:
289
+ """Log chunk transfer (debug mode).
290
+
291
+ Parameters:
292
+ chunk_num: Chunk number
293
+ bytes_transferred: Total bytes transferred so far
294
+ speed_mbps: Current speed in MB/s
295
+ """
296
+ if self.debug_mode:
297
+ logger.debug(f"Chunk {chunk_num}: {bytes_transferred} bytes ({speed_mbps:.1f}MB/s)")