experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,423 @@
1
+ """SSH State Provider Server
2
+
3
+ JSON-RPC server that wraps WorkspaceStateProvider and communicates via stdio.
4
+ Designed to be run over SSH for remote experiment monitoring.
5
+
6
+ Usage:
7
+ experimaestro experiments monitor-server --workdir /path/to/workspace
8
+ """
9
+
10
+ import logging
11
+ import sys
12
+ import threading
13
+ from pathlib import Path
14
+ from typing import IO, Callable, Dict, Optional
15
+
16
+ from experimaestro.scheduler.state_provider import (
17
+ WorkspaceStateProvider,
18
+ StateEvent,
19
+ StateEventType,
20
+ )
21
+ from experimaestro.scheduler.remote.protocol import (
22
+ RPCMethod,
23
+ NotificationMethod,
24
+ parse_message,
25
+ create_success_response,
26
+ create_error_response,
27
+ create_notification,
28
+ serialize_job,
29
+ serialize_experiment,
30
+ serialize_run,
31
+ serialize_datetime,
32
+ deserialize_datetime,
33
+ PARSE_ERROR,
34
+ METHOD_NOT_FOUND,
35
+ INVALID_PARAMS,
36
+ INTERNAL_ERROR,
37
+ WORKSPACE_NOT_FOUND,
38
+ )
39
+
40
+ logger = logging.getLogger("xpm.remote.server")
41
+
42
+
43
+ class SSHStateProviderServer:
44
+ """JSON-RPC server that wraps WorkspaceStateProvider for SSH-based monitoring
45
+
46
+ This server reads JSON-RPC requests from stdin and writes responses to stdout.
47
+ It registers as a listener with the WorkspaceStateProvider to push notifications
48
+ when state changes occur.
49
+
50
+ Thread safety:
51
+ - Writes to stdout are serialized with a lock
52
+ - The main read loop runs in the calling thread
53
+ - Event notifications may come from the state provider's change detector thread
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ workspace_path: Path,
59
+ stdin: IO[bytes] = None,
60
+ stdout: IO[bytes] = None,
61
+ ):
62
+ """Initialize the server
63
+
64
+ Args:
65
+ workspace_path: Path to the workspace directory
66
+ stdin: Input stream for reading requests (default: sys.stdin.buffer)
67
+ stdout: Output stream for writing responses (default: sys.stdout.buffer)
68
+ """
69
+ self.workspace_path = workspace_path
70
+ self.stdin = stdin if stdin is not None else sys.stdin.buffer
71
+ self.stdout = stdout if stdout is not None else sys.stdout.buffer
72
+ self._state_provider: Optional[WorkspaceStateProvider] = None
73
+ self._running = False
74
+ self._write_lock = threading.Lock()
75
+
76
+ # Map of method names to handler functions
77
+ self._handlers: Dict[str, Callable] = {
78
+ RPCMethod.GET_EXPERIMENTS.value: self._handle_get_experiments,
79
+ RPCMethod.GET_EXPERIMENT.value: self._handle_get_experiment,
80
+ RPCMethod.GET_EXPERIMENT_RUNS.value: self._handle_get_experiment_runs,
81
+ RPCMethod.GET_JOBS.value: self._handle_get_jobs,
82
+ RPCMethod.GET_JOB.value: self._handle_get_job,
83
+ RPCMethod.GET_ALL_JOBS.value: self._handle_get_all_jobs,
84
+ RPCMethod.GET_SERVICES.value: self._handle_get_services,
85
+ RPCMethod.KILL_JOB.value: self._handle_kill_job,
86
+ RPCMethod.CLEAN_JOB.value: self._handle_clean_job,
87
+ RPCMethod.GET_SYNC_INFO.value: self._handle_get_sync_info,
88
+ }
89
+
90
+ def start(self):
91
+ """Start the server and begin processing requests
92
+
93
+ This method blocks until the server is stopped or stdin is closed.
94
+ """
95
+ # Verify workspace exists
96
+ if not self.workspace_path.exists():
97
+ logger.error("Workspace path does not exist: %s", self.workspace_path)
98
+ self._send_error_and_exit(
99
+ WORKSPACE_NOT_FOUND,
100
+ f"Workspace path does not exist: {self.workspace_path}",
101
+ )
102
+ return
103
+
104
+ # Initialize state provider in read-only mode
105
+ try:
106
+ self._state_provider = WorkspaceStateProvider.get_instance(
107
+ self.workspace_path,
108
+ read_only=True,
109
+ sync_on_start=True,
110
+ )
111
+ except Exception as e:
112
+ logger.exception("Failed to initialize state provider")
113
+ self._send_error_and_exit(INTERNAL_ERROR, f"Failed to initialize: {e}")
114
+ return
115
+
116
+ # Register as listener for state changes
117
+ self._state_provider.add_listener(self._on_state_event)
118
+
119
+ self._running = True
120
+ logger.info("SSH State Provider Server started for %s", self.workspace_path)
121
+
122
+ try:
123
+ self._read_loop()
124
+ finally:
125
+ self.stop()
126
+
127
+ def stop(self):
128
+ """Stop the server and clean up resources"""
129
+ self._running = False
130
+
131
+ # Unregister listener
132
+ if self._state_provider is not None:
133
+ try:
134
+ self._state_provider.remove_listener(self._on_state_event)
135
+ except Exception:
136
+ pass
137
+
138
+ # Send shutdown notification
139
+ try:
140
+ self._send_notification(
141
+ NotificationMethod.SHUTDOWN, {"reason": "server_shutdown"}
142
+ )
143
+ except Exception:
144
+ pass
145
+
146
+ logger.info("SSH State Provider Server stopped")
147
+
148
+ def _read_loop(self):
149
+ """Main loop: read JSON-RPC requests from stdin and process them"""
150
+ while self._running:
151
+ try:
152
+ line = self.stdin.readline()
153
+ if not line:
154
+ # EOF - stdin closed
155
+ logger.debug("stdin closed, stopping server")
156
+ break
157
+
158
+ line_str = line.decode("utf-8").strip()
159
+ if not line_str:
160
+ continue
161
+
162
+ self._process_request(line_str)
163
+
164
+ except Exception as e:
165
+ logger.exception("Error in read loop: %s", e)
166
+ # Continue processing - don't crash on individual request errors
167
+
168
+ def _process_request(self, line: str):
169
+ """Process a single JSON-RPC request"""
170
+ try:
171
+ msg = parse_message(line)
172
+ except ValueError as e:
173
+ self._send_response(create_error_response(0, PARSE_ERROR, str(e)))
174
+ return
175
+
176
+ # We only handle requests (with id), not responses or notifications
177
+ from experimaestro.scheduler.remote.protocol import RPCRequest
178
+
179
+ if not isinstance(msg, RPCRequest):
180
+ logger.warning("Received non-request message: %s", type(msg).__name__)
181
+ return
182
+
183
+ request = msg
184
+ method = request.method
185
+ params = request.params
186
+ request_id = request.id
187
+
188
+ if request_id is None:
189
+ # Notification from client - we don't handle these currently
190
+ logger.debug("Received notification: %s", method)
191
+ return
192
+
193
+ # Dispatch to handler
194
+ handler = self._handlers.get(method)
195
+ if handler is None:
196
+ self._send_response(
197
+ create_error_response(
198
+ request_id, METHOD_NOT_FOUND, f"Unknown method: {method}"
199
+ )
200
+ )
201
+ return
202
+
203
+ try:
204
+ result = handler(params)
205
+ self._send_response(create_success_response(request_id, result))
206
+ except TypeError as e:
207
+ self._send_response(
208
+ create_error_response(request_id, INVALID_PARAMS, str(e))
209
+ )
210
+ except Exception as e:
211
+ logger.exception("Error handling %s", method)
212
+ self._send_response(
213
+ create_error_response(request_id, INTERNAL_ERROR, str(e))
214
+ )
215
+
216
+ def _send_response(self, response: str):
217
+ """Send a JSON-RPC response (thread-safe)"""
218
+ with self._write_lock:
219
+ self.stdout.write((response + "\n").encode("utf-8"))
220
+ self.stdout.flush()
221
+
222
+ def _send_notification(self, method: NotificationMethod, params: Dict):
223
+ """Send a JSON-RPC notification (thread-safe)"""
224
+ notification = create_notification(method, params)
225
+ self._send_response(notification)
226
+
227
+ def _send_error_and_exit(self, code: int, message: str):
228
+ """Send an error notification and exit"""
229
+ self._send_notification(
230
+ NotificationMethod.SHUTDOWN,
231
+ {"reason": "error", "code": code, "message": message},
232
+ )
233
+
234
+ def _on_state_event(self, event: StateEvent):
235
+ """Handle state change events from the state provider
236
+
237
+ Converts events to JSON-RPC notifications and sends them to the client.
238
+ """
239
+ try:
240
+ if event.event_type == StateEventType.EXPERIMENT_UPDATED:
241
+ self._send_notification(
242
+ NotificationMethod.EXPERIMENT_UPDATED,
243
+ {
244
+ "experiment_id": event.data.get("experiment_id"),
245
+ "data": event.data,
246
+ },
247
+ )
248
+ elif event.event_type == StateEventType.RUN_UPDATED:
249
+ self._send_notification(
250
+ NotificationMethod.RUN_UPDATED,
251
+ {
252
+ "experiment_id": event.data.get("experiment_id"),
253
+ "run_id": event.data.get("run_id"),
254
+ "data": event.data,
255
+ },
256
+ )
257
+ elif event.event_type == StateEventType.JOB_UPDATED:
258
+ self._send_notification(
259
+ NotificationMethod.JOB_UPDATED,
260
+ {
261
+ "job_id": event.data.get("job_id"),
262
+ "experiment_id": event.data.get("experiment_id"),
263
+ "run_id": event.data.get("run_id"),
264
+ "state": event.data.get("state"),
265
+ "data": event.data,
266
+ },
267
+ )
268
+ # Also send file_changed notification for job metadata
269
+ if "path" in event.data and event.data["path"]:
270
+ self._send_notification(
271
+ NotificationMethod.FILE_CHANGED,
272
+ {
273
+ "path": f"{event.data['path']}/.experimaestro/",
274
+ "change_type": "modified",
275
+ },
276
+ )
277
+ elif event.event_type == StateEventType.SERVICE_UPDATED:
278
+ self._send_notification(
279
+ NotificationMethod.SERVICE_UPDATED,
280
+ {
281
+ "service_id": event.data.get("service_id"),
282
+ "experiment_id": event.data.get("experiment_id"),
283
+ "run_id": event.data.get("run_id"),
284
+ "state": event.data.get("state"),
285
+ "data": event.data,
286
+ },
287
+ )
288
+ except Exception as e:
289
+ logger.exception("Error sending notification: %s", e)
290
+
291
+ # -------------------------------------------------------------------------
292
+ # Request Handlers
293
+ # -------------------------------------------------------------------------
294
+
295
+ def _handle_get_experiments(self, params: Dict) -> list:
296
+ """Handle get_experiments request"""
297
+ since = deserialize_datetime(params.get("since"))
298
+ experiments = self._state_provider.get_experiments(since=since)
299
+ return [serialize_experiment(exp) for exp in experiments]
300
+
301
+ def _handle_get_experiment(self, params: Dict) -> Optional[Dict]:
302
+ """Handle get_experiment request"""
303
+ experiment_id = params.get("experiment_id")
304
+ if not experiment_id:
305
+ raise TypeError("experiment_id is required")
306
+
307
+ experiment = self._state_provider.get_experiment(experiment_id)
308
+ if experiment is None:
309
+ return None
310
+ return serialize_experiment(experiment)
311
+
312
+ def _handle_get_experiment_runs(self, params: Dict) -> list:
313
+ """Handle get_experiment_runs request"""
314
+ experiment_id = params.get("experiment_id")
315
+ if not experiment_id:
316
+ raise TypeError("experiment_id is required")
317
+
318
+ runs = self._state_provider.get_experiment_runs(experiment_id)
319
+ return [serialize_run(run) for run in runs]
320
+
321
+ def _handle_get_jobs(self, params: Dict) -> list:
322
+ """Handle get_jobs request"""
323
+ since = deserialize_datetime(params.get("since"))
324
+ jobs = self._state_provider.get_jobs(
325
+ experiment_id=params.get("experiment_id"),
326
+ run_id=params.get("run_id"),
327
+ task_id=params.get("task_id"),
328
+ state=params.get("state"),
329
+ tags=params.get("tags"),
330
+ since=since,
331
+ )
332
+ return [serialize_job(job) for job in jobs]
333
+
334
+ def _handle_get_job(self, params: Dict) -> Optional[Dict]:
335
+ """Handle get_job request"""
336
+ job_id = params.get("job_id")
337
+ experiment_id = params.get("experiment_id")
338
+ if not job_id or not experiment_id:
339
+ raise TypeError("job_id and experiment_id are required")
340
+
341
+ job = self._state_provider.get_job(
342
+ job_id=job_id,
343
+ experiment_id=experiment_id,
344
+ run_id=params.get("run_id"),
345
+ )
346
+ if job is None:
347
+ return None
348
+ return serialize_job(job)
349
+
350
+ def _handle_get_all_jobs(self, params: Dict) -> list:
351
+ """Handle get_all_jobs request"""
352
+ since = deserialize_datetime(params.get("since"))
353
+ jobs = self._state_provider.get_all_jobs(
354
+ state=params.get("state"),
355
+ tags=params.get("tags"),
356
+ since=since,
357
+ )
358
+ return [serialize_job(job) for job in jobs]
359
+
360
+ def _handle_get_services(self, params: Dict) -> list:
361
+ """Handle get_services request
362
+
363
+ Uses get_services_raw to return raw service data without trying to
364
+ recreate Service objects. This allows the client to handle module
365
+ loading and show appropriate error messages.
366
+ """
367
+ return self._state_provider.get_services_raw(
368
+ experiment_id=params.get("experiment_id"),
369
+ run_id=params.get("run_id"),
370
+ )
371
+
372
+ def _handle_kill_job(self, params: Dict) -> Dict:
373
+ """Handle kill_job request"""
374
+ job_id = params.get("job_id")
375
+ experiment_id = params.get("experiment_id")
376
+ run_id = params.get("run_id")
377
+
378
+ if not job_id or not experiment_id or not run_id:
379
+ raise TypeError("job_id, experiment_id, and run_id are required")
380
+
381
+ # Get the job first
382
+ job = self._state_provider.get_job(job_id, experiment_id, run_id)
383
+ if job is None:
384
+ return {"success": False, "error": "Job not found"}
385
+
386
+ # Kill the job
387
+ try:
388
+ result = self._state_provider.kill_job(job, perform=True)
389
+ return {"success": result}
390
+ except Exception as e:
391
+ return {"success": False, "error": str(e)}
392
+
393
+ def _handle_clean_job(self, params: Dict) -> Dict:
394
+ """Handle clean_job request"""
395
+ job_id = params.get("job_id")
396
+ experiment_id = params.get("experiment_id")
397
+ run_id = params.get("run_id")
398
+
399
+ if not job_id or not experiment_id or not run_id:
400
+ raise TypeError("job_id, experiment_id, and run_id are required")
401
+
402
+ # Get the job first
403
+ job = self._state_provider.get_job(job_id, experiment_id, run_id)
404
+ if job is None:
405
+ return {"success": False, "error": "Job not found"}
406
+
407
+ # Clean the job
408
+ try:
409
+ result = self._state_provider.clean_job(job, perform=True)
410
+ return {"success": result}
411
+ except Exception as e:
412
+ return {"success": False, "error": str(e)}
413
+
414
+ def _handle_get_sync_info(self, params: Dict) -> Dict:
415
+ """Handle get_sync_info request"""
416
+ return {
417
+ "workspace_path": str(self.workspace_path),
418
+ "last_sync_time": (
419
+ serialize_datetime(self._state_provider.get_last_sync_time())
420
+ if hasattr(self._state_provider, "get_last_sync_time")
421
+ else None
422
+ ),
423
+ }
@@ -0,0 +1,144 @@
1
+ """Remote File Synchronizer
2
+
3
+ Handles rsync-based file synchronization between remote and local workspaces
4
+ for SSH-based experiment monitoring. Only syncs specific paths on-demand
5
+ when services need them (e.g., TensorboardService).
6
+ """
7
+
8
+ import logging
9
+ import subprocess
10
+ from pathlib import Path
11
+ from typing import List, Optional
12
+
13
+ logger = logging.getLogger("xpm.remote.sync")
14
+
15
+
16
+ class RemoteFileSynchronizer:
17
+ """Handles rsync-based file synchronization for remote monitoring
18
+
19
+ Syncs specific paths on-demand from a remote host to a local cache
20
+ directory. Used when services need access to remote files.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ host: str,
26
+ remote_workspace: Path,
27
+ local_cache: Path,
28
+ ssh_options: Optional[List[str]] = None,
29
+ ):
30
+ """Initialize the synchronizer
31
+
32
+ Args:
33
+ host: SSH host (user@host or just host)
34
+ remote_workspace: Path to workspace on the remote host
35
+ local_cache: Local directory to sync files to
36
+ ssh_options: Additional SSH options (e.g., ["-p", "2222"])
37
+ """
38
+ self.host = host
39
+ self.remote_workspace = remote_workspace
40
+ self.local_cache = local_cache
41
+ self.ssh_options = ssh_options or []
42
+
43
+ def sync_path(self, remote_path: str) -> Path:
44
+ """Sync a specific path from remote
45
+
46
+ Args:
47
+ remote_path: Absolute path on remote or path relative to workspace
48
+
49
+ Returns:
50
+ Local path where the files were synced to
51
+ """
52
+ # Normalize the path - get relative path within workspace
53
+ if remote_path.startswith(str(self.remote_workspace)):
54
+ relative_path = remote_path[len(str(self.remote_workspace)) :].lstrip("/")
55
+ else:
56
+ relative_path = remote_path.lstrip("/")
57
+
58
+ if not relative_path:
59
+ raise ValueError("Cannot sync empty path")
60
+
61
+ logger.info("Syncing path: %s", relative_path)
62
+
63
+ # Build source and destination
64
+ source = f"{self.host}:{self.remote_workspace}/{relative_path}/"
65
+ local_path = self.local_cache / relative_path
66
+ local_path.mkdir(parents=True, exist_ok=True)
67
+ dest = f"{local_path}/"
68
+
69
+ self._rsync(source, dest)
70
+
71
+ return local_path
72
+
73
+ def _rsync(self, source: str, dest: str):
74
+ """Execute rsync command
75
+
76
+ Args:
77
+ source: Remote source path (host:path/)
78
+ dest: Local destination path
79
+ """
80
+ cmd = [
81
+ "rsync",
82
+ "--inplace", # Update destination files in-place
83
+ "--delete", # Delete extraneous files from destination
84
+ "-L", # Transform symlinks into referent file/dir
85
+ "-a", # Archive mode (preserves permissions, times, etc.)
86
+ "-z", # Compress during transfer
87
+ "-v", # Verbose
88
+ ]
89
+
90
+ # SSH options
91
+ if self.ssh_options:
92
+ ssh_cmd = "ssh " + " ".join(self.ssh_options)
93
+ cmd.extend(["-e", ssh_cmd])
94
+
95
+ cmd.extend([source, dest])
96
+
97
+ logger.debug("Running rsync: %s", " ".join(cmd))
98
+
99
+ try:
100
+ result = subprocess.run(
101
+ cmd,
102
+ capture_output=True,
103
+ text=True,
104
+ timeout=300, # 5 minute timeout
105
+ )
106
+
107
+ if result.returncode != 0:
108
+ # rsync returns non-zero for some warnings
109
+ if result.returncode == 23:
110
+ # Partial transfer due to error - some files may be missing
111
+ logger.warning("Rsync partial transfer: %s", result.stderr)
112
+ elif result.returncode == 24:
113
+ # Partial transfer due to vanished source files
114
+ logger.debug("Rsync: some source files vanished")
115
+ else:
116
+ logger.error(
117
+ "Rsync failed (code %d): %s",
118
+ result.returncode,
119
+ result.stderr,
120
+ )
121
+ raise RuntimeError(f"Rsync failed: {result.stderr}")
122
+ else:
123
+ logger.debug("Rsync completed successfully")
124
+
125
+ except subprocess.TimeoutExpired:
126
+ logger.error("Rsync timed out")
127
+ raise
128
+ except FileNotFoundError:
129
+ logger.error("rsync command not found - please install rsync")
130
+ raise RuntimeError("rsync command not found")
131
+
132
+ def get_local_path(self, remote_path: str) -> Path:
133
+ """Get the local cache path for a remote path
134
+
135
+ Args:
136
+ remote_path: Absolute path on the remote system
137
+
138
+ Returns:
139
+ Corresponding path in the local cache
140
+ """
141
+ if remote_path.startswith(str(self.remote_workspace)):
142
+ relative = remote_path[len(str(self.remote_workspace)) :].lstrip("/")
143
+ return self.local_cache / relative
144
+ return Path(remote_path)