experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -1,388 +0,0 @@
1
- """Database models for experiment state persistence
2
-
3
- This module provides peewee ORM models for storing job and service state
4
- in a workspace-level SQLite database. The workspace has a single database
5
- file (.experimaestro/workspace.db) with WAL mode enabled for concurrent
6
- read/write access.
7
-
8
- Key design:
9
- - One database per workspace at: workdir/.experimaestro/workspace.db
10
- - Experiments can be run multiple times, each run tracked separately
11
- - Jobs and services are scoped to (experiment_id, run_id)
12
- - Tags are scoped to (job_id, experiment_id, run_id) - fixes GH #128
13
- - Current state and progress stored in JobModel - no history tracking
14
- - Database instance is passed explicitly to avoid global state
15
- """
16
-
17
- from pathlib import Path
18
- from peewee import (
19
- Model,
20
- SqliteDatabase,
21
- CharField,
22
- FloatField,
23
- IntegerField,
24
- TextField,
25
- DateTimeField,
26
- CompositeKey,
27
- IntegrityError,
28
- OperationalError,
29
- )
30
- from datetime import datetime
31
- import fasteners
32
-
33
-
34
- class BaseModel(Model):
35
- """Base model for workspace database tables
36
-
37
- Models are unbound by default. Use database.bind_ctx() when querying:
38
-
39
- with workspace.workspace_db.bind_ctx([ExperimentModel, JobModel, ...]):
40
- experiments = ExperimentModel.select()
41
-
42
- Or use the convenience method bind_models() defined below.
43
- """
44
-
45
- class Meta:
46
- database = None # Unbound - will be bound when used
47
-
48
-
49
- class ExperimentModel(BaseModel):
50
- """Experiment metadata - tracks experiment definitions
51
-
52
- An experiment can be run multiple times. This table tracks the experiment
53
- itself and points to the current/latest run.
54
-
55
- Fields:
56
- experiment_id: Unique identifier for the experiment
57
- current_run_id: Points to the current/latest run (null if no runs yet)
58
- created_at: When experiment was first created
59
- updated_at: When experiment was last modified (for incremental queries)
60
-
61
- Note: Experiment path is derivable: {workspace}/xp/{experiment_id}
62
- """
63
-
64
- experiment_id = CharField(primary_key=True)
65
- current_run_id = CharField(null=True)
66
- created_at = DateTimeField(default=datetime.now)
67
- updated_at = DateTimeField(default=datetime.now, index=True)
68
-
69
- class Meta:
70
- table_name = "experiments"
71
-
72
-
73
- class ExperimentRunModel(BaseModel):
74
- """Individual experiment runs
75
-
76
- Each time an experiment is executed, a new run is created.
77
- Runs are identified by (experiment_id, run_id) composite key.
78
-
79
- run_id format: timestamp-based like "20250120_143022" or sequential counter
80
-
81
- Fields:
82
- experiment_id: ID of the experiment this run belongs to
83
- run_id: Unique ID for this run (timestamp or sequential)
84
- started_at: When this run started
85
- ended_at: When this run completed (null if still active)
86
- status: Run status (active, completed, failed, abandoned)
87
- """
88
-
89
- experiment_id = CharField(index=True)
90
- run_id = CharField(index=True)
91
- started_at = DateTimeField(default=datetime.now)
92
- ended_at = DateTimeField(null=True)
93
- status = CharField(default="active", index=True)
94
-
95
- class Meta:
96
- table_name = "experiment_runs"
97
- primary_key = CompositeKey("experiment_id", "run_id")
98
- indexes = ((("experiment_id", "started_at"), False),) # For finding latest run
99
-
100
-
101
- class WorkspaceSyncMetadata(BaseModel):
102
- """Workspace-level metadata for disk sync tracking
103
-
104
- Single-row table to track when the last disk sync occurred.
105
- Used to throttle sync operations and prevent excessive disk scanning.
106
-
107
- Fields:
108
- id: Always "workspace" (single row table)
109
- last_sync_time: When last sync completed
110
- sync_interval_minutes: Minimum interval between syncs
111
- """
112
-
113
- id = CharField(primary_key=True, default="workspace")
114
- last_sync_time = DateTimeField(null=True)
115
- sync_interval_minutes = IntegerField(default=5)
116
-
117
- class Meta:
118
- table_name = "workspace_sync_metadata"
119
-
120
-
121
- class JobModel(BaseModel):
122
- """Job information linked to specific experiment run
123
-
124
- Jobs are tied to a specific run of an experiment via (experiment_id, run_id).
125
- The same job can appear in multiple runs with different states/tags.
126
-
127
- Fields:
128
- job_id: Unique identifier for the job (from task identifier)
129
- experiment_id: ID of the experiment this job belongs to
130
- run_id: ID of the run this job belongs to
131
- task_id: Task class identifier
132
- locator: Full task locator (identifier)
133
- state: Current job state (e.g., "unscheduled", "waiting", "running", "done", "error")
134
- failure_reason: Optional failure reason for error states (e.g., "TIMEOUT", "DEPENDENCY")
135
- submitted_time: When job was submitted (Unix timestamp)
136
- started_time: When job started running (Unix timestamp)
137
- ended_time: When job finished (Unix timestamp)
138
- progress: JSON-encoded list of progress updates
139
- updated_at: When job was last modified (for incremental queries)
140
-
141
- Note: Job path is derivable: {workspace}/jobs/{task_id}/{job_id}
142
- Note: Tags are stored in separate JobTagModel table (run-scoped)
143
- Note: Dependencies are NOT stored in DB (available in state.json only)
144
- """
145
-
146
- job_id = CharField(index=True)
147
- experiment_id = CharField(index=True)
148
- run_id = CharField(index=True)
149
- task_id = CharField(index=True)
150
- locator = CharField()
151
- state = CharField(default="unscheduled", index=True)
152
- failure_reason = CharField(null=True)
153
- submitted_time = FloatField(null=True)
154
- started_time = FloatField(null=True)
155
- ended_time = FloatField(null=True)
156
- progress = TextField(default="[]")
157
- updated_at = DateTimeField(default=datetime.now, index=True)
158
-
159
- class Meta:
160
- table_name = "jobs"
161
- primary_key = CompositeKey("job_id", "experiment_id", "run_id")
162
- indexes = (
163
- (
164
- ("experiment_id", "run_id", "state"),
165
- False,
166
- ), # Query jobs by run and state
167
- (
168
- ("experiment_id", "run_id", "task_id"),
169
- False,
170
- ), # Query jobs by run and task
171
- (
172
- ("experiment_id", "run_id", "updated_at"),
173
- False,
174
- ), # Query jobs by run and update time
175
- )
176
-
177
-
178
- class JobTagModel(BaseModel):
179
- """Job tags for efficient searching (fixes GH #128)
180
-
181
- **FIX FOR GH ISSUE #128**: Tags are now experiment-run-dependent, not job-dependent.
182
- The same job in different experiment runs can have different tags, because tags
183
- are scoped to the (job_id, experiment_id, run_id) combination.
184
-
185
- Tags are stored as key-value pairs in a separate table for efficient indexing.
186
- Each job can have multiple tags within an experiment run context.
187
-
188
- Key change from old behavior:
189
- - OLD: Tags were global per job_id (broken - same job in different experiments/runs shared tags)
190
- - NEW: Tags are scoped per (job_id, experiment_id, run_id) - same job can have different tags in different runs
191
-
192
- Fields:
193
- job_id: ID of the job
194
- experiment_id: ID of the experiment
195
- run_id: ID of the run
196
- tag_key: Tag name
197
- tag_value: Tag value
198
- """
199
-
200
- job_id = CharField(index=True)
201
- experiment_id = CharField(index=True)
202
- run_id = CharField(index=True)
203
- tag_key = CharField(index=True)
204
- tag_value = CharField(index=True)
205
-
206
- class Meta:
207
- table_name = "job_tags"
208
- primary_key = CompositeKey("job_id", "experiment_id", "run_id", "tag_key")
209
- indexes = (
210
- (("tag_key", "tag_value"), False), # For tag-based queries
211
- (
212
- ("experiment_id", "run_id", "tag_key"),
213
- False,
214
- ), # For experiment run tag queries
215
- )
216
-
217
-
218
- class ServiceModel(BaseModel):
219
- """Service information linked to specific experiment run
220
-
221
- Services are tied to a specific run of an experiment via (experiment_id, run_id).
222
-
223
- Fields:
224
- service_id: Unique identifier for the service
225
- experiment_id: ID of the experiment this service belongs to
226
- run_id: ID of the run this service belongs to
227
- description: Human-readable description
228
- state: Service state (e.g., "running", "stopped")
229
- state_dict: JSON serialized state_dict for service recreation
230
- created_at: When service was created
231
- updated_at: Timestamp of last update
232
- """
233
-
234
- service_id = CharField()
235
- experiment_id = CharField(index=True)
236
- run_id = CharField(index=True)
237
- description = TextField(default="")
238
- state = CharField()
239
- state_dict = TextField(default="{}") # JSON for service recreation
240
- created_at = DateTimeField(default=datetime.now)
241
- updated_at = DateTimeField(default=datetime.now)
242
-
243
- class Meta:
244
- table_name = "services"
245
- primary_key = CompositeKey("service_id", "experiment_id", "run_id")
246
-
247
-
248
- class PartialModel(BaseModel):
249
- """Partial directory tracking for subparameters
250
-
251
- Tracks partial directories that are shared across jobs with different
252
- parameter values (but same partial identifier). These directories are
253
- at WORKSPACE/partials/TASK_ID/SUBPARAM_NAME/PARTIAL_ID/ (reconstructible).
254
-
255
- Fields:
256
- partial_id: Hex hash of the partial identifier
257
- task_id: Task class identifier
258
- subparameters_name: Name of the subparameters definition
259
- created_at: When this partial directory was first created
260
- """
261
-
262
- partial_id = CharField(primary_key=True)
263
- task_id = CharField(index=True)
264
- subparameters_name = CharField(index=True)
265
- created_at = DateTimeField(default=datetime.now)
266
-
267
- class Meta:
268
- table_name = "partials"
269
- indexes = ((("task_id", "subparameters_name"), False),)
270
-
271
-
272
- class JobPartialModel(BaseModel):
273
- """Links jobs to partial directories they use
274
-
275
- Tracks which jobs reference which partial directories. This enables
276
- cleanup of orphan partials when all referencing jobs are deleted.
277
-
278
- A job can use multiple partials (different subparameters definitions),
279
- and a partial can be used by multiple jobs.
280
-
281
- Fields:
282
- job_id: ID of the job using this partial
283
- experiment_id: ID of the experiment
284
- run_id: ID of the run
285
- partial_id: ID of the partial directory being used
286
- """
287
-
288
- job_id = CharField(index=True)
289
- experiment_id = CharField(index=True)
290
- run_id = CharField(index=True)
291
- partial_id = CharField(index=True)
292
-
293
- class Meta:
294
- table_name = "job_partials"
295
- primary_key = CompositeKey("job_id", "experiment_id", "run_id", "partial_id")
296
- indexes = ((("partial_id",), False),) # For finding jobs using a partial
297
-
298
-
299
- # List of all models for binding
300
- ALL_MODELS = [
301
- ExperimentModel,
302
- ExperimentRunModel,
303
- WorkspaceSyncMetadata,
304
- JobModel,
305
- JobTagModel,
306
- ServiceModel,
307
- PartialModel,
308
- JobPartialModel,
309
- ]
310
-
311
-
312
- def initialize_workspace_database(
313
- db_path: Path, read_only: bool = False
314
- ) -> SqliteDatabase:
315
- """Initialize a workspace database connection with proper configuration
316
-
317
- Creates and configures a SQLite database connection for the workspace.
318
- Models must be bound to this database before querying.
319
-
320
- Uses file-based locking to prevent multiple processes from initializing
321
- the database simultaneously, which could cause SQLite locking issues.
322
-
323
- Args:
324
- db_path: Path to the workspace SQLite database file
325
- read_only: If True, open database in read-only mode
326
-
327
- Returns:
328
- Configured SqliteDatabase instance
329
- """
330
- # Ensure parent directory exists (unless read-only)
331
- if not read_only:
332
- db_path.parent.mkdir(parents=True, exist_ok=True)
333
-
334
- # Use file-based lock to prevent concurrent initialization from multiple processes
335
- # This prevents SQLite locking issues during table creation
336
- lock_path = db_path.parent / f".{db_path.name}.init.lock"
337
- lock = fasteners.InterProcessLock(str(lock_path))
338
-
339
- # Acquire lock (blocking) - only one process can initialize at a time
340
- with lock:
341
- # Create database connection
342
- # check_same_thread=False allows the connection to be used from multiple threads
343
- # This is safe with WAL mode and proper locking
344
- db = SqliteDatabase(
345
- str(db_path),
346
- pragmas={
347
- "journal_mode": "wal", # Write-Ahead Logging for concurrent reads
348
- "foreign_keys": 1, # Enable foreign key constraints
349
- "ignore_check_constraints": 0,
350
- "synchronous": 1, # NORMAL mode (balance safety/speed)
351
- "busy_timeout": 5000, # Wait up to 5 seconds for locks
352
- },
353
- check_same_thread=False,
354
- )
355
-
356
- if read_only:
357
- # Set query-only mode for read-only access
358
- db.execute_sql("PRAGMA query_only = ON")
359
-
360
- # Bind all models to this database
361
- db.bind(ALL_MODELS)
362
-
363
- # Create tables if they don't exist (only in write mode)
364
- if not read_only:
365
- db.create_tables(ALL_MODELS, safe=True)
366
-
367
- # Initialize WorkspaceSyncMetadata with default row if not exists
368
- # Use try/except to handle race condition (shouldn't happen with lock, but be safe)
369
- try:
370
- WorkspaceSyncMetadata.get_or_create(
371
- id="workspace",
372
- defaults={"last_sync_time": None, "sync_interval_minutes": 5},
373
- )
374
- except (IntegrityError, OperationalError):
375
- # If get_or_create fails, the row likely already exists
376
- pass
377
-
378
- return db
379
-
380
-
381
- def close_workspace_database(db: SqliteDatabase):
382
- """Close a workspace database connection
383
-
384
- Args:
385
- db: The database connection to close
386
- """
387
- if db and not db.is_closed():
388
- db.close()