experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,437 @@
1
+ """Database models for experiment state persistence
2
+
3
+ This module provides peewee ORM models for storing job and service state
4
+ in a workspace-level SQLite database. The workspace has a single database
5
+ file (.experimaestro/workspace.db) with WAL mode enabled for concurrent
6
+ read/write access.
7
+
8
+ Key design:
9
+ - One database per workspace at: workdir/.experimaestro/workspace.db
10
+ - Experiments can be run multiple times, each run tracked separately
11
+ - Jobs and services are scoped to (experiment_id, run_id)
12
+ - Tags are scoped to (job_id, experiment_id, run_id) - fixes GH #128
13
+ - Current state and progress stored in JobModel - no history tracking
14
+ - Database instance is passed explicitly to avoid global state
15
+ """
16
+
17
+ import logging
18
+ from pathlib import Path
19
+ from typing import Tuple
20
+ from peewee import (
21
+ Model,
22
+ SqliteDatabase,
23
+ CharField,
24
+ FloatField,
25
+ IntegerField,
26
+ TextField,
27
+ DateTimeField,
28
+ CompositeKey,
29
+ IntegrityError,
30
+ OperationalError,
31
+ )
32
+ from datetime import datetime
33
+ import fasteners
34
+
35
+ logger = logging.getLogger("xpm.state_db")
36
+
37
+ # Database schema version - increment when schema changes require resync
38
+ CURRENT_DB_VERSION = 3
39
+
40
+
41
+ class BaseModel(Model):
42
+ """Base model for workspace database tables
43
+
44
+ Models are unbound by default. Use database.bind_ctx() when querying:
45
+
46
+ with workspace.workspace_db.bind_ctx([ExperimentModel, JobModel, ...]):
47
+ experiments = ExperimentModel.select()
48
+
49
+ Or use the convenience method bind_models() defined below.
50
+ """
51
+
52
+ class Meta:
53
+ database = None # Unbound - will be bound when used
54
+
55
+
56
+ class ExperimentModel(BaseModel):
57
+ """Experiment metadata - tracks experiment definitions
58
+
59
+ An experiment can be run multiple times. This table tracks the experiment
60
+ itself and points to the current/latest run.
61
+
62
+ Fields:
63
+ experiment_id: Unique identifier for the experiment
64
+ current_run_id: Points to the current/latest run (null if no runs yet)
65
+ created_at: When experiment was first created
66
+ updated_at: When experiment was last modified (for incremental queries)
67
+
68
+ Note: Experiment path is derivable: {workspace}/xp/{experiment_id}
69
+ """
70
+
71
+ experiment_id = CharField(primary_key=True)
72
+ current_run_id = CharField(null=True)
73
+ created_at = DateTimeField(default=datetime.now)
74
+ updated_at = DateTimeField(default=datetime.now, index=True)
75
+
76
+ class Meta:
77
+ table_name = "experiments"
78
+
79
+
80
+ class ExperimentRunModel(BaseModel):
81
+ """Individual experiment runs
82
+
83
+ Each time an experiment is executed, a new run is created.
84
+ Runs are identified by (experiment_id, run_id) composite key.
85
+
86
+ run_id format: timestamp-based like "20250120_143022" or sequential counter
87
+
88
+ Fields:
89
+ experiment_id: ID of the experiment this run belongs to
90
+ run_id: Unique ID for this run (timestamp or sequential)
91
+ started_at: When this run started
92
+ ended_at: When this run completed (null if still active)
93
+ status: Run status (active, completed, failed, abandoned)
94
+ hostname: Host where the experiment was launched (null for old runs)
95
+ """
96
+
97
+ experiment_id = CharField(index=True)
98
+ run_id = CharField(index=True)
99
+ started_at = DateTimeField(default=datetime.now)
100
+ ended_at = DateTimeField(null=True)
101
+ status = CharField(default="active", index=True)
102
+ hostname = CharField(null=True)
103
+
104
+ class Meta:
105
+ table_name = "experiment_runs"
106
+ primary_key = CompositeKey("experiment_id", "run_id")
107
+ indexes = ((("experiment_id", "started_at"), False),) # For finding latest run
108
+
109
+
110
+ class WorkspaceSyncMetadata(BaseModel):
111
+ """Workspace-level metadata for disk sync tracking
112
+
113
+ Single-row table to track when the last disk sync occurred.
114
+ Used to throttle sync operations and prevent excessive disk scanning.
115
+
116
+ Fields:
117
+ id: Always "workspace" (single row table)
118
+ last_sync_time: When last sync completed
119
+ sync_interval_minutes: Minimum interval between syncs
120
+ db_version: Schema version for migration detection
121
+ """
122
+
123
+ id = CharField(primary_key=True, default="workspace")
124
+ last_sync_time = DateTimeField(null=True)
125
+ sync_interval_minutes = IntegerField(default=5)
126
+ db_version = IntegerField(default=1)
127
+
128
+ class Meta:
129
+ table_name = "workspace_sync_metadata"
130
+
131
+
132
+ class JobModel(BaseModel):
133
+ """Job information linked to specific experiment run
134
+
135
+ Jobs are tied to a specific run of an experiment via (experiment_id, run_id).
136
+ The same job can appear in multiple runs with different states/tags.
137
+
138
+ Fields:
139
+ job_id: Unique identifier for the job (from task identifier)
140
+ experiment_id: ID of the experiment this job belongs to
141
+ run_id: ID of the run this job belongs to
142
+ task_id: Task class identifier
143
+ locator: Full task locator (identifier)
144
+ state: Current job state (e.g., "unscheduled", "waiting", "running", "done", "error")
145
+ failure_reason: Optional failure reason for error states (e.g., "TIMEOUT", "DEPENDENCY")
146
+ submitted_time: When job was submitted (Unix timestamp)
147
+ started_time: When job started running (Unix timestamp)
148
+ ended_time: When job finished (Unix timestamp)
149
+ progress: JSON-encoded list of progress updates
150
+ updated_at: When job was last modified (for incremental queries)
151
+
152
+ Note: Job path is derivable: {workspace}/jobs/{task_id}/{job_id}
153
+ Note: Tags are stored in separate JobTagModel table (run-scoped)
154
+ Note: Dependencies are NOT stored in DB (available in state.json only)
155
+ """
156
+
157
+ job_id = CharField(index=True)
158
+ experiment_id = CharField(index=True)
159
+ run_id = CharField(index=True)
160
+ task_id = CharField(index=True)
161
+ locator = CharField()
162
+ state = CharField(default="unscheduled", index=True)
163
+ failure_reason = CharField(null=True)
164
+ submitted_time = FloatField(null=True)
165
+ started_time = FloatField(null=True)
166
+ ended_time = FloatField(null=True)
167
+ progress = TextField(default="[]")
168
+ updated_at = DateTimeField(default=datetime.now, index=True)
169
+
170
+ class Meta:
171
+ table_name = "jobs"
172
+ primary_key = CompositeKey("job_id", "experiment_id", "run_id")
173
+ indexes = (
174
+ (
175
+ ("experiment_id", "run_id", "state"),
176
+ False,
177
+ ), # Query jobs by run and state
178
+ (
179
+ ("experiment_id", "run_id", "task_id"),
180
+ False,
181
+ ), # Query jobs by run and task
182
+ (
183
+ ("experiment_id", "run_id", "updated_at"),
184
+ False,
185
+ ), # Query jobs by run and update time
186
+ )
187
+
188
+
189
+ class JobTagModel(BaseModel):
190
+ """Job tags for efficient searching (fixes GH #128)
191
+
192
+ **FIX FOR GH ISSUE #128**: Tags are now experiment-run-dependent, not job-dependent.
193
+ The same job in different experiment runs can have different tags, because tags
194
+ are scoped to the (job_id, experiment_id, run_id) combination.
195
+
196
+ Tags are stored as key-value pairs in a separate table for efficient indexing.
197
+ Each job can have multiple tags within an experiment run context.
198
+
199
+ Key change from old behavior:
200
+ - OLD: Tags were global per job_id (broken - same job in different experiments/runs shared tags)
201
+ - NEW: Tags are scoped per (job_id, experiment_id, run_id) - same job can have different tags in different runs
202
+
203
+ Fields:
204
+ job_id: ID of the job
205
+ experiment_id: ID of the experiment
206
+ run_id: ID of the run
207
+ tag_key: Tag name
208
+ tag_value: Tag value
209
+ """
210
+
211
+ job_id = CharField(index=True)
212
+ experiment_id = CharField(index=True)
213
+ run_id = CharField(index=True)
214
+ tag_key = CharField(index=True)
215
+ tag_value = CharField(index=True)
216
+
217
+ class Meta:
218
+ table_name = "job_tags"
219
+ primary_key = CompositeKey("job_id", "experiment_id", "run_id", "tag_key")
220
+ indexes = (
221
+ (("tag_key", "tag_value"), False), # For tag-based queries
222
+ (
223
+ ("experiment_id", "run_id", "tag_key"),
224
+ False,
225
+ ), # For experiment run tag queries
226
+ )
227
+
228
+
229
+ class ServiceModel(BaseModel):
230
+ """Service information linked to specific experiment run
231
+
232
+ Services are tied to a specific run of an experiment via (experiment_id, run_id).
233
+ Services are only added or removed, not updated - state is managed at runtime.
234
+
235
+ Fields:
236
+ service_id: Unique identifier for the service
237
+ experiment_id: ID of the experiment this service belongs to
238
+ run_id: ID of the run this service belongs to
239
+ description: Human-readable description
240
+ state_dict: JSON serialized state_dict for service recreation
241
+ created_at: When service was registered
242
+ """
243
+
244
+ service_id = CharField()
245
+ experiment_id = CharField(index=True)
246
+ run_id = CharField(index=True)
247
+ description = TextField(default="")
248
+ state_dict = TextField(default="{}") # JSON for service recreation
249
+ created_at = DateTimeField(default=datetime.now)
250
+
251
+ class Meta:
252
+ table_name = "services"
253
+ primary_key = CompositeKey("service_id", "experiment_id", "run_id")
254
+
255
+
256
+ class PartialModel(BaseModel):
257
+ """Partial directory tracking for subparameters
258
+
259
+ Tracks partial directories that are shared across jobs with different
260
+ parameter values (but same partial identifier). These directories are
261
+ at WORKSPACE/partials/TASK_ID/SUBPARAM_NAME/PARTIAL_ID/ (reconstructible).
262
+
263
+ Fields:
264
+ partial_id: Hex hash of the partial identifier
265
+ task_id: Task class identifier
266
+ subparameters_name: Name of the subparameters definition
267
+ created_at: When this partial directory was first created
268
+ """
269
+
270
+ partial_id = CharField(primary_key=True)
271
+ task_id = CharField(index=True)
272
+ subparameters_name = CharField(index=True)
273
+ created_at = DateTimeField(default=datetime.now)
274
+
275
+ class Meta:
276
+ table_name = "partials"
277
+ indexes = ((("task_id", "subparameters_name"), False),)
278
+
279
+
280
+ class JobPartialModel(BaseModel):
281
+ """Links jobs to partial directories they use
282
+
283
+ Tracks which jobs reference which partial directories. This enables
284
+ cleanup of orphan partials when all referencing jobs are deleted.
285
+
286
+ A job can use multiple partials (different subparameters definitions),
287
+ and a partial can be used by multiple jobs.
288
+
289
+ Fields:
290
+ job_id: ID of the job using this partial
291
+ experiment_id: ID of the experiment
292
+ run_id: ID of the run
293
+ partial_id: ID of the partial directory being used
294
+ """
295
+
296
+ job_id = CharField(index=True)
297
+ experiment_id = CharField(index=True)
298
+ run_id = CharField(index=True)
299
+ partial_id = CharField(index=True)
300
+
301
+ class Meta:
302
+ table_name = "job_partials"
303
+ primary_key = CompositeKey("job_id", "experiment_id", "run_id", "partial_id")
304
+ indexes = ((("partial_id",), False),) # For finding jobs using a partial
305
+
306
+
307
+ # List of all models for binding
308
+ ALL_MODELS = [
309
+ ExperimentModel,
310
+ ExperimentRunModel,
311
+ WorkspaceSyncMetadata,
312
+ JobModel,
313
+ JobTagModel,
314
+ ServiceModel,
315
+ PartialModel,
316
+ JobPartialModel,
317
+ ]
318
+
319
+
320
+ def initialize_workspace_database(
321
+ db_path: Path, read_only: bool = False
322
+ ) -> Tuple[SqliteDatabase, bool]:
323
+ """Initialize a workspace database connection with proper configuration
324
+
325
+ Creates and configures a SQLite database connection for the workspace.
326
+ Models must be bound to this database before querying.
327
+
328
+ Uses file-based locking to prevent multiple processes from initializing
329
+ the database simultaneously, which could cause SQLite locking issues.
330
+
331
+ Args:
332
+ db_path: Path to the workspace SQLite database file
333
+ read_only: If True, open database in read-only mode
334
+
335
+ Returns:
336
+ Tuple of (SqliteDatabase instance, needs_resync flag)
337
+ The needs_resync flag is True when the database schema version is outdated
338
+ and a full resync from disk is required.
339
+ """
340
+ # Ensure parent directory exists (unless read-only)
341
+ if not read_only:
342
+ db_path.parent.mkdir(parents=True, exist_ok=True)
343
+
344
+ # Use file-based lock to prevent concurrent initialization from multiple processes
345
+ # This prevents SQLite locking issues during table creation
346
+ lock_path = db_path.parent / f".{db_path.name}.init.lock"
347
+ lock = fasteners.InterProcessLock(str(lock_path))
348
+
349
+ needs_resync = False
350
+
351
+ # Acquire lock (blocking) - only one process can initialize at a time
352
+ with lock:
353
+ # Create database connection
354
+ # check_same_thread=False allows the connection to be used from multiple threads
355
+ # This is safe with WAL mode and proper locking
356
+ db = SqliteDatabase(
357
+ str(db_path),
358
+ pragmas={
359
+ "journal_mode": "wal", # Write-Ahead Logging for concurrent reads
360
+ "foreign_keys": 1, # Enable foreign key constraints
361
+ "ignore_check_constraints": 0,
362
+ "synchronous": 1, # NORMAL mode (balance safety/speed)
363
+ "busy_timeout": 5000, # Wait up to 5 seconds for locks
364
+ },
365
+ check_same_thread=False,
366
+ )
367
+
368
+ if read_only:
369
+ # Set query-only mode for read-only access
370
+ db.execute_sql("PRAGMA query_only = ON")
371
+
372
+ # Bind all models to this database
373
+ db.bind(ALL_MODELS)
374
+
375
+ # Create tables if they don't exist (only in write mode)
376
+ if not read_only:
377
+ db.create_tables(ALL_MODELS, safe=True)
378
+
379
+ # Check database version for migration - use raw SQL since column may not exist
380
+ current_version = 0
381
+ try:
382
+ cursor = db.execute_sql(
383
+ "SELECT db_version FROM workspace_sync_metadata WHERE id='workspace'"
384
+ )
385
+ row = cursor.fetchone()
386
+ if row is not None:
387
+ current_version = row[0]
388
+ if current_version < CURRENT_DB_VERSION:
389
+ needs_resync = True
390
+ except OperationalError:
391
+ # Column doesn't exist - add it and trigger resync
392
+ needs_resync = True
393
+ try:
394
+ db.execute_sql(
395
+ "ALTER TABLE workspace_sync_metadata "
396
+ "ADD COLUMN db_version INTEGER DEFAULT 1"
397
+ )
398
+ except OperationalError:
399
+ pass # Column may already exist
400
+
401
+ # Run schema migrations for older databases
402
+ if current_version < 2:
403
+ # Migration v1 -> v2: Add hostname column to experiment_runs table
404
+ try:
405
+ db.execute_sql(
406
+ "ALTER TABLE experiment_runs ADD COLUMN hostname VARCHAR(255) NULL"
407
+ )
408
+ logger.info("Added hostname column to experiment_runs table")
409
+ except OperationalError:
410
+ pass # Column already exists
411
+
412
+ # Initialize WorkspaceSyncMetadata with default row if not exists
413
+ # Use try/except to handle race condition (shouldn't happen with lock, but be safe)
414
+ try:
415
+ WorkspaceSyncMetadata.get_or_create(
416
+ id="workspace",
417
+ defaults={
418
+ "last_sync_time": None,
419
+ "sync_interval_minutes": 5,
420
+ "db_version": 1,
421
+ },
422
+ )
423
+ except (IntegrityError, OperationalError):
424
+ # If get_or_create fails, the row likely already exists
425
+ pass
426
+
427
+ return db, needs_resync
428
+
429
+
430
+ def close_workspace_database(db: SqliteDatabase):
431
+ """Close a workspace database connection
432
+
433
+ Args:
434
+ db: The database connection to close
435
+ """
436
+ if db and not db.is_closed():
437
+ db.close()