PyPI - experimaestro - Versions diffs - 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl - Mend

experimaestro 2.0.0a8py3-none-any.whl → 2.0.0b8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show

experimaestro/__init__.py +10 -11
experimaestro/annotations.py +167 -206
experimaestro/cli/__init__.py +278 -7
experimaestro/cli/filter.py +42 -74
experimaestro/cli/jobs.py +157 -106
experimaestro/cli/refactor.py +249 -0
experimaestro/click.py +0 -1
experimaestro/commandline.py +19 -3
experimaestro/connectors/__init__.py +20 -1
experimaestro/connectors/local.py +12 -0
experimaestro/core/arguments.py +182 -46
experimaestro/core/identifier.py +107 -6
experimaestro/core/objects/__init__.py +6 -0
experimaestro/core/objects/config.py +542 -25
experimaestro/core/objects/config_walk.py +20 -0
experimaestro/core/serialization.py +91 -34
experimaestro/core/subparameters.py +164 -0
experimaestro/core/types.py +175 -38
experimaestro/exceptions.py +26 -0
experimaestro/experiments/cli.py +111 -25
experimaestro/generators.py +50 -9
experimaestro/huggingface.py +3 -1
experimaestro/launcherfinder/parser.py +29 -0
experimaestro/launchers/__init__.py +26 -1
experimaestro/launchers/direct.py +12 -0
experimaestro/launchers/slurm/base.py +154 -2
experimaestro/mkdocs/metaloader.py +0 -1
experimaestro/mypy.py +452 -7
experimaestro/notifications.py +63 -13
experimaestro/progress.py +0 -2
experimaestro/rpyc.py +0 -1
experimaestro/run.py +19 -6
experimaestro/scheduler/base.py +510 -125
experimaestro/scheduler/dependencies.py +43 -28
experimaestro/scheduler/dynamic_outputs.py +259 -130
experimaestro/scheduler/experiment.py +256 -31
experimaestro/scheduler/interfaces.py +501 -0
experimaestro/scheduler/jobs.py +216 -206
experimaestro/scheduler/remote/__init__.py +31 -0
experimaestro/scheduler/remote/client.py +874 -0
experimaestro/scheduler/remote/protocol.py +467 -0
experimaestro/scheduler/remote/server.py +423 -0
experimaestro/scheduler/remote/sync.py +144 -0
experimaestro/scheduler/services.py +323 -23
experimaestro/scheduler/state_db.py +437 -0
experimaestro/scheduler/state_provider.py +2766 -0
experimaestro/scheduler/state_sync.py +891 -0
experimaestro/scheduler/workspace.py +52 -10
experimaestro/scriptbuilder.py +7 -0
experimaestro/server/__init__.py +147 -57
experimaestro/server/data/index.css +0 -125
experimaestro/server/data/index.css.map +1 -1
experimaestro/server/data/index.js +194 -58
experimaestro/server/data/index.js.map +1 -1
experimaestro/settings.py +44 -5
experimaestro/sphinx/__init__.py +3 -3
experimaestro/taskglobals.py +20 -0
experimaestro/tests/conftest.py +80 -0
experimaestro/tests/core/test_generics.py +2 -2
experimaestro/tests/identifier_stability.json +45 -0
experimaestro/tests/launchers/bin/sacct +6 -2
experimaestro/tests/launchers/bin/sbatch +4 -2
experimaestro/tests/launchers/test_slurm.py +80 -0
experimaestro/tests/tasks/test_dynamic.py +231 -0
experimaestro/tests/test_cli_jobs.py +615 -0
experimaestro/tests/test_deprecated.py +630 -0
experimaestro/tests/test_environment.py +200 -0
experimaestro/tests/test_file_progress_integration.py +1 -1
experimaestro/tests/test_forward.py +3 -3
experimaestro/tests/test_identifier.py +372 -41
experimaestro/tests/test_identifier_stability.py +458 -0
experimaestro/tests/test_instance.py +3 -3
experimaestro/tests/test_multitoken.py +442 -0
experimaestro/tests/test_mypy.py +433 -0
experimaestro/tests/test_objects.py +312 -5
experimaestro/tests/test_outputs.py +2 -2
experimaestro/tests/test_param.py +8 -12
experimaestro/tests/test_partial_paths.py +231 -0
experimaestro/tests/test_progress.py +0 -48
experimaestro/tests/test_remote_state.py +671 -0
experimaestro/tests/test_resumable_task.py +480 -0
experimaestro/tests/test_serializers.py +141 -1
experimaestro/tests/test_state_db.py +434 -0
experimaestro/tests/test_subparameters.py +160 -0
experimaestro/tests/test_tags.py +136 -0
experimaestro/tests/test_tasks.py +107 -121
experimaestro/tests/test_token_locking.py +252 -0
experimaestro/tests/test_tokens.py +17 -13
experimaestro/tests/test_types.py +123 -1
experimaestro/tests/test_workspace_triggers.py +158 -0
experimaestro/tests/token_reschedule.py +4 -2
experimaestro/tests/utils.py +2 -2
experimaestro/tokens.py +154 -57
experimaestro/tools/diff.py +1 -1
experimaestro/tui/__init__.py +8 -0
experimaestro/tui/app.py +2395 -0
experimaestro/tui/app.tcss +353 -0
experimaestro/tui/log_viewer.py +228 -0
experimaestro/utils/__init__.py +23 -0
experimaestro/utils/environment.py +148 -0
experimaestro/utils/git.py +129 -0
experimaestro/utils/resources.py +1 -1
experimaestro/version.py +34 -0
{experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
experimaestro-2.0.0b8.dist-info/RECORD +187 -0
{experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
experimaestro/compat.py +0 -6
experimaestro/core/objects.pyi +0 -221
experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
experimaestro-2.0.0a8.dist-info/RECORD +0 -166
experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
{experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0

experimaestro/scheduler/state_db.py ADDED Viewed

@@ -0,0 +1,437 @@
+"""Database models for experiment state persistence
+This module provides peewee ORM models for storing job and service state
+in a workspace-level SQLite database. The workspace has a single database
+file (.experimaestro/workspace.db) with WAL mode enabled for concurrent
+read/write access.
+Key design:
+- One database per workspace at: workdir/.experimaestro/workspace.db
+- Experiments can be run multiple times, each run tracked separately
+- Jobs and services are scoped to (experiment_id, run_id)
+- Tags are scoped to (job_id, experiment_id, run_id) - fixes GH #128
+- Current state and progress stored in JobModel - no history tracking
+- Database instance is passed explicitly to avoid global state
+"""
+import logging
+from pathlib import Path
+from typing import Tuple
+from peewee import (
+    Model,
+    SqliteDatabase,
+    CharField,
+    FloatField,
+    IntegerField,
+    TextField,
+    DateTimeField,
+    CompositeKey,
+    IntegrityError,
+    OperationalError,
+)
+from datetime import datetime
+import fasteners
+logger = logging.getLogger("xpm.state_db")
+# Database schema version - increment when schema changes require resync
+CURRENT_DB_VERSION = 3
+class BaseModel(Model):
+    """Base model for workspace database tables
+    Models are unbound by default. Use database.bind_ctx() when querying:
+        with workspace.workspace_db.bind_ctx([ExperimentModel, JobModel, ...]):
+            experiments = ExperimentModel.select()
+    Or use the convenience method bind_models() defined below.
+    """
+    class Meta:
+        database = None  # Unbound - will be bound when used
+class ExperimentModel(BaseModel):
+    """Experiment metadata - tracks experiment definitions
+    An experiment can be run multiple times. This table tracks the experiment
+    itself and points to the current/latest run.
+    Fields:
+        experiment_id: Unique identifier for the experiment
+        current_run_id: Points to the current/latest run (null if no runs yet)
+        created_at: When experiment was first created
+        updated_at: When experiment was last modified (for incremental queries)
+    Note: Experiment path is derivable: {workspace}/xp/{experiment_id}
+    """
+    experiment_id = CharField(primary_key=True)
+    current_run_id = CharField(null=True)
+    created_at = DateTimeField(default=datetime.now)
+    updated_at = DateTimeField(default=datetime.now, index=True)
+    class Meta:
+        table_name = "experiments"
+class ExperimentRunModel(BaseModel):
+    """Individual experiment runs
+    Each time an experiment is executed, a new run is created.
+    Runs are identified by (experiment_id, run_id) composite key.
+    run_id format: timestamp-based like "20250120_143022" or sequential counter
+    Fields:
+        experiment_id: ID of the experiment this run belongs to
+        run_id: Unique ID for this run (timestamp or sequential)
+        started_at: When this run started
+        ended_at: When this run completed (null if still active)
+        status: Run status (active, completed, failed, abandoned)
+        hostname: Host where the experiment was launched (null for old runs)
+    """
+    experiment_id = CharField(index=True)
+    run_id = CharField(index=True)
+    started_at = DateTimeField(default=datetime.now)
+    ended_at = DateTimeField(null=True)
+    status = CharField(default="active", index=True)
+    hostname = CharField(null=True)
+    class Meta:
+        table_name = "experiment_runs"
+        primary_key = CompositeKey("experiment_id", "run_id")
+        indexes = ((("experiment_id", "started_at"), False),)  # For finding latest run
+class WorkspaceSyncMetadata(BaseModel):
+    """Workspace-level metadata for disk sync tracking
+    Single-row table to track when the last disk sync occurred.
+    Used to throttle sync operations and prevent excessive disk scanning.
+    Fields:
+        id: Always "workspace" (single row table)
+        last_sync_time: When last sync completed
+        sync_interval_minutes: Minimum interval between syncs
+        db_version: Schema version for migration detection
+    """
+    id = CharField(primary_key=True, default="workspace")
+    last_sync_time = DateTimeField(null=True)
+    sync_interval_minutes = IntegerField(default=5)
+    db_version = IntegerField(default=1)
+    class Meta:
+        table_name = "workspace_sync_metadata"
+class JobModel(BaseModel):
+    """Job information linked to specific experiment run
+    Jobs are tied to a specific run of an experiment via (experiment_id, run_id).
+    The same job can appear in multiple runs with different states/tags.
+    Fields:
+        job_id: Unique identifier for the job (from task identifier)
+        experiment_id: ID of the experiment this job belongs to
+        run_id: ID of the run this job belongs to
+        task_id: Task class identifier
+        locator: Full task locator (identifier)
+        state: Current job state (e.g., "unscheduled", "waiting", "running", "done", "error")
+        failure_reason: Optional failure reason for error states (e.g., "TIMEOUT", "DEPENDENCY")
+        submitted_time: When job was submitted (Unix timestamp)
+        started_time: When job started running (Unix timestamp)
+        ended_time: When job finished (Unix timestamp)
+        progress: JSON-encoded list of progress updates
+        updated_at: When job was last modified (for incremental queries)
+    Note: Job path is derivable: {workspace}/jobs/{task_id}/{job_id}
+    Note: Tags are stored in separate JobTagModel table (run-scoped)
+    Note: Dependencies are NOT stored in DB (available in state.json only)
+    """
+    job_id = CharField(index=True)
+    experiment_id = CharField(index=True)
+    run_id = CharField(index=True)
+    task_id = CharField(index=True)
+    locator = CharField()
+    state = CharField(default="unscheduled", index=True)
+    failure_reason = CharField(null=True)
+    submitted_time = FloatField(null=True)
+    started_time = FloatField(null=True)
+    ended_time = FloatField(null=True)
+    progress = TextField(default="[]")
+    updated_at = DateTimeField(default=datetime.now, index=True)
+    class Meta:
+        table_name = "jobs"
+        primary_key = CompositeKey("job_id", "experiment_id", "run_id")
+        indexes = (
+            (
+                ("experiment_id", "run_id", "state"),
+                False,
+            ),  # Query jobs by run and state
+            (
+                ("experiment_id", "run_id", "task_id"),
+                False,
+            ),  # Query jobs by run and task
+            (
+                ("experiment_id", "run_id", "updated_at"),
+                False,
+            ),  # Query jobs by run and update time
+        )
+class JobTagModel(BaseModel):
+    """Job tags for efficient searching (fixes GH #128)
+    **FIX FOR GH ISSUE #128**: Tags are now experiment-run-dependent, not job-dependent.
+    The same job in different experiment runs can have different tags, because tags
+    are scoped to the (job_id, experiment_id, run_id) combination.
+    Tags are stored as key-value pairs in a separate table for efficient indexing.
+    Each job can have multiple tags within an experiment run context.
+    Key change from old behavior:
+    - OLD: Tags were global per job_id (broken - same job in different experiments/runs shared tags)
+    - NEW: Tags are scoped per (job_id, experiment_id, run_id) - same job can have different tags in different runs
+    Fields:
+        job_id: ID of the job
+        experiment_id: ID of the experiment
+        run_id: ID of the run
+        tag_key: Tag name
+        tag_value: Tag value
+    """
+    job_id = CharField(index=True)
+    experiment_id = CharField(index=True)
+    run_id = CharField(index=True)
+    tag_key = CharField(index=True)
+    tag_value = CharField(index=True)
+    class Meta:
+        table_name = "job_tags"
+        primary_key = CompositeKey("job_id", "experiment_id", "run_id", "tag_key")
+        indexes = (
+            (("tag_key", "tag_value"), False),  # For tag-based queries
+            (
+                ("experiment_id", "run_id", "tag_key"),
+                False,
+            ),  # For experiment run tag queries
+        )
+class ServiceModel(BaseModel):
+    """Service information linked to specific experiment run
+    Services are tied to a specific run of an experiment via (experiment_id, run_id).
+    Services are only added or removed, not updated - state is managed at runtime.
+    Fields:
+        service_id: Unique identifier for the service
+        experiment_id: ID of the experiment this service belongs to
+        run_id: ID of the run this service belongs to
+        description: Human-readable description
+        state_dict: JSON serialized state_dict for service recreation
+        created_at: When service was registered
+    """
+    service_id = CharField()
+    experiment_id = CharField(index=True)
+    run_id = CharField(index=True)
+    description = TextField(default="")
+    state_dict = TextField(default="{}")  # JSON for service recreation
+    created_at = DateTimeField(default=datetime.now)
+    class Meta:
+        table_name = "services"
+        primary_key = CompositeKey("service_id", "experiment_id", "run_id")
+class PartialModel(BaseModel):
+    """Partial directory tracking for subparameters
+    Tracks partial directories that are shared across jobs with different
+    parameter values (but same partial identifier). These directories are
+    at WORKSPACE/partials/TASK_ID/SUBPARAM_NAME/PARTIAL_ID/ (reconstructible).
+    Fields:
+        partial_id: Hex hash of the partial identifier
+        task_id: Task class identifier
+        subparameters_name: Name of the subparameters definition
+        created_at: When this partial directory was first created
+    """
+    partial_id = CharField(primary_key=True)
+    task_id = CharField(index=True)
+    subparameters_name = CharField(index=True)
+    created_at = DateTimeField(default=datetime.now)
+    class Meta:
+        table_name = "partials"
+        indexes = ((("task_id", "subparameters_name"), False),)
+class JobPartialModel(BaseModel):
+    """Links jobs to partial directories they use
+    Tracks which jobs reference which partial directories. This enables
+    cleanup of orphan partials when all referencing jobs are deleted.
+    A job can use multiple partials (different subparameters definitions),
+    and a partial can be used by multiple jobs.
+    Fields:
+        job_id: ID of the job using this partial
+        experiment_id: ID of the experiment
+        run_id: ID of the run
+        partial_id: ID of the partial directory being used
+    """
+    job_id = CharField(index=True)
+    experiment_id = CharField(index=True)
+    run_id = CharField(index=True)
+    partial_id = CharField(index=True)
+    class Meta:
+        table_name = "job_partials"
+        primary_key = CompositeKey("job_id", "experiment_id", "run_id", "partial_id")
+        indexes = ((("partial_id",), False),)  # For finding jobs using a partial
+# List of all models for binding
+ALL_MODELS = [
+    ExperimentModel,
+    ExperimentRunModel,
+    WorkspaceSyncMetadata,
+    JobModel,
+    JobTagModel,
+    ServiceModel,
+    PartialModel,
+    JobPartialModel,
+]
+def initialize_workspace_database(
+    db_path: Path, read_only: bool = False
+) -> Tuple[SqliteDatabase, bool]:
+    """Initialize a workspace database connection with proper configuration
+    Creates and configures a SQLite database connection for the workspace.
+    Models must be bound to this database before querying.
+    Uses file-based locking to prevent multiple processes from initializing
+    the database simultaneously, which could cause SQLite locking issues.
+    Args:
+        db_path: Path to the workspace SQLite database file
+        read_only: If True, open database in read-only mode
+    Returns:
+        Tuple of (SqliteDatabase instance, needs_resync flag)
+        The needs_resync flag is True when the database schema version is outdated
+        and a full resync from disk is required.
+    """
+    # Ensure parent directory exists (unless read-only)
+    if not read_only:
+        db_path.parent.mkdir(parents=True, exist_ok=True)
+    # Use file-based lock to prevent concurrent initialization from multiple processes
+    # This prevents SQLite locking issues during table creation
+    lock_path = db_path.parent / f".{db_path.name}.init.lock"
+    lock = fasteners.InterProcessLock(str(lock_path))
+    needs_resync = False
+    # Acquire lock (blocking) - only one process can initialize at a time
+    with lock:
+        # Create database connection
+        # check_same_thread=False allows the connection to be used from multiple threads
+        # This is safe with WAL mode and proper locking
+        db = SqliteDatabase(
+            str(db_path),
+            pragmas={
+                "journal_mode": "wal",  # Write-Ahead Logging for concurrent reads
+                "foreign_keys": 1,  # Enable foreign key constraints
+                "ignore_check_constraints": 0,
+                "synchronous": 1,  # NORMAL mode (balance safety/speed)
+                "busy_timeout": 5000,  # Wait up to 5 seconds for locks
+            },
+            check_same_thread=False,
+        )
+        if read_only:
+            # Set query-only mode for read-only access
+            db.execute_sql("PRAGMA query_only = ON")
+        # Bind all models to this database
+        db.bind(ALL_MODELS)
+        # Create tables if they don't exist (only in write mode)
+        if not read_only:
+            db.create_tables(ALL_MODELS, safe=True)
+            # Check database version for migration - use raw SQL since column may not exist
+            current_version = 0
+            try:
+                cursor = db.execute_sql(
+                    "SELECT db_version FROM workspace_sync_metadata WHERE id='workspace'"
+                )
+                row = cursor.fetchone()
+                if row is not None:
+                    current_version = row[0]
+                if current_version < CURRENT_DB_VERSION:
+                    needs_resync = True
+            except OperationalError:
+                # Column doesn't exist - add it and trigger resync
+                needs_resync = True
+                try:
+                    db.execute_sql(
+                        "ALTER TABLE workspace_sync_metadata "
+                        "ADD COLUMN db_version INTEGER DEFAULT 1"
+                    )
+                except OperationalError:
+                    pass  # Column may already exist
+            # Run schema migrations for older databases
+            if current_version < 2:
+                # Migration v1 -> v2: Add hostname column to experiment_runs table
+                try:
+                    db.execute_sql(
+                        "ALTER TABLE experiment_runs ADD COLUMN hostname VARCHAR(255) NULL"
+                    )
+                    logger.info("Added hostname column to experiment_runs table")
+                except OperationalError:
+                    pass  # Column already exists
+            # Initialize WorkspaceSyncMetadata with default row if not exists
+            # Use try/except to handle race condition (shouldn't happen with lock, but be safe)
+            try:
+                WorkspaceSyncMetadata.get_or_create(
+                    id="workspace",
+                    defaults={
+                        "last_sync_time": None,
+                        "sync_interval_minutes": 5,
+                        "db_version": 1,
+                    },
+                )
+            except (IntegrityError, OperationalError):
+                # If get_or_create fails, the row likely already exists
+                pass
+    return db, needs_resync
+def close_workspace_database(db: SqliteDatabase):
+    """Close a workspace database connection
+    Args:
+        db: The database connection to close
+    """
+    if db and not db.is_closed():
+        db.close()

experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

Potentially problematic release.

experimaestro 2.0.0a8py3-none-any.whl → 2.0.0b8py3-none-any.whl