PyPI - experimaestro - Versions diffs - 2.0.0b4__py3-none-any.whl → 2.0.0b8__py3-none-any.whl - Mend

experimaestro 2.0.0b4py3-none-any.whl → 2.0.0b8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of experimaestro might be problematic. Click here for more details.

Files changed (24) hide show

experimaestro/cli/__init__.py +177 -31
experimaestro/experiments/cli.py +6 -2
experimaestro/scheduler/base.py +21 -0
experimaestro/scheduler/experiment.py +64 -34
experimaestro/scheduler/interfaces.py +27 -0
experimaestro/scheduler/remote/__init__.py +31 -0
experimaestro/scheduler/remote/client.py +874 -0
experimaestro/scheduler/remote/protocol.py +467 -0
experimaestro/scheduler/remote/server.py +423 -0
experimaestro/scheduler/remote/sync.py +144 -0
experimaestro/scheduler/services.py +158 -32
experimaestro/scheduler/state_db.py +58 -9
experimaestro/scheduler/state_provider.py +512 -91
experimaestro/scheduler/state_sync.py +65 -8
experimaestro/tests/test_cli_jobs.py +3 -3
experimaestro/tests/test_remote_state.py +671 -0
experimaestro/tests/test_state_db.py +8 -8
experimaestro/tui/app.py +100 -8
experimaestro/version.py +2 -2
{experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +4 -4
{experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/RECORD +24 -18
{experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +0 -0
{experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/entry_points.txt +0 -0
{experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0

experimaestro/scheduler/state_provider.py CHANGED Viewed

@@ -15,13 +15,15 @@ Key features:
 import json
 import logging
+import socket
 import threading
 import time
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum, auto
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Set, TYPE_CHECKING
+from abc import ABC, abstractmethod
+from typing import Callable, Dict, List, Optional, Set, Tuple, TYPE_CHECKING
 from watchdog.events import FileSystemEventHandler
 from watchdog.observers.api import ObservedWatch
@@ -34,11 +36,14 @@ from experimaestro.scheduler.state_db import (
     ServiceModel,
     PartialModel,
     JobPartialModel,
+    WorkspaceSyncMetadata,
     ALL_MODELS,
+    CURRENT_DB_VERSION,
 )
 from experimaestro.scheduler.interfaces import (
     BaseJob,
     BaseExperiment,
+    BaseService,
     JobState,
     JobFailureStatus,
     STATE_NAME_TO_JOBSTATE,
@@ -78,6 +83,213 @@ class StateEvent:
 StateListener = Callable[[StateEvent], None]
+class StateProvider(ABC):
+    """Abstract base class for state providers
+    Defines the interface that all state providers must implement.
+    This enables both local (WorkspaceStateProvider) and remote
+    (SSHStateProviderClient) providers to be used interchangeably.
+    Provides common service caching logic to preserve service instances
+    (and their URLs) across calls to get_services(). Subclasses should call
+    _init_service_cache() in their __init__ and implement _fetch_services_from_storage().
+    """
+    def _init_service_cache(self) -> None:
+        """Initialize service cache - call from subclass __init__"""
+        self._service_cache: Dict[Tuple[str, str], Dict[str, "BaseService"]] = {}
+        self._service_cache_lock = threading.Lock()
+    def _clear_service_cache(self) -> None:
+        """Clear the service cache"""
+        with self._service_cache_lock:
+            self._service_cache.clear()
+    def get_services(
+        self, experiment_id: Optional[str] = None, run_id: Optional[str] = None
+    ) -> List[BaseService]:
+        """Get services for an experiment
+        Uses caching to preserve service instances (and their URLs) across calls.
+        Subclasses can override _get_live_services() for live service support
+        and must implement _fetch_services_from_storage() for persistent storage.
+        """
+        # Resolve run_id if needed
+        if experiment_id is not None and run_id is None:
+            run_id = self.get_current_run(experiment_id)
+            if run_id is None:
+                return []
+        cache_key = (experiment_id or "", run_id or "")
+        with self._service_cache_lock:
+            # Try to get live services (scheduler, etc.) - may return None
+            live_services = self._get_live_services(experiment_id, run_id)
+            if live_services is not None:
+                # Cache and return live services
+                self._service_cache[cache_key] = {s.id: s for s in live_services}
+                return live_services
+            # Check cache
+            cached = self._service_cache.get(cache_key)
+            if cached is not None:
+                return list(cached.values())
+            # Fetch from persistent storage (DB or remote)
+            services = self._fetch_services_from_storage(experiment_id, run_id)
+            self._service_cache[cache_key] = {s.id: s for s in services}
+            return services
+    def _get_live_services(
+        self, experiment_id: Optional[str], run_id: Optional[str]
+    ) -> Optional[List[BaseService]]:
+        """Get live services if available (e.g., from scheduler).
+        Returns None if no live services are available (default).
+        Subclasses may override to check for live services.
+        """
+        return None
+    @abstractmethod
+    def _fetch_services_from_storage(
+        self, experiment_id: Optional[str], run_id: Optional[str]
+    ) -> List[BaseService]:
+        """Fetch services from persistent storage (DB or remote).
+        Called when no live services and cache is empty.
+        """
+        ...
+    @abstractmethod
+    def get_experiments(self, since: Optional[datetime] = None) -> List[BaseExperiment]:
+        """Get list of all experiments"""
+        ...
+    @abstractmethod
+    def get_experiment(self, experiment_id: str) -> Optional[BaseExperiment]:
+        """Get a specific experiment by ID"""
+        ...
+    @abstractmethod
+    def get_experiment_runs(self, experiment_id: str) -> List[Dict]:
+        """Get all runs for an experiment"""
+        ...
+    @abstractmethod
+    def get_current_run(self, experiment_id: str) -> Optional[str]:
+        """Get the current run ID for an experiment"""
+        ...
+    @abstractmethod
+    def get_jobs(
+        self,
+        experiment_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        task_id: Optional[str] = None,
+        state: Optional[str] = None,
+        tags: Optional[Dict[str, str]] = None,
+        since: Optional[datetime] = None,
+    ) -> List[BaseJob]:
+        """Query jobs with optional filters"""
+        ...
+    @abstractmethod
+    def get_job(
+        self, job_id: str, experiment_id: str, run_id: Optional[str] = None
+    ) -> Optional[BaseJob]:
+        """Get a specific job"""
+        ...
+    @abstractmethod
+    def get_all_jobs(
+        self,
+        state: Optional[str] = None,
+        tags: Optional[Dict[str, str]] = None,
+        since: Optional[datetime] = None,
+    ) -> List[BaseJob]:
+        """Get all jobs across all experiments"""
+        ...
+    # Note: get_services is implemented in base class using _fetch_services_from_storage
+    @abstractmethod
+    def get_services_raw(
+        self, experiment_id: Optional[str] = None, run_id: Optional[str] = None
+    ) -> List[Dict]:
+        """Get raw service data as dictionaries (for serialization)"""
+        ...
+    @abstractmethod
+    def add_listener(self, listener: StateListener) -> None:
+        """Register a listener for state change events"""
+        ...
+    @abstractmethod
+    def remove_listener(self, listener: StateListener) -> None:
+        """Unregister a listener"""
+        ...
+    @abstractmethod
+    def kill_job(self, job: BaseJob, perform: bool = False) -> bool:
+        """Kill a running job"""
+        ...
+    @abstractmethod
+    def clean_job(self, job: BaseJob, perform: bool = False) -> bool:
+        """Clean a finished job"""
+        ...
+    @abstractmethod
+    def close(self) -> None:
+        """Close the state provider and release resources"""
+        ...
+    # Optional methods with default implementations
+    def sync_path(self, path: str) -> Optional[Path]:
+        """Sync a specific path from remote (remote providers only)
+        Returns None for local providers or if sync fails.
+        """
+        return None
+    def get_orphan_jobs(self) -> List[BaseJob]:
+        """Get orphan jobs (jobs not associated with any experiment run)"""
+        return []
+    def delete_job_safely(self, job: BaseJob, perform: bool = True) -> Tuple[bool, str]:
+        """Safely delete a job and its data"""
+        return False, "Not implemented"
+    def delete_experiment(
+        self, experiment_id: str, perform: bool = True
+    ) -> Tuple[bool, str]:
+        """Delete an experiment and all its data"""
+        return False, "Not implemented"
+    def cleanup_orphan_partials(self, perform: bool = False) -> List[str]:
+        """Clean up orphan partial directories"""
+        return []
+    def get_last_sync_time(self) -> Optional[datetime]:
+        """Get the last sync time (for incremental updates)"""
+        return None
+    @property
+    def read_only(self) -> bool:
+        """Whether this provider is read-only"""
+        return True
+    @property
+    def is_remote(self) -> bool:
+        """Whether this is a remote provider (e.g., SSH)
+        Remote providers use periodic refresh instead of push notifications
+        and support sync_path for on-demand file synchronization.
+        """
+        return False
 class _DatabaseChangeDetector:
     """Background thread that detects database changes and notifies listeners
@@ -371,6 +583,7 @@ class MockExperiment(BaseExperiment):
         updated_at: str,
         started_at: Optional[float] = None,
         ended_at: Optional[float] = None,
+        hostname: Optional[str] = None,
     ):
         self.workdir = workdir
         self.current_run_id = current_run_id
@@ -380,6 +593,7 @@ class MockExperiment(BaseExperiment):
         self.updated_at = updated_at
         self.started_at = started_at
         self.ended_at = ended_at
+        self.hostname = hostname
     @property
     def experiment_id(self) -> str:
@@ -387,6 +601,57 @@ class MockExperiment(BaseExperiment):
         return self.workdir.name
+class MockService(BaseService):
+    """Mock service object for remote monitoring
+    This class provides a service-like interface for services loaded from
+    the remote server. It mimics the Service class interface sufficiently
+    for display in the TUI ServicesList widget.
+    """
+    def __init__(
+        self,
+        service_id: str,
+        description_text: str,
+        state_dict_data: dict,
+        experiment_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        url: Optional[str] = None,
+        state: str = "STOPPED",
+    ):
+        self.id = service_id
+        self._description = description_text
+        self._state_name = state
+        self._state_dict_data = state_dict_data
+        self.experiment_id = experiment_id
+        self.run_id = run_id
+        self.url = url
+    @property
+    def state(self):
+        """Return state as a ServiceState-like object with a name attribute"""
+        from experimaestro.scheduler.services import ServiceState
+        # Convert state name to ServiceState enum
+        try:
+            return ServiceState[self._state_name]
+        except KeyError:
+            # Return a mock object with name attribute for unknown states
+            class MockState:
+                def __init__(self, name):
+                    self.name = name
+            return MockState(self._state_name)
+    def description(self) -> str:
+        """Return service description"""
+        return self._description
+    def state_dict(self) -> dict:
+        """Return state dictionary for service recreation"""
+        return self._state_dict_data
 def _with_db_context(func):
     """Decorator to wrap method in database bind context
@@ -406,7 +671,7 @@ def _with_db_context(func):
     return wrapper
-class WorkspaceStateProvider:
+class WorkspaceStateProvider(StateProvider):
     """Unified state provider for workspace-level database (singleton per workspace path)
     Provides access to experiment and job state from a single workspace database.
@@ -507,13 +772,16 @@ class WorkspaceStateProvider:
             workspace_path = Path(workspace_path).absolute()
         self.workspace_path = workspace_path
-        self.read_only = read_only
+        self._read_only = read_only
         self.sync_interval_minutes = sync_interval_minutes
         # Listeners for push notifications
         self._listeners: Set[StateListener] = set()
         self._listeners_lock = threading.Lock()
+        # Service cache (from base class)
+        self._init_service_cache()
         # File watcher for database changes (started when listeners are added)
         self._change_detector: Optional[_DatabaseChangeDetector] = None
         self._db_file_handler: Optional[_DatabaseFileHandler] = None
@@ -570,9 +838,18 @@ class WorkspaceStateProvider:
             experimaestro_dir.mkdir(parents=True, exist_ok=True)
         db_path = experimaestro_dir / "workspace.db"
-        self.workspace_db = initialize_workspace_database(db_path, read_only=read_only)
+        self.workspace_db, needs_resync = initialize_workspace_database(
+            db_path, read_only=read_only
+        )
         self._db_dir = experimaestro_dir  # Store for file watcher
+        # Sync from disk if needed due to schema version change
+        if needs_resync and not read_only:
+            logger.info(
+                "Database schema version changed, triggering full resync from disk"
+            )
+            sync_on_start = True  # Force sync
         # Optionally sync from disk on start (only in write mode)
         # Syncing requires write access to update the database and sync timestamp
         if sync_on_start and not read_only:
@@ -581,16 +858,29 @@ class WorkspaceStateProvider:
             sync_workspace_from_disk(
                 self.workspace_path,
                 write_mode=True,
-                force=False,
+                force=needs_resync,  # Force full sync if schema changed
                 sync_interval_minutes=sync_interval_minutes,
             )
+            # Update db_version after successful sync
+            if needs_resync:
+                with self.workspace_db.bind_ctx([WorkspaceSyncMetadata]):
+                    WorkspaceSyncMetadata.update(db_version=CURRENT_DB_VERSION).where(
+                        WorkspaceSyncMetadata.id == "workspace"
+                    ).execute()
+                logger.info("Database schema updated to version %d", CURRENT_DB_VERSION)
         logger.info(
             "WorkspaceStateProvider initialized (read_only=%s, workspace=%s)",
             read_only,
             workspace_path,
         )
+    @property
+    def read_only(self) -> bool:
+        """Whether this provider is read-only"""
+        return self._read_only
     # Experiment management methods
     @_with_db_context
@@ -652,14 +942,40 @@ class WorkspaceStateProvider:
             now = datetime.now()
             run_id = now.strftime("%Y%m%d_%H%M%S") + f"_{now.microsecond:06d}"
-        # Create run record
+        # Capture hostname
+        hostname = socket.gethostname()
+        started_at = datetime.now()
+        # Create run record with hostname
         ExperimentRunModel.insert(
             experiment_id=experiment_id,
             run_id=run_id,
-            started_at=datetime.now(),
+            started_at=started_at,
             status="active",
+            hostname=hostname,
         ).execute()
+        # Persist to disk in experiment folder (informations.json)
+        exp_dir = self.workspace_path / "xp" / experiment_id
+        exp_dir.mkdir(parents=True, exist_ok=True)
+        info_file = exp_dir / "informations.json"
+        # Merge with existing data (may have multiple runs)
+        info_data: Dict = {}
+        if info_file.exists():
+            try:
+                info_data = json.loads(info_file.read_text())
+            except json.JSONDecodeError:
+                logger.warning("Could not parse existing informations.json")
+        if "runs" not in info_data:
+            info_data["runs"] = {}
+        info_data["runs"][run_id] = {
+            "hostname": hostname,
+            "started_at": started_at.isoformat(),
+        }
+        info_file.write_text(json.dumps(info_data, indent=2))
         # Update experiment's current_run_id and updated_at
         now = datetime.now()
         ExperimentModel.update(
@@ -667,7 +983,12 @@ class WorkspaceStateProvider:
             updated_at=now,
         ).where(ExperimentModel.experiment_id == experiment_id).execute()
-        logger.info("Created run %s for experiment %s", run_id, experiment_id)
+        logger.info(
+            "Created run %s for experiment %s on host %s",
+            run_id,
+            experiment_id,
+            hostname,
+        )
         # Notify listeners
         self._notify_listeners(
@@ -678,6 +999,7 @@ class WorkspaceStateProvider:
                     "run_id": run_id,
                     "status": "active",
                     "started_at": now.isoformat(),
+                    "hostname": hostname,
                 },
             )
         )
@@ -718,6 +1040,7 @@ class WorkspaceStateProvider:
             - finished_jobs: Number of completed jobs (for current run)
             - failed_jobs: Number of failed jobs (for current run)
             - updated_at: When experiment was last modified
+            - hostname: Host where the current run was launched
         """
         experiments = []
@@ -733,6 +1056,7 @@ class WorkspaceStateProvider:
             started_at = None
             ended_at = None
+            hostname = None
             if exp_model.current_run_id:
                 total_jobs = (
@@ -762,7 +1086,7 @@ class WorkspaceStateProvider:
                     .count()
                 )
-                # Get run timestamps
+                # Get run timestamps and hostname
                 try:
                     run_model = ExperimentRunModel.get(
                         (ExperimentRunModel.experiment_id == exp_model.experiment_id)
@@ -772,6 +1096,7 @@ class WorkspaceStateProvider:
                         started_at = run_model.started_at.timestamp()
                     if run_model.ended_at:
                         ended_at = run_model.ended_at.timestamp()
+                    hostname = run_model.hostname
                 except ExperimentRunModel.DoesNotExist:
                     pass
@@ -788,6 +1113,7 @@ class WorkspaceStateProvider:
                     updated_at=exp_model.updated_at.isoformat(),
                     started_at=started_at,
                     ended_at=ended_at,
+                    hostname=hostname,
                 )
             )
@@ -814,6 +1140,7 @@ class WorkspaceStateProvider:
         total_jobs = 0
         finished_jobs = 0
         failed_jobs = 0
+        hostname = None
         if exp_model.current_run_id:
             total_jobs = (
@@ -843,6 +1170,16 @@ class WorkspaceStateProvider:
                 .count()
             )
+            # Get hostname from run model
+            try:
+                run_model = ExperimentRunModel.get(
+                    (ExperimentRunModel.experiment_id == exp_model.experiment_id)
+                    & (ExperimentRunModel.run_id == exp_model.current_run_id)
+                )
+                hostname = run_model.hostname
+            except ExperimentRunModel.DoesNotExist:
+                pass
         # Compute experiment path from workspace_path and experiment_id
         exp_path = self.workspace_path / "xp" / exp_model.experiment_id
@@ -853,6 +1190,7 @@ class WorkspaceStateProvider:
             finished_jobs=finished_jobs,
             failed_jobs=failed_jobs,
             updated_at=exp_model.updated_at.isoformat(),
+            hostname=hostname,
         )
     @_with_db_context
@@ -1633,61 +1971,48 @@ class WorkspaceStateProvider:
     # Service operations
     @_with_db_context
-    def update_service(
+    def register_service(
         self,
         service_id: str,
         experiment_id: str,
         run_id: str,
         description: str,
-        state: str,
         state_dict: Optional[str] = None,
     ):
-        """Update service information
+        """Register a service in the database
+        Services are only added or removed, not updated. Runtime state
+        is managed by the Service object itself.
         Args:
             service_id: Service identifier
             experiment_id: Experiment identifier
             run_id: Run identifier
             description: Human-readable description
-            state: Service state
             state_dict: JSON serialized state_dict for service recreation
         Raises:
             RuntimeError: If in read-only mode
         """
         if self.read_only:
-            raise RuntimeError("Cannot update services in read-only mode")
+            raise RuntimeError("Cannot register services in read-only mode")
         insert_data = {
             "service_id": service_id,
             "experiment_id": experiment_id,
             "run_id": run_id,
             "description": description,
-            "state": state,
             "created_at": datetime.now(),
-            "updated_at": datetime.now(),
-        }
-        update_data = {
-            ServiceModel.description: description,
-            ServiceModel.state: state,
-            ServiceModel.updated_at: datetime.now(),
         }
         if state_dict is not None:
             insert_data["state_dict"] = state_dict
-            update_data[ServiceModel.state_dict] = state_dict
-        ServiceModel.insert(**insert_data).on_conflict(
-            conflict_target=[
-                ServiceModel.service_id,
-                ServiceModel.experiment_id,
-                ServiceModel.run_id,
-            ],
-            update=update_data,
-        ).execute()
+        # Use INSERT OR IGNORE - services are only added, not updated
+        ServiceModel.insert(**insert_data).on_conflict_ignore().execute()
         logger.debug(
-            "Updated service %s (experiment=%s, run=%s)",
+            "Registered service %s (experiment=%s, run=%s)",
             service_id,
             experiment_id,
             run_id,
@@ -1701,70 +2026,68 @@ class WorkspaceStateProvider:
                     "serviceId": service_id,
                     "experimentId": experiment_id,
                     "runId": run_id,
-                    "state": state,
                     "description": description,
                 },
             )
         )
-    @_with_db_context
-    def get_services(
-        self, experiment_id: Optional[str] = None, run_id: Optional[str] = None
-    ) -> List["Service"]:
-        """Get services, optionally filtered by experiment/run
+    def _get_live_services(
+        self, experiment_id: Optional[str], run_id: Optional[str]
+    ) -> Optional[List["Service"]]:
+        """Get live services from scheduler if available.
-        This method abstracts whether services are live (from scheduler) or
-        from the database. It returns actual Service objects in both cases:
-        - If a live scheduler has the experiment, return live Service objects
-        - Otherwise, recreate Service objects from stored state_dict
+        Returns None if no live services (experiment not in scheduler).
+        """
+        if experiment_id is None:
+            return None
-        Args:
-            experiment_id: Filter by experiment (None = all)
-            run_id: Filter by run (None = current run if experiment_id provided)
+        try:
+            from experimaestro.scheduler.base import Scheduler
-        Returns:
-            List of Service objects
+            if not Scheduler.has_instance():
+                return None
+            scheduler = Scheduler.instance()
+            if experiment_id not in scheduler.experiments:
+                logger.debug("Experiment %s not in scheduler", experiment_id)
+                return None
+            exp = scheduler.experiments[experiment_id]
+            services = list(exp.services.values())
+            logger.debug(
+                "Returning %d live services for experiment %s",
+                len(services),
+                experiment_id,
+            )
+            return services
+        except Exception as e:
+            logger.warning("Could not get live services: %s", e)
+            return None
+    @_with_db_context
+    def _fetch_services_from_storage(
+        self, experiment_id: Optional[str], run_id: Optional[str]
+    ) -> List["Service"]:
+        """Fetch services from database.
+        Called when no live services and cache is empty.
         """
         from experimaestro.scheduler.services import Service
-        # First, check for live services from the scheduler
-        if experiment_id is not None:
-            try:
-                from experimaestro.scheduler.base import Scheduler
-                if Scheduler.has_instance():
-                    scheduler = Scheduler.instance()
-                    # Check if experiment is registered with scheduler
-                    if experiment_id in scheduler.experiments:
-                        exp = scheduler.experiments[experiment_id]
-                        services = list(exp.services.values())
-                        logger.debug(
-                            "Returning %d live services for experiment %s",
-                            len(services),
-                            experiment_id,
-                        )
-                        return services
-            except Exception as e:
-                # Scheduler not available or error - fall back to database
-                logger.debug("Could not get live services: %s", e)
-        # Fall back to database
         query = ServiceModel.select()
         if experiment_id is not None:
-            # Use current run if not specified
-            if run_id is None:
-                run_id = self.get_current_run(experiment_id)
-                if run_id is None:
-                    return []
             query = query.where(
                 (ServiceModel.experiment_id == experiment_id)
                 & (ServiceModel.run_id == run_id)
             )
         services = []
         for service_model in query:
+            service_id = service_model.service_id
             # Try to recreate service from state_dict
             state_dict_json = service_model.state_dict
             if state_dict_json and state_dict_json != "{}":
@@ -1772,20 +2095,89 @@ class WorkspaceStateProvider:
                     state_dict = json.loads(state_dict_json)
                     if "__class__" in state_dict:
                         service = Service.from_state_dict(state_dict)
-                        # Set the id from the database record
-                        service.id = service_model.service_id
-                        services.append(service)
-                        continue
                 except Exception as e:
+                    service = MockService(
+                        service_id,
+                        f"error: {e}",
+                        {},
+                        experiment_id=experiment_id,
+                        run_id=run_id,
+                    )
                     logger.warning(
                         "Failed to recreate service %s from state_dict: %s",
-                        service_model.service_id,
+                        service_id,
                         e,
                     )
-            # If we can't recreate, skip this service (it's not usable)
-            logger.debug(
-                "Service %s has no state_dict for recreation, skipping",
-                service_model.service_id,
+            else:
+                # If we can't recreate, skip this service (it's not usable)
+                logger.debug(
+                    "Service %s has no state_dict for recreation, skipping",
+                    service_id,
+                )
+                service = MockService(
+                    service_id,
+                    "error: no state_dict",
+                    {},
+                    experiment_id=experiment_id,
+                    run_id=run_id,
+                )
+            # Add to services
+            service.id = service_id
+            services.append(service)
+            continue
+        return services
+    @_with_db_context
+    def get_services_raw(
+        self, experiment_id: Optional[str] = None, run_id: Optional[str] = None
+    ) -> List[Dict]:
+        """Get raw service data from database without recreating Service objects
+        This is useful for remote monitoring where the client may have different
+        modules installed than the server. Returns dictionaries with service
+        metadata that can be serialized over JSON-RPC.
+        Args:
+            experiment_id: Filter by experiment (None = all)
+            run_id: Filter by run (None = current run if experiment_id provided)
+        Returns:
+            List of dictionaries with service data
+        """
+        query = ServiceModel.select()
+        if experiment_id is not None:
+            # Use current run if not specified
+            if run_id is None:
+                run_id = self.get_current_run(experiment_id)
+                if run_id is None:
+                    return []
+            query = query.where(
+                (ServiceModel.experiment_id == experiment_id)
+                & (ServiceModel.run_id == run_id)
+            )
+        services = []
+        for service_model in query:
+            state_dict = {}
+            if service_model.state_dict and service_model.state_dict != "{}":
+                try:
+                    state_dict = json.loads(service_model.state_dict)
+                except json.JSONDecodeError:
+                    pass
+            services.append(
+                {
+                    "service_id": service_model.service_id,
+                    "description": service_model.description,
+                    "state_dict": state_dict,
+                    "experiment_id": service_model.experiment_id,
+                    "run_id": service_model.run_id,
+                }
             )
         return services
@@ -1855,13 +2247,19 @@ class WorkspaceStateProvider:
         Returns:
             datetime of last sync, or None if never synced
         """
+        from peewee import OperationalError
         from .state_db import WorkspaceSyncMetadata
-        metadata = WorkspaceSyncMetadata.get_or_none(
-            WorkspaceSyncMetadata.id == "workspace"
-        )
-        if metadata and metadata.last_sync_time:
-            return metadata.last_sync_time
+        try:
+            metadata = WorkspaceSyncMetadata.get_or_none(
+                WorkspaceSyncMetadata.id == "workspace"
+            )
+            if metadata and metadata.last_sync_time:
+                return metadata.last_sync_time
+        except OperationalError:
+            # Table might not exist in older workspaces opened in read-only mode
+            pass
         return None
     @_with_db_context
@@ -2333,13 +2731,36 @@ class SchedulerListener:
             experiment_id: Experiment identifier
             run_id: Run identifier
         """
+        from experimaestro.scheduler.services import Service
         try:
-            self.state_provider.update_service(
+            # Get state_dict for service recreation
+            state_dict_json = None
+            try:
+                # _full_state_dict includes __class__ automatically
+                state_dict = service._full_state_dict()
+                # Serialize paths automatically
+                serialized = Service.serialize_state_dict(state_dict)
+                state_dict_json = json.dumps(serialized)
+            except Exception as e:
+                # Service cannot be serialized - store unserializable marker
+                logger.warning(
+                    "Could not get state_dict for service %s: %s", service.id, e
+                )
+                state_dict_json = json.dumps(
+                    {
+                        "__class__": f"{service.__class__.__module__}.{service.__class__.__name__}",
+                        "__unserializable__": True,
+                        "__reason__": f"Cannot serialize: {e}",
+                    }
+                )
+            self.state_provider.register_service(
                 service.id,
                 experiment_id,
                 run_id,
                 service.description(),
-                service.state.name,
+                state_dict=state_dict_json,
             )
         except Exception as e:
             logger.exception("Error updating service %s: %s", service.id, e)

experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

Potentially problematic release.

experimaestro 2.0.0b4py3-none-any.whl → 2.0.0b8py3-none-any.whl