PyPI - datamint - Versions diffs - 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl - Mend

datamint 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

datamint/api/base_api.py +66 -8
datamint/api/client.py +16 -5
datamint/api/dto/__init__.py +10 -2
datamint/api/endpoints/__init__.py +2 -0
datamint/api/endpoints/annotations_api.py +47 -7
datamint/api/endpoints/annotationsets_api.py +11 -0
datamint/api/endpoints/projects_api.py +36 -34
datamint/api/endpoints/resources_api.py +75 -28
datamint/api/entity_base_api.py +11 -43
datamint/apihandler/dto/annotation_dto.py +6 -2
datamint/configs.py +6 -0
datamint/dataset/base_dataset.py +18 -12
datamint/dataset/dataset.py +2 -2
datamint/entities/__init__.py +4 -2
datamint/entities/annotation.py +74 -4
datamint/entities/base_entity.py +47 -6
datamint/entities/cache_manager.py +302 -0
datamint/entities/datasetinfo.py +108 -1
datamint/entities/project.py +47 -6
datamint/entities/resource.py +146 -19
datamint/types.py +17 -0
{datamint-2.3.1.dist-info → datamint-2.3.3.dist-info}/METADATA +2 -1
{datamint-2.3.1.dist-info → datamint-2.3.3.dist-info}/RECORD +25 -22
{datamint-2.3.1.dist-info → datamint-2.3.3.dist-info}/WHEEL +0 -0
{datamint-2.3.1.dist-info → datamint-2.3.3.dist-info}/entry_points.txt +0 -0

datamint/entities/cache_manager.py ADDED Viewed

@@ -0,0 +1,302 @@
+"""Cache manager for storing and retrieving entity-related data locally.
+This module provides caching functionality for resource data (images, segmentations, etc.)
+with automatic validation against server versions to ensure data freshness.
+"""
+import hashlib
+import json
+import logging
+import pickle
+from datetime import datetime
+from pathlib import Path
+from typing import Any, TypeVar, Generic
+from pydantic import BaseModel
+# import appdirs
+import datamint.configs
+_LOGGER = logging.getLogger(__name__)
+T = TypeVar('T')
+class CacheManager(Generic[T]):
+    """Manages local caching of entity data with versioning support.
+    This class handles storing and retrieving cached data with automatic
+    validation against server versions to ensure data consistency.
+    The cache uses a directory structure:
+    - cache_root/
+      - resources/
+        - {resource_id}/
+          - image_data.pkl
+          - metadata.json
+      - annotations/
+        - {annotation_id}/
+          - segmentation_data.pkl
+          - metadata.json
+    Attributes:
+        cache_root: Root directory for cache storage
+        entity_type: Type of entity being cached (e.g., 'resources', 'annotations')
+    """
+    class ItemMetadata(BaseModel):
+        cached_at: datetime
+        data_path: str
+        data_type: str
+        mimetype: str
+        version_hash: str | None = None
+        version_info: dict | None = None
+        entity_id: str | None = None
+    def __init__(self, entity_type: str, cache_root: Path | str | None = None):
+        """Initialize the cache manager.
+        Args:
+            entity_type: Type of entity (e.g., 'resources', 'annotations')
+            cache_root: Root directory for cache. If None, uses system cache directory.
+        """
+        self.entity_type = entity_type
+        if cache_root is None:
+            # Use platform-specific cache directory
+            # app_cache_dir = appdirs.user_cache_dir('datamint', 'sonance')
+            # cache_root = Path(app_cache_dir) / 'entity_cache'
+            cache_root = Path(datamint.configs.DATAMINT_DATA_DIR)
+        else:
+            cache_root = Path(cache_root)
+        self.cache_root = cache_root / entity_type
+    def _get_entity_cache_dir(self, entity_id: str) -> Path:
+        """Get the cache directory for a specific entity.
+        Args:
+            entity_id: Unique identifier for the entity
+        Returns:
+            Path to the entity's cache directory
+        """
+        entity_dir = self.cache_root / entity_id
+        entity_dir = entity_dir.resolve().absolute()
+        entity_dir.mkdir(parents=True, exist_ok=True)
+        return entity_dir
+    def _get_metadata_path(self, entity_id: str) -> Path:
+        """Get the path to the metadata file for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+        Returns:
+            Path to the metadata file
+        """
+        return self._get_entity_cache_dir(entity_id) / 'metadata.json'
+    def _get_data_path(self, entity_id: str, data_key: str) -> Path:
+        """Get the path to a data file for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+            data_key: Key identifying the type of data (e.g., 'image_data', 'segmentation')
+        Returns:
+            Path to the data file
+        """
+        return self._get_entity_cache_dir(entity_id) / f"{data_key}.pkl"
+    def _compute_version_hash(self, version_info: dict[str, Any]) -> str:
+        """Compute a hash from version information.
+        Args:
+            version_info: Dictionary containing version information (e.g., updated_at, size)
+        Returns:
+            Hash string representing the version
+        """
+        # Sort keys for consistent hashing
+        sorted_info = json.dumps(version_info, sort_keys=True)
+        return hashlib.sha256(sorted_info.encode()).hexdigest()
+    def get(
+        self,
+        entity_id: str,
+        data_key: str,
+        version_info: dict[str, Any] | None = None
+    ) -> T | None:
+        """Retrieve cached data for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+            data_key: Key identifying the type of data
+            version_info: Optional version information from server to validate cache
+        Returns:
+            Cached data if valid, None if cache miss or invalid
+        """
+        metadata_path = self._get_metadata_path(entity_id)
+        data_path = self._get_data_path(entity_id, data_key)
+        # Check if cache exists
+        if not metadata_path.exists() or not data_path.exists():
+            _LOGGER.debug(f"Cache miss for {entity_id}/{data_key}")
+            return None
+        try:
+            # Load or create metadata
+            with open(metadata_path, 'r') as f:
+                jsondata = f.read()
+            cached_metadata = CacheManager.ItemMetadata.model_validate_json(jsondata)
+            # Validate version if provided
+            if version_info is not None:
+                server_version = self._compute_version_hash(version_info)
+                if server_version != cached_metadata.version_hash:
+                    _LOGGER.debug(
+                        f"Cache version mismatch for {entity_id}/{data_key}. "
+                        f"Server: {server_version}, Cached: {cached_metadata.version_hash}"
+                    )
+                    return None
+            data = self._load_data(cached_metadata)
+            _LOGGER.debug(f"Cache hit for {entity_id}/{data_key}")
+            return data
+        except Exception as e:
+            _LOGGER.warning(f"Error reading cache for {entity_id}/{data_key}: {e}")
+            return None
+    def set(
+        self,
+        entity_id: str,
+        data_key: str,
+        data: T,
+        version_info: dict[str, Any] | None = None
+    ) -> None:
+        """Store data in cache for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+            data_key: Key identifying the type of data
+            data: Data to cache
+            version_info: Optional version information from server
+        """
+        metadata_path = self._get_metadata_path(entity_id)
+        data_path = self._get_data_path(entity_id, data_key)
+        try:
+            mimetype = self._save_data(data_path, data)
+            metadata = CacheManager.ItemMetadata(
+                cached_at=datetime.now(),
+                data_path=str(data_path.absolute()),
+                data_type=type(data).__name__,
+                mimetype=mimetype,
+                entity_id=entity_id
+            )
+            # Update metadata for this data key
+            if version_info is not None:
+                metadata.version_hash = self._compute_version_hash(version_info)
+                # Store version_info as JSON string to ensure metadata is JSON-serializable
+                metadata.version_info = version_info
+            # Save metadata
+            with open(metadata_path, 'w') as f:
+                f.write(metadata.model_dump_json(indent=2))
+            _LOGGER.debug(f"Cached data for {entity_id}/{data_key}")
+        except Exception as e:
+            _LOGGER.warning(f"Error writing cache for {entity_id}/{data_key}: {e}")
+    def _load_data(self,
+                   metadata: 'CacheManager.ItemMetadata') -> T:
+        path = metadata.data_path
+        if metadata.mimetype == 'application/octet-stream':
+            with open(path, 'rb') as f:
+                return f.read()
+        else:
+            with open(path, 'rb') as f:
+                return pickle.load(f)
+    def _save_data(self, path: Path, data: T) -> str:
+        """
+        Save data and returns the mimetype
+        """
+        if isinstance(data, bytes):
+            with open(path, 'wb') as f:
+                f.write(data)
+            return 'application/octet-stream'
+        else:
+            with open(path, 'wb') as f:
+                pickle.dump(data, f)
+            return 'application/x-python-serialize'
+    def invalidate(self, entity_id: str, data_key: str | None = None) -> None:
+        """Invalidate cached data for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+            data_key: Optional key for specific data. If None, invalidates all data for entity.
+        """
+        if data_key is None:
+            # Invalidate entire entity cache
+            entity_dir = self._get_entity_cache_dir(entity_id)
+            if entity_dir.exists():
+                import shutil
+                shutil.rmtree(entity_dir)
+                _LOGGER.debug(f"Invalidated all cache for {entity_id}")
+        else:
+            # Invalidate specific data
+            data_path = self._get_data_path(entity_id, data_key)
+            if data_path.exists():
+                data_path.unlink()
+                _LOGGER.debug(f"Invalidated cache for {entity_id}/{data_key}")
+            # Update metadata
+            metadata_path = self._get_metadata_path(entity_id)
+            if metadata_path.exists():
+                with open(metadata_path, 'r') as f:
+                    metadata = json.load(f)
+                if data_key in metadata:
+                    del metadata[data_key]
+                    with open(metadata_path, 'w') as f:
+                        json.dump(metadata, f, indent=2)
+    def clear_all(self) -> None:
+        """Clear all cached data for this entity type."""
+        if self.cache_root.exists():
+            import shutil
+            shutil.rmtree(self.cache_root)
+            self.cache_root.mkdir(parents=True, exist_ok=True)
+            _LOGGER.info(f"Cleared all cache for {self.entity_type}")
+    def get_cache_info(self, entity_id: str) -> dict[str, Any]:
+        """Get information about cached data for an entity.
+        Args:
+            entity_id: Unique identifier for the entity
+        Returns:
+            Dictionary containing cache information
+        """
+        metadata_path = self._get_metadata_path(entity_id)
+        if not metadata_path.exists():
+            return {}
+        try:
+            with open(metadata_path, 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            _LOGGER.warning(f"Error reading cache info for {entity_id}: {e}")
+            return {}

datamint/entities/datasetinfo.py CHANGED Viewed

@@ -1,14 +1,24 @@
-"""Project entity module for DataMint API."""
+"""Dataset entity module for DataMint API."""
 from datetime import datetime
 import logging
+from typing import TYPE_CHECKING, Sequence
 from .base_entity import BaseEntity, MISSING_FIELD
+if TYPE_CHECKING:
+    from datamint.api.client import Api
+    from .resource import Resource
+    from .project import Project
 logger = logging.getLogger(__name__)
 class DatasetInfo(BaseEntity):
     """Pydantic Model representing a DataMint dataset.
+    This class provides access to dataset information and related entities
+    like resources and projects.
     """
     id: str
@@ -20,3 +30,100 @@ class DatasetInfo(BaseEntity):
     updated_at: str | None
     total_resource: int
     resource_ids: list[str]
+    def __init__(self, **data):
+        """Initialize the dataset info entity."""
+        super().__init__(**data)
+        self._manager: EntityManager['DatasetInfo'] = EntityManager(self)
+        # Cache for lazy-loaded data
+        self._resources_cache: Sequence['Resource'] | None = None
+        self._projects_cache: Sequence['Project'] | None = None
+    def _inject_api(self, api: 'Api') -> None:
+        """Inject API client into this dataset (called automatically by Api class)."""
+        self._manager.set_api(api)
+    def get_resources(
+        self,
+        refresh: bool = False,
+        limit: int | None = None
+    ) -> Sequence['Resource']:
+        """Get all resources in this dataset.
+        Results are cached after the first call unless refresh=True.
+        Args:
+            api: Optional API client. Uses the one from set_api() if not provided.
+            refresh: If True, bypass cache and fetch fresh data
+        Returns:
+            List of Resource instances in this dataset
+        Raises:
+            RuntimeError: If no API client is available
+        Example:
+            >>> dataset = api._datasetsinfo.get_by_id("dataset-id")
+            >>> dataset.set_api(api)
+            >>> resources = dataset.get_resources()
+        """
+        if refresh or self._resources_cache is None:
+            api_client = self._manager._ensure_api(api)
+            # Fetch resources by their IDs
+            resources = []
+            for resource_id in self.resource_ids:
+                try:
+                    resource = api_client.resources.get_by_id(resource_id)
+                    resource.set_api(api_client)
+                    resources.append(resource)
+                except Exception as e:
+                    logger.warning(f"Failed to fetch resource {resource_id}: {e}")
+            self._resources_cache = resources
+        return self._resources_cache
+    def get_projects(
+        self,
+        api: 'Api | None' = None,
+        refresh: bool = False
+    ) -> Sequence['Project']:
+        """Get all projects associated with this dataset.
+        Results are cached after the first call unless refresh=True.
+        Args:
+            refresh: If True, bypass cache and fetch fresh data
+        Returns:
+            List of Project instances
+        Raises:
+            RuntimeError: If no API client is available
+        Example:
+            >>> dataset = api.datasetsinfo.get_by_id("dataset-id")
+            >>> projects = dataset.get_projects()
+        """
+        if refresh or self._projects_cache is None:
+            api_client = self._manager.api
+            # Get all projects and filter by dataset_id
+            all_projects = api_client.projects.get_all()
+            projects = [p for p in all_projects if p.dataset_id == self.id]
+            self._projects_cache = projects
+        return self._projects_cache
+    def invalidate_cache(self) -> None:
+        """Invalidate all cached relationship data.
+        This forces fresh data fetches on the next access.
+        """
+        self._resources_cache = None
+        self._projects_cache = None
+        logger.debug(f"Invalidated cache for dataset {self.id}")

datamint/entities/project.py CHANGED Viewed

@@ -1,9 +1,15 @@
 """Project entity module for DataMint API."""
 from datetime import datetime
 import logging
+from typing import Sequence, Literal, TYPE_CHECKING
 from .base_entity import BaseEntity, MISSING_FIELD
 from typing import Any
+import webbrowser
+from pydantic import PrivateAttr
+if TYPE_CHECKING:
+    from datamint.api.endpoints.projects_api import ProjectsApi
+    from .resource import Resource
 logger = logging.getLogger(__name__)
@@ -36,7 +42,7 @@ class Project(BaseEntity):
     """
     id: str
     name: str
-    created_at: str  # ISO timestamp string
+    created_at: str
     created_by: str
     dataset_id: str
     worklist_id: str
@@ -50,17 +56,52 @@ class Project(BaseEntity):
     ai_model_id: str | None = MISSING_FIELD
     closed_resources_count: int = MISSING_FIELD
     resources_to_annotate_count: int = MISSING_FIELD
-    most_recent_experiment: str | None = MISSING_FIELD  # ISO timestamp string
+    most_recent_experiment: str | None = MISSING_FIELD
     annotators: list[dict] = MISSING_FIELD
-    customer_id: str | None = MISSING_FIELD
     archived_on: str | None = MISSING_FIELD
     archived_by: str | None = MISSING_FIELD
     is_active_learning: bool = MISSING_FIELD
     two_up_display: bool = MISSING_FIELD
     require_review: bool = MISSING_FIELD
+    _api: 'ProjectsApi' = PrivateAttr()
+    def fetch_resources(self) -> Sequence['Resource']:
+        """Fetch resources associated with this project from the API,
+        IMPORTANT: It always fetches fresh data from the server.
+        Returns:
+            List of Resource instances associated with the project.
+        """
+        return self._api.get_project_resources(self.id)
+    def set_work_status(self, resource: 'Resource', status: Literal['opened', 'annotated', 'closed']) -> None:
+        """Set the status of a resource.
+        Args:
+            resource: The resource unique id or a resource object.
+            status: The new status to set.
+        """
+        return self._api.set_work_status(self, resource, status)
     @property
     def url(self) -> str:
         """Get the URL to access this project in the DataMint web application."""
-        base_url = "https://app.datamint.io/projects/edit"
-        return f"{base_url}/{self.id}"
+        base_url = self._api.config.web_app_url
+        return f'{base_url}/projects/edit/{self.id}'
+    def show(self) -> None:
+        """Open the project in the default web browser."""
+        webbrowser.open(self.url)
+    def as_torch_dataset(self,
+                         root_dir: str | None = None,
+                         auto_update: bool = True,
+                         return_as_semantic_segmentation: bool = False):
+        from datamint.dataset import Dataset
+        return Dataset(project_name=self.name,
+                       root=root_dir,
+                       auto_update=auto_update,
+                       return_as_semantic_segmentation=return_as_semantic_segmentation,
+                       all_annotations=True)

datamint 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

datamint 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl