datamint 2.3.2__py3-none-any.whl → 2.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamint/api/base_api.py +66 -8
- datamint/api/client.py +6 -3
- datamint/api/dto/__init__.py +10 -2
- datamint/api/endpoints/annotations_api.py +47 -7
- datamint/api/endpoints/projects_api.py +36 -34
- datamint/api/endpoints/resources_api.py +75 -28
- datamint/api/entity_base_api.py +11 -43
- datamint/apihandler/dto/annotation_dto.py +6 -2
- datamint/configs.py +6 -0
- datamint/dataset/base_dataset.py +3 -3
- datamint/entities/__init__.py +4 -2
- datamint/entities/annotation.py +74 -4
- datamint/entities/base_entity.py +47 -6
- datamint/entities/cache_manager.py +302 -0
- datamint/entities/datasetinfo.py +108 -1
- datamint/entities/project.py +47 -6
- datamint/entities/resource.py +146 -19
- datamint/types.py +17 -0
- {datamint-2.3.2.dist-info → datamint-2.3.3.dist-info}/METADATA +2 -1
- {datamint-2.3.2.dist-info → datamint-2.3.3.dist-info}/RECORD +22 -20
- {datamint-2.3.2.dist-info → datamint-2.3.3.dist-info}/WHEEL +0 -0
- {datamint-2.3.2.dist-info → datamint-2.3.3.dist-info}/entry_points.txt +0 -0
|
@@ -17,7 +17,11 @@ Classes:
|
|
|
17
17
|
import json
|
|
18
18
|
from typing import Any, TypeAlias, Literal
|
|
19
19
|
import logging
|
|
20
|
-
|
|
20
|
+
import sys
|
|
21
|
+
if sys.version_info >= (3, 11):
|
|
22
|
+
from enum import StrEnum
|
|
23
|
+
else:
|
|
24
|
+
from backports.strenum import StrEnum
|
|
21
25
|
from medimgkit.dicom_utils import pixel_to_patient
|
|
22
26
|
import pydicom
|
|
23
27
|
import numpy as np
|
|
@@ -31,7 +35,7 @@ CoordinateSystem: TypeAlias = Literal['pixel', 'patient']
|
|
|
31
35
|
"""
|
|
32
36
|
|
|
33
37
|
|
|
34
|
-
class AnnotationType(
|
|
38
|
+
class AnnotationType(StrEnum):
|
|
35
39
|
SEGMENTATION = 'segmentation'
|
|
36
40
|
AREA = 'area'
|
|
37
41
|
DISTANCE = 'distance'
|
datamint/configs.py
CHANGED
|
@@ -18,6 +18,12 @@ _LOGGER = logging.getLogger(__name__)
|
|
|
18
18
|
|
|
19
19
|
DIRS = PlatformDirs(appname='datamintapi')
|
|
20
20
|
CONFIG_FILE = os.path.join(DIRS.user_config_dir, 'datamintapi.yaml')
|
|
21
|
+
try:
|
|
22
|
+
DATAMINT_DATA_DIR = os.path.join(os.path.expanduser("~"), '.datamint')
|
|
23
|
+
except Exception as e:
|
|
24
|
+
_LOGGER.error(f"Could not determine home directory: {e}")
|
|
25
|
+
DATAMINT_DATA_DIR = None
|
|
26
|
+
|
|
21
27
|
|
|
22
28
|
|
|
23
29
|
def get_env_var_name(key: str) -> str:
|
datamint/dataset/base_dataset.py
CHANGED
|
@@ -19,6 +19,7 @@ from pathlib import Path
|
|
|
19
19
|
from datamint.entities import Annotation, DatasetInfo
|
|
20
20
|
import cv2
|
|
21
21
|
from datamint.entities import Resource
|
|
22
|
+
import datamint.configs
|
|
22
23
|
|
|
23
24
|
_LOGGER = logging.getLogger(__name__)
|
|
24
25
|
|
|
@@ -54,7 +55,7 @@ class DatamintBaseDataset:
|
|
|
54
55
|
exclude_frame_label_names: List of frame label names to exclude. If None, no frame labels will be excluded.
|
|
55
56
|
"""
|
|
56
57
|
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
DATAMINT_DATASETS_DIR = "datasets"
|
|
59
60
|
|
|
60
61
|
def __init__(
|
|
@@ -183,8 +184,7 @@ class DatamintBaseDataset:
|
|
|
183
184
|
"""Setup root and dataset directories."""
|
|
184
185
|
if root is None:
|
|
185
186
|
root = os.path.join(
|
|
186
|
-
|
|
187
|
-
self.DATAMINT_DEFAULT_DIR,
|
|
187
|
+
datamint.configs.DATAMINT_DATA_DIR,
|
|
188
188
|
self.DATAMINT_DATASETS_DIR
|
|
189
189
|
)
|
|
190
190
|
os.makedirs(root, exist_ok=True)
|
datamint/entities/__init__.py
CHANGED
|
@@ -7,14 +7,16 @@ from .project import Project
|
|
|
7
7
|
from .resource import Resource
|
|
8
8
|
from .user import User # new export
|
|
9
9
|
from .datasetinfo import DatasetInfo
|
|
10
|
+
from .cache_manager import CacheManager
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
'Annotation',
|
|
13
14
|
'BaseEntity',
|
|
15
|
+
'CacheManager',
|
|
14
16
|
'Channel',
|
|
15
17
|
'ChannelResourceData',
|
|
18
|
+
'DatasetInfo',
|
|
16
19
|
'Project',
|
|
17
20
|
'Resource',
|
|
18
|
-
|
|
19
|
-
'DatasetInfo',
|
|
21
|
+
'User',
|
|
20
22
|
]
|
datamint/entities/annotation.py
CHANGED
|
@@ -5,11 +5,20 @@ This module defines the Annotation model used to represent annotation
|
|
|
5
5
|
records returned by the DataMint API.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
import logging
|
|
10
|
+
import os
|
|
11
|
+
|
|
10
12
|
from .base_entity import BaseEntity, MISSING_FIELD
|
|
11
|
-
from
|
|
13
|
+
from .cache_manager import CacheManager
|
|
14
|
+
from pydantic import PrivateAttr
|
|
12
15
|
from datetime import datetime
|
|
16
|
+
from datamint.api.dto import AnnotationType
|
|
17
|
+
from datamint.types import ImagingData
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from datamint.api.endpoints.annotations_api import AnnotationsApi
|
|
21
|
+
from .resource import Resource
|
|
13
22
|
|
|
14
23
|
logger = logging.getLogger(__name__)
|
|
15
24
|
|
|
@@ -21,6 +30,8 @@ _FIELD_MAPPING = {
|
|
|
21
30
|
'index': 'frame_index',
|
|
22
31
|
}
|
|
23
32
|
|
|
33
|
+
_ANNOTATION_CACHE_KEY = "annotation_data"
|
|
34
|
+
|
|
24
35
|
|
|
25
36
|
class Annotation(BaseEntity):
|
|
26
37
|
"""Pydantic Model representing a DataMint annotation.
|
|
@@ -60,7 +71,7 @@ class Annotation(BaseEntity):
|
|
|
60
71
|
identifier: str
|
|
61
72
|
scope: str
|
|
62
73
|
frame_index: int | None
|
|
63
|
-
annotation_type:
|
|
74
|
+
annotation_type: AnnotationType
|
|
64
75
|
text_value: str | None
|
|
65
76
|
numeric_value: float | int | None
|
|
66
77
|
units: str | None
|
|
@@ -83,7 +94,66 @@ class Annotation(BaseEntity):
|
|
|
83
94
|
annotation_worklist_name: str | None
|
|
84
95
|
user_info: dict | None
|
|
85
96
|
values: list | None = MISSING_FIELD
|
|
86
|
-
file: str | None = None
|
|
97
|
+
file: str | None = None
|
|
98
|
+
|
|
99
|
+
_api: 'AnnotationsApi' = PrivateAttr()
|
|
100
|
+
|
|
101
|
+
def __init__(self, **data):
|
|
102
|
+
"""Initialize the annotation entity."""
|
|
103
|
+
super().__init__(**data)
|
|
104
|
+
self._cache: CacheManager = CacheManager('annotations')
|
|
105
|
+
self._resource: 'Resource | None' = None
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def resource(self) -> 'Resource':
|
|
109
|
+
"""Lazily load and cache the associated Resource entity."""
|
|
110
|
+
if self._resource is None:
|
|
111
|
+
self._resource = self._api._get_resource(self)
|
|
112
|
+
return self._resource
|
|
113
|
+
|
|
114
|
+
def fetch_file_data(
|
|
115
|
+
self,
|
|
116
|
+
save_path: os.PathLike | str | None = None,
|
|
117
|
+
auto_convert: bool = True,
|
|
118
|
+
use_cache: bool = False,
|
|
119
|
+
) -> bytes | ImagingData:
|
|
120
|
+
# Version info for cache validation
|
|
121
|
+
version_info = self._generate_version_info()
|
|
122
|
+
|
|
123
|
+
# Try to get from cache
|
|
124
|
+
img_data = None
|
|
125
|
+
if use_cache:
|
|
126
|
+
img_data = self._cache.get(self.id, _ANNOTATION_CACHE_KEY, version_info)
|
|
127
|
+
|
|
128
|
+
if img_data is None:
|
|
129
|
+
# Fetch from server using download_resource_file
|
|
130
|
+
logger.debug(f"Fetching image data from server for resource {self.id}")
|
|
131
|
+
img_data = self._api.download_file(
|
|
132
|
+
self,
|
|
133
|
+
fpath_out=save_path
|
|
134
|
+
)
|
|
135
|
+
# Cache the data
|
|
136
|
+
if use_cache:
|
|
137
|
+
self._cache.set(self.id, _ANNOTATION_CACHE_KEY, img_data, version_info)
|
|
138
|
+
|
|
139
|
+
if auto_convert:
|
|
140
|
+
return self._api.convert_format(img_data)
|
|
141
|
+
|
|
142
|
+
return img_data
|
|
143
|
+
|
|
144
|
+
def _generate_version_info(self) -> dict:
|
|
145
|
+
"""Helper to generate version info for caching."""
|
|
146
|
+
return {
|
|
147
|
+
'created_at': self.created_at,
|
|
148
|
+
'deleted_at': self.deleted_at,
|
|
149
|
+
'associated_file': self.associated_file,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def invalidate_cache(self) -> None:
|
|
153
|
+
"""Invalidate all cached data for this annotation."""
|
|
154
|
+
self._cache.invalidate(self.id)
|
|
155
|
+
self._resource = None
|
|
156
|
+
logger.debug(f"Invalidated cache for annotation {self.id}")
|
|
87
157
|
|
|
88
158
|
@classmethod
|
|
89
159
|
def from_dict(cls, data: dict[str, Any]) -> 'Annotation':
|
datamint/entities/base_entity.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
|
-
from typing import Any
|
|
4
|
-
from pydantic import ConfigDict, BaseModel
|
|
3
|
+
from typing import Any, TYPE_CHECKING
|
|
4
|
+
from pydantic import ConfigDict, BaseModel, PrivateAttr
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from datamint.api.client import Api
|
|
8
|
+
from datamint.api.entity_base_api import EntityBaseApi
|
|
5
9
|
|
|
6
10
|
if sys.version_info >= (3, 11):
|
|
7
11
|
from typing import Self
|
|
@@ -22,9 +26,14 @@ class BaseEntity(BaseModel):
|
|
|
22
26
|
This class provides common functionality for all entities, such as
|
|
23
27
|
serialization and deserialization from dictionaries, as well as
|
|
24
28
|
handling unknown fields gracefully.
|
|
29
|
+
|
|
30
|
+
The API client is automatically injected by the Api class when entities
|
|
31
|
+
are created through API endpoints.
|
|
25
32
|
"""
|
|
26
33
|
|
|
27
|
-
model_config = ConfigDict(extra='allow') # Allow extra fields
|
|
34
|
+
model_config = ConfigDict(extra='allow', arbitrary_types_allowed=True) # Allow extra fields and arbitrary types
|
|
35
|
+
|
|
36
|
+
_api: 'EntityBaseApi[Self] | EntityBaseApi' = PrivateAttr()
|
|
28
37
|
|
|
29
38
|
def asdict(self) -> dict[str, Any]:
|
|
30
39
|
"""Convert the entity to a dictionary, including unknown fields."""
|
|
@@ -38,14 +47,46 @@ class BaseEntity(BaseModel):
|
|
|
38
47
|
"""Handle unknown fields by logging a warning once per class/field combination in debug mode."""
|
|
39
48
|
if self.__pydantic_extra__ and _LOGGER.isEnabledFor(logging.DEBUG):
|
|
40
49
|
class_name = self.__class__.__name__
|
|
41
|
-
|
|
50
|
+
|
|
42
51
|
have_to_log = False
|
|
43
52
|
for key in self.__pydantic_extra__.keys():
|
|
44
53
|
warning_key = (class_name, key)
|
|
45
|
-
|
|
54
|
+
|
|
46
55
|
if warning_key not in _LOGGED_WARNINGS:
|
|
47
56
|
_LOGGED_WARNINGS.add(warning_key)
|
|
48
57
|
have_to_log = True
|
|
49
|
-
|
|
58
|
+
|
|
50
59
|
if have_to_log:
|
|
51
60
|
_LOGGER.warning(f"Unknown fields {list(self.__pydantic_extra__.keys())} found in {class_name}")
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def is_attr_missing(value: Any) -> bool:
|
|
64
|
+
"""Check if a value is the MISSING_FIELD sentinel."""
|
|
65
|
+
return value == MISSING_FIELD
|
|
66
|
+
|
|
67
|
+
def _refresh(self) -> Self:
|
|
68
|
+
"""Refresh the entity data from the server.
|
|
69
|
+
|
|
70
|
+
This method fetches the latest data from the server and updates
|
|
71
|
+
the current instance with any missing or updated fields.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
The updated Entity instance (self)
|
|
75
|
+
"""
|
|
76
|
+
updated_ent = self._api.get_by_id(self._api._entid(self))
|
|
77
|
+
|
|
78
|
+
# Update all fields from the fresh data
|
|
79
|
+
for field_name, field_value in updated_ent.model_dump().items():
|
|
80
|
+
if field_value != MISSING_FIELD:
|
|
81
|
+
setattr(self, field_name, field_value)
|
|
82
|
+
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def _ensure_attr(self, attr_name: str) -> None:
|
|
86
|
+
"""Ensure that a given attribute is not MISSING_FIELD, refreshing if necessary.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
attr_name: Name of the attribute to check and ensure
|
|
90
|
+
"""
|
|
91
|
+
if self.is_attr_missing(getattr(self, attr_name)):
|
|
92
|
+
self._refresh()
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Cache manager for storing and retrieving entity-related data locally.
|
|
2
|
+
|
|
3
|
+
This module provides caching functionality for resource data (images, segmentations, etc.)
|
|
4
|
+
with automatic validation against server versions to ensure data freshness.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import pickle
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, TypeVar, Generic
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
# import appdirs
|
|
16
|
+
import datamint.configs
|
|
17
|
+
|
|
18
|
+
_LOGGER = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
T = TypeVar('T')
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CacheManager(Generic[T]):
|
|
24
|
+
"""Manages local caching of entity data with versioning support.
|
|
25
|
+
|
|
26
|
+
This class handles storing and retrieving cached data with automatic
|
|
27
|
+
validation against server versions to ensure data consistency.
|
|
28
|
+
|
|
29
|
+
The cache uses a directory structure:
|
|
30
|
+
- cache_root/
|
|
31
|
+
- resources/
|
|
32
|
+
- {resource_id}/
|
|
33
|
+
- image_data.pkl
|
|
34
|
+
- metadata.json
|
|
35
|
+
- annotations/
|
|
36
|
+
- {annotation_id}/
|
|
37
|
+
- segmentation_data.pkl
|
|
38
|
+
- metadata.json
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
cache_root: Root directory for cache storage
|
|
42
|
+
entity_type: Type of entity being cached (e.g., 'resources', 'annotations')
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
class ItemMetadata(BaseModel):
|
|
46
|
+
cached_at: datetime
|
|
47
|
+
data_path: str
|
|
48
|
+
data_type: str
|
|
49
|
+
mimetype: str
|
|
50
|
+
version_hash: str | None = None
|
|
51
|
+
version_info: dict | None = None
|
|
52
|
+
entity_id: str | None = None
|
|
53
|
+
|
|
54
|
+
def __init__(self, entity_type: str, cache_root: Path | str | None = None):
|
|
55
|
+
"""Initialize the cache manager.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
entity_type: Type of entity (e.g., 'resources', 'annotations')
|
|
59
|
+
cache_root: Root directory for cache. If None, uses system cache directory.
|
|
60
|
+
"""
|
|
61
|
+
self.entity_type = entity_type
|
|
62
|
+
|
|
63
|
+
if cache_root is None:
|
|
64
|
+
# Use platform-specific cache directory
|
|
65
|
+
# app_cache_dir = appdirs.user_cache_dir('datamint', 'sonance')
|
|
66
|
+
# cache_root = Path(app_cache_dir) / 'entity_cache'
|
|
67
|
+
cache_root = Path(datamint.configs.DATAMINT_DATA_DIR)
|
|
68
|
+
else:
|
|
69
|
+
cache_root = Path(cache_root)
|
|
70
|
+
|
|
71
|
+
self.cache_root = cache_root / entity_type
|
|
72
|
+
|
|
73
|
+
def _get_entity_cache_dir(self, entity_id: str) -> Path:
|
|
74
|
+
"""Get the cache directory for a specific entity.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
entity_id: Unique identifier for the entity
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Path to the entity's cache directory
|
|
81
|
+
"""
|
|
82
|
+
entity_dir = self.cache_root / entity_id
|
|
83
|
+
entity_dir = entity_dir.resolve().absolute()
|
|
84
|
+
entity_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
return entity_dir
|
|
86
|
+
|
|
87
|
+
def _get_metadata_path(self, entity_id: str) -> Path:
|
|
88
|
+
"""Get the path to the metadata file for an entity.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
entity_id: Unique identifier for the entity
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Path to the metadata file
|
|
95
|
+
"""
|
|
96
|
+
return self._get_entity_cache_dir(entity_id) / 'metadata.json'
|
|
97
|
+
|
|
98
|
+
def _get_data_path(self, entity_id: str, data_key: str) -> Path:
|
|
99
|
+
"""Get the path to a data file for an entity.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
entity_id: Unique identifier for the entity
|
|
103
|
+
data_key: Key identifying the type of data (e.g., 'image_data', 'segmentation')
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Path to the data file
|
|
107
|
+
"""
|
|
108
|
+
return self._get_entity_cache_dir(entity_id) / f"{data_key}.pkl"
|
|
109
|
+
|
|
110
|
+
def _compute_version_hash(self, version_info: dict[str, Any]) -> str:
|
|
111
|
+
"""Compute a hash from version information.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
version_info: Dictionary containing version information (e.g., updated_at, size)
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Hash string representing the version
|
|
118
|
+
"""
|
|
119
|
+
# Sort keys for consistent hashing
|
|
120
|
+
sorted_info = json.dumps(version_info, sort_keys=True)
|
|
121
|
+
return hashlib.sha256(sorted_info.encode()).hexdigest()
|
|
122
|
+
|
|
123
|
+
def get(
|
|
124
|
+
self,
|
|
125
|
+
entity_id: str,
|
|
126
|
+
data_key: str,
|
|
127
|
+
version_info: dict[str, Any] | None = None
|
|
128
|
+
) -> T | None:
|
|
129
|
+
"""Retrieve cached data for an entity.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
entity_id: Unique identifier for the entity
|
|
133
|
+
data_key: Key identifying the type of data
|
|
134
|
+
version_info: Optional version information from server to validate cache
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Cached data if valid, None if cache miss or invalid
|
|
138
|
+
"""
|
|
139
|
+
metadata_path = self._get_metadata_path(entity_id)
|
|
140
|
+
data_path = self._get_data_path(entity_id, data_key)
|
|
141
|
+
|
|
142
|
+
# Check if cache exists
|
|
143
|
+
if not metadata_path.exists() or not data_path.exists():
|
|
144
|
+
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key}")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
# Load or create metadata
|
|
149
|
+
with open(metadata_path, 'r') as f:
|
|
150
|
+
jsondata = f.read()
|
|
151
|
+
cached_metadata = CacheManager.ItemMetadata.model_validate_json(jsondata)
|
|
152
|
+
|
|
153
|
+
# Validate version if provided
|
|
154
|
+
if version_info is not None:
|
|
155
|
+
server_version = self._compute_version_hash(version_info)
|
|
156
|
+
|
|
157
|
+
if server_version != cached_metadata.version_hash:
|
|
158
|
+
_LOGGER.debug(
|
|
159
|
+
f"Cache version mismatch for {entity_id}/{data_key}. "
|
|
160
|
+
f"Server: {server_version}, Cached: {cached_metadata.version_hash}"
|
|
161
|
+
)
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
data = self._load_data(cached_metadata)
|
|
165
|
+
|
|
166
|
+
_LOGGER.debug(f"Cache hit for {entity_id}/{data_key}")
|
|
167
|
+
return data
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
_LOGGER.warning(f"Error reading cache for {entity_id}/{data_key}: {e}")
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
def set(
|
|
174
|
+
self,
|
|
175
|
+
entity_id: str,
|
|
176
|
+
data_key: str,
|
|
177
|
+
data: T,
|
|
178
|
+
version_info: dict[str, Any] | None = None
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Store data in cache for an entity.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
entity_id: Unique identifier for the entity
|
|
184
|
+
data_key: Key identifying the type of data
|
|
185
|
+
data: Data to cache
|
|
186
|
+
version_info: Optional version information from server
|
|
187
|
+
"""
|
|
188
|
+
metadata_path = self._get_metadata_path(entity_id)
|
|
189
|
+
data_path = self._get_data_path(entity_id, data_key)
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
mimetype = self._save_data(data_path, data)
|
|
193
|
+
|
|
194
|
+
metadata = CacheManager.ItemMetadata(
|
|
195
|
+
cached_at=datetime.now(),
|
|
196
|
+
data_path=str(data_path.absolute()),
|
|
197
|
+
data_type=type(data).__name__,
|
|
198
|
+
mimetype=mimetype,
|
|
199
|
+
entity_id=entity_id
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Update metadata for this data key
|
|
203
|
+
|
|
204
|
+
if version_info is not None:
|
|
205
|
+
metadata.version_hash = self._compute_version_hash(version_info)
|
|
206
|
+
# Store version_info as JSON string to ensure metadata is JSON-serializable
|
|
207
|
+
metadata.version_info = version_info
|
|
208
|
+
|
|
209
|
+
# Save metadata
|
|
210
|
+
with open(metadata_path, 'w') as f:
|
|
211
|
+
f.write(metadata.model_dump_json(indent=2))
|
|
212
|
+
|
|
213
|
+
_LOGGER.debug(f"Cached data for {entity_id}/{data_key}")
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
_LOGGER.warning(f"Error writing cache for {entity_id}/{data_key}: {e}")
|
|
217
|
+
|
|
218
|
+
def _load_data(self,
|
|
219
|
+
metadata: 'CacheManager.ItemMetadata') -> T:
|
|
220
|
+
path = metadata.data_path
|
|
221
|
+
if metadata.mimetype == 'application/octet-stream':
|
|
222
|
+
with open(path, 'rb') as f:
|
|
223
|
+
return f.read()
|
|
224
|
+
else:
|
|
225
|
+
with open(path, 'rb') as f:
|
|
226
|
+
return pickle.load(f)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _save_data(self, path: Path, data: T) -> str:
|
|
230
|
+
"""
|
|
231
|
+
Save data and returns the mimetype
|
|
232
|
+
"""
|
|
233
|
+
if isinstance(data, bytes):
|
|
234
|
+
with open(path, 'wb') as f:
|
|
235
|
+
f.write(data)
|
|
236
|
+
return 'application/octet-stream'
|
|
237
|
+
else:
|
|
238
|
+
with open(path, 'wb') as f:
|
|
239
|
+
pickle.dump(data, f)
|
|
240
|
+
return 'application/x-python-serialize'
|
|
241
|
+
|
|
242
|
+
def invalidate(self, entity_id: str, data_key: str | None = None) -> None:
|
|
243
|
+
"""Invalidate cached data for an entity.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
entity_id: Unique identifier for the entity
|
|
247
|
+
data_key: Optional key for specific data. If None, invalidates all data for entity.
|
|
248
|
+
"""
|
|
249
|
+
if data_key is None:
|
|
250
|
+
# Invalidate entire entity cache
|
|
251
|
+
entity_dir = self._get_entity_cache_dir(entity_id)
|
|
252
|
+
if entity_dir.exists():
|
|
253
|
+
import shutil
|
|
254
|
+
shutil.rmtree(entity_dir)
|
|
255
|
+
_LOGGER.debug(f"Invalidated all cache for {entity_id}")
|
|
256
|
+
else:
|
|
257
|
+
# Invalidate specific data
|
|
258
|
+
data_path = self._get_data_path(entity_id, data_key)
|
|
259
|
+
if data_path.exists():
|
|
260
|
+
data_path.unlink()
|
|
261
|
+
_LOGGER.debug(f"Invalidated cache for {entity_id}/{data_key}")
|
|
262
|
+
|
|
263
|
+
# Update metadata
|
|
264
|
+
metadata_path = self._get_metadata_path(entity_id)
|
|
265
|
+
if metadata_path.exists():
|
|
266
|
+
with open(metadata_path, 'r') as f:
|
|
267
|
+
metadata = json.load(f)
|
|
268
|
+
|
|
269
|
+
if data_key in metadata:
|
|
270
|
+
del metadata[data_key]
|
|
271
|
+
|
|
272
|
+
with open(metadata_path, 'w') as f:
|
|
273
|
+
json.dump(metadata, f, indent=2)
|
|
274
|
+
|
|
275
|
+
def clear_all(self) -> None:
|
|
276
|
+
"""Clear all cached data for this entity type."""
|
|
277
|
+
if self.cache_root.exists():
|
|
278
|
+
import shutil
|
|
279
|
+
shutil.rmtree(self.cache_root)
|
|
280
|
+
self.cache_root.mkdir(parents=True, exist_ok=True)
|
|
281
|
+
_LOGGER.info(f"Cleared all cache for {self.entity_type}")
|
|
282
|
+
|
|
283
|
+
def get_cache_info(self, entity_id: str) -> dict[str, Any]:
|
|
284
|
+
"""Get information about cached data for an entity.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
entity_id: Unique identifier for the entity
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Dictionary containing cache information
|
|
291
|
+
"""
|
|
292
|
+
metadata_path = self._get_metadata_path(entity_id)
|
|
293
|
+
|
|
294
|
+
if not metadata_path.exists():
|
|
295
|
+
return {}
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
with open(metadata_path, 'r') as f:
|
|
299
|
+
return json.load(f)
|
|
300
|
+
except Exception as e:
|
|
301
|
+
_LOGGER.warning(f"Error reading cache info for {entity_id}: {e}")
|
|
302
|
+
return {}
|
datamint/entities/datasetinfo.py
CHANGED
|
@@ -1,14 +1,24 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Dataset entity module for DataMint API."""
|
|
2
2
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
import logging
|
|
5
|
+
from typing import TYPE_CHECKING, Sequence
|
|
6
|
+
|
|
5
7
|
from .base_entity import BaseEntity, MISSING_FIELD
|
|
6
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from datamint.api.client import Api
|
|
11
|
+
from .resource import Resource
|
|
12
|
+
from .project import Project
|
|
13
|
+
|
|
7
14
|
logger = logging.getLogger(__name__)
|
|
8
15
|
|
|
9
16
|
|
|
10
17
|
class DatasetInfo(BaseEntity):
|
|
11
18
|
"""Pydantic Model representing a DataMint dataset.
|
|
19
|
+
|
|
20
|
+
This class provides access to dataset information and related entities
|
|
21
|
+
like resources and projects.
|
|
12
22
|
"""
|
|
13
23
|
|
|
14
24
|
id: str
|
|
@@ -20,3 +30,100 @@ class DatasetInfo(BaseEntity):
|
|
|
20
30
|
updated_at: str | None
|
|
21
31
|
total_resource: int
|
|
22
32
|
resource_ids: list[str]
|
|
33
|
+
|
|
34
|
+
def __init__(self, **data):
|
|
35
|
+
"""Initialize the dataset info entity."""
|
|
36
|
+
super().__init__(**data)
|
|
37
|
+
self._manager: EntityManager['DatasetInfo'] = EntityManager(self)
|
|
38
|
+
|
|
39
|
+
# Cache for lazy-loaded data
|
|
40
|
+
self._resources_cache: Sequence['Resource'] | None = None
|
|
41
|
+
self._projects_cache: Sequence['Project'] | None = None
|
|
42
|
+
|
|
43
|
+
def _inject_api(self, api: 'Api') -> None:
|
|
44
|
+
"""Inject API client into this dataset (called automatically by Api class)."""
|
|
45
|
+
self._manager.set_api(api)
|
|
46
|
+
|
|
47
|
+
def get_resources(
|
|
48
|
+
self,
|
|
49
|
+
refresh: bool = False,
|
|
50
|
+
limit: int | None = None
|
|
51
|
+
) -> Sequence['Resource']:
|
|
52
|
+
"""Get all resources in this dataset.
|
|
53
|
+
|
|
54
|
+
Results are cached after the first call unless refresh=True.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
api: Optional API client. Uses the one from set_api() if not provided.
|
|
58
|
+
refresh: If True, bypass cache and fetch fresh data
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of Resource instances in this dataset
|
|
62
|
+
|
|
63
|
+
Raises:
|
|
64
|
+
RuntimeError: If no API client is available
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> dataset = api._datasetsinfo.get_by_id("dataset-id")
|
|
68
|
+
>>> dataset.set_api(api)
|
|
69
|
+
>>> resources = dataset.get_resources()
|
|
70
|
+
"""
|
|
71
|
+
if refresh or self._resources_cache is None:
|
|
72
|
+
api_client = self._manager._ensure_api(api)
|
|
73
|
+
|
|
74
|
+
# Fetch resources by their IDs
|
|
75
|
+
resources = []
|
|
76
|
+
for resource_id in self.resource_ids:
|
|
77
|
+
try:
|
|
78
|
+
resource = api_client.resources.get_by_id(resource_id)
|
|
79
|
+
resource.set_api(api_client)
|
|
80
|
+
resources.append(resource)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.warning(f"Failed to fetch resource {resource_id}: {e}")
|
|
83
|
+
|
|
84
|
+
self._resources_cache = resources
|
|
85
|
+
|
|
86
|
+
return self._resources_cache
|
|
87
|
+
|
|
88
|
+
def get_projects(
|
|
89
|
+
self,
|
|
90
|
+
api: 'Api | None' = None,
|
|
91
|
+
refresh: bool = False
|
|
92
|
+
) -> Sequence['Project']:
|
|
93
|
+
"""Get all projects associated with this dataset.
|
|
94
|
+
|
|
95
|
+
Results are cached after the first call unless refresh=True.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
refresh: If True, bypass cache and fetch fresh data
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of Project instances
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
RuntimeError: If no API client is available
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
>>> dataset = api.datasetsinfo.get_by_id("dataset-id")
|
|
108
|
+
>>> projects = dataset.get_projects()
|
|
109
|
+
"""
|
|
110
|
+
if refresh or self._projects_cache is None:
|
|
111
|
+
api_client = self._manager.api
|
|
112
|
+
|
|
113
|
+
# Get all projects and filter by dataset_id
|
|
114
|
+
all_projects = api_client.projects.get_all()
|
|
115
|
+
projects = [p for p in all_projects if p.dataset_id == self.id]
|
|
116
|
+
|
|
117
|
+
self._projects_cache = projects
|
|
118
|
+
|
|
119
|
+
return self._projects_cache
|
|
120
|
+
|
|
121
|
+
def invalidate_cache(self) -> None:
|
|
122
|
+
"""Invalidate all cached relationship data.
|
|
123
|
+
|
|
124
|
+
This forces fresh data fetches on the next access.
|
|
125
|
+
"""
|
|
126
|
+
self._resources_cache = None
|
|
127
|
+
self._projects_cache = None
|
|
128
|
+
logger.debug(f"Invalidated cache for dataset {self.id}")
|
|
129
|
+
|