datamint 2.3.3__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamint/__init__.py +1 -3
- datamint/api/__init__.py +0 -3
- datamint/api/base_api.py +286 -54
- datamint/api/client.py +76 -13
- datamint/api/endpoints/__init__.py +2 -2
- datamint/api/endpoints/annotations_api.py +186 -28
- datamint/api/endpoints/deploy_model_api.py +78 -0
- datamint/api/endpoints/models_api.py +1 -0
- datamint/api/endpoints/projects_api.py +38 -7
- datamint/api/endpoints/resources_api.py +227 -100
- datamint/api/entity_base_api.py +66 -7
- datamint/apihandler/base_api_handler.py +0 -1
- datamint/apihandler/dto/annotation_dto.py +2 -0
- datamint/client_cmd_tools/datamint_config.py +0 -1
- datamint/client_cmd_tools/datamint_upload.py +3 -1
- datamint/configs.py +11 -7
- datamint/dataset/base_dataset.py +24 -4
- datamint/dataset/dataset.py +1 -1
- datamint/entities/__init__.py +1 -1
- datamint/entities/annotations/__init__.py +13 -0
- datamint/entities/{annotation.py → annotations/annotation.py} +81 -47
- datamint/entities/annotations/image_classification.py +12 -0
- datamint/entities/annotations/image_segmentation.py +252 -0
- datamint/entities/annotations/volume_segmentation.py +273 -0
- datamint/entities/base_entity.py +100 -6
- datamint/entities/cache_manager.py +129 -15
- datamint/entities/datasetinfo.py +60 -65
- datamint/entities/deployjob.py +18 -0
- datamint/entities/project.py +39 -0
- datamint/entities/resource.py +310 -46
- datamint/lightning/__init__.py +1 -0
- datamint/lightning/datamintdatamodule.py +103 -0
- datamint/mlflow/__init__.py +65 -0
- datamint/mlflow/artifact/__init__.py +1 -0
- datamint/mlflow/artifact/datamint_artifacts_repo.py +8 -0
- datamint/mlflow/env_utils.py +131 -0
- datamint/mlflow/env_vars.py +5 -0
- datamint/mlflow/flavors/__init__.py +17 -0
- datamint/mlflow/flavors/datamint_flavor.py +150 -0
- datamint/mlflow/flavors/model.py +877 -0
- datamint/mlflow/lightning/callbacks/__init__.py +1 -0
- datamint/mlflow/lightning/callbacks/modelcheckpoint.py +410 -0
- datamint/mlflow/models/__init__.py +93 -0
- datamint/mlflow/tracking/datamint_store.py +76 -0
- datamint/mlflow/tracking/default_experiment.py +27 -0
- datamint/mlflow/tracking/fluent.py +91 -0
- datamint/utils/env.py +27 -0
- datamint/utils/visualization.py +21 -13
- datamint-2.9.0.dist-info/METADATA +220 -0
- datamint-2.9.0.dist-info/RECORD +73 -0
- {datamint-2.3.3.dist-info → datamint-2.9.0.dist-info}/WHEEL +1 -1
- datamint-2.9.0.dist-info/entry_points.txt +18 -0
- datamint/apihandler/exp_api_handler.py +0 -204
- datamint/experiment/__init__.py +0 -1
- datamint/experiment/_patcher.py +0 -570
- datamint/experiment/experiment.py +0 -1049
- datamint-2.3.3.dist-info/METADATA +0 -125
- datamint-2.3.3.dist-info/RECORD +0 -54
- datamint-2.3.3.dist-info/entry_points.txt +0 -4
|
@@ -105,7 +105,11 @@ class CacheManager(Generic[T]):
|
|
|
105
105
|
Returns:
|
|
106
106
|
Path to the data file
|
|
107
107
|
"""
|
|
108
|
-
|
|
108
|
+
|
|
109
|
+
datapath = self._get_entity_cache_dir(entity_id) / f"{data_key}"
|
|
110
|
+
if datapath.with_suffix('.pkl').exists():
|
|
111
|
+
return datapath.with_suffix('.pkl')
|
|
112
|
+
return datapath
|
|
109
113
|
|
|
110
114
|
def _compute_version_hash(self, version_info: dict[str, Any]) -> str:
|
|
111
115
|
"""Compute a hash from version information.
|
|
@@ -120,13 +124,13 @@ class CacheManager(Generic[T]):
|
|
|
120
124
|
sorted_info = json.dumps(version_info, sort_keys=True)
|
|
121
125
|
return hashlib.sha256(sorted_info.encode()).hexdigest()
|
|
122
126
|
|
|
123
|
-
def
|
|
127
|
+
def _get_validated_metadata(
|
|
124
128
|
self,
|
|
125
129
|
entity_id: str,
|
|
126
130
|
data_key: str,
|
|
127
131
|
version_info: dict[str, Any] | None = None
|
|
128
|
-
) ->
|
|
129
|
-
"""
|
|
132
|
+
) -> tuple['CacheManager.ItemMetadata', Path] | tuple[None, None]:
|
|
133
|
+
"""Get and validate cached metadata for an entity.
|
|
130
134
|
|
|
131
135
|
Args:
|
|
132
136
|
entity_id: Unique identifier for the entity
|
|
@@ -134,42 +138,152 @@ class CacheManager(Generic[T]):
|
|
|
134
138
|
version_info: Optional version information from server to validate cache
|
|
135
139
|
|
|
136
140
|
Returns:
|
|
137
|
-
|
|
141
|
+
Tuple of (metadata, data_path) if valid, (None, None) if cache miss or invalid
|
|
138
142
|
"""
|
|
139
143
|
metadata_path = self._get_metadata_path(entity_id)
|
|
140
|
-
data_path = self._get_data_path(entity_id, data_key)
|
|
141
144
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
return None
|
|
145
|
+
if not metadata_path.exists():
|
|
146
|
+
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key} - no metadata")
|
|
147
|
+
return None, None
|
|
146
148
|
|
|
147
149
|
try:
|
|
148
|
-
#
|
|
150
|
+
# Read metadata first to get the actual data path (could be external)
|
|
149
151
|
with open(metadata_path, 'r') as f:
|
|
150
152
|
jsondata = f.read()
|
|
151
153
|
cached_metadata = CacheManager.ItemMetadata.model_validate_json(jsondata)
|
|
154
|
+
|
|
155
|
+
# Use the data_path from metadata (supports external file locations)
|
|
156
|
+
data_path = Path(cached_metadata.data_path)
|
|
157
|
+
|
|
158
|
+
# Check if the actual data file exists
|
|
159
|
+
if not data_path.exists():
|
|
160
|
+
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key} - data file not found at {data_path}")
|
|
161
|
+
return None, None
|
|
152
162
|
|
|
153
163
|
# Validate version if provided
|
|
154
164
|
if version_info is not None:
|
|
155
165
|
server_version = self._compute_version_hash(version_info)
|
|
156
|
-
|
|
157
166
|
if server_version != cached_metadata.version_hash:
|
|
158
167
|
_LOGGER.debug(
|
|
159
168
|
f"Cache version mismatch for {entity_id}/{data_key}. "
|
|
160
169
|
f"Server: {server_version}, Cached: {cached_metadata.version_hash}"
|
|
161
170
|
)
|
|
162
|
-
return None
|
|
171
|
+
return None, None
|
|
163
172
|
|
|
164
|
-
|
|
173
|
+
return cached_metadata, data_path
|
|
174
|
+
except Exception as e:
|
|
175
|
+
_LOGGER.warning(f"Error reading cache metadata for {entity_id}/{data_key}: {e}")
|
|
176
|
+
return None, None
|
|
165
177
|
|
|
178
|
+
def get(
|
|
179
|
+
self,
|
|
180
|
+
entity_id: str,
|
|
181
|
+
data_key: str,
|
|
182
|
+
version_info: dict[str, Any] | None = None
|
|
183
|
+
) -> T | None:
|
|
184
|
+
"""Retrieve cached data for an entity.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
entity_id: Unique identifier for the entity
|
|
188
|
+
data_key: Key identifying the type of data
|
|
189
|
+
version_info: Optional version information from server to validate cache
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Cached data if valid, None if cache miss or invalid
|
|
193
|
+
"""
|
|
194
|
+
cached_metadata, data_path = self._get_validated_metadata(entity_id, data_key, version_info)
|
|
195
|
+
|
|
196
|
+
if cached_metadata is None:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
data = self._load_data(cached_metadata)
|
|
166
201
|
_LOGGER.debug(f"Cache hit for {entity_id}/{data_key}")
|
|
167
202
|
return data
|
|
168
|
-
|
|
169
203
|
except Exception as e:
|
|
170
204
|
_LOGGER.warning(f"Error reading cache for {entity_id}/{data_key}: {e}")
|
|
171
205
|
return None
|
|
172
206
|
|
|
207
|
+
def get_path(
|
|
208
|
+
self,
|
|
209
|
+
entity_id: str,
|
|
210
|
+
data_key: str,
|
|
211
|
+
version_info: dict[str, Any] | None = None
|
|
212
|
+
) -> Path | None:
|
|
213
|
+
"""Get the path to cached data for an entity if valid.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
entity_id: Unique identifier for the entity
|
|
217
|
+
data_key: Key identifying the type of data
|
|
218
|
+
version_info: Optional version information from server to validate cache
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Path to cached data if valid, None if cache miss or invalid
|
|
222
|
+
"""
|
|
223
|
+
cached_metadata, data_path = self._get_validated_metadata(entity_id, data_key, version_info)
|
|
224
|
+
return data_path
|
|
225
|
+
|
|
226
|
+
def get_expected_path(self, entity_id: str, data_key: str) -> Path:
|
|
227
|
+
"""Get the expected cache path for an entity (even if not yet cached).
|
|
228
|
+
|
|
229
|
+
This is useful for downloading directly to the cache location.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
entity_id: Unique identifier for the entity
|
|
233
|
+
data_key: Key identifying the type of data
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Path where data will be cached
|
|
237
|
+
"""
|
|
238
|
+
return self._get_data_path(entity_id, data_key)
|
|
239
|
+
|
|
240
|
+
def register_file_location(
|
|
241
|
+
self,
|
|
242
|
+
entity_id: str,
|
|
243
|
+
data_key: str,
|
|
244
|
+
file_path: str | Path,
|
|
245
|
+
version_info: dict[str, Any] | None = None,
|
|
246
|
+
mimetype: str = 'application/octet-stream'
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Register an external file location in cache metadata without copying data.
|
|
249
|
+
|
|
250
|
+
This allows tracking a file stored at an arbitrary location (e.g., user's save_path)
|
|
251
|
+
while keeping version metadata in the cache directory.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
entity_id: Unique identifier for the entity
|
|
255
|
+
data_key: Key identifying the type of data
|
|
256
|
+
file_path: Path to the external file to register
|
|
257
|
+
version_info: Optional version information from server
|
|
258
|
+
mimetype: MIME type of the file data
|
|
259
|
+
"""
|
|
260
|
+
metadata_path = self._get_metadata_path(entity_id)
|
|
261
|
+
file_path = Path(file_path).resolve().absolute()
|
|
262
|
+
|
|
263
|
+
if not file_path.exists():
|
|
264
|
+
raise FileNotFoundError(f"Cannot register non-existent file: {file_path}")
|
|
265
|
+
|
|
266
|
+
try:
|
|
267
|
+
metadata = CacheManager.ItemMetadata(
|
|
268
|
+
cached_at=datetime.now(),
|
|
269
|
+
data_path=str(file_path),
|
|
270
|
+
data_type='bytes',
|
|
271
|
+
mimetype=mimetype,
|
|
272
|
+
entity_id=entity_id
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if version_info is not None:
|
|
276
|
+
metadata.version_hash = self._compute_version_hash(version_info)
|
|
277
|
+
metadata.version_info = version_info
|
|
278
|
+
|
|
279
|
+
with open(metadata_path, 'w') as f:
|
|
280
|
+
f.write(metadata.model_dump_json(indent=2))
|
|
281
|
+
|
|
282
|
+
_LOGGER.debug(f"Registered external file for {entity_id}/{data_key}: {file_path}")
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
_LOGGER.warning(f"Error registering file location for {entity_id}/{data_key}: {e}")
|
|
286
|
+
|
|
173
287
|
def set(
|
|
174
288
|
self,
|
|
175
289
|
entity_id: str,
|
datamint/entities/datasetinfo.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""Dataset entity module for DataMint API."""
|
|
2
2
|
|
|
3
|
-
from datetime import datetime
|
|
4
3
|
import logging
|
|
5
4
|
from typing import TYPE_CHECKING, Sequence
|
|
5
|
+
from pydantic import PrivateAttr
|
|
6
6
|
|
|
7
7
|
from .base_entity import BaseEntity, MISSING_FIELD
|
|
8
8
|
|
|
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
|
|
|
10
10
|
from datamint.api.client import Api
|
|
11
11
|
from .resource import Resource
|
|
12
12
|
from .project import Project
|
|
13
|
+
from datamint.api.endpoints.datasetsinfo_api import DatasetsInfoApi
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
@@ -31,92 +32,86 @@ class DatasetInfo(BaseEntity):
|
|
|
31
32
|
total_resource: int
|
|
32
33
|
resource_ids: list[str]
|
|
33
34
|
|
|
35
|
+
_api: 'DatasetsInfoApi' = PrivateAttr()
|
|
36
|
+
|
|
34
37
|
def __init__(self, **data):
|
|
35
38
|
"""Initialize the dataset info entity."""
|
|
36
39
|
super().__init__(**data)
|
|
37
|
-
self._manager: EntityManager['DatasetInfo'] = EntityManager(self)
|
|
38
40
|
|
|
39
41
|
# Cache for lazy-loaded data
|
|
40
|
-
self._resources_cache: Sequence['Resource'] | None = None
|
|
41
|
-
self._projects_cache: Sequence['Project'] | None = None
|
|
42
|
+
# self._resources_cache: Sequence['Resource'] | None = None
|
|
43
|
+
# self._projects_cache: Sequence['Project'] | None = None
|
|
42
44
|
|
|
43
|
-
def
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
refresh: bool = False,
|
|
50
|
-
limit: int | None = None
|
|
51
|
-
) -> Sequence['Resource']:
|
|
52
|
-
"""Get all resources in this dataset.
|
|
45
|
+
# def get_resources(
|
|
46
|
+
# self,
|
|
47
|
+
# refresh: bool = False,
|
|
48
|
+
# limit: int | None = None
|
|
49
|
+
# ) -> Sequence['Resource']:
|
|
50
|
+
# """Get all resources in this dataset.
|
|
53
51
|
|
|
54
|
-
|
|
52
|
+
# Results are cached after the first call unless refresh=True.
|
|
55
53
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
# Args:
|
|
55
|
+
# api: Optional API client. Uses the one from set_api() if not provided.
|
|
56
|
+
# refresh: If True, bypass cache and fetch fresh data
|
|
59
57
|
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
# Returns:
|
|
59
|
+
# List of Resource instances in this dataset
|
|
62
60
|
|
|
63
|
-
|
|
64
|
-
|
|
61
|
+
# Raises:
|
|
62
|
+
# RuntimeError: If no API client is available
|
|
65
63
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
64
|
+
# Example:
|
|
65
|
+
# >>> dataset = api._datasetsinfo.get_by_id("dataset-id")
|
|
66
|
+
# >>> dataset.set_api(api)
|
|
67
|
+
# >>> resources = dataset.get_resources()
|
|
68
|
+
# """
|
|
69
|
+
# if refresh or self._resources_cache is None:
|
|
70
|
+
# # Fetch resources by their IDs
|
|
71
|
+
# resources = []
|
|
72
|
+
# for resource_id in self.resource_ids:
|
|
73
|
+
# try:
|
|
74
|
+
# resource = self._api.get.get_by_id(resource_id)
|
|
75
|
+
# resource.set_api(self._api)
|
|
76
|
+
# resources.append(resource)
|
|
77
|
+
# except Exception as e:
|
|
78
|
+
# logger.warning(f"Failed to fetch resource {resource_id}: {e}")
|
|
73
79
|
|
|
74
|
-
|
|
75
|
-
resources = []
|
|
76
|
-
for resource_id in self.resource_ids:
|
|
77
|
-
try:
|
|
78
|
-
resource = api_client.resources.get_by_id(resource_id)
|
|
79
|
-
resource.set_api(api_client)
|
|
80
|
-
resources.append(resource)
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logger.warning(f"Failed to fetch resource {resource_id}: {e}")
|
|
83
|
-
|
|
84
|
-
self._resources_cache = resources
|
|
80
|
+
# self._resources_cache = resources
|
|
85
81
|
|
|
86
|
-
|
|
82
|
+
# return self._resources_cache
|
|
87
83
|
|
|
88
|
-
def get_projects(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
) -> Sequence['Project']:
|
|
93
|
-
|
|
84
|
+
# def get_projects(
|
|
85
|
+
# self,
|
|
86
|
+
# api: 'Api | None' = None,
|
|
87
|
+
# refresh: bool = False
|
|
88
|
+
# ) -> Sequence['Project']:
|
|
89
|
+
# """Get all projects associated with this dataset.
|
|
94
90
|
|
|
95
|
-
|
|
91
|
+
# Results are cached after the first call unless refresh=True.
|
|
96
92
|
|
|
97
|
-
|
|
98
|
-
|
|
93
|
+
# Args:
|
|
94
|
+
# refresh: If True, bypass cache and fetch fresh data
|
|
99
95
|
|
|
100
|
-
|
|
101
|
-
|
|
96
|
+
# Returns:
|
|
97
|
+
# List of Project instances
|
|
102
98
|
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
# Raises:
|
|
100
|
+
# RuntimeError: If no API client is available
|
|
105
101
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
api_client = self._manager.api
|
|
102
|
+
# Example:
|
|
103
|
+
# >>> dataset = api.datasetsinfo.get_by_id("dataset-id")
|
|
104
|
+
# >>> projects = dataset.get_projects()
|
|
105
|
+
# """
|
|
106
|
+
# if refresh or self._projects_cache is None:
|
|
112
107
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
108
|
+
# # Get all projects and filter by dataset_id
|
|
109
|
+
# all_projects = api_client.projects.get_all()
|
|
110
|
+
# projects = [p for p in all_projects if p.dataset_id == self.id]
|
|
116
111
|
|
|
117
|
-
|
|
112
|
+
# self._projects_cache = projects
|
|
118
113
|
|
|
119
|
-
|
|
114
|
+
# return self._projects_cache
|
|
120
115
|
|
|
121
116
|
def invalidate_cache(self) -> None:
|
|
122
117
|
"""Invalidate all cached relationship data.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from datamint.entities.base_entity import BaseEntity
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DeployJob(BaseEntity):
|
|
5
|
+
id: str
|
|
6
|
+
status: str
|
|
7
|
+
model_name: str
|
|
8
|
+
model_version: int | None = None
|
|
9
|
+
model_alias: str | None = None
|
|
10
|
+
image_name: str | None = None
|
|
11
|
+
image_tag: str | None = None
|
|
12
|
+
error_message: str | None = None
|
|
13
|
+
progress_percentage: int = 0
|
|
14
|
+
current_step: str | None = None
|
|
15
|
+
with_gpu: bool = False
|
|
16
|
+
recent_logs: list[str] | None = None
|
|
17
|
+
started_at: str | None = None
|
|
18
|
+
completed_at: str | None = None
|
datamint/entities/project.py
CHANGED
|
@@ -75,6 +75,45 @@ class Project(BaseEntity):
|
|
|
75
75
|
"""
|
|
76
76
|
return self._api.get_project_resources(self.id)
|
|
77
77
|
|
|
78
|
+
def download_resources_datas(self, progress_bar: bool = True) -> None:
|
|
79
|
+
"""Downloads all project resources in parallel for faster subsequent access.
|
|
80
|
+
|
|
81
|
+
This method downloads and caches all resource file data concurrently,
|
|
82
|
+
skipping resources that are already cached. This dramatically improves
|
|
83
|
+
performance when working with large projects.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
progress_bar: Whether to show a progress bar. Default is True.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> proj = api.projects.get_by_name("My Project")
|
|
90
|
+
>>> proj.download_resources() # Cache all resources in parallel
|
|
91
|
+
>>> # Now fetch_file_data() will be instantaneous for cached resources
|
|
92
|
+
>>> for res in proj.fetch_resources():
|
|
93
|
+
... data = res.fetch_file_data(use_cache=True)
|
|
94
|
+
"""
|
|
95
|
+
return self.cache_resources(progress_bar=progress_bar)
|
|
96
|
+
|
|
97
|
+
def cache_resources(self, progress_bar: bool = True) -> None:
|
|
98
|
+
"""Cache all project resources in parallel for faster subsequent access.
|
|
99
|
+
|
|
100
|
+
This method downloads and caches all resource file data concurrently,
|
|
101
|
+
skipping resources that are already cached. This dramatically improves
|
|
102
|
+
performance when working with large projects.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
progress_bar: Whether to show a progress bar. Default is True.
|
|
106
|
+
|
|
107
|
+
Example:
|
|
108
|
+
>>> proj = api.projects.get_by_name("My Project")
|
|
109
|
+
>>> proj.cache_resources() # Cache all resources in parallel
|
|
110
|
+
>>> # Now fetch_file_data() will be instantaneous for cached resources
|
|
111
|
+
>>> for res in proj.fetch_resources():
|
|
112
|
+
... data = res.fetch_file_data(use_cache=True)
|
|
113
|
+
"""
|
|
114
|
+
resources = self.fetch_resources()
|
|
115
|
+
self._api.resources_api.cache_resources(resources, progress_bar=progress_bar)
|
|
116
|
+
|
|
78
117
|
def set_work_status(self, resource: 'Resource', status: Literal['opened', 'annotated', 'closed']) -> None:
|
|
79
118
|
"""Set the status of a resource.
|
|
80
119
|
|