datamint 1.9.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/__init__.py +2 -0
- datamint/api/__init__.py +3 -0
- datamint/api/base_api.py +430 -0
- datamint/api/client.py +91 -0
- datamint/api/dto/__init__.py +10 -0
- datamint/api/endpoints/__init__.py +17 -0
- datamint/api/endpoints/annotations_api.py +984 -0
- datamint/api/endpoints/channels_api.py +28 -0
- datamint/api/endpoints/datasetsinfo_api.py +16 -0
- datamint/api/endpoints/projects_api.py +203 -0
- datamint/api/endpoints/resources_api.py +1013 -0
- datamint/api/endpoints/users_api.py +38 -0
- datamint/api/entity_base_api.py +347 -0
- datamint/apihandler/api_handler.py +3 -6
- datamint/apihandler/base_api_handler.py +6 -28
- datamint/apihandler/dto/__init__.py +0 -0
- datamint/apihandler/dto/annotation_dto.py +1 -1
- datamint/client_cmd_tools/datamint_upload.py +19 -30
- datamint/dataset/base_dataset.py +83 -86
- datamint/dataset/dataset.py +2 -2
- datamint/entities/__init__.py +20 -0
- datamint/entities/annotation.py +178 -0
- datamint/entities/base_entity.py +51 -0
- datamint/entities/channel.py +46 -0
- datamint/entities/datasetinfo.py +22 -0
- datamint/entities/project.py +64 -0
- datamint/entities/resource.py +130 -0
- datamint/entities/user.py +21 -0
- datamint/examples/example_projects.py +41 -44
- datamint/exceptions.py +27 -1
- {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/METADATA +13 -9
- datamint-2.0.1.dist-info/RECORD +50 -0
- {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/WHEEL +1 -1
- datamint-1.9.3.dist-info/RECORD +0 -29
- {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/entry_points.txt +0 -0
datamint/dataset/base_dataset.py
CHANGED
|
@@ -13,14 +13,16 @@ from datamint import configs
|
|
|
13
13
|
from torch.utils.data import DataLoader
|
|
14
14
|
import torch
|
|
15
15
|
from torch import Tensor
|
|
16
|
-
from datamint.
|
|
16
|
+
from datamint.exceptions import DatamintException
|
|
17
17
|
from medimgkit.dicom_utils import is_dicom
|
|
18
18
|
from medimgkit.readers import read_array_normalized
|
|
19
|
-
from medimgkit.format_detection import guess_extension
|
|
19
|
+
from medimgkit.format_detection import guess_extension, guess_typez
|
|
20
|
+
from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from pathlib import Path
|
|
22
|
-
from datamint.
|
|
23
|
+
from datamint.entities import Annotation, DatasetInfo
|
|
23
24
|
import cv2
|
|
25
|
+
from datamint.entities import Resource
|
|
24
26
|
|
|
25
27
|
_LOGGER = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
|
|
|
174
176
|
|
|
175
177
|
def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
|
|
176
178
|
"""Setup API handler and validate connection."""
|
|
177
|
-
from datamint
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
root_url=server_url,
|
|
179
|
+
from datamint import Api
|
|
180
|
+
self.api = Api(
|
|
181
|
+
server_url=server_url,
|
|
181
182
|
api_key=api_key,
|
|
182
|
-
check_connection=auto_update
|
|
183
|
+
check_connection=self.auto_update
|
|
183
184
|
)
|
|
184
|
-
self.server_url = self.api_handler.root_url
|
|
185
|
-
self.api_key = self.api_handler.api_key
|
|
186
|
-
|
|
187
|
-
if self.api_key is None:
|
|
188
|
-
_LOGGER.warning(
|
|
189
|
-
"API key not provided. If you want to download data, please provide an API key, "
|
|
190
|
-
f"either by passing it as an argument, "
|
|
191
|
-
f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
|
|
192
|
-
"using datamint-config command line tool."
|
|
193
|
-
)
|
|
194
185
|
|
|
195
186
|
def _setup_directories(self, root: str | None) -> None:
|
|
196
187
|
"""Setup root and dataset directories."""
|
|
@@ -234,26 +225,34 @@ class DatamintBaseDataset:
|
|
|
234
225
|
else:
|
|
235
226
|
self._check_version()
|
|
236
227
|
|
|
228
|
+
def _init_metainfo(self) -> None:
|
|
229
|
+
# get the server info
|
|
230
|
+
self.project_info = self.get_info()
|
|
231
|
+
self.metainfo = self._get_datasetinfo().asdict().copy()
|
|
232
|
+
self.metainfo['updated_at'] = None
|
|
233
|
+
self.metainfo['resources'] = []
|
|
234
|
+
self.metainfo['all_annotations'] = self.all_annotations
|
|
235
|
+
self.images_metainfo = self.metainfo['resources']
|
|
236
|
+
|
|
237
237
|
def _load_metadata(self) -> bool:
|
|
238
238
|
"""Load and process dataset metadata."""
|
|
239
239
|
if hasattr(self, 'metainfo'):
|
|
240
240
|
_LOGGER.warning("Metadata already loaded.")
|
|
241
241
|
metadata_path = os.path.join(self.dataset_dir, 'dataset.json')
|
|
242
242
|
if not os.path.isfile(metadata_path):
|
|
243
|
-
|
|
244
|
-
self.project_info = self.get_info()
|
|
245
|
-
self.metainfo = self._get_datasetinfo().copy()
|
|
246
|
-
self.metainfo['updated_at'] = None
|
|
247
|
-
self.metainfo['resources'] = []
|
|
248
|
-
self.metainfo['all_annotations'] = self.all_annotations
|
|
249
|
-
self.images_metainfo = self.metainfo['resources']
|
|
243
|
+
self._init_metainfo()
|
|
250
244
|
return False
|
|
251
245
|
else:
|
|
252
246
|
with open(metadata_path, 'r') as file:
|
|
253
247
|
self.metainfo = json.load(file)
|
|
254
248
|
self.images_metainfo = self.metainfo['resources']
|
|
255
249
|
# Convert annotations from dict to Annotation objects
|
|
256
|
-
|
|
250
|
+
try:
|
|
251
|
+
self._convert_metainfo_to_clsobj()
|
|
252
|
+
except Exception as e:
|
|
253
|
+
_LOGGER.warning(f"Failed to convert annotations. Redownloading dataset. {type(e)}")
|
|
254
|
+
self._init_metainfo()
|
|
255
|
+
return False
|
|
257
256
|
return True
|
|
258
257
|
|
|
259
258
|
def _convert_metainfo_to_clsobj(self):
|
|
@@ -412,19 +411,33 @@ class DatamintBaseDataset:
|
|
|
412
411
|
@staticmethod
|
|
413
412
|
def read_number_of_frames(filepath: str) -> int:
|
|
414
413
|
"""Read the number of frames in a file."""
|
|
415
|
-
|
|
414
|
+
|
|
415
|
+
mimetypes, ext = guess_typez(filepath)
|
|
416
|
+
mimetype = mimetypes[0]
|
|
417
|
+
if mimetype is None:
|
|
418
|
+
raise ValueError(f"Could not determine MIME type for file: {filepath}")
|
|
419
|
+
|
|
420
|
+
if mimetype == 'application/dicom':
|
|
416
421
|
ds = pydicom.dcmread(filepath)
|
|
417
422
|
return getattr(ds, 'NumberOfFrames', 1)
|
|
418
|
-
elif
|
|
423
|
+
elif mimetype.startswith('video/'):
|
|
419
424
|
cap = cv2.VideoCapture(filepath)
|
|
420
425
|
try:
|
|
421
426
|
return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
422
427
|
finally:
|
|
423
428
|
cap.release()
|
|
424
|
-
elif
|
|
429
|
+
elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
|
|
425
430
|
return 1
|
|
431
|
+
elif mimetype in NIFTI_MIMES:
|
|
432
|
+
shape = get_nifti_shape(filepath)
|
|
433
|
+
if len(shape) == 3:
|
|
434
|
+
return shape[-1]
|
|
435
|
+
elif len(shape) > 3:
|
|
436
|
+
return shape[3]
|
|
437
|
+
else:
|
|
438
|
+
return 1
|
|
426
439
|
else:
|
|
427
|
-
raise ValueError(f"Unsupported file type
|
|
440
|
+
raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
|
|
428
441
|
|
|
429
442
|
def get_resources_ids(self) -> list[str]:
|
|
430
443
|
"""Get list of resource IDs."""
|
|
@@ -526,18 +539,18 @@ class DatamintBaseDataset:
|
|
|
526
539
|
if missing_files:
|
|
527
540
|
raise DatamintDatasetException(f"Image files not found: {missing_files}")
|
|
528
541
|
|
|
529
|
-
def _get_datasetinfo(self) ->
|
|
542
|
+
def _get_datasetinfo(self) -> DatasetInfo:
|
|
530
543
|
"""Get dataset information from API."""
|
|
531
544
|
if self._server_dataset_info is not None:
|
|
532
545
|
return self._server_dataset_info
|
|
533
|
-
all_datasets = self.
|
|
546
|
+
all_datasets = self.api._datasetsinfo.get_all()
|
|
534
547
|
|
|
535
548
|
for dataset in all_datasets:
|
|
536
|
-
if dataset
|
|
549
|
+
if dataset.id == self.dataset_id:
|
|
537
550
|
self._server_dataset_info = dataset
|
|
538
551
|
return dataset
|
|
539
552
|
|
|
540
|
-
available_datasets = [(d
|
|
553
|
+
available_datasets = [(d.name, d.id) for d in all_datasets]
|
|
541
554
|
raise DatamintDatasetException(
|
|
542
555
|
f"Dataset with id '{self.dataset_id}' not found. "
|
|
543
556
|
f"Available datasets: {available_datasets}"
|
|
@@ -547,13 +560,12 @@ class DatamintBaseDataset:
|
|
|
547
560
|
"""Get project information from API."""
|
|
548
561
|
if hasattr(self, 'project_info') and self.project_info is not None:
|
|
549
562
|
return self.project_info
|
|
550
|
-
project = self.
|
|
551
|
-
if
|
|
552
|
-
available_projects = project['all_projects']
|
|
563
|
+
project = self.api.projects.get_by_name(self.project_name)
|
|
564
|
+
if project is None:
|
|
553
565
|
raise DatamintDatasetException(
|
|
554
|
-
f"Project with name '{self.project_name}' not found.
|
|
555
|
-
f"Available projects: {available_projects}"
|
|
566
|
+
f"Project with name '{self.project_name}' not found."
|
|
556
567
|
)
|
|
568
|
+
project = project.asdict()
|
|
557
569
|
self.project_info = project
|
|
558
570
|
self.dataset_id = project['dataset_id']
|
|
559
571
|
return project
|
|
@@ -592,31 +604,10 @@ class DatamintBaseDataset:
|
|
|
592
604
|
lines = [head] + [" " * 4 + line for line in body]
|
|
593
605
|
return "\n".join(lines)
|
|
594
606
|
|
|
595
|
-
def download_project(self) -> None:
|
|
596
|
-
"""Download project data from API."""
|
|
597
|
-
|
|
598
|
-
dataset_info = self._get_datasetinfo()
|
|
599
|
-
self.dataset_id = dataset_info['id']
|
|
600
|
-
self.last_updaded_at = dataset_info['updated_at']
|
|
601
|
-
|
|
602
|
-
self.api_handler.download_project(
|
|
603
|
-
self.project_info['id'],
|
|
604
|
-
self.dataset_zippath,
|
|
605
|
-
all_annotations=self.all_annotations,
|
|
606
|
-
include_unannotated=self.include_unannotated
|
|
607
|
-
)
|
|
608
|
-
|
|
609
|
-
_LOGGER.debug("Downloaded dataset")
|
|
610
|
-
|
|
611
|
-
if os.path.getsize(self.dataset_zippath) == 0:
|
|
612
|
-
raise DatamintDatasetException("Download failed.")
|
|
613
|
-
|
|
614
|
-
self._extract_and_update_metadata()
|
|
615
|
-
|
|
616
607
|
def _get_dataset_id(self) -> str:
|
|
617
608
|
if self.dataset_id is None:
|
|
618
609
|
dataset_info = self._get_datasetinfo()
|
|
619
|
-
self.dataset_id = dataset_info
|
|
610
|
+
self.dataset_id = dataset_info.id
|
|
620
611
|
return self.dataset_id
|
|
621
612
|
|
|
622
613
|
def _extract_and_update_metadata(self) -> None:
|
|
@@ -638,7 +629,7 @@ class DatamintBaseDataset:
|
|
|
638
629
|
|
|
639
630
|
# Save updated metadata
|
|
640
631
|
with open(datasetjson_path, 'w') as file:
|
|
641
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
632
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
642
633
|
|
|
643
634
|
self.images_metainfo = self.metainfo['resources']
|
|
644
635
|
# self._convert_metainfo_to_clsobj()
|
|
@@ -646,19 +637,19 @@ class DatamintBaseDataset:
|
|
|
646
637
|
def _update_metadata_timestamps(self) -> None:
|
|
647
638
|
"""Update metadata with correct timestamps."""
|
|
648
639
|
if 'updated_at' not in self.metainfo:
|
|
649
|
-
self.metainfo['updated_at'] = self.
|
|
640
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
650
641
|
else:
|
|
651
642
|
try:
|
|
652
643
|
local_time = datetime.fromisoformat(self.metainfo['updated_at'])
|
|
653
|
-
server_time = datetime.fromisoformat(self.
|
|
644
|
+
server_time = datetime.fromisoformat(self.last_updated_at)
|
|
654
645
|
|
|
655
646
|
if local_time < server_time:
|
|
656
647
|
_LOGGER.warning(
|
|
657
648
|
f"Inconsistent updated_at dates detected "
|
|
658
|
-
f"({self.metainfo['updated_at']} < {self.
|
|
659
|
-
f"Fixing it to {self.
|
|
649
|
+
f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
|
|
650
|
+
f"Fixing it to {self.last_updated_at}"
|
|
660
651
|
)
|
|
661
|
-
self.metainfo['updated_at'] = self.
|
|
652
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
662
653
|
except Exception as e:
|
|
663
654
|
_LOGGER.warning(f"Failed to parse updated_at date: {e}")
|
|
664
655
|
|
|
@@ -690,6 +681,9 @@ class DatamintBaseDataset:
|
|
|
690
681
|
img = (img - min_val) / (img.max() - min_val) * 255
|
|
691
682
|
img = img.astype(np.uint8)
|
|
692
683
|
|
|
684
|
+
if not img.flags.writeable:
|
|
685
|
+
img = img.copy()
|
|
686
|
+
|
|
693
687
|
img_tensor = torch.from_numpy(img).contiguous()
|
|
694
688
|
|
|
695
689
|
if isinstance(img_tensor, torch.ByteTensor):
|
|
@@ -829,7 +823,7 @@ class DatamintBaseDataset:
|
|
|
829
823
|
|
|
830
824
|
try:
|
|
831
825
|
external_metadata_info = self._get_datasetinfo()
|
|
832
|
-
server_updated_at = external_metadata_info
|
|
826
|
+
server_updated_at = external_metadata_info.updated_at
|
|
833
827
|
except Exception as e:
|
|
834
828
|
_LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
|
|
835
829
|
return
|
|
@@ -856,20 +850,21 @@ class DatamintBaseDataset:
|
|
|
856
850
|
_LOGGER.info('Local version is up to date with the latest version.')
|
|
857
851
|
|
|
858
852
|
def _fetch_new_resources(self,
|
|
859
|
-
all_uptodate_resources: list[
|
|
853
|
+
all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
860
854
|
local_resources = self.images_metainfo
|
|
861
855
|
local_resources_ids = [res['id'] for res in local_resources]
|
|
862
856
|
new_resources = []
|
|
863
857
|
for resource in all_uptodate_resources:
|
|
858
|
+
resource = resource.asdict()
|
|
864
859
|
if resource['id'] not in local_resources_ids:
|
|
865
860
|
resource['file'] = str(self._get_resource_file_path(resource))
|
|
866
861
|
resource['annotations'] = []
|
|
867
862
|
new_resources.append(resource)
|
|
868
863
|
return new_resources
|
|
869
864
|
|
|
870
|
-
def _fetch_deleted_resources(self, all_uptodate_resources: list[
|
|
865
|
+
def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
871
866
|
local_resources = self.images_metainfo
|
|
872
|
-
all_uptodate_resources_ids = [res
|
|
867
|
+
all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
|
|
873
868
|
deleted_resources = []
|
|
874
869
|
for resource in local_resources:
|
|
875
870
|
try:
|
|
@@ -888,7 +883,7 @@ class DatamintBaseDataset:
|
|
|
888
883
|
# server_updated_at = external_metadata_info['updated_at']
|
|
889
884
|
|
|
890
885
|
### RESOURCES ###
|
|
891
|
-
all_uptodate_resources = self.
|
|
886
|
+
all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
|
|
892
887
|
new_resources = self._fetch_new_resources(all_uptodate_resources)
|
|
893
888
|
deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
|
|
894
889
|
|
|
@@ -898,9 +893,9 @@ class DatamintBaseDataset:
|
|
|
898
893
|
new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
|
|
899
894
|
new_resources_ids = [r['id'] for r in new_resources]
|
|
900
895
|
_LOGGER.info(f"Downloading {len(new_resources)} new resources...")
|
|
901
|
-
new_res_paths = self.
|
|
902
|
-
|
|
903
|
-
|
|
896
|
+
new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
|
|
897
|
+
save_path=new_resources_path,
|
|
898
|
+
add_extension=True)
|
|
904
899
|
for new_rpath, r in zip(new_res_paths, new_resources):
|
|
905
900
|
r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
|
|
906
901
|
_LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
|
|
@@ -910,16 +905,17 @@ class DatamintBaseDataset:
|
|
|
910
905
|
################
|
|
911
906
|
|
|
912
907
|
### ANNOTATIONS ###
|
|
913
|
-
all_annotations = self.
|
|
914
|
-
|
|
908
|
+
all_annotations = self.api.annotations.get_list(worklist_id=None if self.all_annotations else self.project_info['worklist_id'],
|
|
909
|
+
status=None if self.all_annotations else 'published')
|
|
910
|
+
|
|
915
911
|
# group annotations by resource ID
|
|
916
|
-
annotations_by_resource = {}
|
|
912
|
+
annotations_by_resource: dict[str, list[Annotation]] = {}
|
|
917
913
|
for ann in all_annotations:
|
|
918
914
|
# add the local filepath
|
|
919
915
|
filepath = self._get_annotation_file_path(ann)
|
|
920
916
|
if filepath is not None:
|
|
921
|
-
ann
|
|
922
|
-
resource_id = ann
|
|
917
|
+
ann.file = str(filepath)
|
|
918
|
+
resource_id = ann.resource_id
|
|
923
919
|
if resource_id not in annotations_by_resource:
|
|
924
920
|
annotations_by_resource[resource_id] = []
|
|
925
921
|
annotations_by_resource[resource_id].append(ann)
|
|
@@ -937,11 +933,11 @@ class DatamintBaseDataset:
|
|
|
937
933
|
# check if segmentation annotations need to be downloaded
|
|
938
934
|
# Also check if annotations need to be deleted
|
|
939
935
|
old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
|
|
940
|
-
new_ann_ids = set([ann
|
|
936
|
+
new_ann_ids = set([ann.id for ann in new_resource_annotations])
|
|
941
937
|
|
|
942
938
|
# Find annotations to add, update, or remove
|
|
943
939
|
annotations_to_add = [ann for ann in new_resource_annotations
|
|
944
|
-
if ann
|
|
940
|
+
if ann.id not in old_ann_ids]
|
|
945
941
|
annotations_to_remove = [ann for ann in old_resource_annotations
|
|
946
942
|
if getattr(ann, 'id', 'NA') not in new_ann_ids]
|
|
947
943
|
|
|
@@ -970,22 +966,23 @@ class DatamintBaseDataset:
|
|
|
970
966
|
_LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
|
|
971
967
|
|
|
972
968
|
# Update resource annotations list - convert to Annotation objects
|
|
973
|
-
resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
969
|
+
# resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
970
|
+
resource['annotations'] = new_resource_annotations
|
|
974
971
|
|
|
975
972
|
# Batch download all segmentation files
|
|
976
973
|
if segmentations_to_download:
|
|
977
974
|
_LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
|
|
978
|
-
self.
|
|
975
|
+
self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
|
|
979
976
|
_LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
|
|
980
977
|
|
|
981
978
|
###################
|
|
982
979
|
# update metadata
|
|
983
|
-
self.metainfo['updated_at'] = self._get_datasetinfo()
|
|
980
|
+
self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
|
|
984
981
|
self.metainfo['all_annotations'] = self.all_annotations
|
|
985
982
|
# save updated metadata
|
|
986
983
|
datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
|
|
987
984
|
with open(datasetjson_path, 'w') as file:
|
|
988
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
985
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
989
986
|
|
|
990
987
|
def _get_resource_file_path(self, resource: dict) -> Path:
|
|
991
988
|
"""Get the local file path for a resource."""
|
datamint/dataset/dataset.py
CHANGED
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
|
7
7
|
import logging
|
|
8
8
|
from PIL import Image
|
|
9
9
|
import albumentations
|
|
10
|
-
from datamint.
|
|
10
|
+
from datamint.entities.annotation import Annotation
|
|
11
11
|
|
|
12
12
|
_LOGGER = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -155,7 +155,7 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
155
155
|
# FIXME: avoid enforcing resizing the mask
|
|
156
156
|
seg = (Image.open(segfilepath)
|
|
157
157
|
.convert('L')
|
|
158
|
-
.resize((w, h), Image.NEAREST)
|
|
158
|
+
.resize((w, h), Image.Resampling.NEAREST)
|
|
159
159
|
)
|
|
160
160
|
seg = np.array(seg)
|
|
161
161
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""DataMint entities package."""
|
|
2
|
+
|
|
3
|
+
from .annotation import Annotation
|
|
4
|
+
from .base_entity import BaseEntity
|
|
5
|
+
from .channel import Channel, ChannelResourceData
|
|
6
|
+
from .project import Project
|
|
7
|
+
from .resource import Resource
|
|
8
|
+
from .user import User # new export
|
|
9
|
+
from .datasetinfo import DatasetInfo
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'Annotation',
|
|
13
|
+
'BaseEntity',
|
|
14
|
+
'Channel',
|
|
15
|
+
'ChannelResourceData',
|
|
16
|
+
'Project',
|
|
17
|
+
'Resource',
|
|
18
|
+
"User",
|
|
19
|
+
'DatasetInfo',
|
|
20
|
+
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# filepath: datamint/entities/annotation.py
|
|
2
|
+
"""Annotation entity module for DataMint API.
|
|
3
|
+
|
|
4
|
+
This module defines the Annotation model used to represent annotation
|
|
5
|
+
records returned by the DataMint API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
import logging
|
|
10
|
+
from .base_entity import BaseEntity, MISSING_FIELD
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Map API field names to class attributes
|
|
17
|
+
_FIELD_MAPPING = {
|
|
18
|
+
'type': 'annotation_type',
|
|
19
|
+
'name': 'identifier',
|
|
20
|
+
'added_by': 'created_by',
|
|
21
|
+
'index': 'frame_index',
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Annotation(BaseEntity):
|
|
26
|
+
"""Pydantic Model representing a DataMint annotation.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Unique identifier for the annotation.
|
|
30
|
+
identifier: User-friendly identifier or label for the annotation.
|
|
31
|
+
scope: Scope of the annotation (e.g., "frame", "image").
|
|
32
|
+
frame_index: Index of the frame if scope is frame-based.
|
|
33
|
+
annotation_type: Type of annotation (e.g., "segmentation", "bbox", "label").
|
|
34
|
+
text_value: Optional text value associated with the annotation.
|
|
35
|
+
numeric_value: Optional numeric value associated with the annotation.
|
|
36
|
+
units: Optional units for numeric_value.
|
|
37
|
+
geometry: Optional geometry payload (e.g., polygons, masks) as a list.
|
|
38
|
+
created_at: ISO timestamp for when the annotation was created.
|
|
39
|
+
created_by: Email or identifier of the creating user.
|
|
40
|
+
annotation_worklist_id: Optional worklist ID associated with the annotation.
|
|
41
|
+
status: Lifecycle status of the annotation (e.g., "new", "approved").
|
|
42
|
+
approved_at: Optional ISO timestamp for approval time.
|
|
43
|
+
approved_by: Optional identifier of the approver.
|
|
44
|
+
resource_id: ID of the resource this annotation belongs to.
|
|
45
|
+
associated_file: Path or identifier of any associated file artifact.
|
|
46
|
+
deleted: Whether the annotation is marked as deleted.
|
|
47
|
+
deleted_at: Optional ISO timestamp for deletion time.
|
|
48
|
+
deleted_by: Optional identifier of the user who deleted the annotation.
|
|
49
|
+
created_by_model: Optional identifier of the model that created this annotation.
|
|
50
|
+
old_geometry: Optional previous geometry payload for change tracking.
|
|
51
|
+
set_name: Optional set name this annotation belongs to.
|
|
52
|
+
resource_filename: Optional filename of the resource.
|
|
53
|
+
resource_modality: Optional modality of the resource (e.g., CT, MR).
|
|
54
|
+
annotation_worklist_name: Optional worklist name associated with the annotation.
|
|
55
|
+
user_info: Optional user information with keys like firstname and lastname.
|
|
56
|
+
values: Optional extra values payload for flexible schemas.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
id: str
|
|
60
|
+
identifier: str
|
|
61
|
+
scope: str
|
|
62
|
+
frame_index: int | None
|
|
63
|
+
annotation_type: str
|
|
64
|
+
text_value: str | None
|
|
65
|
+
numeric_value: float | int | None
|
|
66
|
+
units: str | None
|
|
67
|
+
geometry: list | dict | None
|
|
68
|
+
created_at: str # ISO timestamp string
|
|
69
|
+
created_by: str
|
|
70
|
+
annotation_worklist_id: str | None
|
|
71
|
+
status: str
|
|
72
|
+
approved_at: str | None # ISO timestamp string
|
|
73
|
+
approved_by: str | None
|
|
74
|
+
resource_id: str
|
|
75
|
+
associated_file: str | None
|
|
76
|
+
deleted: bool
|
|
77
|
+
deleted_at: str | None # ISO timestamp string
|
|
78
|
+
deleted_by: str | None
|
|
79
|
+
created_by_model: str | None
|
|
80
|
+
set_name: str | None
|
|
81
|
+
resource_filename: str | None
|
|
82
|
+
resource_modality: str | None
|
|
83
|
+
annotation_worklist_name: str | None
|
|
84
|
+
user_info: dict | None
|
|
85
|
+
values: list | None = MISSING_FIELD
|
|
86
|
+
file: str | None = None # Add file field for segmentations
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_dict(cls, data: dict[str, Any]) -> 'Annotation':
|
|
90
|
+
"""Create an Annotation instance from a dictionary.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
data: Dictionary containing annotation data from API
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Annotation instance
|
|
97
|
+
"""
|
|
98
|
+
# Convert field names and filter valid fields
|
|
99
|
+
converted_data = {}
|
|
100
|
+
for key, value in data.items():
|
|
101
|
+
# Map field names if needed
|
|
102
|
+
mapped_key = _FIELD_MAPPING.get(key, key)
|
|
103
|
+
converted_data[mapped_key] = value
|
|
104
|
+
|
|
105
|
+
if 'scope' not in converted_data:
|
|
106
|
+
converted_data['scope'] = 'image' if converted_data.get('frame_index') is None else 'frame'
|
|
107
|
+
|
|
108
|
+
if converted_data['annotation_type'] in ['segmentation']:
|
|
109
|
+
if converted_data.get('file') is None:
|
|
110
|
+
raise ValueError(f"Segmentation annotations must have an associated file. {data}")
|
|
111
|
+
|
|
112
|
+
# Create instance with only valid fields
|
|
113
|
+
valid_fields = {f for f in cls.model_fields.keys()}
|
|
114
|
+
filtered_data = {k: v for k, v in converted_data.items() if k in valid_fields}
|
|
115
|
+
|
|
116
|
+
return cls(**filtered_data)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def type(self) -> str:
|
|
120
|
+
"""Alias for :attr:`annotation_type`."""
|
|
121
|
+
return self.annotation_type
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def name(self) -> str:
|
|
125
|
+
"""Get the annotation name (alias for identifier)."""
|
|
126
|
+
return self.identifier
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def index(self) -> int | None:
|
|
130
|
+
"""Get the frame index (alias for frame_index)."""
|
|
131
|
+
return self.frame_index
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def value(self) -> str | None:
|
|
135
|
+
"""Get the annotation value (for category annotations)."""
|
|
136
|
+
return self.text_value
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def added_by(self) -> str:
|
|
140
|
+
"""Get the creator email (alias for created_by)."""
|
|
141
|
+
return self.created_by
|
|
142
|
+
|
|
143
|
+
def is_segmentation(self) -> bool:
|
|
144
|
+
"""Check if this is a segmentation annotation."""
|
|
145
|
+
return self.annotation_type == 'segmentation'
|
|
146
|
+
|
|
147
|
+
def is_label(self) -> bool:
|
|
148
|
+
"""Check if this is a label annotation."""
|
|
149
|
+
return self.annotation_type == 'label'
|
|
150
|
+
|
|
151
|
+
def is_category(self) -> bool:
|
|
152
|
+
"""Check if this is a category annotation."""
|
|
153
|
+
return self.annotation_type == 'category'
|
|
154
|
+
|
|
155
|
+
def is_frame_scoped(self) -> bool:
|
|
156
|
+
"""Check if this annotation is frame-scoped."""
|
|
157
|
+
return self.scope == 'frame'
|
|
158
|
+
|
|
159
|
+
def is_image_scoped(self) -> bool:
|
|
160
|
+
"""Check if this annotation is image-scoped."""
|
|
161
|
+
return self.scope == 'image'
|
|
162
|
+
|
|
163
|
+
def get_created_datetime(self) -> datetime | None:
|
|
164
|
+
"""
|
|
165
|
+
Get the creation datetime as a datetime object.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
datetime object or None if created_at is not set
|
|
169
|
+
"""
|
|
170
|
+
if isinstance(self.created_at, datetime):
|
|
171
|
+
return self.created_at
|
|
172
|
+
|
|
173
|
+
if self.created_at:
|
|
174
|
+
try:
|
|
175
|
+
return datetime.fromisoformat(self.created_at.replace('Z', '+00:00'))
|
|
176
|
+
except ValueError:
|
|
177
|
+
logger.warning(f"Could not parse created_at datetime: {self.created_at}")
|
|
178
|
+
return None
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Any
|
|
4
|
+
from pydantic import ConfigDict, BaseModel
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
from typing import Self
|
|
8
|
+
else:
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
_LOGGER = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
MISSING_FIELD = 'MISSING_FIELD' # Used when a field is sometimes missing for one endpoint but not on another endpoint
|
|
13
|
+
|
|
14
|
+
# Track logged warnings to avoid duplicates
|
|
15
|
+
_LOGGED_WARNINGS: set[tuple[str, str]] = set()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseEntity(BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Base class for all entities in the Datamint system.
|
|
21
|
+
|
|
22
|
+
This class provides common functionality for all entities, such as
|
|
23
|
+
serialization and deserialization from dictionaries, as well as
|
|
24
|
+
handling unknown fields gracefully.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
model_config = ConfigDict(extra='allow') # Allow extra fields not defined in the model
|
|
28
|
+
|
|
29
|
+
def asdict(self) -> dict[str, Any]:
|
|
30
|
+
"""Convert the entity to a dictionary, including unknown fields."""
|
|
31
|
+
return self.model_dump(warnings='none')
|
|
32
|
+
|
|
33
|
+
def asjson(self) -> str:
|
|
34
|
+
"""Convert the entity to a JSON string, including unknown fields."""
|
|
35
|
+
return self.model_dump_json(warnings='none')
|
|
36
|
+
|
|
37
|
+
def model_post_init(self, __context: Any) -> None:
|
|
38
|
+
"""Handle unknown fields by logging a warning once per class/field combination in debug mode."""
|
|
39
|
+
if self.__pydantic_extra__ and _LOGGER.isEnabledFor(logging.DEBUG):
|
|
40
|
+
class_name = self.__class__.__name__
|
|
41
|
+
|
|
42
|
+
have_to_log = False
|
|
43
|
+
for key in self.__pydantic_extra__.keys():
|
|
44
|
+
warning_key = (class_name, key)
|
|
45
|
+
|
|
46
|
+
if warning_key not in _LOGGED_WARNINGS:
|
|
47
|
+
_LOGGED_WARNINGS.add(warning_key)
|
|
48
|
+
have_to_log = True
|
|
49
|
+
|
|
50
|
+
if have_to_log:
|
|
51
|
+
_LOGGER.warning(f"Unknown fields {list(self.__pydantic_extra__.keys())} found in {class_name}")
|