datamint 1.9.3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/__init__.py +2 -0
- datamint/api/__init__.py +3 -0
- datamint/api/base_api.py +430 -0
- datamint/api/client.py +91 -0
- datamint/api/dto/__init__.py +10 -0
- datamint/api/endpoints/__init__.py +17 -0
- datamint/api/endpoints/annotations_api.py +984 -0
- datamint/api/endpoints/channels_api.py +28 -0
- datamint/api/endpoints/datasetsinfo_api.py +16 -0
- datamint/api/endpoints/projects_api.py +203 -0
- datamint/api/endpoints/resources_api.py +1013 -0
- datamint/api/endpoints/users_api.py +38 -0
- datamint/api/entity_base_api.py +347 -0
- datamint/apihandler/api_handler.py +3 -6
- datamint/apihandler/base_api_handler.py +6 -28
- datamint/apihandler/dto/__init__.py +0 -0
- datamint/apihandler/dto/annotation_dto.py +1 -1
- datamint/client_cmd_tools/datamint_upload.py +19 -30
- datamint/dataset/base_dataset.py +65 -75
- datamint/dataset/dataset.py +2 -2
- datamint/entities/__init__.py +20 -0
- datamint/entities/annotation.py +178 -0
- datamint/entities/base_entity.py +51 -0
- datamint/entities/channel.py +46 -0
- datamint/entities/datasetinfo.py +22 -0
- datamint/entities/project.py +64 -0
- datamint/entities/resource.py +130 -0
- datamint/entities/user.py +21 -0
- datamint/examples/example_projects.py +41 -44
- datamint/exceptions.py +27 -1
- {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/METADATA +13 -9
- datamint-2.0.0.dist-info/RECORD +50 -0
- {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/WHEEL +1 -1
- datamint-1.9.3.dist-info/RECORD +0 -29
- {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/entry_points.txt +0 -0
datamint/dataset/base_dataset.py
CHANGED
|
@@ -13,14 +13,16 @@ from datamint import configs
|
|
|
13
13
|
from torch.utils.data import DataLoader
|
|
14
14
|
import torch
|
|
15
15
|
from torch import Tensor
|
|
16
|
-
from datamint.
|
|
16
|
+
from datamint.exceptions import DatamintException
|
|
17
17
|
from medimgkit.dicom_utils import is_dicom
|
|
18
18
|
from medimgkit.readers import read_array_normalized
|
|
19
|
-
from medimgkit.format_detection import guess_extension
|
|
19
|
+
from medimgkit.format_detection import guess_extension, guess_typez
|
|
20
|
+
from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from pathlib import Path
|
|
22
|
-
from datamint.
|
|
23
|
+
from datamint.entities import Annotation, DatasetInfo
|
|
23
24
|
import cv2
|
|
25
|
+
from datamint.entities import Resource
|
|
24
26
|
|
|
25
27
|
_LOGGER = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
|
|
|
174
176
|
|
|
175
177
|
def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
|
|
176
178
|
"""Setup API handler and validate connection."""
|
|
177
|
-
from datamint
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
root_url=server_url,
|
|
179
|
+
from datamint import Api
|
|
180
|
+
self.api = Api(
|
|
181
|
+
server_url=server_url,
|
|
181
182
|
api_key=api_key,
|
|
182
|
-
check_connection=auto_update
|
|
183
|
+
check_connection=self.auto_update
|
|
183
184
|
)
|
|
184
|
-
self.server_url = self.api_handler.root_url
|
|
185
|
-
self.api_key = self.api_handler.api_key
|
|
186
|
-
|
|
187
|
-
if self.api_key is None:
|
|
188
|
-
_LOGGER.warning(
|
|
189
|
-
"API key not provided. If you want to download data, please provide an API key, "
|
|
190
|
-
f"either by passing it as an argument, "
|
|
191
|
-
f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
|
|
192
|
-
"using datamint-config command line tool."
|
|
193
|
-
)
|
|
194
185
|
|
|
195
186
|
def _setup_directories(self, root: str | None) -> None:
|
|
196
187
|
"""Setup root and dataset directories."""
|
|
@@ -242,7 +233,7 @@ class DatamintBaseDataset:
|
|
|
242
233
|
if not os.path.isfile(metadata_path):
|
|
243
234
|
# get the server info
|
|
244
235
|
self.project_info = self.get_info()
|
|
245
|
-
self.metainfo = self._get_datasetinfo().copy()
|
|
236
|
+
self.metainfo = self._get_datasetinfo().asdict().copy()
|
|
246
237
|
self.metainfo['updated_at'] = None
|
|
247
238
|
self.metainfo['resources'] = []
|
|
248
239
|
self.metainfo['all_annotations'] = self.all_annotations
|
|
@@ -412,19 +403,33 @@ class DatamintBaseDataset:
|
|
|
412
403
|
@staticmethod
|
|
413
404
|
def read_number_of_frames(filepath: str) -> int:
|
|
414
405
|
"""Read the number of frames in a file."""
|
|
415
|
-
|
|
406
|
+
|
|
407
|
+
mimetypes, ext = guess_typez(filepath)
|
|
408
|
+
mimetype = mimetypes[0]
|
|
409
|
+
if mimetype is None:
|
|
410
|
+
raise ValueError(f"Could not determine MIME type for file: {filepath}")
|
|
411
|
+
|
|
412
|
+
if mimetype == 'application/dicom':
|
|
416
413
|
ds = pydicom.dcmread(filepath)
|
|
417
414
|
return getattr(ds, 'NumberOfFrames', 1)
|
|
418
|
-
elif
|
|
415
|
+
elif mimetype.startswith('video/'):
|
|
419
416
|
cap = cv2.VideoCapture(filepath)
|
|
420
417
|
try:
|
|
421
418
|
return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
422
419
|
finally:
|
|
423
420
|
cap.release()
|
|
424
|
-
elif
|
|
421
|
+
elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
|
|
425
422
|
return 1
|
|
423
|
+
elif mimetype in NIFTI_MIMES:
|
|
424
|
+
shape = get_nifti_shape(filepath)
|
|
425
|
+
if len(shape) == 3:
|
|
426
|
+
return shape[-1]
|
|
427
|
+
elif len(shape) > 3:
|
|
428
|
+
return shape[3]
|
|
429
|
+
else:
|
|
430
|
+
return 1
|
|
426
431
|
else:
|
|
427
|
-
raise ValueError(f"Unsupported file type
|
|
432
|
+
raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
|
|
428
433
|
|
|
429
434
|
def get_resources_ids(self) -> list[str]:
|
|
430
435
|
"""Get list of resource IDs."""
|
|
@@ -526,18 +531,18 @@ class DatamintBaseDataset:
|
|
|
526
531
|
if missing_files:
|
|
527
532
|
raise DatamintDatasetException(f"Image files not found: {missing_files}")
|
|
528
533
|
|
|
529
|
-
def _get_datasetinfo(self) ->
|
|
534
|
+
def _get_datasetinfo(self) -> DatasetInfo:
|
|
530
535
|
"""Get dataset information from API."""
|
|
531
536
|
if self._server_dataset_info is not None:
|
|
532
537
|
return self._server_dataset_info
|
|
533
|
-
all_datasets = self.
|
|
538
|
+
all_datasets = self.api._datasetsinfo.get_all()
|
|
534
539
|
|
|
535
540
|
for dataset in all_datasets:
|
|
536
|
-
if dataset
|
|
541
|
+
if dataset.id == self.dataset_id:
|
|
537
542
|
self._server_dataset_info = dataset
|
|
538
543
|
return dataset
|
|
539
544
|
|
|
540
|
-
available_datasets = [(d
|
|
545
|
+
available_datasets = [(d.name, d.id) for d in all_datasets]
|
|
541
546
|
raise DatamintDatasetException(
|
|
542
547
|
f"Dataset with id '{self.dataset_id}' not found. "
|
|
543
548
|
f"Available datasets: {available_datasets}"
|
|
@@ -547,7 +552,7 @@ class DatamintBaseDataset:
|
|
|
547
552
|
"""Get project information from API."""
|
|
548
553
|
if hasattr(self, 'project_info') and self.project_info is not None:
|
|
549
554
|
return self.project_info
|
|
550
|
-
project = self.
|
|
555
|
+
project = self.api.projects.get_by_name(self.project_name).asdict()
|
|
551
556
|
if 'error' in project:
|
|
552
557
|
available_projects = project['all_projects']
|
|
553
558
|
raise DatamintDatasetException(
|
|
@@ -592,31 +597,10 @@ class DatamintBaseDataset:
|
|
|
592
597
|
lines = [head] + [" " * 4 + line for line in body]
|
|
593
598
|
return "\n".join(lines)
|
|
594
599
|
|
|
595
|
-
def download_project(self) -> None:
|
|
596
|
-
"""Download project data from API."""
|
|
597
|
-
|
|
598
|
-
dataset_info = self._get_datasetinfo()
|
|
599
|
-
self.dataset_id = dataset_info['id']
|
|
600
|
-
self.last_updaded_at = dataset_info['updated_at']
|
|
601
|
-
|
|
602
|
-
self.api_handler.download_project(
|
|
603
|
-
self.project_info['id'],
|
|
604
|
-
self.dataset_zippath,
|
|
605
|
-
all_annotations=self.all_annotations,
|
|
606
|
-
include_unannotated=self.include_unannotated
|
|
607
|
-
)
|
|
608
|
-
|
|
609
|
-
_LOGGER.debug("Downloaded dataset")
|
|
610
|
-
|
|
611
|
-
if os.path.getsize(self.dataset_zippath) == 0:
|
|
612
|
-
raise DatamintDatasetException("Download failed.")
|
|
613
|
-
|
|
614
|
-
self._extract_and_update_metadata()
|
|
615
|
-
|
|
616
600
|
def _get_dataset_id(self) -> str:
|
|
617
601
|
if self.dataset_id is None:
|
|
618
602
|
dataset_info = self._get_datasetinfo()
|
|
619
|
-
self.dataset_id = dataset_info
|
|
603
|
+
self.dataset_id = dataset_info.id
|
|
620
604
|
return self.dataset_id
|
|
621
605
|
|
|
622
606
|
def _extract_and_update_metadata(self) -> None:
|
|
@@ -638,7 +622,7 @@ class DatamintBaseDataset:
|
|
|
638
622
|
|
|
639
623
|
# Save updated metadata
|
|
640
624
|
with open(datasetjson_path, 'w') as file:
|
|
641
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
625
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
642
626
|
|
|
643
627
|
self.images_metainfo = self.metainfo['resources']
|
|
644
628
|
# self._convert_metainfo_to_clsobj()
|
|
@@ -646,19 +630,19 @@ class DatamintBaseDataset:
|
|
|
646
630
|
def _update_metadata_timestamps(self) -> None:
|
|
647
631
|
"""Update metadata with correct timestamps."""
|
|
648
632
|
if 'updated_at' not in self.metainfo:
|
|
649
|
-
self.metainfo['updated_at'] = self.
|
|
633
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
650
634
|
else:
|
|
651
635
|
try:
|
|
652
636
|
local_time = datetime.fromisoformat(self.metainfo['updated_at'])
|
|
653
|
-
server_time = datetime.fromisoformat(self.
|
|
637
|
+
server_time = datetime.fromisoformat(self.last_updated_at)
|
|
654
638
|
|
|
655
639
|
if local_time < server_time:
|
|
656
640
|
_LOGGER.warning(
|
|
657
641
|
f"Inconsistent updated_at dates detected "
|
|
658
|
-
f"({self.metainfo['updated_at']} < {self.
|
|
659
|
-
f"Fixing it to {self.
|
|
642
|
+
f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
|
|
643
|
+
f"Fixing it to {self.last_updated_at}"
|
|
660
644
|
)
|
|
661
|
-
self.metainfo['updated_at'] = self.
|
|
645
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
662
646
|
except Exception as e:
|
|
663
647
|
_LOGGER.warning(f"Failed to parse updated_at date: {e}")
|
|
664
648
|
|
|
@@ -690,6 +674,9 @@ class DatamintBaseDataset:
|
|
|
690
674
|
img = (img - min_val) / (img.max() - min_val) * 255
|
|
691
675
|
img = img.astype(np.uint8)
|
|
692
676
|
|
|
677
|
+
if not img.flags.writeable:
|
|
678
|
+
img = img.copy()
|
|
679
|
+
|
|
693
680
|
img_tensor = torch.from_numpy(img).contiguous()
|
|
694
681
|
|
|
695
682
|
if isinstance(img_tensor, torch.ByteTensor):
|
|
@@ -829,7 +816,7 @@ class DatamintBaseDataset:
|
|
|
829
816
|
|
|
830
817
|
try:
|
|
831
818
|
external_metadata_info = self._get_datasetinfo()
|
|
832
|
-
server_updated_at = external_metadata_info
|
|
819
|
+
server_updated_at = external_metadata_info.updated_at
|
|
833
820
|
except Exception as e:
|
|
834
821
|
_LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
|
|
835
822
|
return
|
|
@@ -856,20 +843,21 @@ class DatamintBaseDataset:
|
|
|
856
843
|
_LOGGER.info('Local version is up to date with the latest version.')
|
|
857
844
|
|
|
858
845
|
def _fetch_new_resources(self,
|
|
859
|
-
all_uptodate_resources: list[
|
|
846
|
+
all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
860
847
|
local_resources = self.images_metainfo
|
|
861
848
|
local_resources_ids = [res['id'] for res in local_resources]
|
|
862
849
|
new_resources = []
|
|
863
850
|
for resource in all_uptodate_resources:
|
|
851
|
+
resource = resource.asdict()
|
|
864
852
|
if resource['id'] not in local_resources_ids:
|
|
865
853
|
resource['file'] = str(self._get_resource_file_path(resource))
|
|
866
854
|
resource['annotations'] = []
|
|
867
855
|
new_resources.append(resource)
|
|
868
856
|
return new_resources
|
|
869
857
|
|
|
870
|
-
def _fetch_deleted_resources(self, all_uptodate_resources: list[
|
|
858
|
+
def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
871
859
|
local_resources = self.images_metainfo
|
|
872
|
-
all_uptodate_resources_ids = [res
|
|
860
|
+
all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
|
|
873
861
|
deleted_resources = []
|
|
874
862
|
for resource in local_resources:
|
|
875
863
|
try:
|
|
@@ -888,7 +876,7 @@ class DatamintBaseDataset:
|
|
|
888
876
|
# server_updated_at = external_metadata_info['updated_at']
|
|
889
877
|
|
|
890
878
|
### RESOURCES ###
|
|
891
|
-
all_uptodate_resources = self.
|
|
879
|
+
all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
|
|
892
880
|
new_resources = self._fetch_new_resources(all_uptodate_resources)
|
|
893
881
|
deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
|
|
894
882
|
|
|
@@ -898,9 +886,9 @@ class DatamintBaseDataset:
|
|
|
898
886
|
new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
|
|
899
887
|
new_resources_ids = [r['id'] for r in new_resources]
|
|
900
888
|
_LOGGER.info(f"Downloading {len(new_resources)} new resources...")
|
|
901
|
-
new_res_paths = self.
|
|
902
|
-
|
|
903
|
-
|
|
889
|
+
new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
|
|
890
|
+
save_path=new_resources_path,
|
|
891
|
+
add_extension=True)
|
|
904
892
|
for new_rpath, r in zip(new_res_paths, new_resources):
|
|
905
893
|
r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
|
|
906
894
|
_LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
|
|
@@ -910,16 +898,17 @@ class DatamintBaseDataset:
|
|
|
910
898
|
################
|
|
911
899
|
|
|
912
900
|
### ANNOTATIONS ###
|
|
913
|
-
all_annotations = self.
|
|
914
|
-
|
|
901
|
+
all_annotations = self.api.annotations.get_list(worklist_id=self.project_info['worklist_id'],
|
|
902
|
+
status='published' if self.all_annotations else None)
|
|
903
|
+
|
|
915
904
|
# group annotations by resource ID
|
|
916
|
-
annotations_by_resource = {}
|
|
905
|
+
annotations_by_resource: dict[str, list[Annotation]] = {}
|
|
917
906
|
for ann in all_annotations:
|
|
918
907
|
# add the local filepath
|
|
919
908
|
filepath = self._get_annotation_file_path(ann)
|
|
920
909
|
if filepath is not None:
|
|
921
|
-
ann
|
|
922
|
-
resource_id = ann
|
|
910
|
+
ann.file = str(filepath)
|
|
911
|
+
resource_id = ann.resource_id
|
|
923
912
|
if resource_id not in annotations_by_resource:
|
|
924
913
|
annotations_by_resource[resource_id] = []
|
|
925
914
|
annotations_by_resource[resource_id].append(ann)
|
|
@@ -937,11 +926,11 @@ class DatamintBaseDataset:
|
|
|
937
926
|
# check if segmentation annotations need to be downloaded
|
|
938
927
|
# Also check if annotations need to be deleted
|
|
939
928
|
old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
|
|
940
|
-
new_ann_ids = set([ann
|
|
929
|
+
new_ann_ids = set([ann.id for ann in new_resource_annotations])
|
|
941
930
|
|
|
942
931
|
# Find annotations to add, update, or remove
|
|
943
932
|
annotations_to_add = [ann for ann in new_resource_annotations
|
|
944
|
-
if ann
|
|
933
|
+
if ann.id not in old_ann_ids]
|
|
945
934
|
annotations_to_remove = [ann for ann in old_resource_annotations
|
|
946
935
|
if getattr(ann, 'id', 'NA') not in new_ann_ids]
|
|
947
936
|
|
|
@@ -970,22 +959,23 @@ class DatamintBaseDataset:
|
|
|
970
959
|
_LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
|
|
971
960
|
|
|
972
961
|
# Update resource annotations list - convert to Annotation objects
|
|
973
|
-
resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
962
|
+
# resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
963
|
+
resource['annotations'] = new_resource_annotations
|
|
974
964
|
|
|
975
965
|
# Batch download all segmentation files
|
|
976
966
|
if segmentations_to_download:
|
|
977
967
|
_LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
|
|
978
|
-
self.
|
|
968
|
+
self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
|
|
979
969
|
_LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
|
|
980
970
|
|
|
981
971
|
###################
|
|
982
972
|
# update metadata
|
|
983
|
-
self.metainfo['updated_at'] = self._get_datasetinfo()
|
|
973
|
+
self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
|
|
984
974
|
self.metainfo['all_annotations'] = self.all_annotations
|
|
985
975
|
# save updated metadata
|
|
986
976
|
datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
|
|
987
977
|
with open(datasetjson_path, 'w') as file:
|
|
988
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
978
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
989
979
|
|
|
990
980
|
def _get_resource_file_path(self, resource: dict) -> Path:
|
|
991
981
|
"""Get the local file path for a resource."""
|
datamint/dataset/dataset.py
CHANGED
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
|
7
7
|
import logging
|
|
8
8
|
from PIL import Image
|
|
9
9
|
import albumentations
|
|
10
|
-
from datamint.
|
|
10
|
+
from datamint.entities.annotation import Annotation
|
|
11
11
|
|
|
12
12
|
_LOGGER = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -155,7 +155,7 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
155
155
|
# FIXME: avoid enforcing resizing the mask
|
|
156
156
|
seg = (Image.open(segfilepath)
|
|
157
157
|
.convert('L')
|
|
158
|
-
.resize((w, h), Image.NEAREST)
|
|
158
|
+
.resize((w, h), Image.Resampling.NEAREST)
|
|
159
159
|
)
|
|
160
160
|
seg = np.array(seg)
|
|
161
161
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""DataMint entities package."""
|
|
2
|
+
|
|
3
|
+
from .annotation import Annotation
|
|
4
|
+
from .base_entity import BaseEntity
|
|
5
|
+
from .channel import Channel, ChannelResourceData
|
|
6
|
+
from .project import Project
|
|
7
|
+
from .resource import Resource
|
|
8
|
+
from .user import User # new export
|
|
9
|
+
from .datasetinfo import DatasetInfo
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'Annotation',
|
|
13
|
+
'BaseEntity',
|
|
14
|
+
'Channel',
|
|
15
|
+
'ChannelResourceData',
|
|
16
|
+
'Project',
|
|
17
|
+
'Resource',
|
|
18
|
+
"User",
|
|
19
|
+
'DatasetInfo',
|
|
20
|
+
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# filepath: datamint/entities/annotation.py
|
|
2
|
+
"""Annotation entity module for DataMint API.
|
|
3
|
+
|
|
4
|
+
This module defines the Annotation model used to represent annotation
|
|
5
|
+
records returned by the DataMint API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
import logging
|
|
10
|
+
from .base_entity import BaseEntity, MISSING_FIELD
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Map API field names to class attributes
|
|
17
|
+
_FIELD_MAPPING = {
|
|
18
|
+
'type': 'annotation_type',
|
|
19
|
+
'name': 'identifier',
|
|
20
|
+
'added_by': 'created_by',
|
|
21
|
+
'index': 'frame_index',
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Annotation(BaseEntity):
|
|
26
|
+
"""Pydantic Model representing a DataMint annotation.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Unique identifier for the annotation.
|
|
30
|
+
identifier: User-friendly identifier or label for the annotation.
|
|
31
|
+
scope: Scope of the annotation (e.g., "frame", "image").
|
|
32
|
+
frame_index: Index of the frame if scope is frame-based.
|
|
33
|
+
annotation_type: Type of annotation (e.g., "segmentation", "bbox", "label").
|
|
34
|
+
text_value: Optional text value associated with the annotation.
|
|
35
|
+
numeric_value: Optional numeric value associated with the annotation.
|
|
36
|
+
units: Optional units for numeric_value.
|
|
37
|
+
geometry: Optional geometry payload (e.g., polygons, masks) as a list.
|
|
38
|
+
created_at: ISO timestamp for when the annotation was created.
|
|
39
|
+
created_by: Email or identifier of the creating user.
|
|
40
|
+
annotation_worklist_id: Optional worklist ID associated with the annotation.
|
|
41
|
+
status: Lifecycle status of the annotation (e.g., "new", "approved").
|
|
42
|
+
approved_at: Optional ISO timestamp for approval time.
|
|
43
|
+
approved_by: Optional identifier of the approver.
|
|
44
|
+
resource_id: ID of the resource this annotation belongs to.
|
|
45
|
+
associated_file: Path or identifier of any associated file artifact.
|
|
46
|
+
deleted: Whether the annotation is marked as deleted.
|
|
47
|
+
deleted_at: Optional ISO timestamp for deletion time.
|
|
48
|
+
deleted_by: Optional identifier of the user who deleted the annotation.
|
|
49
|
+
created_by_model: Optional identifier of the model that created this annotation.
|
|
50
|
+
old_geometry: Optional previous geometry payload for change tracking.
|
|
51
|
+
set_name: Optional set name this annotation belongs to.
|
|
52
|
+
resource_filename: Optional filename of the resource.
|
|
53
|
+
resource_modality: Optional modality of the resource (e.g., CT, MR).
|
|
54
|
+
annotation_worklist_name: Optional worklist name associated with the annotation.
|
|
55
|
+
user_info: Optional user information with keys like firstname and lastname.
|
|
56
|
+
values: Optional extra values payload for flexible schemas.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
id: str
|
|
60
|
+
identifier: str
|
|
61
|
+
scope: str
|
|
62
|
+
frame_index: int | None
|
|
63
|
+
annotation_type: str
|
|
64
|
+
text_value: str | None
|
|
65
|
+
numeric_value: float | int | None
|
|
66
|
+
units: str | None
|
|
67
|
+
geometry: list | dict | None
|
|
68
|
+
created_at: str # ISO timestamp string
|
|
69
|
+
created_by: str
|
|
70
|
+
annotation_worklist_id: str | None
|
|
71
|
+
status: str
|
|
72
|
+
approved_at: str | None # ISO timestamp string
|
|
73
|
+
approved_by: str | None
|
|
74
|
+
resource_id: str
|
|
75
|
+
associated_file: str | None
|
|
76
|
+
deleted: bool
|
|
77
|
+
deleted_at: str | None # ISO timestamp string
|
|
78
|
+
deleted_by: str | None
|
|
79
|
+
created_by_model: str | None
|
|
80
|
+
set_name: str | None
|
|
81
|
+
resource_filename: str | None
|
|
82
|
+
resource_modality: str | None
|
|
83
|
+
annotation_worklist_name: str | None
|
|
84
|
+
user_info: dict | None
|
|
85
|
+
values: list | None = MISSING_FIELD
|
|
86
|
+
file: str | None = None # Add file field for segmentations
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_dict(cls, data: dict[str, Any]) -> 'Annotation':
|
|
90
|
+
"""Create an Annotation instance from a dictionary.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
data: Dictionary containing annotation data from API
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Annotation instance
|
|
97
|
+
"""
|
|
98
|
+
# Convert field names and filter valid fields
|
|
99
|
+
converted_data = {}
|
|
100
|
+
for key, value in data.items():
|
|
101
|
+
# Map field names if needed
|
|
102
|
+
mapped_key = _FIELD_MAPPING.get(key, key)
|
|
103
|
+
converted_data[mapped_key] = value
|
|
104
|
+
|
|
105
|
+
if 'scope' not in converted_data:
|
|
106
|
+
converted_data['scope'] = 'image' if converted_data.get('frame_index') is None else 'frame'
|
|
107
|
+
|
|
108
|
+
if converted_data['annotation_type'] in ['segmentation']:
|
|
109
|
+
if converted_data.get('file') is None:
|
|
110
|
+
raise ValueError(f"Segmentation annotations must have an associated file. {data}")
|
|
111
|
+
|
|
112
|
+
# Create instance with only valid fields
|
|
113
|
+
valid_fields = {f for f in cls.model_fields.keys()}
|
|
114
|
+
filtered_data = {k: v for k, v in converted_data.items() if k in valid_fields}
|
|
115
|
+
|
|
116
|
+
return cls(**filtered_data)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def type(self) -> str:
|
|
120
|
+
"""Alias for :attr:`annotation_type`."""
|
|
121
|
+
return self.annotation_type
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def name(self) -> str:
|
|
125
|
+
"""Get the annotation name (alias for identifier)."""
|
|
126
|
+
return self.identifier
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def index(self) -> int | None:
|
|
130
|
+
"""Get the frame index (alias for frame_index)."""
|
|
131
|
+
return self.frame_index
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def value(self) -> str | None:
|
|
135
|
+
"""Get the annotation value (for category annotations)."""
|
|
136
|
+
return self.text_value
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def added_by(self) -> str:
|
|
140
|
+
"""Get the creator email (alias for created_by)."""
|
|
141
|
+
return self.created_by
|
|
142
|
+
|
|
143
|
+
def is_segmentation(self) -> bool:
|
|
144
|
+
"""Check if this is a segmentation annotation."""
|
|
145
|
+
return self.annotation_type == 'segmentation'
|
|
146
|
+
|
|
147
|
+
def is_label(self) -> bool:
|
|
148
|
+
"""Check if this is a label annotation."""
|
|
149
|
+
return self.annotation_type == 'label'
|
|
150
|
+
|
|
151
|
+
def is_category(self) -> bool:
|
|
152
|
+
"""Check if this is a category annotation."""
|
|
153
|
+
return self.annotation_type == 'category'
|
|
154
|
+
|
|
155
|
+
def is_frame_scoped(self) -> bool:
|
|
156
|
+
"""Check if this annotation is frame-scoped."""
|
|
157
|
+
return self.scope == 'frame'
|
|
158
|
+
|
|
159
|
+
def is_image_scoped(self) -> bool:
|
|
160
|
+
"""Check if this annotation is image-scoped."""
|
|
161
|
+
return self.scope == 'image'
|
|
162
|
+
|
|
163
|
+
def get_created_datetime(self) -> datetime | None:
|
|
164
|
+
"""
|
|
165
|
+
Get the creation datetime as a datetime object.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
datetime object or None if created_at is not set
|
|
169
|
+
"""
|
|
170
|
+
if isinstance(self.created_at, datetime):
|
|
171
|
+
return self.created_at
|
|
172
|
+
|
|
173
|
+
if self.created_at:
|
|
174
|
+
try:
|
|
175
|
+
return datetime.fromisoformat(self.created_at.replace('Z', '+00:00'))
|
|
176
|
+
except ValueError:
|
|
177
|
+
logger.warning(f"Could not parse created_at datetime: {self.created_at}")
|
|
178
|
+
return None
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Any
|
|
4
|
+
from pydantic import ConfigDict, BaseModel
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
from typing import Self
|
|
8
|
+
else:
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
_LOGGER = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
MISSING_FIELD = 'MISSING_FIELD' # Used when a field is sometimes missing for one endpoint but not on another endpoint
|
|
13
|
+
|
|
14
|
+
# Track logged warnings to avoid duplicates
|
|
15
|
+
_LOGGED_WARNINGS: set[tuple[str, str]] = set()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseEntity(BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Base class for all entities in the Datamint system.
|
|
21
|
+
|
|
22
|
+
This class provides common functionality for all entities, such as
|
|
23
|
+
serialization and deserialization from dictionaries, as well as
|
|
24
|
+
handling unknown fields gracefully.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
model_config = ConfigDict(extra='allow') # Allow extra fields not defined in the model
|
|
28
|
+
|
|
29
|
+
def asdict(self) -> dict[str, Any]:
|
|
30
|
+
"""Convert the entity to a dictionary, including unknown fields."""
|
|
31
|
+
return self.model_dump(warnings='none')
|
|
32
|
+
|
|
33
|
+
def asjson(self) -> str:
|
|
34
|
+
"""Convert the entity to a JSON string, including unknown fields."""
|
|
35
|
+
return self.model_dump_json(warnings='none')
|
|
36
|
+
|
|
37
|
+
def model_post_init(self, __context: Any) -> None:
|
|
38
|
+
"""Handle unknown fields by logging a warning once per class/field combination in debug mode."""
|
|
39
|
+
if self.__pydantic_extra__ and _LOGGER.isEnabledFor(logging.DEBUG):
|
|
40
|
+
class_name = self.__class__.__name__
|
|
41
|
+
|
|
42
|
+
have_to_log = False
|
|
43
|
+
for key in self.__pydantic_extra__.keys():
|
|
44
|
+
warning_key = (class_name, key)
|
|
45
|
+
|
|
46
|
+
if warning_key not in _LOGGED_WARNINGS:
|
|
47
|
+
_LOGGED_WARNINGS.add(warning_key)
|
|
48
|
+
have_to_log = True
|
|
49
|
+
|
|
50
|
+
if have_to_log:
|
|
51
|
+
_LOGGER.warning(f"Unknown fields {list(self.__pydantic_extra__.keys())} found in {class_name}")
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from pydantic import ConfigDict, BaseModel
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from datamint.entities.base_entity import BaseEntity
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ChannelResourceData(BaseModel):
|
|
7
|
+
"""Represents resource data within a channel.
|
|
8
|
+
|
|
9
|
+
Attributes:
|
|
10
|
+
created_by: Email of the user who created the resource.
|
|
11
|
+
customer_id: UUID of the customer.
|
|
12
|
+
resource_id: UUID of the resource.
|
|
13
|
+
resource_file_name: Original filename of the resource.
|
|
14
|
+
resource_mimetype: MIME type of the resource.
|
|
15
|
+
"""
|
|
16
|
+
model_config = ConfigDict(extra='allow')
|
|
17
|
+
|
|
18
|
+
created_by: str
|
|
19
|
+
customer_id: str
|
|
20
|
+
resource_id: str
|
|
21
|
+
resource_file_name: str
|
|
22
|
+
resource_mimetype: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Channel(BaseEntity):
|
|
26
|
+
"""Represents a channel containing multiple resources.
|
|
27
|
+
|
|
28
|
+
A channel is a collection of resources grouped together,
|
|
29
|
+
typically for batch processing or organization purposes.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
channel_name: Name identifier for the channel.
|
|
33
|
+
resource_data: List of resources contained in this channel.
|
|
34
|
+
deleted: Whether the channel has been marked as deleted.
|
|
35
|
+
created_at: Timestamp when the channel was created.
|
|
36
|
+
updated_at: Timestamp when the channel was last updated.
|
|
37
|
+
"""
|
|
38
|
+
channel_name: str
|
|
39
|
+
resource_data: list[ChannelResourceData]
|
|
40
|
+
deleted: bool = False
|
|
41
|
+
created_at: str | None = None
|
|
42
|
+
updated_at: str | None = None
|
|
43
|
+
|
|
44
|
+
def get_resource_ids(self) -> list[str]:
|
|
45
|
+
"""Get list of all resource IDs in this channel."""
|
|
46
|
+
return [resource.resource_id for resource in self.resource_data] if self.resource_data else []
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Project entity module for DataMint API."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import logging
|
|
5
|
+
from .base_entity import BaseEntity, MISSING_FIELD
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DatasetInfo(BaseEntity):
|
|
11
|
+
"""Pydantic Model representing a DataMint dataset.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
id: str
|
|
15
|
+
name: str
|
|
16
|
+
created_at: str # ISO timestamp string
|
|
17
|
+
created_by: str
|
|
18
|
+
description: str
|
|
19
|
+
customer_id: str
|
|
20
|
+
updated_at: str | None
|
|
21
|
+
total_resource: int
|
|
22
|
+
resource_ids: list[str]
|