datamint 1.9.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (35) hide show
  1. datamint/__init__.py +2 -0
  2. datamint/api/__init__.py +3 -0
  3. datamint/api/base_api.py +430 -0
  4. datamint/api/client.py +91 -0
  5. datamint/api/dto/__init__.py +10 -0
  6. datamint/api/endpoints/__init__.py +17 -0
  7. datamint/api/endpoints/annotations_api.py +984 -0
  8. datamint/api/endpoints/channels_api.py +28 -0
  9. datamint/api/endpoints/datasetsinfo_api.py +16 -0
  10. datamint/api/endpoints/projects_api.py +203 -0
  11. datamint/api/endpoints/resources_api.py +1013 -0
  12. datamint/api/endpoints/users_api.py +38 -0
  13. datamint/api/entity_base_api.py +347 -0
  14. datamint/apihandler/api_handler.py +3 -6
  15. datamint/apihandler/base_api_handler.py +6 -28
  16. datamint/apihandler/dto/__init__.py +0 -0
  17. datamint/apihandler/dto/annotation_dto.py +1 -1
  18. datamint/client_cmd_tools/datamint_upload.py +19 -30
  19. datamint/dataset/base_dataset.py +83 -86
  20. datamint/dataset/dataset.py +2 -2
  21. datamint/entities/__init__.py +20 -0
  22. datamint/entities/annotation.py +178 -0
  23. datamint/entities/base_entity.py +51 -0
  24. datamint/entities/channel.py +46 -0
  25. datamint/entities/datasetinfo.py +22 -0
  26. datamint/entities/project.py +64 -0
  27. datamint/entities/resource.py +130 -0
  28. datamint/entities/user.py +21 -0
  29. datamint/examples/example_projects.py +41 -44
  30. datamint/exceptions.py +27 -1
  31. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/METADATA +13 -9
  32. datamint-2.0.1.dist-info/RECORD +50 -0
  33. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/WHEEL +1 -1
  34. datamint-1.9.3.dist-info/RECORD +0 -29
  35. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -13,14 +13,16 @@ from datamint import configs
13
13
  from torch.utils.data import DataLoader
14
14
  import torch
15
15
  from torch import Tensor
16
- from datamint.apihandler.base_api_handler import DatamintException
16
+ from datamint.exceptions import DatamintException
17
17
  from medimgkit.dicom_utils import is_dicom
18
18
  from medimgkit.readers import read_array_normalized
19
- from medimgkit.format_detection import guess_extension
19
+ from medimgkit.format_detection import guess_extension, guess_typez
20
+ from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
20
21
  from datetime import datetime
21
22
  from pathlib import Path
22
- from datamint.dataset.annotation import Annotation
23
+ from datamint.entities import Annotation, DatasetInfo
23
24
  import cv2
25
+ from datamint.entities import Resource
24
26
 
25
27
  _LOGGER = logging.getLogger(__name__)
26
28
 
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
174
176
 
175
177
  def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
176
178
  """Setup API handler and validate connection."""
177
- from datamint.apihandler.api_handler import APIHandler
178
-
179
- self.api_handler = APIHandler(
180
- root_url=server_url,
179
+ from datamint import Api
180
+ self.api = Api(
181
+ server_url=server_url,
181
182
  api_key=api_key,
182
- check_connection=auto_update
183
+ check_connection=self.auto_update
183
184
  )
184
- self.server_url = self.api_handler.root_url
185
- self.api_key = self.api_handler.api_key
186
-
187
- if self.api_key is None:
188
- _LOGGER.warning(
189
- "API key not provided. If you want to download data, please provide an API key, "
190
- f"either by passing it as an argument, "
191
- f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
192
- "using datamint-config command line tool."
193
- )
194
185
 
195
186
  def _setup_directories(self, root: str | None) -> None:
196
187
  """Setup root and dataset directories."""
@@ -234,26 +225,34 @@ class DatamintBaseDataset:
234
225
  else:
235
226
  self._check_version()
236
227
 
228
+ def _init_metainfo(self) -> None:
229
+ # get the server info
230
+ self.project_info = self.get_info()
231
+ self.metainfo = self._get_datasetinfo().asdict().copy()
232
+ self.metainfo['updated_at'] = None
233
+ self.metainfo['resources'] = []
234
+ self.metainfo['all_annotations'] = self.all_annotations
235
+ self.images_metainfo = self.metainfo['resources']
236
+
237
237
  def _load_metadata(self) -> bool:
238
238
  """Load and process dataset metadata."""
239
239
  if hasattr(self, 'metainfo'):
240
240
  _LOGGER.warning("Metadata already loaded.")
241
241
  metadata_path = os.path.join(self.dataset_dir, 'dataset.json')
242
242
  if not os.path.isfile(metadata_path):
243
- # get the server info
244
- self.project_info = self.get_info()
245
- self.metainfo = self._get_datasetinfo().copy()
246
- self.metainfo['updated_at'] = None
247
- self.metainfo['resources'] = []
248
- self.metainfo['all_annotations'] = self.all_annotations
249
- self.images_metainfo = self.metainfo['resources']
243
+ self._init_metainfo()
250
244
  return False
251
245
  else:
252
246
  with open(metadata_path, 'r') as file:
253
247
  self.metainfo = json.load(file)
254
248
  self.images_metainfo = self.metainfo['resources']
255
249
  # Convert annotations from dict to Annotation objects
256
- self._convert_metainfo_to_clsobj()
250
+ try:
251
+ self._convert_metainfo_to_clsobj()
252
+ except Exception as e:
253
+ _LOGGER.warning(f"Failed to convert annotations. Redownloading dataset. {type(e)}")
254
+ self._init_metainfo()
255
+ return False
257
256
  return True
258
257
 
259
258
  def _convert_metainfo_to_clsobj(self):
@@ -412,19 +411,33 @@ class DatamintBaseDataset:
412
411
  @staticmethod
413
412
  def read_number_of_frames(filepath: str) -> int:
414
413
  """Read the number of frames in a file."""
415
- if is_dicom(filepath):
414
+
415
+ mimetypes, ext = guess_typez(filepath)
416
+ mimetype = mimetypes[0]
417
+ if mimetype is None:
418
+ raise ValueError(f"Could not determine MIME type for file: {filepath}")
419
+
420
+ if mimetype == 'application/dicom':
416
421
  ds = pydicom.dcmread(filepath)
417
422
  return getattr(ds, 'NumberOfFrames', 1)
418
- elif filepath.lower().endswith(('.mp4', '.avi')):
423
+ elif mimetype.startswith('video/'):
419
424
  cap = cv2.VideoCapture(filepath)
420
425
  try:
421
426
  return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
422
427
  finally:
423
428
  cap.release()
424
- elif filepath.lower().endswith(('.png', '.jpg', '.jpeg')):
429
+ elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
425
430
  return 1
431
+ elif mimetype in NIFTI_MIMES:
432
+ shape = get_nifti_shape(filepath)
433
+ if len(shape) == 3:
434
+ return shape[-1]
435
+ elif len(shape) > 3:
436
+ return shape[3]
437
+ else:
438
+ return 1
426
439
  else:
427
- raise ValueError(f"Unsupported file type: {filepath}")
440
+ raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
428
441
 
429
442
  def get_resources_ids(self) -> list[str]:
430
443
  """Get list of resource IDs."""
@@ -526,18 +539,18 @@ class DatamintBaseDataset:
526
539
  if missing_files:
527
540
  raise DatamintDatasetException(f"Image files not found: {missing_files}")
528
541
 
529
- def _get_datasetinfo(self) -> dict:
542
+ def _get_datasetinfo(self) -> DatasetInfo:
530
543
  """Get dataset information from API."""
531
544
  if self._server_dataset_info is not None:
532
545
  return self._server_dataset_info
533
- all_datasets = self.api_handler.get_datasets()
546
+ all_datasets = self.api._datasetsinfo.get_all()
534
547
 
535
548
  for dataset in all_datasets:
536
- if dataset['id'] == self.dataset_id:
549
+ if dataset.id == self.dataset_id:
537
550
  self._server_dataset_info = dataset
538
551
  return dataset
539
552
 
540
- available_datasets = [(d['name'], d['id']) for d in all_datasets]
553
+ available_datasets = [(d.name, d.id) for d in all_datasets]
541
554
  raise DatamintDatasetException(
542
555
  f"Dataset with id '{self.dataset_id}' not found. "
543
556
  f"Available datasets: {available_datasets}"
@@ -547,13 +560,12 @@ class DatamintBaseDataset:
547
560
  """Get project information from API."""
548
561
  if hasattr(self, 'project_info') and self.project_info is not None:
549
562
  return self.project_info
550
- project = self.api_handler.get_project_by_name(self.project_name)
551
- if 'error' in project:
552
- available_projects = project['all_projects']
563
+ project = self.api.projects.get_by_name(self.project_name)
564
+ if project is None:
553
565
  raise DatamintDatasetException(
554
- f"Project with name '{self.project_name}' not found. "
555
- f"Available projects: {available_projects}"
566
+ f"Project with name '{self.project_name}' not found."
556
567
  )
568
+ project = project.asdict()
557
569
  self.project_info = project
558
570
  self.dataset_id = project['dataset_id']
559
571
  return project
@@ -592,31 +604,10 @@ class DatamintBaseDataset:
592
604
  lines = [head] + [" " * 4 + line for line in body]
593
605
  return "\n".join(lines)
594
606
 
595
- def download_project(self) -> None:
596
- """Download project data from API."""
597
-
598
- dataset_info = self._get_datasetinfo()
599
- self.dataset_id = dataset_info['id']
600
- self.last_updaded_at = dataset_info['updated_at']
601
-
602
- self.api_handler.download_project(
603
- self.project_info['id'],
604
- self.dataset_zippath,
605
- all_annotations=self.all_annotations,
606
- include_unannotated=self.include_unannotated
607
- )
608
-
609
- _LOGGER.debug("Downloaded dataset")
610
-
611
- if os.path.getsize(self.dataset_zippath) == 0:
612
- raise DatamintDatasetException("Download failed.")
613
-
614
- self._extract_and_update_metadata()
615
-
616
607
  def _get_dataset_id(self) -> str:
617
608
  if self.dataset_id is None:
618
609
  dataset_info = self._get_datasetinfo()
619
- self.dataset_id = dataset_info['id']
610
+ self.dataset_id = dataset_info.id
620
611
  return self.dataset_id
621
612
 
622
613
  def _extract_and_update_metadata(self) -> None:
@@ -638,7 +629,7 @@ class DatamintBaseDataset:
638
629
 
639
630
  # Save updated metadata
640
631
  with open(datasetjson_path, 'w') as file:
641
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
632
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
642
633
 
643
634
  self.images_metainfo = self.metainfo['resources']
644
635
  # self._convert_metainfo_to_clsobj()
@@ -646,19 +637,19 @@ class DatamintBaseDataset:
646
637
  def _update_metadata_timestamps(self) -> None:
647
638
  """Update metadata with correct timestamps."""
648
639
  if 'updated_at' not in self.metainfo:
649
- self.metainfo['updated_at'] = self.last_updaded_at
640
+ self.metainfo['updated_at'] = self.last_updated_at
650
641
  else:
651
642
  try:
652
643
  local_time = datetime.fromisoformat(self.metainfo['updated_at'])
653
- server_time = datetime.fromisoformat(self.last_updaded_at)
644
+ server_time = datetime.fromisoformat(self.last_updated_at)
654
645
 
655
646
  if local_time < server_time:
656
647
  _LOGGER.warning(
657
648
  f"Inconsistent updated_at dates detected "
658
- f"({self.metainfo['updated_at']} < {self.last_updaded_at}). "
659
- f"Fixing it to {self.last_updaded_at}"
649
+ f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
650
+ f"Fixing it to {self.last_updated_at}"
660
651
  )
661
- self.metainfo['updated_at'] = self.last_updaded_at
652
+ self.metainfo['updated_at'] = self.last_updated_at
662
653
  except Exception as e:
663
654
  _LOGGER.warning(f"Failed to parse updated_at date: {e}")
664
655
 
@@ -690,6 +681,9 @@ class DatamintBaseDataset:
690
681
  img = (img - min_val) / (img.max() - min_val) * 255
691
682
  img = img.astype(np.uint8)
692
683
 
684
+ if not img.flags.writeable:
685
+ img = img.copy()
686
+
693
687
  img_tensor = torch.from_numpy(img).contiguous()
694
688
 
695
689
  if isinstance(img_tensor, torch.ByteTensor):
@@ -829,7 +823,7 @@ class DatamintBaseDataset:
829
823
 
830
824
  try:
831
825
  external_metadata_info = self._get_datasetinfo()
832
- server_updated_at = external_metadata_info['updated_at']
826
+ server_updated_at = external_metadata_info.updated_at
833
827
  except Exception as e:
834
828
  _LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
835
829
  return
@@ -856,20 +850,21 @@ class DatamintBaseDataset:
856
850
  _LOGGER.info('Local version is up to date with the latest version.')
857
851
 
858
852
  def _fetch_new_resources(self,
859
- all_uptodate_resources: list[dict]) -> list[dict]:
853
+ all_uptodate_resources: list[Resource]) -> list[dict]:
860
854
  local_resources = self.images_metainfo
861
855
  local_resources_ids = [res['id'] for res in local_resources]
862
856
  new_resources = []
863
857
  for resource in all_uptodate_resources:
858
+ resource = resource.asdict()
864
859
  if resource['id'] not in local_resources_ids:
865
860
  resource['file'] = str(self._get_resource_file_path(resource))
866
861
  resource['annotations'] = []
867
862
  new_resources.append(resource)
868
863
  return new_resources
869
864
 
870
- def _fetch_deleted_resources(self, all_uptodate_resources: list[dict]) -> list[dict]:
865
+ def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
871
866
  local_resources = self.images_metainfo
872
- all_uptodate_resources_ids = [res['id'] for res in all_uptodate_resources]
867
+ all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
873
868
  deleted_resources = []
874
869
  for resource in local_resources:
875
870
  try:
@@ -888,7 +883,7 @@ class DatamintBaseDataset:
888
883
  # server_updated_at = external_metadata_info['updated_at']
889
884
 
890
885
  ### RESOURCES ###
891
- all_uptodate_resources = self.api_handler.get_project_resources(self.get_info()['id'])
886
+ all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
892
887
  new_resources = self._fetch_new_resources(all_uptodate_resources)
893
888
  deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
894
889
 
@@ -898,9 +893,9 @@ class DatamintBaseDataset:
898
893
  new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
899
894
  new_resources_ids = [r['id'] for r in new_resources]
900
895
  _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
901
- new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
902
- save_path=new_resources_path,
903
- add_extension=True)
896
+ new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
897
+ save_path=new_resources_path,
898
+ add_extension=True)
904
899
  for new_rpath, r in zip(new_res_paths, new_resources):
905
900
  r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
906
901
  _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
@@ -910,16 +905,17 @@ class DatamintBaseDataset:
910
905
  ################
911
906
 
912
907
  ### ANNOTATIONS ###
913
- all_annotations = self.api_handler.get_annotations(worklist_id=self.project_info['worklist_id'],
914
- status='published' if self.all_annotations else None)
908
+ all_annotations = self.api.annotations.get_list(worklist_id=None if self.all_annotations else self.project_info['worklist_id'],
909
+ status=None if self.all_annotations else 'published')
910
+
915
911
  # group annotations by resource ID
916
- annotations_by_resource = {}
912
+ annotations_by_resource: dict[str, list[Annotation]] = {}
917
913
  for ann in all_annotations:
918
914
  # add the local filepath
919
915
  filepath = self._get_annotation_file_path(ann)
920
916
  if filepath is not None:
921
- ann['file'] = str(filepath)
922
- resource_id = ann['resource_id']
917
+ ann.file = str(filepath)
918
+ resource_id = ann.resource_id
923
919
  if resource_id not in annotations_by_resource:
924
920
  annotations_by_resource[resource_id] = []
925
921
  annotations_by_resource[resource_id].append(ann)
@@ -937,11 +933,11 @@ class DatamintBaseDataset:
937
933
  # check if segmentation annotations need to be downloaded
938
934
  # Also check if annotations need to be deleted
939
935
  old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
940
- new_ann_ids = set([ann['id'] for ann in new_resource_annotations])
936
+ new_ann_ids = set([ann.id for ann in new_resource_annotations])
941
937
 
942
938
  # Find annotations to add, update, or remove
943
939
  annotations_to_add = [ann for ann in new_resource_annotations
944
- if ann['id'] not in old_ann_ids]
940
+ if ann.id not in old_ann_ids]
945
941
  annotations_to_remove = [ann for ann in old_resource_annotations
946
942
  if getattr(ann, 'id', 'NA') not in new_ann_ids]
947
943
 
@@ -970,22 +966,23 @@ class DatamintBaseDataset:
970
966
  _LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
971
967
 
972
968
  # Update resource annotations list - convert to Annotation objects
973
- resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
969
+ # resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
970
+ resource['annotations'] = new_resource_annotations
974
971
 
975
972
  # Batch download all segmentation files
976
973
  if segmentations_to_download:
977
974
  _LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
978
- self.api_handler.download_multiple_segmentations(segmentations_to_download, segmentation_paths)
975
+ self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
979
976
  _LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
980
977
 
981
978
  ###################
982
979
  # update metadata
983
- self.metainfo['updated_at'] = self._get_datasetinfo()['updated_at']
980
+ self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
984
981
  self.metainfo['all_annotations'] = self.all_annotations
985
982
  # save updated metadata
986
983
  datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
987
984
  with open(datasetjson_path, 'w') as file:
988
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
985
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
989
986
 
990
987
  def _get_resource_file_path(self, resource: dict) -> Path:
991
988
  """Get the local file path for a resource."""
@@ -7,7 +7,7 @@ import numpy as np
7
7
  import logging
8
8
  from PIL import Image
9
9
  import albumentations
10
- from datamint.dataset.annotation import Annotation
10
+ from datamint.entities.annotation import Annotation
11
11
 
12
12
  _LOGGER = logging.getLogger(__name__)
13
13
 
@@ -155,7 +155,7 @@ class DatamintDataset(DatamintBaseDataset):
155
155
  # FIXME: avoid enforcing resizing the mask
156
156
  seg = (Image.open(segfilepath)
157
157
  .convert('L')
158
- .resize((w, h), Image.NEAREST)
158
+ .resize((w, h), Image.Resampling.NEAREST)
159
159
  )
160
160
  seg = np.array(seg)
161
161
 
@@ -0,0 +1,20 @@
1
+ """DataMint entities package."""
2
+
3
+ from .annotation import Annotation
4
+ from .base_entity import BaseEntity
5
+ from .channel import Channel, ChannelResourceData
6
+ from .project import Project
7
+ from .resource import Resource
8
+ from .user import User # new export
9
+ from .datasetinfo import DatasetInfo
10
+
11
+ __all__ = [
12
+ 'Annotation',
13
+ 'BaseEntity',
14
+ 'Channel',
15
+ 'ChannelResourceData',
16
+ 'Project',
17
+ 'Resource',
18
+ "User",
19
+ 'DatasetInfo',
20
+ ]
@@ -0,0 +1,178 @@
1
+ # filepath: datamint/entities/annotation.py
2
+ """Annotation entity module for DataMint API.
3
+
4
+ This module defines the Annotation model used to represent annotation
5
+ records returned by the DataMint API.
6
+ """
7
+
8
+ from typing import Any
9
+ import logging
10
+ from .base_entity import BaseEntity, MISSING_FIELD
11
+ from pydantic import Field
12
+ from datetime import datetime
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Map API field names to class attributes
17
+ _FIELD_MAPPING = {
18
+ 'type': 'annotation_type',
19
+ 'name': 'identifier',
20
+ 'added_by': 'created_by',
21
+ 'index': 'frame_index',
22
+ }
23
+
24
+
25
+ class Annotation(BaseEntity):
26
+ """Pydantic Model representing a DataMint annotation.
27
+
28
+ Attributes:
29
+ id: Unique identifier for the annotation.
30
+ identifier: User-friendly identifier or label for the annotation.
31
+ scope: Scope of the annotation (e.g., "frame", "image").
32
+ frame_index: Index of the frame if scope is frame-based.
33
+ annotation_type: Type of annotation (e.g., "segmentation", "bbox", "label").
34
+ text_value: Optional text value associated with the annotation.
35
+ numeric_value: Optional numeric value associated with the annotation.
36
+ units: Optional units for numeric_value.
37
+ geometry: Optional geometry payload (e.g., polygons, masks) as a list.
38
+ created_at: ISO timestamp for when the annotation was created.
39
+ created_by: Email or identifier of the creating user.
40
+ annotation_worklist_id: Optional worklist ID associated with the annotation.
41
+ status: Lifecycle status of the annotation (e.g., "new", "approved").
42
+ approved_at: Optional ISO timestamp for approval time.
43
+ approved_by: Optional identifier of the approver.
44
+ resource_id: ID of the resource this annotation belongs to.
45
+ associated_file: Path or identifier of any associated file artifact.
46
+ deleted: Whether the annotation is marked as deleted.
47
+ deleted_at: Optional ISO timestamp for deletion time.
48
+ deleted_by: Optional identifier of the user who deleted the annotation.
49
+ created_by_model: Optional identifier of the model that created this annotation.
50
+ old_geometry: Optional previous geometry payload for change tracking.
51
+ set_name: Optional set name this annotation belongs to.
52
+ resource_filename: Optional filename of the resource.
53
+ resource_modality: Optional modality of the resource (e.g., CT, MR).
54
+ annotation_worklist_name: Optional worklist name associated with the annotation.
55
+ user_info: Optional user information with keys like firstname and lastname.
56
+ values: Optional extra values payload for flexible schemas.
57
+ """
58
+
59
+ id: str
60
+ identifier: str
61
+ scope: str
62
+ frame_index: int | None
63
+ annotation_type: str
64
+ text_value: str | None
65
+ numeric_value: float | int | None
66
+ units: str | None
67
+ geometry: list | dict | None
68
+ created_at: str # ISO timestamp string
69
+ created_by: str
70
+ annotation_worklist_id: str | None
71
+ status: str
72
+ approved_at: str | None # ISO timestamp string
73
+ approved_by: str | None
74
+ resource_id: str
75
+ associated_file: str | None
76
+ deleted: bool
77
+ deleted_at: str | None # ISO timestamp string
78
+ deleted_by: str | None
79
+ created_by_model: str | None
80
+ set_name: str | None
81
+ resource_filename: str | None
82
+ resource_modality: str | None
83
+ annotation_worklist_name: str | None
84
+ user_info: dict | None
85
+ values: list | None = MISSING_FIELD
86
+ file: str | None = None # Add file field for segmentations
87
+
88
+ @classmethod
89
+ def from_dict(cls, data: dict[str, Any]) -> 'Annotation':
90
+ """Create an Annotation instance from a dictionary.
91
+
92
+ Args:
93
+ data: Dictionary containing annotation data from API
94
+
95
+ Returns:
96
+ Annotation instance
97
+ """
98
+ # Convert field names and filter valid fields
99
+ converted_data = {}
100
+ for key, value in data.items():
101
+ # Map field names if needed
102
+ mapped_key = _FIELD_MAPPING.get(key, key)
103
+ converted_data[mapped_key] = value
104
+
105
+ if 'scope' not in converted_data:
106
+ converted_data['scope'] = 'image' if converted_data.get('frame_index') is None else 'frame'
107
+
108
+ if converted_data['annotation_type'] in ['segmentation']:
109
+ if converted_data.get('file') is None:
110
+ raise ValueError(f"Segmentation annotations must have an associated file. {data}")
111
+
112
+ # Create instance with only valid fields
113
+ valid_fields = {f for f in cls.model_fields.keys()}
114
+ filtered_data = {k: v for k, v in converted_data.items() if k in valid_fields}
115
+
116
+ return cls(**filtered_data)
117
+
118
+ @property
119
+ def type(self) -> str:
120
+ """Alias for :attr:`annotation_type`."""
121
+ return self.annotation_type
122
+
123
+ @property
124
+ def name(self) -> str:
125
+ """Get the annotation name (alias for identifier)."""
126
+ return self.identifier
127
+
128
+ @property
129
+ def index(self) -> int | None:
130
+ """Get the frame index (alias for frame_index)."""
131
+ return self.frame_index
132
+
133
+ @property
134
+ def value(self) -> str | None:
135
+ """Get the annotation value (for category annotations)."""
136
+ return self.text_value
137
+
138
+ @property
139
+ def added_by(self) -> str:
140
+ """Get the creator email (alias for created_by)."""
141
+ return self.created_by
142
+
143
+ def is_segmentation(self) -> bool:
144
+ """Check if this is a segmentation annotation."""
145
+ return self.annotation_type == 'segmentation'
146
+
147
+ def is_label(self) -> bool:
148
+ """Check if this is a label annotation."""
149
+ return self.annotation_type == 'label'
150
+
151
+ def is_category(self) -> bool:
152
+ """Check if this is a category annotation."""
153
+ return self.annotation_type == 'category'
154
+
155
+ def is_frame_scoped(self) -> bool:
156
+ """Check if this annotation is frame-scoped."""
157
+ return self.scope == 'frame'
158
+
159
+ def is_image_scoped(self) -> bool:
160
+ """Check if this annotation is image-scoped."""
161
+ return self.scope == 'image'
162
+
163
+ def get_created_datetime(self) -> datetime | None:
164
+ """
165
+ Get the creation datetime as a datetime object.
166
+
167
+ Returns:
168
+ datetime object or None if created_at is not set
169
+ """
170
+ if isinstance(self.created_at, datetime):
171
+ return self.created_at
172
+
173
+ if self.created_at:
174
+ try:
175
+ return datetime.fromisoformat(self.created_at.replace('Z', '+00:00'))
176
+ except ValueError:
177
+ logger.warning(f"Could not parse created_at datetime: {self.created_at}")
178
+ return None
@@ -0,0 +1,51 @@
1
+ import logging
2
+ import sys
3
+ from typing import Any
4
+ from pydantic import ConfigDict, BaseModel
5
+
6
+ if sys.version_info >= (3, 11):
7
+ from typing import Self
8
+ else:
9
+ from typing_extensions import Self
10
+ _LOGGER = logging.getLogger(__name__)
11
+
12
+ MISSING_FIELD = 'MISSING_FIELD' # Used when a field is sometimes missing for one endpoint but not on another endpoint
13
+
14
+ # Track logged warnings to avoid duplicates
15
+ _LOGGED_WARNINGS: set[tuple[str, str]] = set()
16
+
17
+
18
+ class BaseEntity(BaseModel):
19
+ """
20
+ Base class for all entities in the Datamint system.
21
+
22
+ This class provides common functionality for all entities, such as
23
+ serialization and deserialization from dictionaries, as well as
24
+ handling unknown fields gracefully.
25
+ """
26
+
27
+ model_config = ConfigDict(extra='allow') # Allow extra fields not defined in the model
28
+
29
+ def asdict(self) -> dict[str, Any]:
30
+ """Convert the entity to a dictionary, including unknown fields."""
31
+ return self.model_dump(warnings='none')
32
+
33
+ def asjson(self) -> str:
34
+ """Convert the entity to a JSON string, including unknown fields."""
35
+ return self.model_dump_json(warnings='none')
36
+
37
+ def model_post_init(self, __context: Any) -> None:
38
+ """Handle unknown fields by logging a warning once per class/field combination in debug mode."""
39
+ if self.__pydantic_extra__ and _LOGGER.isEnabledFor(logging.DEBUG):
40
+ class_name = self.__class__.__name__
41
+
42
+ have_to_log = False
43
+ for key in self.__pydantic_extra__.keys():
44
+ warning_key = (class_name, key)
45
+
46
+ if warning_key not in _LOGGED_WARNINGS:
47
+ _LOGGED_WARNINGS.add(warning_key)
48
+ have_to_log = True
49
+
50
+ if have_to_log:
51
+ _LOGGER.warning(f"Unknown fields {list(self.__pydantic_extra__.keys())} found in {class_name}")