datamint 1.9.3__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (35) hide show
  1. datamint/__init__.py +2 -0
  2. datamint/api/__init__.py +3 -0
  3. datamint/api/base_api.py +430 -0
  4. datamint/api/client.py +91 -0
  5. datamint/api/dto/__init__.py +10 -0
  6. datamint/api/endpoints/__init__.py +17 -0
  7. datamint/api/endpoints/annotations_api.py +984 -0
  8. datamint/api/endpoints/channels_api.py +28 -0
  9. datamint/api/endpoints/datasetsinfo_api.py +16 -0
  10. datamint/api/endpoints/projects_api.py +203 -0
  11. datamint/api/endpoints/resources_api.py +1013 -0
  12. datamint/api/endpoints/users_api.py +38 -0
  13. datamint/api/entity_base_api.py +347 -0
  14. datamint/apihandler/api_handler.py +3 -6
  15. datamint/apihandler/base_api_handler.py +6 -28
  16. datamint/apihandler/dto/__init__.py +0 -0
  17. datamint/apihandler/dto/annotation_dto.py +1 -1
  18. datamint/client_cmd_tools/datamint_upload.py +19 -30
  19. datamint/dataset/base_dataset.py +65 -75
  20. datamint/dataset/dataset.py +2 -2
  21. datamint/entities/__init__.py +20 -0
  22. datamint/entities/annotation.py +178 -0
  23. datamint/entities/base_entity.py +51 -0
  24. datamint/entities/channel.py +46 -0
  25. datamint/entities/datasetinfo.py +22 -0
  26. datamint/entities/project.py +64 -0
  27. datamint/entities/resource.py +130 -0
  28. datamint/entities/user.py +21 -0
  29. datamint/examples/example_projects.py +41 -44
  30. datamint/exceptions.py +27 -1
  31. {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/METADATA +13 -9
  32. datamint-2.0.0.dist-info/RECORD +50 -0
  33. {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/WHEEL +1 -1
  34. datamint-1.9.3.dist-info/RECORD +0 -29
  35. {datamint-1.9.3.dist-info → datamint-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -13,14 +13,16 @@ from datamint import configs
13
13
  from torch.utils.data import DataLoader
14
14
  import torch
15
15
  from torch import Tensor
16
- from datamint.apihandler.base_api_handler import DatamintException
16
+ from datamint.exceptions import DatamintException
17
17
  from medimgkit.dicom_utils import is_dicom
18
18
  from medimgkit.readers import read_array_normalized
19
- from medimgkit.format_detection import guess_extension
19
+ from medimgkit.format_detection import guess_extension, guess_typez
20
+ from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
20
21
  from datetime import datetime
21
22
  from pathlib import Path
22
- from datamint.dataset.annotation import Annotation
23
+ from datamint.entities import Annotation, DatasetInfo
23
24
  import cv2
25
+ from datamint.entities import Resource
24
26
 
25
27
  _LOGGER = logging.getLogger(__name__)
26
28
 
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
174
176
 
175
177
  def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
176
178
  """Setup API handler and validate connection."""
177
- from datamint.apihandler.api_handler import APIHandler
178
-
179
- self.api_handler = APIHandler(
180
- root_url=server_url,
179
+ from datamint import Api
180
+ self.api = Api(
181
+ server_url=server_url,
181
182
  api_key=api_key,
182
- check_connection=auto_update
183
+ check_connection=self.auto_update
183
184
  )
184
- self.server_url = self.api_handler.root_url
185
- self.api_key = self.api_handler.api_key
186
-
187
- if self.api_key is None:
188
- _LOGGER.warning(
189
- "API key not provided. If you want to download data, please provide an API key, "
190
- f"either by passing it as an argument, "
191
- f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
192
- "using datamint-config command line tool."
193
- )
194
185
 
195
186
  def _setup_directories(self, root: str | None) -> None:
196
187
  """Setup root and dataset directories."""
@@ -242,7 +233,7 @@ class DatamintBaseDataset:
242
233
  if not os.path.isfile(metadata_path):
243
234
  # get the server info
244
235
  self.project_info = self.get_info()
245
- self.metainfo = self._get_datasetinfo().copy()
236
+ self.metainfo = self._get_datasetinfo().asdict().copy()
246
237
  self.metainfo['updated_at'] = None
247
238
  self.metainfo['resources'] = []
248
239
  self.metainfo['all_annotations'] = self.all_annotations
@@ -412,19 +403,33 @@ class DatamintBaseDataset:
412
403
  @staticmethod
413
404
  def read_number_of_frames(filepath: str) -> int:
414
405
  """Read the number of frames in a file."""
415
- if is_dicom(filepath):
406
+
407
+ mimetypes, ext = guess_typez(filepath)
408
+ mimetype = mimetypes[0]
409
+ if mimetype is None:
410
+ raise ValueError(f"Could not determine MIME type for file: {filepath}")
411
+
412
+ if mimetype == 'application/dicom':
416
413
  ds = pydicom.dcmread(filepath)
417
414
  return getattr(ds, 'NumberOfFrames', 1)
418
- elif filepath.lower().endswith(('.mp4', '.avi')):
415
+ elif mimetype.startswith('video/'):
419
416
  cap = cv2.VideoCapture(filepath)
420
417
  try:
421
418
  return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
422
419
  finally:
423
420
  cap.release()
424
- elif filepath.lower().endswith(('.png', '.jpg', '.jpeg')):
421
+ elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
425
422
  return 1
423
+ elif mimetype in NIFTI_MIMES:
424
+ shape = get_nifti_shape(filepath)
425
+ if len(shape) == 3:
426
+ return shape[-1]
427
+ elif len(shape) > 3:
428
+ return shape[3]
429
+ else:
430
+ return 1
426
431
  else:
427
- raise ValueError(f"Unsupported file type: {filepath}")
432
+ raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
428
433
 
429
434
  def get_resources_ids(self) -> list[str]:
430
435
  """Get list of resource IDs."""
@@ -526,18 +531,18 @@ class DatamintBaseDataset:
526
531
  if missing_files:
527
532
  raise DatamintDatasetException(f"Image files not found: {missing_files}")
528
533
 
529
- def _get_datasetinfo(self) -> dict:
534
+ def _get_datasetinfo(self) -> DatasetInfo:
530
535
  """Get dataset information from API."""
531
536
  if self._server_dataset_info is not None:
532
537
  return self._server_dataset_info
533
- all_datasets = self.api_handler.get_datasets()
538
+ all_datasets = self.api._datasetsinfo.get_all()
534
539
 
535
540
  for dataset in all_datasets:
536
- if dataset['id'] == self.dataset_id:
541
+ if dataset.id == self.dataset_id:
537
542
  self._server_dataset_info = dataset
538
543
  return dataset
539
544
 
540
- available_datasets = [(d['name'], d['id']) for d in all_datasets]
545
+ available_datasets = [(d.name, d.id) for d in all_datasets]
541
546
  raise DatamintDatasetException(
542
547
  f"Dataset with id '{self.dataset_id}' not found. "
543
548
  f"Available datasets: {available_datasets}"
@@ -547,7 +552,7 @@ class DatamintBaseDataset:
547
552
  """Get project information from API."""
548
553
  if hasattr(self, 'project_info') and self.project_info is not None:
549
554
  return self.project_info
550
- project = self.api_handler.get_project_by_name(self.project_name)
555
+ project = self.api.projects.get_by_name(self.project_name).asdict()
551
556
  if 'error' in project:
552
557
  available_projects = project['all_projects']
553
558
  raise DatamintDatasetException(
@@ -592,31 +597,10 @@ class DatamintBaseDataset:
592
597
  lines = [head] + [" " * 4 + line for line in body]
593
598
  return "\n".join(lines)
594
599
 
595
- def download_project(self) -> None:
596
- """Download project data from API."""
597
-
598
- dataset_info = self._get_datasetinfo()
599
- self.dataset_id = dataset_info['id']
600
- self.last_updaded_at = dataset_info['updated_at']
601
-
602
- self.api_handler.download_project(
603
- self.project_info['id'],
604
- self.dataset_zippath,
605
- all_annotations=self.all_annotations,
606
- include_unannotated=self.include_unannotated
607
- )
608
-
609
- _LOGGER.debug("Downloaded dataset")
610
-
611
- if os.path.getsize(self.dataset_zippath) == 0:
612
- raise DatamintDatasetException("Download failed.")
613
-
614
- self._extract_and_update_metadata()
615
-
616
600
  def _get_dataset_id(self) -> str:
617
601
  if self.dataset_id is None:
618
602
  dataset_info = self._get_datasetinfo()
619
- self.dataset_id = dataset_info['id']
603
+ self.dataset_id = dataset_info.id
620
604
  return self.dataset_id
621
605
 
622
606
  def _extract_and_update_metadata(self) -> None:
@@ -638,7 +622,7 @@ class DatamintBaseDataset:
638
622
 
639
623
  # Save updated metadata
640
624
  with open(datasetjson_path, 'w') as file:
641
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
625
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
642
626
 
643
627
  self.images_metainfo = self.metainfo['resources']
644
628
  # self._convert_metainfo_to_clsobj()
@@ -646,19 +630,19 @@ class DatamintBaseDataset:
646
630
  def _update_metadata_timestamps(self) -> None:
647
631
  """Update metadata with correct timestamps."""
648
632
  if 'updated_at' not in self.metainfo:
649
- self.metainfo['updated_at'] = self.last_updaded_at
633
+ self.metainfo['updated_at'] = self.last_updated_at
650
634
  else:
651
635
  try:
652
636
  local_time = datetime.fromisoformat(self.metainfo['updated_at'])
653
- server_time = datetime.fromisoformat(self.last_updaded_at)
637
+ server_time = datetime.fromisoformat(self.last_updated_at)
654
638
 
655
639
  if local_time < server_time:
656
640
  _LOGGER.warning(
657
641
  f"Inconsistent updated_at dates detected "
658
- f"({self.metainfo['updated_at']} < {self.last_updaded_at}). "
659
- f"Fixing it to {self.last_updaded_at}"
642
+ f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
643
+ f"Fixing it to {self.last_updated_at}"
660
644
  )
661
- self.metainfo['updated_at'] = self.last_updaded_at
645
+ self.metainfo['updated_at'] = self.last_updated_at
662
646
  except Exception as e:
663
647
  _LOGGER.warning(f"Failed to parse updated_at date: {e}")
664
648
 
@@ -690,6 +674,9 @@ class DatamintBaseDataset:
690
674
  img = (img - min_val) / (img.max() - min_val) * 255
691
675
  img = img.astype(np.uint8)
692
676
 
677
+ if not img.flags.writeable:
678
+ img = img.copy()
679
+
693
680
  img_tensor = torch.from_numpy(img).contiguous()
694
681
 
695
682
  if isinstance(img_tensor, torch.ByteTensor):
@@ -829,7 +816,7 @@ class DatamintBaseDataset:
829
816
 
830
817
  try:
831
818
  external_metadata_info = self._get_datasetinfo()
832
- server_updated_at = external_metadata_info['updated_at']
819
+ server_updated_at = external_metadata_info.updated_at
833
820
  except Exception as e:
834
821
  _LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
835
822
  return
@@ -856,20 +843,21 @@ class DatamintBaseDataset:
856
843
  _LOGGER.info('Local version is up to date with the latest version.')
857
844
 
858
845
  def _fetch_new_resources(self,
859
- all_uptodate_resources: list[dict]) -> list[dict]:
846
+ all_uptodate_resources: list[Resource]) -> list[dict]:
860
847
  local_resources = self.images_metainfo
861
848
  local_resources_ids = [res['id'] for res in local_resources]
862
849
  new_resources = []
863
850
  for resource in all_uptodate_resources:
851
+ resource = resource.asdict()
864
852
  if resource['id'] not in local_resources_ids:
865
853
  resource['file'] = str(self._get_resource_file_path(resource))
866
854
  resource['annotations'] = []
867
855
  new_resources.append(resource)
868
856
  return new_resources
869
857
 
870
- def _fetch_deleted_resources(self, all_uptodate_resources: list[dict]) -> list[dict]:
858
+ def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
871
859
  local_resources = self.images_metainfo
872
- all_uptodate_resources_ids = [res['id'] for res in all_uptodate_resources]
860
+ all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
873
861
  deleted_resources = []
874
862
  for resource in local_resources:
875
863
  try:
@@ -888,7 +876,7 @@ class DatamintBaseDataset:
888
876
  # server_updated_at = external_metadata_info['updated_at']
889
877
 
890
878
  ### RESOURCES ###
891
- all_uptodate_resources = self.api_handler.get_project_resources(self.get_info()['id'])
879
+ all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
892
880
  new_resources = self._fetch_new_resources(all_uptodate_resources)
893
881
  deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
894
882
 
@@ -898,9 +886,9 @@ class DatamintBaseDataset:
898
886
  new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
899
887
  new_resources_ids = [r['id'] for r in new_resources]
900
888
  _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
901
- new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
902
- save_path=new_resources_path,
903
- add_extension=True)
889
+ new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
890
+ save_path=new_resources_path,
891
+ add_extension=True)
904
892
  for new_rpath, r in zip(new_res_paths, new_resources):
905
893
  r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
906
894
  _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
@@ -910,16 +898,17 @@ class DatamintBaseDataset:
910
898
  ################
911
899
 
912
900
  ### ANNOTATIONS ###
913
- all_annotations = self.api_handler.get_annotations(worklist_id=self.project_info['worklist_id'],
914
- status='published' if self.all_annotations else None)
901
+ all_annotations = self.api.annotations.get_list(worklist_id=self.project_info['worklist_id'],
902
+ status='published' if self.all_annotations else None)
903
+
915
904
  # group annotations by resource ID
916
- annotations_by_resource = {}
905
+ annotations_by_resource: dict[str, list[Annotation]] = {}
917
906
  for ann in all_annotations:
918
907
  # add the local filepath
919
908
  filepath = self._get_annotation_file_path(ann)
920
909
  if filepath is not None:
921
- ann['file'] = str(filepath)
922
- resource_id = ann['resource_id']
910
+ ann.file = str(filepath)
911
+ resource_id = ann.resource_id
923
912
  if resource_id not in annotations_by_resource:
924
913
  annotations_by_resource[resource_id] = []
925
914
  annotations_by_resource[resource_id].append(ann)
@@ -937,11 +926,11 @@ class DatamintBaseDataset:
937
926
  # check if segmentation annotations need to be downloaded
938
927
  # Also check if annotations need to be deleted
939
928
  old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
940
- new_ann_ids = set([ann['id'] for ann in new_resource_annotations])
929
+ new_ann_ids = set([ann.id for ann in new_resource_annotations])
941
930
 
942
931
  # Find annotations to add, update, or remove
943
932
  annotations_to_add = [ann for ann in new_resource_annotations
944
- if ann['id'] not in old_ann_ids]
933
+ if ann.id not in old_ann_ids]
945
934
  annotations_to_remove = [ann for ann in old_resource_annotations
946
935
  if getattr(ann, 'id', 'NA') not in new_ann_ids]
947
936
 
@@ -970,22 +959,23 @@ class DatamintBaseDataset:
970
959
  _LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
971
960
 
972
961
  # Update resource annotations list - convert to Annotation objects
973
- resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
962
+ # resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
963
+ resource['annotations'] = new_resource_annotations
974
964
 
975
965
  # Batch download all segmentation files
976
966
  if segmentations_to_download:
977
967
  _LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
978
- self.api_handler.download_multiple_segmentations(segmentations_to_download, segmentation_paths)
968
+ self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
979
969
  _LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
980
970
 
981
971
  ###################
982
972
  # update metadata
983
- self.metainfo['updated_at'] = self._get_datasetinfo()['updated_at']
973
+ self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
984
974
  self.metainfo['all_annotations'] = self.all_annotations
985
975
  # save updated metadata
986
976
  datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
987
977
  with open(datasetjson_path, 'w') as file:
988
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
978
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
989
979
 
990
980
  def _get_resource_file_path(self, resource: dict) -> Path:
991
981
  """Get the local file path for a resource."""
@@ -7,7 +7,7 @@ import numpy as np
7
7
  import logging
8
8
  from PIL import Image
9
9
  import albumentations
10
- from datamint.dataset.annotation import Annotation
10
+ from datamint.entities.annotation import Annotation
11
11
 
12
12
  _LOGGER = logging.getLogger(__name__)
13
13
 
@@ -155,7 +155,7 @@ class DatamintDataset(DatamintBaseDataset):
155
155
  # FIXME: avoid enforcing resizing the mask
156
156
  seg = (Image.open(segfilepath)
157
157
  .convert('L')
158
- .resize((w, h), Image.NEAREST)
158
+ .resize((w, h), Image.Resampling.NEAREST)
159
159
  )
160
160
  seg = np.array(seg)
161
161
 
@@ -0,0 +1,20 @@
1
+ """DataMint entities package."""
2
+
3
+ from .annotation import Annotation
4
+ from .base_entity import BaseEntity
5
+ from .channel import Channel, ChannelResourceData
6
+ from .project import Project
7
+ from .resource import Resource
8
+ from .user import User # new export
9
+ from .datasetinfo import DatasetInfo
10
+
11
+ __all__ = [
12
+ 'Annotation',
13
+ 'BaseEntity',
14
+ 'Channel',
15
+ 'ChannelResourceData',
16
+ 'Project',
17
+ 'Resource',
18
+ "User",
19
+ 'DatasetInfo',
20
+ ]
@@ -0,0 +1,178 @@
1
+ # filepath: datamint/entities/annotation.py
2
+ """Annotation entity module for DataMint API.
3
+
4
+ This module defines the Annotation model used to represent annotation
5
+ records returned by the DataMint API.
6
+ """
7
+
8
+ from typing import Any
9
+ import logging
10
+ from .base_entity import BaseEntity, MISSING_FIELD
11
+ from pydantic import Field
12
+ from datetime import datetime
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Map API field names to class attributes
17
+ _FIELD_MAPPING = {
18
+ 'type': 'annotation_type',
19
+ 'name': 'identifier',
20
+ 'added_by': 'created_by',
21
+ 'index': 'frame_index',
22
+ }
23
+
24
+
25
+ class Annotation(BaseEntity):
26
+ """Pydantic Model representing a DataMint annotation.
27
+
28
+ Attributes:
29
+ id: Unique identifier for the annotation.
30
+ identifier: User-friendly identifier or label for the annotation.
31
+ scope: Scope of the annotation (e.g., "frame", "image").
32
+ frame_index: Index of the frame if scope is frame-based.
33
+ annotation_type: Type of annotation (e.g., "segmentation", "bbox", "label").
34
+ text_value: Optional text value associated with the annotation.
35
+ numeric_value: Optional numeric value associated with the annotation.
36
+ units: Optional units for numeric_value.
37
+ geometry: Optional geometry payload (e.g., polygons, masks) as a list.
38
+ created_at: ISO timestamp for when the annotation was created.
39
+ created_by: Email or identifier of the creating user.
40
+ annotation_worklist_id: Optional worklist ID associated with the annotation.
41
+ status: Lifecycle status of the annotation (e.g., "new", "approved").
42
+ approved_at: Optional ISO timestamp for approval time.
43
+ approved_by: Optional identifier of the approver.
44
+ resource_id: ID of the resource this annotation belongs to.
45
+ associated_file: Path or identifier of any associated file artifact.
46
+ deleted: Whether the annotation is marked as deleted.
47
+ deleted_at: Optional ISO timestamp for deletion time.
48
+ deleted_by: Optional identifier of the user who deleted the annotation.
49
+ created_by_model: Optional identifier of the model that created this annotation.
50
+ old_geometry: Optional previous geometry payload for change tracking.
51
+ set_name: Optional set name this annotation belongs to.
52
+ resource_filename: Optional filename of the resource.
53
+ resource_modality: Optional modality of the resource (e.g., CT, MR).
54
+ annotation_worklist_name: Optional worklist name associated with the annotation.
55
+ user_info: Optional user information with keys like firstname and lastname.
56
+ values: Optional extra values payload for flexible schemas.
57
+ """
58
+
59
+ id: str
60
+ identifier: str
61
+ scope: str
62
+ frame_index: int | None
63
+ annotation_type: str
64
+ text_value: str | None
65
+ numeric_value: float | int | None
66
+ units: str | None
67
+ geometry: list | dict | None
68
+ created_at: str # ISO timestamp string
69
+ created_by: str
70
+ annotation_worklist_id: str | None
71
+ status: str
72
+ approved_at: str | None # ISO timestamp string
73
+ approved_by: str | None
74
+ resource_id: str
75
+ associated_file: str | None
76
+ deleted: bool
77
+ deleted_at: str | None # ISO timestamp string
78
+ deleted_by: str | None
79
+ created_by_model: str | None
80
+ set_name: str | None
81
+ resource_filename: str | None
82
+ resource_modality: str | None
83
+ annotation_worklist_name: str | None
84
+ user_info: dict | None
85
+ values: list | None = MISSING_FIELD
86
+ file: str | None = None # Add file field for segmentations
87
+
88
+ @classmethod
89
+ def from_dict(cls, data: dict[str, Any]) -> 'Annotation':
90
+ """Create an Annotation instance from a dictionary.
91
+
92
+ Args:
93
+ data: Dictionary containing annotation data from API
94
+
95
+ Returns:
96
+ Annotation instance
97
+ """
98
+ # Convert field names and filter valid fields
99
+ converted_data = {}
100
+ for key, value in data.items():
101
+ # Map field names if needed
102
+ mapped_key = _FIELD_MAPPING.get(key, key)
103
+ converted_data[mapped_key] = value
104
+
105
+ if 'scope' not in converted_data:
106
+ converted_data['scope'] = 'image' if converted_data.get('frame_index') is None else 'frame'
107
+
108
+ if converted_data['annotation_type'] in ['segmentation']:
109
+ if converted_data.get('file') is None:
110
+ raise ValueError(f"Segmentation annotations must have an associated file. {data}")
111
+
112
+ # Create instance with only valid fields
113
+ valid_fields = {f for f in cls.model_fields.keys()}
114
+ filtered_data = {k: v for k, v in converted_data.items() if k in valid_fields}
115
+
116
+ return cls(**filtered_data)
117
+
118
+ @property
119
+ def type(self) -> str:
120
+ """Alias for :attr:`annotation_type`."""
121
+ return self.annotation_type
122
+
123
+ @property
124
+ def name(self) -> str:
125
+ """Get the annotation name (alias for identifier)."""
126
+ return self.identifier
127
+
128
+ @property
129
+ def index(self) -> int | None:
130
+ """Get the frame index (alias for frame_index)."""
131
+ return self.frame_index
132
+
133
+ @property
134
+ def value(self) -> str | None:
135
+ """Get the annotation value (for category annotations)."""
136
+ return self.text_value
137
+
138
+ @property
139
+ def added_by(self) -> str:
140
+ """Get the creator email (alias for created_by)."""
141
+ return self.created_by
142
+
143
+ def is_segmentation(self) -> bool:
144
+ """Check if this is a segmentation annotation."""
145
+ return self.annotation_type == 'segmentation'
146
+
147
+ def is_label(self) -> bool:
148
+ """Check if this is a label annotation."""
149
+ return self.annotation_type == 'label'
150
+
151
+ def is_category(self) -> bool:
152
+ """Check if this is a category annotation."""
153
+ return self.annotation_type == 'category'
154
+
155
+ def is_frame_scoped(self) -> bool:
156
+ """Check if this annotation is frame-scoped."""
157
+ return self.scope == 'frame'
158
+
159
+ def is_image_scoped(self) -> bool:
160
+ """Check if this annotation is image-scoped."""
161
+ return self.scope == 'image'
162
+
163
+ def get_created_datetime(self) -> datetime | None:
164
+ """
165
+ Get the creation datetime as a datetime object.
166
+
167
+ Returns:
168
+ datetime object or None if created_at is not set
169
+ """
170
+ if isinstance(self.created_at, datetime):
171
+ return self.created_at
172
+
173
+ if self.created_at:
174
+ try:
175
+ return datetime.fromisoformat(self.created_at.replace('Z', '+00:00'))
176
+ except ValueError:
177
+ logger.warning(f"Could not parse created_at datetime: {self.created_at}")
178
+ return None
@@ -0,0 +1,51 @@
1
+ import logging
2
+ import sys
3
+ from typing import Any
4
+ from pydantic import ConfigDict, BaseModel
5
+
6
+ if sys.version_info >= (3, 11):
7
+ from typing import Self
8
+ else:
9
+ from typing_extensions import Self
10
+ _LOGGER = logging.getLogger(__name__)
11
+
12
+ MISSING_FIELD = 'MISSING_FIELD' # Used when a field is sometimes missing for one endpoint but not on another endpoint
13
+
14
+ # Track logged warnings to avoid duplicates
15
+ _LOGGED_WARNINGS: set[tuple[str, str]] = set()
16
+
17
+
18
+ class BaseEntity(BaseModel):
19
+ """
20
+ Base class for all entities in the Datamint system.
21
+
22
+ This class provides common functionality for all entities, such as
23
+ serialization and deserialization from dictionaries, as well as
24
+ handling unknown fields gracefully.
25
+ """
26
+
27
+ model_config = ConfigDict(extra='allow') # Allow extra fields not defined in the model
28
+
29
+ def asdict(self) -> dict[str, Any]:
30
+ """Convert the entity to a dictionary, including unknown fields."""
31
+ return self.model_dump(warnings='none')
32
+
33
+ def asjson(self) -> str:
34
+ """Convert the entity to a JSON string, including unknown fields."""
35
+ return self.model_dump_json(warnings='none')
36
+
37
+ def model_post_init(self, __context: Any) -> None:
38
+ """Handle unknown fields by logging a warning once per class/field combination in debug mode."""
39
+ if self.__pydantic_extra__ and _LOGGER.isEnabledFor(logging.DEBUG):
40
+ class_name = self.__class__.__name__
41
+
42
+ have_to_log = False
43
+ for key in self.__pydantic_extra__.keys():
44
+ warning_key = (class_name, key)
45
+
46
+ if warning_key not in _LOGGED_WARNINGS:
47
+ _LOGGED_WARNINGS.add(warning_key)
48
+ have_to_log = True
49
+
50
+ if have_to_log:
51
+ _LOGGER.warning(f"Unknown fields {list(self.__pydantic_extra__.keys())} found in {class_name}")
@@ -0,0 +1,46 @@
1
+ from pydantic import ConfigDict, BaseModel
2
+ from datetime import datetime
3
+ from datamint.entities.base_entity import BaseEntity
4
+
5
+
6
+ class ChannelResourceData(BaseModel):
7
+ """Represents resource data within a channel.
8
+
9
+ Attributes:
10
+ created_by: Email of the user who created the resource.
11
+ customer_id: UUID of the customer.
12
+ resource_id: UUID of the resource.
13
+ resource_file_name: Original filename of the resource.
14
+ resource_mimetype: MIME type of the resource.
15
+ """
16
+ model_config = ConfigDict(extra='allow')
17
+
18
+ created_by: str
19
+ customer_id: str
20
+ resource_id: str
21
+ resource_file_name: str
22
+ resource_mimetype: str
23
+
24
+
25
+ class Channel(BaseEntity):
26
+ """Represents a channel containing multiple resources.
27
+
28
+ A channel is a collection of resources grouped together,
29
+ typically for batch processing or organization purposes.
30
+
31
+ Attributes:
32
+ channel_name: Name identifier for the channel.
33
+ resource_data: List of resources contained in this channel.
34
+ deleted: Whether the channel has been marked as deleted.
35
+ created_at: Timestamp when the channel was created.
36
+ updated_at: Timestamp when the channel was last updated.
37
+ """
38
+ channel_name: str
39
+ resource_data: list[ChannelResourceData]
40
+ deleted: bool = False
41
+ created_at: str | None = None
42
+ updated_at: str | None = None
43
+
44
+ def get_resource_ids(self) -> list[str]:
45
+ """Get list of all resource IDs in this channel."""
46
+ return [resource.resource_id for resource in self.resource_data] if self.resource_data else []
@@ -0,0 +1,22 @@
1
+ """Project entity module for DataMint API."""
2
+
3
+ from datetime import datetime
4
+ import logging
5
+ from .base_entity import BaseEntity, MISSING_FIELD
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class DatasetInfo(BaseEntity):
11
+ """Pydantic Model representing a DataMint dataset.
12
+ """
13
+
14
+ id: str
15
+ name: str
16
+ created_at: str # ISO timestamp string
17
+ created_by: str
18
+ description: str
19
+ customer_id: str
20
+ updated_at: str | None
21
+ total_resource: int
22
+ resource_ids: list[str]