datamint 1.9.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (40) hide show
  1. datamint/__init__.py +2 -0
  2. datamint/api/__init__.py +3 -0
  3. datamint/api/base_api.py +430 -0
  4. datamint/api/client.py +91 -0
  5. datamint/api/dto/__init__.py +10 -0
  6. datamint/api/endpoints/__init__.py +17 -0
  7. datamint/api/endpoints/annotations_api.py +984 -0
  8. datamint/api/endpoints/channels_api.py +28 -0
  9. datamint/api/endpoints/datasetsinfo_api.py +16 -0
  10. datamint/api/endpoints/projects_api.py +203 -0
  11. datamint/api/endpoints/resources_api.py +1013 -0
  12. datamint/api/endpoints/users_api.py +38 -0
  13. datamint/api/entity_base_api.py +347 -0
  14. datamint/apihandler/annotation_api_handler.py +5 -5
  15. datamint/apihandler/api_handler.py +3 -6
  16. datamint/apihandler/base_api_handler.py +6 -28
  17. datamint/apihandler/dto/__init__.py +0 -0
  18. datamint/apihandler/dto/annotation_dto.py +1 -1
  19. datamint/apihandler/root_api_handler.py +53 -28
  20. datamint/client_cmd_tools/datamint_config.py +6 -37
  21. datamint/client_cmd_tools/datamint_upload.py +84 -58
  22. datamint/dataset/base_dataset.py +65 -75
  23. datamint/dataset/dataset.py +2 -2
  24. datamint/entities/__init__.py +20 -0
  25. datamint/entities/annotation.py +178 -0
  26. datamint/entities/base_entity.py +51 -0
  27. datamint/entities/channel.py +46 -0
  28. datamint/entities/datasetinfo.py +22 -0
  29. datamint/entities/project.py +64 -0
  30. datamint/entities/resource.py +130 -0
  31. datamint/entities/user.py +21 -0
  32. datamint/examples/example_projects.py +41 -44
  33. datamint/exceptions.py +27 -1
  34. datamint/logging.yaml +1 -1
  35. datamint/utils/logging_utils.py +75 -0
  36. {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/METADATA +13 -9
  37. datamint-2.0.0.dist-info/RECORD +50 -0
  38. {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/WHEEL +1 -1
  39. datamint-1.9.2.dist-info/RECORD +0 -29
  40. {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,46 +1,13 @@
1
1
  import argparse
2
2
  import logging
3
- import os
4
- import platform
5
3
  from datamint import configs
6
- from datamint.utils.logging_utils import load_cmdline_logging_config
4
+ from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
7
5
  from rich.prompt import Prompt, Confirm
8
6
  from rich.console import Console
9
- from rich.theme import Theme
10
-
11
- # Create a custom theme that works well on both dark and blue backgrounds
12
- def _create_console_theme() -> Theme:
13
- """Create a custom Rich theme optimized for cross-platform terminals."""
14
- # Detect if we're likely on PowerShell (Windows + PowerShell)
15
- is_powershell = (
16
- platform.system() == "Windows" and
17
- os.environ.get("PSModulePath") is not None
18
- )
19
-
20
- if is_powershell:
21
- # PowerShell blue background - use high contrast colors
22
- return Theme({
23
- "warning": "bright_yellow",
24
- "error": "bright_red on white",
25
- "success": "bright_green",
26
- "key": "bright_cyan",
27
- "accent": "bright_cyan",
28
- "title": "bold"
29
- })
30
- else:
31
- # Linux/Unix terminals - standard colors
32
- return Theme({
33
- "warning": "yellow",
34
- "error": "red",
35
- "success": "green",
36
- "key": "cyan",
37
- "accent": "bright_blue",
38
- "title": "bold"
39
- })
40
-
41
- # Create console with custom theme
42
- console = Console(theme=_create_console_theme())
7
+
43
8
  _LOGGER = logging.getLogger(__name__)
9
+ _USER_LOGGER = logging.getLogger('user_logger')
10
+ console: Console
44
11
 
45
12
 
46
13
  def configure_default_url():
@@ -170,7 +137,9 @@ def interactive_mode():
170
137
 
171
138
  def main():
172
139
  """Main entry point for the configuration tool."""
140
+ global console
173
141
  load_cmdline_logging_config()
142
+ console = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
174
143
  parser = argparse.ArgumentParser(
175
144
  description='🔧 Datamint API Configuration Tool',
176
145
  epilog="""
@@ -1,6 +1,7 @@
1
1
  from datamint.exceptions import DatamintException
2
2
  import argparse
3
- from datamint.apihandler.api_handler import APIHandler
3
+ # from datamint.apihandler.api_handler import APIHandler
4
+ from datamint import Api
4
5
  import os
5
6
  from humanize import naturalsize
6
7
  import logging
@@ -12,8 +13,8 @@ from typing import Generator, Optional, Any
12
13
  from collections import defaultdict
13
14
  from datamint import __version__ as datamint_version
14
15
  from datamint import configs
15
- from datamint.client_cmd_tools.datamint_config import ask_api_key
16
- from datamint.utils.logging_utils import load_cmdline_logging_config
16
+ from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
17
+ from rich.console import Console
17
18
  import yaml
18
19
  from collections.abc import Iterable
19
20
  import pandas as pd
@@ -22,32 +23,42 @@ import pydicom.errors
22
23
  # Create two loggings: one for the user and one for the developer
23
24
  _LOGGER = logging.getLogger(__name__)
24
25
  _USER_LOGGER = logging.getLogger('user_logger')
26
+ logging.getLogger('pydicom').setLevel(logging.ERROR)
27
+ CONSOLE: Console
25
28
 
26
29
  MAX_RECURSION_LIMIT = 1000
27
30
 
31
+ # Default extensions to exclude when --include-extensions is not specified
32
+ DEFAULT_EXCLUDED_EXTENSIONS = [
33
+ '.txt', '.json', '.xml', '.docx', '.doc', '.pdf', '.xlsx', '.xls', '.csv', '.tsv',
34
+ '.log', '.ini', '.cfg', '.conf', '.yaml', '.yml', '.md', '.rst', '.html', '.htm',
35
+ '.exe', '.bat', '.sh', '.py', '.js', '.css',
36
+ '.sql', '.bak', '.tmp', '.temp', '.lock', '.DS_Store', '.gitignore'
37
+ ]
38
+
28
39
 
29
40
  def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
30
41
  """
31
42
  Generate minimal distinguishing paths for files to avoid ambiguity when multiple files have the same name.
32
-
43
+
33
44
  Args:
34
45
  file_paths: List of file paths
35
-
46
+
36
47
  Returns:
37
48
  Dictionary mapping full path to minimal distinguishing path
38
49
  """
39
50
  if not file_paths:
40
51
  return {}
41
-
52
+
42
53
  # Convert to Path objects and get absolute paths
43
54
  paths = [Path(fp).resolve() for fp in file_paths]
44
55
  result = {}
45
-
56
+
46
57
  # Group files by basename
47
58
  basename_groups = defaultdict(list)
48
59
  for i, path in enumerate(paths):
49
60
  basename_groups[path.name].append((i, path))
50
-
61
+
51
62
  for basename, path_list in basename_groups.items():
52
63
  if len(path_list) == 1:
53
64
  # Only one file with this name, use just the basename
@@ -56,7 +67,7 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
56
67
  else:
57
68
  # Multiple files with same name, need to distinguish them
58
69
  path_parts_list = [path.parts for _, path in path_list]
59
-
70
+
60
71
  # Find the minimum number of parent directories needed to distinguish
61
72
  max_depth_needed = 1
62
73
  for depth in range(1, max(len(parts) for parts in path_parts_list) + 1):
@@ -67,12 +78,12 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
67
78
  suffixes.append('/'.join(parts))
68
79
  else:
69
80
  suffixes.append('/'.join(parts[-depth:]))
70
-
81
+
71
82
  if len(set(suffixes)) == len(suffixes):
72
83
  # All suffixes are unique at this depth
73
84
  max_depth_needed = depth
74
85
  break
75
-
86
+
76
87
  # Apply the minimal distinguishing paths
77
88
  for (idx, path), parts in zip(path_list, path_parts_list):
78
89
  if max_depth_needed >= len(parts):
@@ -80,10 +91,8 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
80
91
  else:
81
92
  distinguishing_path = '/'.join(parts[-max_depth_needed:])
82
93
  result[file_paths[idx]] = distinguishing_path
83
-
84
- return result
85
-
86
94
 
95
+ return result
87
96
 
88
97
 
89
98
  def _read_segmentation_names(segmentation_names_path: str | Path) -> dict:
@@ -257,6 +266,7 @@ def handle_api_key() -> str | None:
257
266
  If it does not exist, it asks the user to input it.
258
267
  Then, it asks the user if he wants to save the API key at a proper location in the machine
259
268
  """
269
+ from datamint.client_cmd_tools.datamint_config import ask_api_key
260
270
  api_key = configs.get_value(configs.APIKEY_KEY)
261
271
  if api_key is None:
262
272
  _USER_LOGGER.info("API key not found. Please provide it:")
@@ -524,6 +534,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
524
534
  parser.add_argument('--channel', '--name', type=str, required=False,
525
535
  help='Channel name (arbritary) to upload the resources to. \
526
536
  Useful for organizing the resources in the platform.')
537
+ parser.add_argument('--project', type=str, required=False,
538
+ help='Project name to add the uploaded resources to after successful upload.')
527
539
  parser.add_argument('--retain-pii', action='store_true', help='Do not anonymize DICOMs')
528
540
  parser.add_argument('--retain-attribute', type=_tuple_int_type, action='append',
529
541
  default=[],
@@ -541,7 +553,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
541
553
  help='File extensions to be considered for uploading. Default: all file extensions.' +
542
554
  ' Example: --include-extensions dcm jpg png')
543
555
  parser.add_argument('--exclude-extensions', type=str, nargs='+',
544
- help='File extensions to be excluded from uploading. Default: none.' +
556
+ help='File extensions to be excluded from uploading. ' +
557
+ 'Default: common non-medical file extensions (.txt, .json, .xml, .docx, etc.) when --include-extensions is not specified.' +
545
558
  ' Example: --exclude-extensions txt csv'
546
559
  )
547
560
  parser.add_argument('--segmentation_path', type=_is_valid_path_argparse, metavar="FILE",
@@ -581,7 +594,6 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
581
594
 
582
595
  if args.verbose:
583
596
  # Get the console handler and set to debug
584
- print(logging.getLogger().handlers)
585
597
  logging.getLogger().handlers[0].setLevel(logging.DEBUG)
586
598
  logging.getLogger('datamint').setLevel(logging.DEBUG)
587
599
  _LOGGER.setLevel(logging.DEBUG)
@@ -594,6 +606,11 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
594
606
  if args.include_extensions is not None and args.exclude_extensions is not None:
595
607
  raise ValueError("--include-extensions and --exclude-extensions are mutually exclusive.")
596
608
 
609
+ # Apply default excluded extensions if neither include nor exclude extensions are specified
610
+ if args.include_extensions is None and args.exclude_extensions is None:
611
+ args.exclude_extensions = DEFAULT_EXCLUDED_EXTENSIONS
612
+ _LOGGER.debug(f"Applied default excluded extensions: {args.exclude_extensions}")
613
+
597
614
  try:
598
615
  if os.path.isfile(args.path):
599
616
  file_path = [args.path]
@@ -720,24 +737,26 @@ def print_results_summary(files_path: list[str],
720
737
  failure_files = [f for f, r in zip(files_path, results) if isinstance(r, Exception)]
721
738
  # Get distinguishing paths for better error reporting
722
739
  distinguishing_paths = _get_minimal_distinguishing_paths(files_path)
723
-
740
+
724
741
  _USER_LOGGER.info(f"\nUpload summary:")
725
742
  _USER_LOGGER.info(f"\tTotal files: {len(files_path)}")
726
743
  _USER_LOGGER.info(f"\tSuccessful uploads: {len(files_path) - len(failure_files)}")
727
744
  if len(failure_files) > 0:
728
- _USER_LOGGER.info(f"\t❌ Failed uploads: {len(failure_files)}")
745
+ _USER_LOGGER.warning(f"\tFailed uploads: {len(failure_files)}")
729
746
  _USER_LOGGER.warning(f"\tFailed files: {[distinguishing_paths[f] for f in failure_files]}")
730
747
  _USER_LOGGER.warning(f"\nFailures:")
731
748
  for f, r in zip(files_path, results):
732
749
  if isinstance(r, Exception):
733
750
  _USER_LOGGER.warning(f"\t{distinguishing_paths[f]}: {r}")
734
751
  else:
735
- _USER_LOGGER.info(f'✅ All uploads successful!')
752
+ CONSOLE.print(f'✅ All uploads successful!', style='success')
736
753
  return len(failure_files)
737
754
 
738
755
 
739
756
  def main():
757
+ global CONSOLE
740
758
  load_cmdline_logging_config()
759
+ CONSOLE = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
741
760
 
742
761
  try:
743
762
  args, files_path, segfiles, metadata_files = _parse_args()
@@ -745,48 +764,55 @@ def main():
745
764
  _USER_LOGGER.error(f'Error validating arguments. {e}')
746
765
  sys.exit(1)
747
766
 
748
- print_input_summary(files_path,
749
- args=args,
750
- segfiles=segfiles,
751
- metadata_files=metadata_files,
752
- include_extensions=args.include_extensions)
767
+ try:
768
+ print_input_summary(files_path,
769
+ args=args,
770
+ segfiles=segfiles,
771
+ metadata_files=metadata_files,
772
+ include_extensions=args.include_extensions)
753
773
 
754
- if not args.yes:
755
- confirmation = input("Do you want to proceed with the upload? (y/n): ")
756
- if confirmation.lower() != "y":
757
- _USER_LOGGER.info("Upload cancelled.")
758
- return
759
- #######################################
774
+ if not args.yes:
775
+ confirmation = input("Do you want to proceed with the upload? (y/n): ")
776
+ if confirmation.lower() != "y":
777
+ _USER_LOGGER.info("Upload cancelled.")
778
+ return
779
+ #######################################
760
780
 
761
- has_a_dicom_file = any(is_dicom(f) for f in files_path)
781
+ has_a_dicom_file = any(is_dicom(f) for f in files_path)
762
782
 
763
- try:
764
- api_handler = APIHandler(check_connection=True)
765
- except DatamintException as e:
766
- _USER_LOGGER.error(f'❌ Connection failed: {e}')
767
- return
768
- try:
769
- results = api_handler.upload_resources(channel=args.channel,
770
- files_path=files_path,
771
- tags=args.tag,
772
- on_error='skip',
773
- anonymize=args.retain_pii == False and has_a_dicom_file,
774
- anonymize_retain_codes=args.retain_attribute,
775
- mung_filename=args.mungfilename,
776
- publish=args.publish,
777
- segmentation_files=segfiles,
778
- transpose_segmentation=args.transpose_segmentation,
779
- assemble_dicoms=True,
780
- metadata=metadata_files
781
- )
782
- except pydicom.errors.InvalidDicomError as e:
783
- _USER_LOGGER.error(f'❌ Invalid DICOM file: {e}')
784
- return
785
- _USER_LOGGER.info('Upload finished!')
786
- _LOGGER.debug(f"Number of results: {len(results)}")
787
-
788
- num_failures = print_results_summary(files_path, results)
789
- if num_failures > 0:
783
+ try:
784
+ api = Api(check_connection=True)
785
+ except DatamintException as e:
786
+ _USER_LOGGER.error(f'❌ Connection failed: {e}')
787
+ return
788
+ try:
789
+ print('>>>', segfiles)
790
+ results = api.resources.upload_resources(channel=args.channel,
791
+ files_path=files_path,
792
+ tags=args.tag,
793
+ on_error='skip',
794
+ anonymize=args.retain_pii == False and has_a_dicom_file,
795
+ anonymize_retain_codes=args.retain_attribute,
796
+ mung_filename=args.mungfilename,
797
+ publish=args.publish,
798
+ publish_to=args.project,
799
+ segmentation_files=segfiles,
800
+ transpose_segmentation=args.transpose_segmentation,
801
+ assemble_dicoms=True,
802
+ metadata=metadata_files,
803
+ progress_bar=True
804
+ )
805
+ except pydicom.errors.InvalidDicomError as e:
806
+ _USER_LOGGER.error(f'❌ Invalid DICOM file: {e}')
807
+ return
808
+ _USER_LOGGER.info('Upload finished!')
809
+ _LOGGER.debug(f"Number of results: {len(results)}")
810
+
811
+ num_failures = print_results_summary(files_path, results)
812
+ if num_failures > 0:
813
+ sys.exit(1)
814
+ except KeyboardInterrupt:
815
+ CONSOLE.print("\nUpload cancelled by user.", style='warning')
790
816
  sys.exit(1)
791
817
 
792
818
 
@@ -13,14 +13,16 @@ from datamint import configs
13
13
  from torch.utils.data import DataLoader
14
14
  import torch
15
15
  from torch import Tensor
16
- from datamint.apihandler.base_api_handler import DatamintException
16
+ from datamint.exceptions import DatamintException
17
17
  from medimgkit.dicom_utils import is_dicom
18
18
  from medimgkit.readers import read_array_normalized
19
- from medimgkit.format_detection import guess_extension
19
+ from medimgkit.format_detection import guess_extension, guess_typez
20
+ from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
20
21
  from datetime import datetime
21
22
  from pathlib import Path
22
- from datamint.dataset.annotation import Annotation
23
+ from datamint.entities import Annotation, DatasetInfo
23
24
  import cv2
25
+ from datamint.entities import Resource
24
26
 
25
27
  _LOGGER = logging.getLogger(__name__)
26
28
 
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
174
176
 
175
177
  def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
176
178
  """Setup API handler and validate connection."""
177
- from datamint.apihandler.api_handler import APIHandler
178
-
179
- self.api_handler = APIHandler(
180
- root_url=server_url,
179
+ from datamint import Api
180
+ self.api = Api(
181
+ server_url=server_url,
181
182
  api_key=api_key,
182
- check_connection=auto_update
183
+ check_connection=self.auto_update
183
184
  )
184
- self.server_url = self.api_handler.root_url
185
- self.api_key = self.api_handler.api_key
186
-
187
- if self.api_key is None:
188
- _LOGGER.warning(
189
- "API key not provided. If you want to download data, please provide an API key, "
190
- f"either by passing it as an argument, "
191
- f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
192
- "using datamint-config command line tool."
193
- )
194
185
 
195
186
  def _setup_directories(self, root: str | None) -> None:
196
187
  """Setup root and dataset directories."""
@@ -242,7 +233,7 @@ class DatamintBaseDataset:
242
233
  if not os.path.isfile(metadata_path):
243
234
  # get the server info
244
235
  self.project_info = self.get_info()
245
- self.metainfo = self._get_datasetinfo().copy()
236
+ self.metainfo = self._get_datasetinfo().asdict().copy()
246
237
  self.metainfo['updated_at'] = None
247
238
  self.metainfo['resources'] = []
248
239
  self.metainfo['all_annotations'] = self.all_annotations
@@ -412,19 +403,33 @@ class DatamintBaseDataset:
412
403
  @staticmethod
413
404
  def read_number_of_frames(filepath: str) -> int:
414
405
  """Read the number of frames in a file."""
415
- if is_dicom(filepath):
406
+
407
+ mimetypes, ext = guess_typez(filepath)
408
+ mimetype = mimetypes[0]
409
+ if mimetype is None:
410
+ raise ValueError(f"Could not determine MIME type for file: {filepath}")
411
+
412
+ if mimetype == 'application/dicom':
416
413
  ds = pydicom.dcmread(filepath)
417
414
  return getattr(ds, 'NumberOfFrames', 1)
418
- elif filepath.lower().endswith(('.mp4', '.avi')):
415
+ elif mimetype.startswith('video/'):
419
416
  cap = cv2.VideoCapture(filepath)
420
417
  try:
421
418
  return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
422
419
  finally:
423
420
  cap.release()
424
- elif filepath.lower().endswith(('.png', '.jpg', '.jpeg')):
421
+ elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
425
422
  return 1
423
+ elif mimetype in NIFTI_MIMES:
424
+ shape = get_nifti_shape(filepath)
425
+ if len(shape) == 3:
426
+ return shape[-1]
427
+ elif len(shape) > 3:
428
+ return shape[3]
429
+ else:
430
+ return 1
426
431
  else:
427
- raise ValueError(f"Unsupported file type: {filepath}")
432
+ raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
428
433
 
429
434
  def get_resources_ids(self) -> list[str]:
430
435
  """Get list of resource IDs."""
@@ -526,18 +531,18 @@ class DatamintBaseDataset:
526
531
  if missing_files:
527
532
  raise DatamintDatasetException(f"Image files not found: {missing_files}")
528
533
 
529
- def _get_datasetinfo(self) -> dict:
534
+ def _get_datasetinfo(self) -> DatasetInfo:
530
535
  """Get dataset information from API."""
531
536
  if self._server_dataset_info is not None:
532
537
  return self._server_dataset_info
533
- all_datasets = self.api_handler.get_datasets()
538
+ all_datasets = self.api._datasetsinfo.get_all()
534
539
 
535
540
  for dataset in all_datasets:
536
- if dataset['id'] == self.dataset_id:
541
+ if dataset.id == self.dataset_id:
537
542
  self._server_dataset_info = dataset
538
543
  return dataset
539
544
 
540
- available_datasets = [(d['name'], d['id']) for d in all_datasets]
545
+ available_datasets = [(d.name, d.id) for d in all_datasets]
541
546
  raise DatamintDatasetException(
542
547
  f"Dataset with id '{self.dataset_id}' not found. "
543
548
  f"Available datasets: {available_datasets}"
@@ -547,7 +552,7 @@ class DatamintBaseDataset:
547
552
  """Get project information from API."""
548
553
  if hasattr(self, 'project_info') and self.project_info is not None:
549
554
  return self.project_info
550
- project = self.api_handler.get_project_by_name(self.project_name)
555
+ project = self.api.projects.get_by_name(self.project_name).asdict()
551
556
  if 'error' in project:
552
557
  available_projects = project['all_projects']
553
558
  raise DatamintDatasetException(
@@ -592,31 +597,10 @@ class DatamintBaseDataset:
592
597
  lines = [head] + [" " * 4 + line for line in body]
593
598
  return "\n".join(lines)
594
599
 
595
- def download_project(self) -> None:
596
- """Download project data from API."""
597
-
598
- dataset_info = self._get_datasetinfo()
599
- self.dataset_id = dataset_info['id']
600
- self.last_updaded_at = dataset_info['updated_at']
601
-
602
- self.api_handler.download_project(
603
- self.project_info['id'],
604
- self.dataset_zippath,
605
- all_annotations=self.all_annotations,
606
- include_unannotated=self.include_unannotated
607
- )
608
-
609
- _LOGGER.debug("Downloaded dataset")
610
-
611
- if os.path.getsize(self.dataset_zippath) == 0:
612
- raise DatamintDatasetException("Download failed.")
613
-
614
- self._extract_and_update_metadata()
615
-
616
600
  def _get_dataset_id(self) -> str:
617
601
  if self.dataset_id is None:
618
602
  dataset_info = self._get_datasetinfo()
619
- self.dataset_id = dataset_info['id']
603
+ self.dataset_id = dataset_info.id
620
604
  return self.dataset_id
621
605
 
622
606
  def _extract_and_update_metadata(self) -> None:
@@ -638,7 +622,7 @@ class DatamintBaseDataset:
638
622
 
639
623
  # Save updated metadata
640
624
  with open(datasetjson_path, 'w') as file:
641
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
625
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
642
626
 
643
627
  self.images_metainfo = self.metainfo['resources']
644
628
  # self._convert_metainfo_to_clsobj()
@@ -646,19 +630,19 @@ class DatamintBaseDataset:
646
630
  def _update_metadata_timestamps(self) -> None:
647
631
  """Update metadata with correct timestamps."""
648
632
  if 'updated_at' not in self.metainfo:
649
- self.metainfo['updated_at'] = self.last_updaded_at
633
+ self.metainfo['updated_at'] = self.last_updated_at
650
634
  else:
651
635
  try:
652
636
  local_time = datetime.fromisoformat(self.metainfo['updated_at'])
653
- server_time = datetime.fromisoformat(self.last_updaded_at)
637
+ server_time = datetime.fromisoformat(self.last_updated_at)
654
638
 
655
639
  if local_time < server_time:
656
640
  _LOGGER.warning(
657
641
  f"Inconsistent updated_at dates detected "
658
- f"({self.metainfo['updated_at']} < {self.last_updaded_at}). "
659
- f"Fixing it to {self.last_updaded_at}"
642
+ f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
643
+ f"Fixing it to {self.last_updated_at}"
660
644
  )
661
- self.metainfo['updated_at'] = self.last_updaded_at
645
+ self.metainfo['updated_at'] = self.last_updated_at
662
646
  except Exception as e:
663
647
  _LOGGER.warning(f"Failed to parse updated_at date: {e}")
664
648
 
@@ -690,6 +674,9 @@ class DatamintBaseDataset:
690
674
  img = (img - min_val) / (img.max() - min_val) * 255
691
675
  img = img.astype(np.uint8)
692
676
 
677
+ if not img.flags.writeable:
678
+ img = img.copy()
679
+
693
680
  img_tensor = torch.from_numpy(img).contiguous()
694
681
 
695
682
  if isinstance(img_tensor, torch.ByteTensor):
@@ -829,7 +816,7 @@ class DatamintBaseDataset:
829
816
 
830
817
  try:
831
818
  external_metadata_info = self._get_datasetinfo()
832
- server_updated_at = external_metadata_info['updated_at']
819
+ server_updated_at = external_metadata_info.updated_at
833
820
  except Exception as e:
834
821
  _LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
835
822
  return
@@ -856,20 +843,21 @@ class DatamintBaseDataset:
856
843
  _LOGGER.info('Local version is up to date with the latest version.')
857
844
 
858
845
  def _fetch_new_resources(self,
859
- all_uptodate_resources: list[dict]) -> list[dict]:
846
+ all_uptodate_resources: list[Resource]) -> list[dict]:
860
847
  local_resources = self.images_metainfo
861
848
  local_resources_ids = [res['id'] for res in local_resources]
862
849
  new_resources = []
863
850
  for resource in all_uptodate_resources:
851
+ resource = resource.asdict()
864
852
  if resource['id'] not in local_resources_ids:
865
853
  resource['file'] = str(self._get_resource_file_path(resource))
866
854
  resource['annotations'] = []
867
855
  new_resources.append(resource)
868
856
  return new_resources
869
857
 
870
- def _fetch_deleted_resources(self, all_uptodate_resources: list[dict]) -> list[dict]:
858
+ def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
871
859
  local_resources = self.images_metainfo
872
- all_uptodate_resources_ids = [res['id'] for res in all_uptodate_resources]
860
+ all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
873
861
  deleted_resources = []
874
862
  for resource in local_resources:
875
863
  try:
@@ -888,7 +876,7 @@ class DatamintBaseDataset:
888
876
  # server_updated_at = external_metadata_info['updated_at']
889
877
 
890
878
  ### RESOURCES ###
891
- all_uptodate_resources = self.api_handler.get_project_resources(self.get_info()['id'])
879
+ all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
892
880
  new_resources = self._fetch_new_resources(all_uptodate_resources)
893
881
  deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
894
882
 
@@ -898,9 +886,9 @@ class DatamintBaseDataset:
898
886
  new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
899
887
  new_resources_ids = [r['id'] for r in new_resources]
900
888
  _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
901
- new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
902
- save_path=new_resources_path,
903
- add_extension=True)
889
+ new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
890
+ save_path=new_resources_path,
891
+ add_extension=True)
904
892
  for new_rpath, r in zip(new_res_paths, new_resources):
905
893
  r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
906
894
  _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
@@ -910,16 +898,17 @@ class DatamintBaseDataset:
910
898
  ################
911
899
 
912
900
  ### ANNOTATIONS ###
913
- all_annotations = self.api_handler.get_annotations(worklist_id=self.project_info['worklist_id'],
914
- status='published' if self.all_annotations else None)
901
+ all_annotations = self.api.annotations.get_list(worklist_id=self.project_info['worklist_id'],
902
+ status='published' if self.all_annotations else None)
903
+
915
904
  # group annotations by resource ID
916
- annotations_by_resource = {}
905
+ annotations_by_resource: dict[str, list[Annotation]] = {}
917
906
  for ann in all_annotations:
918
907
  # add the local filepath
919
908
  filepath = self._get_annotation_file_path(ann)
920
909
  if filepath is not None:
921
- ann['file'] = str(filepath)
922
- resource_id = ann['resource_id']
910
+ ann.file = str(filepath)
911
+ resource_id = ann.resource_id
923
912
  if resource_id not in annotations_by_resource:
924
913
  annotations_by_resource[resource_id] = []
925
914
  annotations_by_resource[resource_id].append(ann)
@@ -937,11 +926,11 @@ class DatamintBaseDataset:
937
926
  # check if segmentation annotations need to be downloaded
938
927
  # Also check if annotations need to be deleted
939
928
  old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
940
- new_ann_ids = set([ann['id'] for ann in new_resource_annotations])
929
+ new_ann_ids = set([ann.id for ann in new_resource_annotations])
941
930
 
942
931
  # Find annotations to add, update, or remove
943
932
  annotations_to_add = [ann for ann in new_resource_annotations
944
- if ann['id'] not in old_ann_ids]
933
+ if ann.id not in old_ann_ids]
945
934
  annotations_to_remove = [ann for ann in old_resource_annotations
946
935
  if getattr(ann, 'id', 'NA') not in new_ann_ids]
947
936
 
@@ -970,22 +959,23 @@ class DatamintBaseDataset:
970
959
  _LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
971
960
 
972
961
  # Update resource annotations list - convert to Annotation objects
973
- resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
962
+ # resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
963
+ resource['annotations'] = new_resource_annotations
974
964
 
975
965
  # Batch download all segmentation files
976
966
  if segmentations_to_download:
977
967
  _LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
978
- self.api_handler.download_multiple_segmentations(segmentations_to_download, segmentation_paths)
968
+ self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
979
969
  _LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
980
970
 
981
971
  ###################
982
972
  # update metadata
983
- self.metainfo['updated_at'] = self._get_datasetinfo()['updated_at']
973
+ self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
984
974
  self.metainfo['all_annotations'] = self.all_annotations
985
975
  # save updated metadata
986
976
  datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
987
977
  with open(datasetjson_path, 'w') as file:
988
- json.dump(self.metainfo, file, default=lambda o: o.to_dict() if hasattr(o, 'to_dict') else o)
978
+ json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
989
979
 
990
980
  def _get_resource_file_path(self, resource: dict) -> Path:
991
981
  """Get the local file path for a resource."""