datamint 1.9.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/__init__.py +2 -0
- datamint/api/__init__.py +3 -0
- datamint/api/base_api.py +430 -0
- datamint/api/client.py +91 -0
- datamint/api/dto/__init__.py +10 -0
- datamint/api/endpoints/__init__.py +17 -0
- datamint/api/endpoints/annotations_api.py +984 -0
- datamint/api/endpoints/channels_api.py +28 -0
- datamint/api/endpoints/datasetsinfo_api.py +16 -0
- datamint/api/endpoints/projects_api.py +203 -0
- datamint/api/endpoints/resources_api.py +1013 -0
- datamint/api/endpoints/users_api.py +38 -0
- datamint/api/entity_base_api.py +347 -0
- datamint/apihandler/annotation_api_handler.py +5 -5
- datamint/apihandler/api_handler.py +3 -6
- datamint/apihandler/base_api_handler.py +6 -28
- datamint/apihandler/dto/__init__.py +0 -0
- datamint/apihandler/dto/annotation_dto.py +1 -1
- datamint/apihandler/root_api_handler.py +53 -28
- datamint/client_cmd_tools/datamint_config.py +6 -37
- datamint/client_cmd_tools/datamint_upload.py +84 -58
- datamint/dataset/base_dataset.py +65 -75
- datamint/dataset/dataset.py +2 -2
- datamint/entities/__init__.py +20 -0
- datamint/entities/annotation.py +178 -0
- datamint/entities/base_entity.py +51 -0
- datamint/entities/channel.py +46 -0
- datamint/entities/datasetinfo.py +22 -0
- datamint/entities/project.py +64 -0
- datamint/entities/resource.py +130 -0
- datamint/entities/user.py +21 -0
- datamint/examples/example_projects.py +41 -44
- datamint/exceptions.py +27 -1
- datamint/logging.yaml +1 -1
- datamint/utils/logging_utils.py +75 -0
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/METADATA +13 -9
- datamint-2.0.0.dist-info/RECORD +50 -0
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/WHEEL +1 -1
- datamint-1.9.2.dist-info/RECORD +0 -29
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,46 +1,13 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import logging
|
|
3
|
-
import os
|
|
4
|
-
import platform
|
|
5
3
|
from datamint import configs
|
|
6
|
-
from datamint.utils.logging_utils import load_cmdline_logging_config
|
|
4
|
+
from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
|
|
7
5
|
from rich.prompt import Prompt, Confirm
|
|
8
6
|
from rich.console import Console
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
# Create a custom theme that works well on both dark and blue backgrounds
|
|
12
|
-
def _create_console_theme() -> Theme:
|
|
13
|
-
"""Create a custom Rich theme optimized for cross-platform terminals."""
|
|
14
|
-
# Detect if we're likely on PowerShell (Windows + PowerShell)
|
|
15
|
-
is_powershell = (
|
|
16
|
-
platform.system() == "Windows" and
|
|
17
|
-
os.environ.get("PSModulePath") is not None
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
if is_powershell:
|
|
21
|
-
# PowerShell blue background - use high contrast colors
|
|
22
|
-
return Theme({
|
|
23
|
-
"warning": "bright_yellow",
|
|
24
|
-
"error": "bright_red on white",
|
|
25
|
-
"success": "bright_green",
|
|
26
|
-
"key": "bright_cyan",
|
|
27
|
-
"accent": "bright_cyan",
|
|
28
|
-
"title": "bold"
|
|
29
|
-
})
|
|
30
|
-
else:
|
|
31
|
-
# Linux/Unix terminals - standard colors
|
|
32
|
-
return Theme({
|
|
33
|
-
"warning": "yellow",
|
|
34
|
-
"error": "red",
|
|
35
|
-
"success": "green",
|
|
36
|
-
"key": "cyan",
|
|
37
|
-
"accent": "bright_blue",
|
|
38
|
-
"title": "bold"
|
|
39
|
-
})
|
|
40
|
-
|
|
41
|
-
# Create console with custom theme
|
|
42
|
-
console = Console(theme=_create_console_theme())
|
|
7
|
+
|
|
43
8
|
_LOGGER = logging.getLogger(__name__)
|
|
9
|
+
_USER_LOGGER = logging.getLogger('user_logger')
|
|
10
|
+
console: Console
|
|
44
11
|
|
|
45
12
|
|
|
46
13
|
def configure_default_url():
|
|
@@ -170,7 +137,9 @@ def interactive_mode():
|
|
|
170
137
|
|
|
171
138
|
def main():
|
|
172
139
|
"""Main entry point for the configuration tool."""
|
|
140
|
+
global console
|
|
173
141
|
load_cmdline_logging_config()
|
|
142
|
+
console = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
|
|
174
143
|
parser = argparse.ArgumentParser(
|
|
175
144
|
description='🔧 Datamint API Configuration Tool',
|
|
176
145
|
epilog="""
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datamint.exceptions import DatamintException
|
|
2
2
|
import argparse
|
|
3
|
-
from datamint.apihandler.api_handler import APIHandler
|
|
3
|
+
# from datamint.apihandler.api_handler import APIHandler
|
|
4
|
+
from datamint import Api
|
|
4
5
|
import os
|
|
5
6
|
from humanize import naturalsize
|
|
6
7
|
import logging
|
|
@@ -12,8 +13,8 @@ from typing import Generator, Optional, Any
|
|
|
12
13
|
from collections import defaultdict
|
|
13
14
|
from datamint import __version__ as datamint_version
|
|
14
15
|
from datamint import configs
|
|
15
|
-
from datamint.
|
|
16
|
-
from
|
|
16
|
+
from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
|
|
17
|
+
from rich.console import Console
|
|
17
18
|
import yaml
|
|
18
19
|
from collections.abc import Iterable
|
|
19
20
|
import pandas as pd
|
|
@@ -22,32 +23,42 @@ import pydicom.errors
|
|
|
22
23
|
# Create two loggings: one for the user and one for the developer
|
|
23
24
|
_LOGGER = logging.getLogger(__name__)
|
|
24
25
|
_USER_LOGGER = logging.getLogger('user_logger')
|
|
26
|
+
logging.getLogger('pydicom').setLevel(logging.ERROR)
|
|
27
|
+
CONSOLE: Console
|
|
25
28
|
|
|
26
29
|
MAX_RECURSION_LIMIT = 1000
|
|
27
30
|
|
|
31
|
+
# Default extensions to exclude when --include-extensions is not specified
|
|
32
|
+
DEFAULT_EXCLUDED_EXTENSIONS = [
|
|
33
|
+
'.txt', '.json', '.xml', '.docx', '.doc', '.pdf', '.xlsx', '.xls', '.csv', '.tsv',
|
|
34
|
+
'.log', '.ini', '.cfg', '.conf', '.yaml', '.yml', '.md', '.rst', '.html', '.htm',
|
|
35
|
+
'.exe', '.bat', '.sh', '.py', '.js', '.css',
|
|
36
|
+
'.sql', '.bak', '.tmp', '.temp', '.lock', '.DS_Store', '.gitignore'
|
|
37
|
+
]
|
|
38
|
+
|
|
28
39
|
|
|
29
40
|
def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
|
|
30
41
|
"""
|
|
31
42
|
Generate minimal distinguishing paths for files to avoid ambiguity when multiple files have the same name.
|
|
32
|
-
|
|
43
|
+
|
|
33
44
|
Args:
|
|
34
45
|
file_paths: List of file paths
|
|
35
|
-
|
|
46
|
+
|
|
36
47
|
Returns:
|
|
37
48
|
Dictionary mapping full path to minimal distinguishing path
|
|
38
49
|
"""
|
|
39
50
|
if not file_paths:
|
|
40
51
|
return {}
|
|
41
|
-
|
|
52
|
+
|
|
42
53
|
# Convert to Path objects and get absolute paths
|
|
43
54
|
paths = [Path(fp).resolve() for fp in file_paths]
|
|
44
55
|
result = {}
|
|
45
|
-
|
|
56
|
+
|
|
46
57
|
# Group files by basename
|
|
47
58
|
basename_groups = defaultdict(list)
|
|
48
59
|
for i, path in enumerate(paths):
|
|
49
60
|
basename_groups[path.name].append((i, path))
|
|
50
|
-
|
|
61
|
+
|
|
51
62
|
for basename, path_list in basename_groups.items():
|
|
52
63
|
if len(path_list) == 1:
|
|
53
64
|
# Only one file with this name, use just the basename
|
|
@@ -56,7 +67,7 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
|
|
|
56
67
|
else:
|
|
57
68
|
# Multiple files with same name, need to distinguish them
|
|
58
69
|
path_parts_list = [path.parts for _, path in path_list]
|
|
59
|
-
|
|
70
|
+
|
|
60
71
|
# Find the minimum number of parent directories needed to distinguish
|
|
61
72
|
max_depth_needed = 1
|
|
62
73
|
for depth in range(1, max(len(parts) for parts in path_parts_list) + 1):
|
|
@@ -67,12 +78,12 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
|
|
|
67
78
|
suffixes.append('/'.join(parts))
|
|
68
79
|
else:
|
|
69
80
|
suffixes.append('/'.join(parts[-depth:]))
|
|
70
|
-
|
|
81
|
+
|
|
71
82
|
if len(set(suffixes)) == len(suffixes):
|
|
72
83
|
# All suffixes are unique at this depth
|
|
73
84
|
max_depth_needed = depth
|
|
74
85
|
break
|
|
75
|
-
|
|
86
|
+
|
|
76
87
|
# Apply the minimal distinguishing paths
|
|
77
88
|
for (idx, path), parts in zip(path_list, path_parts_list):
|
|
78
89
|
if max_depth_needed >= len(parts):
|
|
@@ -80,10 +91,8 @@ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
|
|
|
80
91
|
else:
|
|
81
92
|
distinguishing_path = '/'.join(parts[-max_depth_needed:])
|
|
82
93
|
result[file_paths[idx]] = distinguishing_path
|
|
83
|
-
|
|
84
|
-
return result
|
|
85
|
-
|
|
86
94
|
|
|
95
|
+
return result
|
|
87
96
|
|
|
88
97
|
|
|
89
98
|
def _read_segmentation_names(segmentation_names_path: str | Path) -> dict:
|
|
@@ -257,6 +266,7 @@ def handle_api_key() -> str | None:
|
|
|
257
266
|
If it does not exist, it asks the user to input it.
|
|
258
267
|
Then, it asks the user if he wants to save the API key at a proper location in the machine
|
|
259
268
|
"""
|
|
269
|
+
from datamint.client_cmd_tools.datamint_config import ask_api_key
|
|
260
270
|
api_key = configs.get_value(configs.APIKEY_KEY)
|
|
261
271
|
if api_key is None:
|
|
262
272
|
_USER_LOGGER.info("API key not found. Please provide it:")
|
|
@@ -524,6 +534,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
524
534
|
parser.add_argument('--channel', '--name', type=str, required=False,
|
|
525
535
|
help='Channel name (arbritary) to upload the resources to. \
|
|
526
536
|
Useful for organizing the resources in the platform.')
|
|
537
|
+
parser.add_argument('--project', type=str, required=False,
|
|
538
|
+
help='Project name to add the uploaded resources to after successful upload.')
|
|
527
539
|
parser.add_argument('--retain-pii', action='store_true', help='Do not anonymize DICOMs')
|
|
528
540
|
parser.add_argument('--retain-attribute', type=_tuple_int_type, action='append',
|
|
529
541
|
default=[],
|
|
@@ -541,7 +553,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
541
553
|
help='File extensions to be considered for uploading. Default: all file extensions.' +
|
|
542
554
|
' Example: --include-extensions dcm jpg png')
|
|
543
555
|
parser.add_argument('--exclude-extensions', type=str, nargs='+',
|
|
544
|
-
help='File extensions to be excluded from uploading.
|
|
556
|
+
help='File extensions to be excluded from uploading. ' +
|
|
557
|
+
'Default: common non-medical file extensions (.txt, .json, .xml, .docx, etc.) when --include-extensions is not specified.' +
|
|
545
558
|
' Example: --exclude-extensions txt csv'
|
|
546
559
|
)
|
|
547
560
|
parser.add_argument('--segmentation_path', type=_is_valid_path_argparse, metavar="FILE",
|
|
@@ -581,7 +594,6 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
581
594
|
|
|
582
595
|
if args.verbose:
|
|
583
596
|
# Get the console handler and set to debug
|
|
584
|
-
print(logging.getLogger().handlers)
|
|
585
597
|
logging.getLogger().handlers[0].setLevel(logging.DEBUG)
|
|
586
598
|
logging.getLogger('datamint').setLevel(logging.DEBUG)
|
|
587
599
|
_LOGGER.setLevel(logging.DEBUG)
|
|
@@ -594,6 +606,11 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
594
606
|
if args.include_extensions is not None and args.exclude_extensions is not None:
|
|
595
607
|
raise ValueError("--include-extensions and --exclude-extensions are mutually exclusive.")
|
|
596
608
|
|
|
609
|
+
# Apply default excluded extensions if neither include nor exclude extensions are specified
|
|
610
|
+
if args.include_extensions is None and args.exclude_extensions is None:
|
|
611
|
+
args.exclude_extensions = DEFAULT_EXCLUDED_EXTENSIONS
|
|
612
|
+
_LOGGER.debug(f"Applied default excluded extensions: {args.exclude_extensions}")
|
|
613
|
+
|
|
597
614
|
try:
|
|
598
615
|
if os.path.isfile(args.path):
|
|
599
616
|
file_path = [args.path]
|
|
@@ -720,24 +737,26 @@ def print_results_summary(files_path: list[str],
|
|
|
720
737
|
failure_files = [f for f, r in zip(files_path, results) if isinstance(r, Exception)]
|
|
721
738
|
# Get distinguishing paths for better error reporting
|
|
722
739
|
distinguishing_paths = _get_minimal_distinguishing_paths(files_path)
|
|
723
|
-
|
|
740
|
+
|
|
724
741
|
_USER_LOGGER.info(f"\nUpload summary:")
|
|
725
742
|
_USER_LOGGER.info(f"\tTotal files: {len(files_path)}")
|
|
726
743
|
_USER_LOGGER.info(f"\tSuccessful uploads: {len(files_path) - len(failure_files)}")
|
|
727
744
|
if len(failure_files) > 0:
|
|
728
|
-
_USER_LOGGER.
|
|
745
|
+
_USER_LOGGER.warning(f"\tFailed uploads: {len(failure_files)}")
|
|
729
746
|
_USER_LOGGER.warning(f"\tFailed files: {[distinguishing_paths[f] for f in failure_files]}")
|
|
730
747
|
_USER_LOGGER.warning(f"\nFailures:")
|
|
731
748
|
for f, r in zip(files_path, results):
|
|
732
749
|
if isinstance(r, Exception):
|
|
733
750
|
_USER_LOGGER.warning(f"\t{distinguishing_paths[f]}: {r}")
|
|
734
751
|
else:
|
|
735
|
-
|
|
752
|
+
CONSOLE.print(f'✅ All uploads successful!', style='success')
|
|
736
753
|
return len(failure_files)
|
|
737
754
|
|
|
738
755
|
|
|
739
756
|
def main():
|
|
757
|
+
global CONSOLE
|
|
740
758
|
load_cmdline_logging_config()
|
|
759
|
+
CONSOLE = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
|
|
741
760
|
|
|
742
761
|
try:
|
|
743
762
|
args, files_path, segfiles, metadata_files = _parse_args()
|
|
@@ -745,48 +764,55 @@ def main():
|
|
|
745
764
|
_USER_LOGGER.error(f'Error validating arguments. {e}')
|
|
746
765
|
sys.exit(1)
|
|
747
766
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
767
|
+
try:
|
|
768
|
+
print_input_summary(files_path,
|
|
769
|
+
args=args,
|
|
770
|
+
segfiles=segfiles,
|
|
771
|
+
metadata_files=metadata_files,
|
|
772
|
+
include_extensions=args.include_extensions)
|
|
753
773
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
774
|
+
if not args.yes:
|
|
775
|
+
confirmation = input("Do you want to proceed with the upload? (y/n): ")
|
|
776
|
+
if confirmation.lower() != "y":
|
|
777
|
+
_USER_LOGGER.info("Upload cancelled.")
|
|
778
|
+
return
|
|
779
|
+
#######################################
|
|
760
780
|
|
|
761
|
-
|
|
781
|
+
has_a_dicom_file = any(is_dicom(f) for f in files_path)
|
|
762
782
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
783
|
+
try:
|
|
784
|
+
api = Api(check_connection=True)
|
|
785
|
+
except DatamintException as e:
|
|
786
|
+
_USER_LOGGER.error(f'❌ Connection failed: {e}')
|
|
787
|
+
return
|
|
788
|
+
try:
|
|
789
|
+
print('>>>', segfiles)
|
|
790
|
+
results = api.resources.upload_resources(channel=args.channel,
|
|
791
|
+
files_path=files_path,
|
|
792
|
+
tags=args.tag,
|
|
793
|
+
on_error='skip',
|
|
794
|
+
anonymize=args.retain_pii == False and has_a_dicom_file,
|
|
795
|
+
anonymize_retain_codes=args.retain_attribute,
|
|
796
|
+
mung_filename=args.mungfilename,
|
|
797
|
+
publish=args.publish,
|
|
798
|
+
publish_to=args.project,
|
|
799
|
+
segmentation_files=segfiles,
|
|
800
|
+
transpose_segmentation=args.transpose_segmentation,
|
|
801
|
+
assemble_dicoms=True,
|
|
802
|
+
metadata=metadata_files,
|
|
803
|
+
progress_bar=True
|
|
804
|
+
)
|
|
805
|
+
except pydicom.errors.InvalidDicomError as e:
|
|
806
|
+
_USER_LOGGER.error(f'❌ Invalid DICOM file: {e}')
|
|
807
|
+
return
|
|
808
|
+
_USER_LOGGER.info('Upload finished!')
|
|
809
|
+
_LOGGER.debug(f"Number of results: {len(results)}")
|
|
810
|
+
|
|
811
|
+
num_failures = print_results_summary(files_path, results)
|
|
812
|
+
if num_failures > 0:
|
|
813
|
+
sys.exit(1)
|
|
814
|
+
except KeyboardInterrupt:
|
|
815
|
+
CONSOLE.print("\nUpload cancelled by user.", style='warning')
|
|
790
816
|
sys.exit(1)
|
|
791
817
|
|
|
792
818
|
|
datamint/dataset/base_dataset.py
CHANGED
|
@@ -13,14 +13,16 @@ from datamint import configs
|
|
|
13
13
|
from torch.utils.data import DataLoader
|
|
14
14
|
import torch
|
|
15
15
|
from torch import Tensor
|
|
16
|
-
from datamint.
|
|
16
|
+
from datamint.exceptions import DatamintException
|
|
17
17
|
from medimgkit.dicom_utils import is_dicom
|
|
18
18
|
from medimgkit.readers import read_array_normalized
|
|
19
|
-
from medimgkit.format_detection import guess_extension
|
|
19
|
+
from medimgkit.format_detection import guess_extension, guess_typez
|
|
20
|
+
from medimgkit.nifti_utils import NIFTI_MIMES, get_nifti_shape
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from pathlib import Path
|
|
22
|
-
from datamint.
|
|
23
|
+
from datamint.entities import Annotation, DatasetInfo
|
|
23
24
|
import cv2
|
|
25
|
+
from datamint.entities import Resource
|
|
24
26
|
|
|
25
27
|
_LOGGER = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -174,23 +176,12 @@ class DatamintBaseDataset:
|
|
|
174
176
|
|
|
175
177
|
def _setup_api_handler(self, server_url: Optional[str], api_key: Optional[str], auto_update: bool) -> None:
|
|
176
178
|
"""Setup API handler and validate connection."""
|
|
177
|
-
from datamint
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
root_url=server_url,
|
|
179
|
+
from datamint import Api
|
|
180
|
+
self.api = Api(
|
|
181
|
+
server_url=server_url,
|
|
181
182
|
api_key=api_key,
|
|
182
|
-
check_connection=auto_update
|
|
183
|
+
check_connection=self.auto_update
|
|
183
184
|
)
|
|
184
|
-
self.server_url = self.api_handler.root_url
|
|
185
|
-
self.api_key = self.api_handler.api_key
|
|
186
|
-
|
|
187
|
-
if self.api_key is None:
|
|
188
|
-
_LOGGER.warning(
|
|
189
|
-
"API key not provided. If you want to download data, please provide an API key, "
|
|
190
|
-
f"either by passing it as an argument, "
|
|
191
|
-
f"setting environment variable {configs.ENV_VARS[configs.APIKEY_KEY]} or "
|
|
192
|
-
"using datamint-config command line tool."
|
|
193
|
-
)
|
|
194
185
|
|
|
195
186
|
def _setup_directories(self, root: str | None) -> None:
|
|
196
187
|
"""Setup root and dataset directories."""
|
|
@@ -242,7 +233,7 @@ class DatamintBaseDataset:
|
|
|
242
233
|
if not os.path.isfile(metadata_path):
|
|
243
234
|
# get the server info
|
|
244
235
|
self.project_info = self.get_info()
|
|
245
|
-
self.metainfo = self._get_datasetinfo().copy()
|
|
236
|
+
self.metainfo = self._get_datasetinfo().asdict().copy()
|
|
246
237
|
self.metainfo['updated_at'] = None
|
|
247
238
|
self.metainfo['resources'] = []
|
|
248
239
|
self.metainfo['all_annotations'] = self.all_annotations
|
|
@@ -412,19 +403,33 @@ class DatamintBaseDataset:
|
|
|
412
403
|
@staticmethod
|
|
413
404
|
def read_number_of_frames(filepath: str) -> int:
|
|
414
405
|
"""Read the number of frames in a file."""
|
|
415
|
-
|
|
406
|
+
|
|
407
|
+
mimetypes, ext = guess_typez(filepath)
|
|
408
|
+
mimetype = mimetypes[0]
|
|
409
|
+
if mimetype is None:
|
|
410
|
+
raise ValueError(f"Could not determine MIME type for file: {filepath}")
|
|
411
|
+
|
|
412
|
+
if mimetype == 'application/dicom':
|
|
416
413
|
ds = pydicom.dcmread(filepath)
|
|
417
414
|
return getattr(ds, 'NumberOfFrames', 1)
|
|
418
|
-
elif
|
|
415
|
+
elif mimetype.startswith('video/'):
|
|
419
416
|
cap = cv2.VideoCapture(filepath)
|
|
420
417
|
try:
|
|
421
418
|
return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
422
419
|
finally:
|
|
423
420
|
cap.release()
|
|
424
|
-
elif
|
|
421
|
+
elif mimetype in ('image/png', 'image/jpeg', 'image/jpg', 'image/bmp', 'image/tiff'):
|
|
425
422
|
return 1
|
|
423
|
+
elif mimetype in NIFTI_MIMES:
|
|
424
|
+
shape = get_nifti_shape(filepath)
|
|
425
|
+
if len(shape) == 3:
|
|
426
|
+
return shape[-1]
|
|
427
|
+
elif len(shape) > 3:
|
|
428
|
+
return shape[3]
|
|
429
|
+
else:
|
|
430
|
+
return 1
|
|
426
431
|
else:
|
|
427
|
-
raise ValueError(f"Unsupported file type
|
|
432
|
+
raise ValueError(f"Unsupported file type '{mimetype}' for file {filepath}")
|
|
428
433
|
|
|
429
434
|
def get_resources_ids(self) -> list[str]:
|
|
430
435
|
"""Get list of resource IDs."""
|
|
@@ -526,18 +531,18 @@ class DatamintBaseDataset:
|
|
|
526
531
|
if missing_files:
|
|
527
532
|
raise DatamintDatasetException(f"Image files not found: {missing_files}")
|
|
528
533
|
|
|
529
|
-
def _get_datasetinfo(self) ->
|
|
534
|
+
def _get_datasetinfo(self) -> DatasetInfo:
|
|
530
535
|
"""Get dataset information from API."""
|
|
531
536
|
if self._server_dataset_info is not None:
|
|
532
537
|
return self._server_dataset_info
|
|
533
|
-
all_datasets = self.
|
|
538
|
+
all_datasets = self.api._datasetsinfo.get_all()
|
|
534
539
|
|
|
535
540
|
for dataset in all_datasets:
|
|
536
|
-
if dataset
|
|
541
|
+
if dataset.id == self.dataset_id:
|
|
537
542
|
self._server_dataset_info = dataset
|
|
538
543
|
return dataset
|
|
539
544
|
|
|
540
|
-
available_datasets = [(d
|
|
545
|
+
available_datasets = [(d.name, d.id) for d in all_datasets]
|
|
541
546
|
raise DatamintDatasetException(
|
|
542
547
|
f"Dataset with id '{self.dataset_id}' not found. "
|
|
543
548
|
f"Available datasets: {available_datasets}"
|
|
@@ -547,7 +552,7 @@ class DatamintBaseDataset:
|
|
|
547
552
|
"""Get project information from API."""
|
|
548
553
|
if hasattr(self, 'project_info') and self.project_info is not None:
|
|
549
554
|
return self.project_info
|
|
550
|
-
project = self.
|
|
555
|
+
project = self.api.projects.get_by_name(self.project_name).asdict()
|
|
551
556
|
if 'error' in project:
|
|
552
557
|
available_projects = project['all_projects']
|
|
553
558
|
raise DatamintDatasetException(
|
|
@@ -592,31 +597,10 @@ class DatamintBaseDataset:
|
|
|
592
597
|
lines = [head] + [" " * 4 + line for line in body]
|
|
593
598
|
return "\n".join(lines)
|
|
594
599
|
|
|
595
|
-
def download_project(self) -> None:
|
|
596
|
-
"""Download project data from API."""
|
|
597
|
-
|
|
598
|
-
dataset_info = self._get_datasetinfo()
|
|
599
|
-
self.dataset_id = dataset_info['id']
|
|
600
|
-
self.last_updaded_at = dataset_info['updated_at']
|
|
601
|
-
|
|
602
|
-
self.api_handler.download_project(
|
|
603
|
-
self.project_info['id'],
|
|
604
|
-
self.dataset_zippath,
|
|
605
|
-
all_annotations=self.all_annotations,
|
|
606
|
-
include_unannotated=self.include_unannotated
|
|
607
|
-
)
|
|
608
|
-
|
|
609
|
-
_LOGGER.debug("Downloaded dataset")
|
|
610
|
-
|
|
611
|
-
if os.path.getsize(self.dataset_zippath) == 0:
|
|
612
|
-
raise DatamintDatasetException("Download failed.")
|
|
613
|
-
|
|
614
|
-
self._extract_and_update_metadata()
|
|
615
|
-
|
|
616
600
|
def _get_dataset_id(self) -> str:
|
|
617
601
|
if self.dataset_id is None:
|
|
618
602
|
dataset_info = self._get_datasetinfo()
|
|
619
|
-
self.dataset_id = dataset_info
|
|
603
|
+
self.dataset_id = dataset_info.id
|
|
620
604
|
return self.dataset_id
|
|
621
605
|
|
|
622
606
|
def _extract_and_update_metadata(self) -> None:
|
|
@@ -638,7 +622,7 @@ class DatamintBaseDataset:
|
|
|
638
622
|
|
|
639
623
|
# Save updated metadata
|
|
640
624
|
with open(datasetjson_path, 'w') as file:
|
|
641
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
625
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
642
626
|
|
|
643
627
|
self.images_metainfo = self.metainfo['resources']
|
|
644
628
|
# self._convert_metainfo_to_clsobj()
|
|
@@ -646,19 +630,19 @@ class DatamintBaseDataset:
|
|
|
646
630
|
def _update_metadata_timestamps(self) -> None:
|
|
647
631
|
"""Update metadata with correct timestamps."""
|
|
648
632
|
if 'updated_at' not in self.metainfo:
|
|
649
|
-
self.metainfo['updated_at'] = self.
|
|
633
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
650
634
|
else:
|
|
651
635
|
try:
|
|
652
636
|
local_time = datetime.fromisoformat(self.metainfo['updated_at'])
|
|
653
|
-
server_time = datetime.fromisoformat(self.
|
|
637
|
+
server_time = datetime.fromisoformat(self.last_updated_at)
|
|
654
638
|
|
|
655
639
|
if local_time < server_time:
|
|
656
640
|
_LOGGER.warning(
|
|
657
641
|
f"Inconsistent updated_at dates detected "
|
|
658
|
-
f"({self.metainfo['updated_at']} < {self.
|
|
659
|
-
f"Fixing it to {self.
|
|
642
|
+
f"({self.metainfo['updated_at']} < {self.last_updated_at}). "
|
|
643
|
+
f"Fixing it to {self.last_updated_at}"
|
|
660
644
|
)
|
|
661
|
-
self.metainfo['updated_at'] = self.
|
|
645
|
+
self.metainfo['updated_at'] = self.last_updated_at
|
|
662
646
|
except Exception as e:
|
|
663
647
|
_LOGGER.warning(f"Failed to parse updated_at date: {e}")
|
|
664
648
|
|
|
@@ -690,6 +674,9 @@ class DatamintBaseDataset:
|
|
|
690
674
|
img = (img - min_val) / (img.max() - min_val) * 255
|
|
691
675
|
img = img.astype(np.uint8)
|
|
692
676
|
|
|
677
|
+
if not img.flags.writeable:
|
|
678
|
+
img = img.copy()
|
|
679
|
+
|
|
693
680
|
img_tensor = torch.from_numpy(img).contiguous()
|
|
694
681
|
|
|
695
682
|
if isinstance(img_tensor, torch.ByteTensor):
|
|
@@ -829,7 +816,7 @@ class DatamintBaseDataset:
|
|
|
829
816
|
|
|
830
817
|
try:
|
|
831
818
|
external_metadata_info = self._get_datasetinfo()
|
|
832
|
-
server_updated_at = external_metadata_info
|
|
819
|
+
server_updated_at = external_metadata_info.updated_at
|
|
833
820
|
except Exception as e:
|
|
834
821
|
_LOGGER.warning(f"Failed to check for updates in {self.project_name}: {e}")
|
|
835
822
|
return
|
|
@@ -856,20 +843,21 @@ class DatamintBaseDataset:
|
|
|
856
843
|
_LOGGER.info('Local version is up to date with the latest version.')
|
|
857
844
|
|
|
858
845
|
def _fetch_new_resources(self,
|
|
859
|
-
all_uptodate_resources: list[
|
|
846
|
+
all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
860
847
|
local_resources = self.images_metainfo
|
|
861
848
|
local_resources_ids = [res['id'] for res in local_resources]
|
|
862
849
|
new_resources = []
|
|
863
850
|
for resource in all_uptodate_resources:
|
|
851
|
+
resource = resource.asdict()
|
|
864
852
|
if resource['id'] not in local_resources_ids:
|
|
865
853
|
resource['file'] = str(self._get_resource_file_path(resource))
|
|
866
854
|
resource['annotations'] = []
|
|
867
855
|
new_resources.append(resource)
|
|
868
856
|
return new_resources
|
|
869
857
|
|
|
870
|
-
def _fetch_deleted_resources(self, all_uptodate_resources: list[
|
|
858
|
+
def _fetch_deleted_resources(self, all_uptodate_resources: list[Resource]) -> list[dict]:
|
|
871
859
|
local_resources = self.images_metainfo
|
|
872
|
-
all_uptodate_resources_ids = [res
|
|
860
|
+
all_uptodate_resources_ids = [res.id for res in all_uptodate_resources]
|
|
873
861
|
deleted_resources = []
|
|
874
862
|
for resource in local_resources:
|
|
875
863
|
try:
|
|
@@ -888,7 +876,7 @@ class DatamintBaseDataset:
|
|
|
888
876
|
# server_updated_at = external_metadata_info['updated_at']
|
|
889
877
|
|
|
890
878
|
### RESOURCES ###
|
|
891
|
-
all_uptodate_resources = self.
|
|
879
|
+
all_uptodate_resources = self.api.projects.get_project_resources(self.get_info()['id'])
|
|
892
880
|
new_resources = self._fetch_new_resources(all_uptodate_resources)
|
|
893
881
|
deleted_resources = self._fetch_deleted_resources(all_uptodate_resources)
|
|
894
882
|
|
|
@@ -898,9 +886,9 @@ class DatamintBaseDataset:
|
|
|
898
886
|
new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
|
|
899
887
|
new_resources_ids = [r['id'] for r in new_resources]
|
|
900
888
|
_LOGGER.info(f"Downloading {len(new_resources)} new resources...")
|
|
901
|
-
new_res_paths = self.
|
|
902
|
-
|
|
903
|
-
|
|
889
|
+
new_res_paths = self.api.resources.download_multiple_resources(new_resources_ids,
|
|
890
|
+
save_path=new_resources_path,
|
|
891
|
+
add_extension=True)
|
|
904
892
|
for new_rpath, r in zip(new_res_paths, new_resources):
|
|
905
893
|
r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
|
|
906
894
|
_LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
|
|
@@ -910,16 +898,17 @@ class DatamintBaseDataset:
|
|
|
910
898
|
################
|
|
911
899
|
|
|
912
900
|
### ANNOTATIONS ###
|
|
913
|
-
all_annotations = self.
|
|
914
|
-
|
|
901
|
+
all_annotations = self.api.annotations.get_list(worklist_id=self.project_info['worklist_id'],
|
|
902
|
+
status='published' if self.all_annotations else None)
|
|
903
|
+
|
|
915
904
|
# group annotations by resource ID
|
|
916
|
-
annotations_by_resource = {}
|
|
905
|
+
annotations_by_resource: dict[str, list[Annotation]] = {}
|
|
917
906
|
for ann in all_annotations:
|
|
918
907
|
# add the local filepath
|
|
919
908
|
filepath = self._get_annotation_file_path(ann)
|
|
920
909
|
if filepath is not None:
|
|
921
|
-
ann
|
|
922
|
-
resource_id = ann
|
|
910
|
+
ann.file = str(filepath)
|
|
911
|
+
resource_id = ann.resource_id
|
|
923
912
|
if resource_id not in annotations_by_resource:
|
|
924
913
|
annotations_by_resource[resource_id] = []
|
|
925
914
|
annotations_by_resource[resource_id].append(ann)
|
|
@@ -937,11 +926,11 @@ class DatamintBaseDataset:
|
|
|
937
926
|
# check if segmentation annotations need to be downloaded
|
|
938
927
|
# Also check if annotations need to be deleted
|
|
939
928
|
old_ann_ids = set([ann.id for ann in old_resource_annotations if hasattr(ann, 'id')])
|
|
940
|
-
new_ann_ids = set([ann
|
|
929
|
+
new_ann_ids = set([ann.id for ann in new_resource_annotations])
|
|
941
930
|
|
|
942
931
|
# Find annotations to add, update, or remove
|
|
943
932
|
annotations_to_add = [ann for ann in new_resource_annotations
|
|
944
|
-
if ann
|
|
933
|
+
if ann.id not in old_ann_ids]
|
|
945
934
|
annotations_to_remove = [ann for ann in old_resource_annotations
|
|
946
935
|
if getattr(ann, 'id', 'NA') not in new_ann_ids]
|
|
947
936
|
|
|
@@ -970,22 +959,23 @@ class DatamintBaseDataset:
|
|
|
970
959
|
_LOGGER.error(f"Error deleting annotation file {filepath}: {e}")
|
|
971
960
|
|
|
972
961
|
# Update resource annotations list - convert to Annotation objects
|
|
973
|
-
resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
962
|
+
# resource['annotations'] = [Annotation.from_dict(ann) for ann in new_resource_annotations]
|
|
963
|
+
resource['annotations'] = new_resource_annotations
|
|
974
964
|
|
|
975
965
|
# Batch download all segmentation files
|
|
976
966
|
if segmentations_to_download:
|
|
977
967
|
_LOGGER.info(f"Downloading {len(segmentations_to_download)} segmentation files...")
|
|
978
|
-
self.
|
|
968
|
+
self.api.annotations.download_multiple_files(segmentations_to_download, segmentation_paths)
|
|
979
969
|
_LOGGER.info(f"Downloaded {len(segmentations_to_download)} segmentation files.")
|
|
980
970
|
|
|
981
971
|
###################
|
|
982
972
|
# update metadata
|
|
983
|
-
self.metainfo['updated_at'] = self._get_datasetinfo()
|
|
973
|
+
self.metainfo['updated_at'] = self._get_datasetinfo().updated_at
|
|
984
974
|
self.metainfo['all_annotations'] = self.all_annotations
|
|
985
975
|
# save updated metadata
|
|
986
976
|
datasetjson_path = os.path.join(self.dataset_dir, 'dataset.json')
|
|
987
977
|
with open(datasetjson_path, 'w') as file:
|
|
988
|
-
json.dump(self.metainfo, file, default=lambda o: o.
|
|
978
|
+
json.dump(self.metainfo, file, default=lambda o: o.asdict() if hasattr(o, 'asdict') else o)
|
|
989
979
|
|
|
990
980
|
def _get_resource_file_path(self, resource: dict) -> Path:
|
|
991
981
|
"""Get the local file path for a resource."""
|