datamint 1.9.1__tar.gz → 1.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (29) hide show
  1. {datamint-1.9.1 → datamint-1.9.3}/PKG-INFO +2 -2
  2. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/annotation_api_handler.py +5 -5
  3. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/base_api_handler.py +4 -10
  4. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/root_api_handler.py +97 -39
  5. {datamint-1.9.1 → datamint-1.9.3}/datamint/client_cmd_tools/datamint_config.py +7 -38
  6. {datamint-1.9.1 → datamint-1.9.3}/datamint/client_cmd_tools/datamint_upload.py +158 -44
  7. datamint-1.9.3/datamint/exceptions.py +5 -0
  8. {datamint-1.9.1 → datamint-1.9.3}/datamint/logging.yaml +1 -1
  9. datamint-1.9.3/datamint/utils/logging_utils.py +130 -0
  10. {datamint-1.9.1 → datamint-1.9.3}/pyproject.toml +2 -2
  11. datamint-1.9.1/datamint/utils/logging_utils.py +0 -55
  12. {datamint-1.9.1 → datamint-1.9.3}/README.md +0 -0
  13. {datamint-1.9.1 → datamint-1.9.3}/datamint/__init__.py +0 -0
  14. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/api_handler.py +0 -0
  15. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/dto/annotation_dto.py +0 -0
  16. {datamint-1.9.1 → datamint-1.9.3}/datamint/apihandler/exp_api_handler.py +0 -0
  17. {datamint-1.9.1 → datamint-1.9.3}/datamint/client_cmd_tools/__init__.py +0 -0
  18. {datamint-1.9.1 → datamint-1.9.3}/datamint/configs.py +0 -0
  19. {datamint-1.9.1 → datamint-1.9.3}/datamint/dataset/__init__.py +0 -0
  20. {datamint-1.9.1 → datamint-1.9.3}/datamint/dataset/annotation.py +0 -0
  21. {datamint-1.9.1 → datamint-1.9.3}/datamint/dataset/base_dataset.py +0 -0
  22. {datamint-1.9.1 → datamint-1.9.3}/datamint/dataset/dataset.py +0 -0
  23. {datamint-1.9.1 → datamint-1.9.3}/datamint/examples/__init__.py +0 -0
  24. {datamint-1.9.1 → datamint-1.9.3}/datamint/examples/example_projects.py +0 -0
  25. {datamint-1.9.1 → datamint-1.9.3}/datamint/experiment/__init__.py +0 -0
  26. {datamint-1.9.1 → datamint-1.9.3}/datamint/experiment/_patcher.py +0 -0
  27. {datamint-1.9.1 → datamint-1.9.3}/datamint/experiment/experiment.py +0 -0
  28. {datamint-1.9.1 → datamint-1.9.3}/datamint/utils/torchmetrics.py +0 -0
  29. {datamint-1.9.1 → datamint-1.9.3}/datamint/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.9.1
3
+ Version: 1.9.3
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
19
19
  Requires-Dist: lazy-loader (>=0.3.0)
20
20
  Requires-Dist: lightning
21
21
  Requires-Dist: matplotlib
22
- Requires-Dist: medimgkit (>=0.4.4)
22
+ Requires-Dist: medimgkit (>=0.5.0)
23
23
  Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
24
24
  Requires-Dist: nibabel (>=4.0.0)
25
25
  Requires-Dist: numpy
@@ -995,11 +995,11 @@ class AnnotationAPIHandler(BaseAPIHandler):
995
995
 
996
996
  def update_annotation_worklist(self,
997
997
  worklist_id: str,
998
- frame_labels: list[str] = None,
999
- image_labels: list[str] = None,
1000
- annotations: list[dict] = None,
1001
- status: Literal['new', 'updating', 'active', 'completed'] = None,
1002
- name: str = None,
998
+ frame_labels: list[str] | None = None,
999
+ image_labels: list[str] | None = None,
1000
+ annotations: list[dict] | None = None,
1001
+ status: Literal['new', 'updating', 'active', 'completed'] | None = None,
1002
+ name: str | None = None,
1003
1003
  ):
1004
1004
  """
1005
1005
  Update the status of an annotation worklist.
@@ -1,4 +1,4 @@
1
- from typing import Optional, Literal, Generator, TypeAlias, Dict, Union, List
1
+ from typing import Optional, Literal, Generator, TypeAlias
2
2
  import pydicom.dataset
3
3
  from requests import Session
4
4
  from requests.exceptions import HTTPError
@@ -15,6 +15,7 @@ import nibabel as nib
15
15
  from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
16
16
  from datamint import configs
17
17
  import gzip
18
+ from datamint.exceptions import DatamintException
18
19
 
19
20
  _LOGGER = logging.getLogger(__name__)
20
21
 
@@ -29,13 +30,6 @@ ResourceFields: TypeAlias = Literal['modality', 'created_by', 'published_by', 'p
29
30
  _PAGE_LIMIT = 5000
30
31
 
31
32
 
32
- class DatamintException(Exception):
33
- """
34
- Base class for exceptions in this module.
35
- """
36
- pass
37
-
38
-
39
33
  class ResourceNotFoundError(DatamintException):
40
34
  """
41
35
  Exception raised when a resource is not found.
@@ -196,10 +190,10 @@ class BaseAPIHandler:
196
190
  _LOGGER.error(f"Error in request to {request_args['url']}: {e}")
197
191
  if status_code >= 400 and status_code < 500:
198
192
  try:
199
- _LOGGER.error(f"Error response: {response.text}")
193
+ _LOGGER.info(f"Error response: {response.text}")
200
194
  error_data = response.json()
201
195
  except Exception as e2:
202
- _LOGGER.error(f"Error parsing the response. {e2}")
196
+ _LOGGER.info(f"Error parsing the response. {e2}")
203
197
  else:
204
198
  if isinstance(error_data['message'], str) and ' not found' in error_data['message'].lower():
205
199
  # Will be caught by the caller and properly initialized:
@@ -6,7 +6,7 @@ from requests.exceptions import HTTPError
6
6
  import logging
7
7
  import asyncio
8
8
  import aiohttp
9
- from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
9
+ from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report, GeneratorWithLength
10
10
  from medimgkit import dicom_utils, standardize_mimetype
11
11
  from medimgkit.io_utils import is_io_object, peek
12
12
  from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
@@ -185,9 +185,7 @@ class RootAPIHandler(BaseAPIHandler):
185
185
  resp_data = await self._run_request_async(request_params, session)
186
186
  if 'error' in resp_data:
187
187
  raise DatamintException(resp_data['error'])
188
- _LOGGER.info(f"Response on uploading {name}: {resp_data}")
189
-
190
- _USER_LOGGER.info(f'"{name}" uploaded')
188
+ _LOGGER.debug(f"Response on uploading {name}: {resp_data}")
191
189
  return resp_data['id']
192
190
  except Exception as e:
193
191
  if 'name' in locals():
@@ -212,6 +210,7 @@ class RootAPIHandler(BaseAPIHandler):
212
210
  segmentation_files: Optional[list[dict]] = None,
213
211
  transpose_segmentation: bool = False,
214
212
  metadata_files: Optional[list[str | dict | None]] = None,
213
+ progress_bar: tqdm | None = None,
215
214
  ) -> list[str]:
216
215
  if on_error not in ['raise', 'skip']:
217
216
  raise ValueError("on_error must be either 'raise' or 'skip'")
@@ -223,7 +222,10 @@ class RootAPIHandler(BaseAPIHandler):
223
222
  metadata_files = _infinite_gen(None)
224
223
 
225
224
  async with aiohttp.ClientSession() as session:
226
- async def __upload_single_resource(file_path, segfiles: dict[str, list | dict], metadata_file: str | dict | None):
225
+ async def __upload_single_resource(file_path, segfiles: dict[str, list | dict],
226
+ metadata_file: str | dict | None):
227
+ name = file_path.name if is_io_object(file_path) else file_path
228
+ name = os.path.basename(name)
227
229
  rid = await self._upload_single_resource_async(
228
230
  file_path=file_path,
229
231
  mimetype=mimetype,
@@ -237,6 +239,12 @@ class RootAPIHandler(BaseAPIHandler):
237
239
  publish=publish,
238
240
  metadata_file=metadata_file,
239
241
  )
242
+ if progress_bar:
243
+ progress_bar.update(1)
244
+ progress_bar.set_postfix(file=name)
245
+ else:
246
+ _USER_LOGGER.info(f'"{name}" uploaded')
247
+
240
248
  if segfiles is not None:
241
249
  fpaths = segfiles['files']
242
250
  names = segfiles.get('names', _infinite_gen(None))
@@ -258,30 +266,55 @@ class RootAPIHandler(BaseAPIHandler):
258
266
  for f, segfiles, metadata_file in zip(files_path, segmentation_files, metadata_files)]
259
267
  return await asyncio.gather(*tasks, return_exceptions=on_error == 'skip')
260
268
 
261
- def _assemble_dicoms(self, files_path: Sequence[str | IO]) -> tuple[Sequence[str | IO], bool]:
269
+ def _assemble_dicoms(self, files_path: Sequence[str | IO]
270
+ ) -> tuple[Sequence[str | IO], bool, Sequence[int]]:
271
+ """
272
+ Assembles DICOM files into a single file.
273
+
274
+ Args:
275
+ files_path: The paths to the DICOM files to assemble.
276
+
277
+ Returns:
278
+ A tuple containing:
279
+ - The paths to the assembled DICOM files.
280
+ - A boolean indicating whether the assembly was successful.
281
+ - same length as the output assembled DICOMs, mapping assembled DICOM to original DICOMs.
282
+ """
262
283
  dicoms_files_path = []
263
284
  other_files_path = []
264
- for f in files_path:
285
+ dicom_original_idxs = []
286
+ others_original_idxs = []
287
+ for i, f in enumerate(files_path):
265
288
  if is_dicom(f):
266
289
  dicoms_files_path.append(f)
290
+ dicom_original_idxs.append(i)
267
291
  else:
268
292
  other_files_path.append(f)
293
+ others_original_idxs.append(i)
269
294
 
270
295
  orig_len = len(dicoms_files_path)
271
296
  if orig_len == 0:
272
297
  _LOGGER.debug("No DICOM files found to assemble.")
273
- return files_path, False
298
+ return files_path, False, []
274
299
  dicoms_files_path = dicom_utils.assemble_dicoms(dicoms_files_path, return_as_IO=True)
275
300
 
276
301
  new_len = len(dicoms_files_path)
277
302
  if new_len != orig_len:
278
303
  _LOGGER.info(f"Assembled {new_len} dicom files out of {orig_len} files.")
279
- files_path = itertools.chain(dicoms_files_path, other_files_path)
304
+ mapping_idx = [None] * len(files_path)
305
+
306
+ files_path = GeneratorWithLength(itertools.chain(dicoms_files_path, other_files_path),
307
+ length=new_len + len(other_files_path))
280
308
  assembled = True
309
+ for orig_idx, value in zip(dicom_original_idxs, dicoms_files_path.inverse_mapping_idx):
310
+ mapping_idx[orig_idx] = value
311
+ for i, orig_idx in enumerate(others_original_idxs):
312
+ mapping_idx[orig_idx] = new_len + i
281
313
  else:
282
314
  assembled = False
315
+ mapping_idx = [i for i in range(len(files_path))]
283
316
 
284
- return files_path, assembled
317
+ return files_path, assembled, mapping_idx
285
318
 
286
319
  def upload_resource(self,
287
320
  file_path: str | IO | pydicom.dataset.Dataset,
@@ -364,7 +397,8 @@ class RootAPIHandler(BaseAPIHandler):
364
397
  transpose_segmentation=transpose_segmentation,
365
398
  modality=modality,
366
399
  assemble_dicoms=assemble_dicoms,
367
- metadata=metadata
400
+ metadata=metadata,
401
+ progress_bar=False
368
402
  )
369
403
 
370
404
  return result[0]
@@ -385,7 +419,8 @@ class RootAPIHandler(BaseAPIHandler):
385
419
  modality: Optional[str] = None,
386
420
  assemble_dicoms: bool = True,
387
421
  metadata: list[str | dict | None] | dict | str | None = None,
388
- discard_dicom_reports: bool = True
422
+ discard_dicom_reports: bool = True,
423
+ progress_bar: bool = False
389
424
  ) -> list[str | Exception] | str | Exception:
390
425
  """
391
426
  Upload resources.
@@ -433,17 +468,17 @@ class RootAPIHandler(BaseAPIHandler):
433
468
  # Create filtered lists maintaining index correspondence
434
469
  filtered_files = []
435
470
  filtered_metadata = []
436
-
471
+
437
472
  for i, f in enumerate(files_path):
438
473
  if not is_dicom_report(f):
439
474
  filtered_files.append(f)
440
475
  if metadata is not None:
441
476
  filtered_metadata.append(metadata[i])
442
-
477
+
443
478
  files_path = filtered_files
444
479
  if metadata is not None:
445
480
  metadata = filtered_metadata
446
-
481
+
447
482
  if old_size is not None and old_size != len(files_path):
448
483
  _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
449
484
 
@@ -454,9 +489,16 @@ class RootAPIHandler(BaseAPIHandler):
454
489
  if metadata is not None and len(metadata) != len(files_path):
455
490
  raise ValueError("The number of metadata files must match the number of resources.")
456
491
  if assemble_dicoms:
457
- files_path, assembled = self._assemble_dicoms(files_path)
492
+ files_path, assembled, mapping_idx = self._assemble_dicoms(files_path)
458
493
  assemble_dicoms = assembled
459
-
494
+ else:
495
+ mapping_idx = [i for i in range(len(files_path))]
496
+ n_files = len(files_path)
497
+
498
+ if n_files <= 1:
499
+ # Disable progress bar for single file uploads
500
+ progress_bar = False
501
+
460
502
  if segmentation_files is not None:
461
503
  if assemble_dicoms:
462
504
  raise NotImplementedError("Segmentation files cannot be uploaded when assembling dicoms yet.")
@@ -484,22 +526,32 @@ class RootAPIHandler(BaseAPIHandler):
484
526
  "segmentation_files['names'] must have the same length as segmentation_files['files'].")
485
527
 
486
528
  loop = asyncio.get_event_loop()
487
- task = self._upload_resources_async(files_path=files_path,
488
- mimetype=mimetype,
489
- anonymize=anonymize,
490
- anonymize_retain_codes=anonymize_retain_codes,
491
- on_error=on_error,
492
- tags=tags,
493
- mung_filename=mung_filename,
494
- channel=channel,
495
- publish=publish,
496
- segmentation_files=segmentation_files,
497
- transpose_segmentation=transpose_segmentation,
498
- modality=modality,
499
- metadata_files=metadata,
500
- )
501
-
502
- resource_ids = loop.run_until_complete(task)
529
+ pbar = None
530
+ try:
531
+ if progress_bar:
532
+ pbar = tqdm(total=n_files, desc="Uploading resources", unit="file")
533
+
534
+ task = self._upload_resources_async(files_path=files_path,
535
+ mimetype=mimetype,
536
+ anonymize=anonymize,
537
+ anonymize_retain_codes=anonymize_retain_codes,
538
+ on_error=on_error,
539
+ tags=tags,
540
+ mung_filename=mung_filename,
541
+ channel=channel,
542
+ publish=publish,
543
+ segmentation_files=segmentation_files,
544
+ transpose_segmentation=transpose_segmentation,
545
+ modality=modality,
546
+ metadata_files=metadata,
547
+ progress_bar=pbar
548
+ )
549
+
550
+ resource_ids = loop.run_until_complete(task)
551
+ finally:
552
+ if pbar:
553
+ pbar.close()
554
+
503
555
  _LOGGER.info(f"Resources uploaded: {resource_ids}")
504
556
 
505
557
  if publish_to is not None:
@@ -512,6 +564,10 @@ class RootAPIHandler(BaseAPIHandler):
512
564
  if on_error == 'raise':
513
565
  raise e
514
566
 
567
+ if mapping_idx:
568
+ _LOGGER.debug(f"Mapping indices for DICOM files: {mapping_idx}")
569
+ resource_ids = [resource_ids[idx] for idx in mapping_idx]
570
+
515
571
  if is_multiple_resources:
516
572
  return resource_ids
517
573
  return resource_ids[0]
@@ -590,7 +646,9 @@ class RootAPIHandler(BaseAPIHandler):
590
646
  # get the project id by its name
591
647
  project = self.get_project_by_name(project_name)
592
648
  if 'error' in project:
593
- raise ResourceNotFoundError('project', {'project_name': project_name})
649
+ project = self.get_project_by_id(project_name)
650
+ if 'error' in project:
651
+ raise ResourceNotFoundError('project', {'project_name': project_name})
594
652
 
595
653
  dataset_id = project['dataset_id']
596
654
 
@@ -652,7 +710,8 @@ class RootAPIHandler(BaseAPIHandler):
652
710
  raise e
653
711
 
654
712
  @staticmethod
655
- def __process_files_parameter(file_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset) -> tuple[Sequence[str | IO], bool]:
713
+ def __process_files_parameter(file_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset
714
+ ) -> tuple[Sequence[str | IO], bool]:
656
715
  """
657
716
  Process the file_path parameter to ensure it is a list of file paths or IO objects.
658
717
  """
@@ -1123,11 +1182,10 @@ class RootAPIHandler(BaseAPIHandler):
1123
1182
 
1124
1183
  loop = asyncio.get_event_loop()
1125
1184
  loop.run_until_complete(_delete_all_resources_async())
1126
-
1127
-
1185
+
1128
1186
  async def _delete_resource_async(self,
1129
- resource_id: str,
1130
- session: aiohttp.ClientSession | None = None) -> None:
1187
+ resource_id: str,
1188
+ session: aiohttp.ClientSession | None = None) -> None:
1131
1189
  """
1132
1190
  Asynchronously delete a resource by its unique id.
1133
1191
 
@@ -1,46 +1,13 @@
1
1
  import argparse
2
2
  import logging
3
- import os
4
- import platform
5
3
  from datamint import configs
6
- from datamint.utils.logging_utils import load_cmdline_logging_config
4
+ from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
7
5
  from rich.prompt import Prompt, Confirm
8
6
  from rich.console import Console
9
- from rich.theme import Theme
10
-
11
- # Create a custom theme that works well on both dark and blue backgrounds
12
- def _create_console_theme() -> Theme:
13
- """Create a custom Rich theme optimized for cross-platform terminals."""
14
- # Detect if we're likely on PowerShell (Windows + PowerShell)
15
- is_powershell = (
16
- platform.system() == "Windows" and
17
- os.environ.get("PSModulePath") is not None
18
- )
19
-
20
- if is_powershell:
21
- # PowerShell blue background - use high contrast colors
22
- return Theme({
23
- "warning": "bright_yellow",
24
- "error": "bright_red on white",
25
- "success": "bright_green",
26
- "key": "bright_cyan",
27
- "accent": "bright_cyan",
28
- "title": "bold"
29
- })
30
- else:
31
- # Linux/Unix terminals - standard colors
32
- return Theme({
33
- "warning": "yellow",
34
- "error": "red",
35
- "success": "green",
36
- "key": "cyan",
37
- "accent": "bright_blue",
38
- "title": "bold"
39
- })
40
-
41
- # Create console with custom theme
42
- console = Console(theme=_create_console_theme())
7
+
43
8
  _LOGGER = logging.getLogger(__name__)
9
+ _USER_LOGGER = logging.getLogger('user_logger')
10
+ console: Console
44
11
 
45
12
 
46
13
  def configure_default_url():
@@ -125,7 +92,7 @@ def test_connection():
125
92
  projects = api.get_projects()
126
93
  console.print(f"[success]✅ Connection successful! Found {len(projects)} projects.[/success]")
127
94
  except ImportError:
128
- console.print("[error]❌ Full API not available. Install with: pip install datamint-python-api[full][/error]")
95
+ console.print("[error]❌ Full API not available. Install with: pip install datamint[/error]")
129
96
  except Exception as e:
130
97
  console.print(f"[error]❌ Connection failed: {e}[/error]")
131
98
 
@@ -170,7 +137,9 @@ def interactive_mode():
170
137
 
171
138
  def main():
172
139
  """Main entry point for the configuration tool."""
140
+ global console
173
141
  load_cmdline_logging_config()
142
+ console = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
174
143
  parser = argparse.ArgumentParser(
175
144
  description='🔧 Datamint API Configuration Tool',
176
145
  epilog="""
@@ -1,3 +1,4 @@
1
+ from datamint.exceptions import DatamintException
1
2
  import argparse
2
3
  from datamint.apihandler.api_handler import APIHandler
3
4
  import os
@@ -11,18 +12,87 @@ from typing import Generator, Optional, Any
11
12
  from collections import defaultdict
12
13
  from datamint import __version__ as datamint_version
13
14
  from datamint import configs
14
- from datamint.client_cmd_tools.datamint_config import ask_api_key
15
- from datamint.utils.logging_utils import load_cmdline_logging_config
15
+ from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
16
+ from rich.console import Console
16
17
  import yaml
17
18
  from collections.abc import Iterable
18
19
  import pandas as pd
20
+ import pydicom.errors
19
21
 
20
22
  # Create two loggings: one for the user and one for the developer
21
23
  _LOGGER = logging.getLogger(__name__)
22
24
  _USER_LOGGER = logging.getLogger('user_logger')
25
+ logging.getLogger('pydicom').setLevel(logging.ERROR)
26
+ CONSOLE: Console
23
27
 
24
28
  MAX_RECURSION_LIMIT = 1000
25
29
 
30
+ # Default extensions to exclude when --include-extensions is not specified
31
+ DEFAULT_EXCLUDED_EXTENSIONS = [
32
+ '.txt', '.json', '.xml', '.docx', '.doc', '.pdf', '.xlsx', '.xls', '.csv', '.tsv',
33
+ '.log', '.ini', '.cfg', '.conf', '.yaml', '.yml', '.md', '.rst', '.html', '.htm',
34
+ '.exe', '.bat', '.sh', '.py', '.js', '.css',
35
+ '.sql', '.bak', '.tmp', '.temp', '.lock', '.DS_Store', '.gitignore'
36
+ ]
37
+
38
+
39
+ def _get_minimal_distinguishing_paths(file_paths: list[str]) -> dict[str, str]:
40
+ """
41
+ Generate minimal distinguishing paths for files to avoid ambiguity when multiple files have the same name.
42
+
43
+ Args:
44
+ file_paths: List of file paths
45
+
46
+ Returns:
47
+ Dictionary mapping full path to minimal distinguishing path
48
+ """
49
+ if not file_paths:
50
+ return {}
51
+
52
+ # Convert to Path objects and get absolute paths
53
+ paths = [Path(fp).resolve() for fp in file_paths]
54
+ result = {}
55
+
56
+ # Group files by basename
57
+ basename_groups = defaultdict(list)
58
+ for i, path in enumerate(paths):
59
+ basename_groups[path.name].append((i, path))
60
+
61
+ for basename, path_list in basename_groups.items():
62
+ if len(path_list) == 1:
63
+ # Only one file with this name, use just the basename
64
+ idx, path = path_list[0]
65
+ result[file_paths[idx]] = basename
66
+ else:
67
+ # Multiple files with same name, need to distinguish them
68
+ path_parts_list = [path.parts for _, path in path_list]
69
+
70
+ # Find the minimum number of parent directories needed to distinguish
71
+ max_depth_needed = 1
72
+ for depth in range(1, max(len(parts) for parts in path_parts_list) + 1):
73
+ # Check if this depth is enough to distinguish all files
74
+ suffixes = []
75
+ for parts in path_parts_list:
76
+ if depth >= len(parts):
77
+ suffixes.append('/'.join(parts))
78
+ else:
79
+ suffixes.append('/'.join(parts[-depth:]))
80
+
81
+ if len(set(suffixes)) == len(suffixes):
82
+ # All suffixes are unique at this depth
83
+ max_depth_needed = depth
84
+ break
85
+
86
+ # Apply the minimal distinguishing paths
87
+ for (idx, path), parts in zip(path_list, path_parts_list):
88
+ if max_depth_needed >= len(parts):
89
+ distinguishing_path = '/'.join(parts)
90
+ else:
91
+ distinguishing_path = '/'.join(parts[-max_depth_needed:])
92
+ result[file_paths[idx]] = distinguishing_path
93
+
94
+ return result
95
+
26
96
 
27
97
  def _read_segmentation_names(segmentation_names_path: str | Path) -> dict:
28
98
  """
@@ -124,7 +194,7 @@ def walk_to_depth(path: str | Path,
124
194
  depth: int,
125
195
  exclude_pattern: str | None = None) -> Generator[Path, None, None]:
126
196
  path = Path(path)
127
-
197
+
128
198
  # Check for DICOMDIR first at current directory level
129
199
  dicomdir_path = detect_dicomdir(path)
130
200
  if dicomdir_path is not None:
@@ -138,7 +208,7 @@ def walk_to_depth(path: str | Path,
138
208
  except Exception as e:
139
209
  _USER_LOGGER.warning(f"Failed to parse DICOMDIR at {path}: {e}. Falling back to directory scan.")
140
210
  # Continue with regular directory scanning below
141
-
211
+
142
212
  # Regular directory scanning
143
213
  for child in path.iterdir():
144
214
  if _is_system_file(child):
@@ -195,6 +265,7 @@ def handle_api_key() -> str | None:
195
265
  If it does not exist, it asks the user to input it.
196
266
  Then, it asks the user if he wants to save the API key at a proper location in the machine
197
267
  """
268
+ from datamint.client_cmd_tools.datamint_config import ask_api_key
198
269
  api_key = configs.get_value(configs.APIKEY_KEY)
199
270
  if api_key is None:
200
271
  _USER_LOGGER.info("API key not found. Please provide it:")
@@ -462,6 +533,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
462
533
  parser.add_argument('--channel', '--name', type=str, required=False,
463
534
  help='Channel name (arbritary) to upload the resources to. \
464
535
  Useful for organizing the resources in the platform.')
536
+ parser.add_argument('--project', type=str, required=False,
537
+ help='Project name to add the uploaded resources to after successful upload.')
465
538
  parser.add_argument('--retain-pii', action='store_true', help='Do not anonymize DICOMs')
466
539
  parser.add_argument('--retain-attribute', type=_tuple_int_type, action='append',
467
540
  default=[],
@@ -479,7 +552,8 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
479
552
  help='File extensions to be considered for uploading. Default: all file extensions.' +
480
553
  ' Example: --include-extensions dcm jpg png')
481
554
  parser.add_argument('--exclude-extensions', type=str, nargs='+',
482
- help='File extensions to be excluded from uploading. Default: none.' +
555
+ help='File extensions to be excluded from uploading. ' +
556
+ 'Default: common non-medical file extensions (.txt, .json, .xml, .docx, etc.) when --include-extensions is not specified.' +
483
557
  ' Example: --exclude-extensions txt csv'
484
558
  )
485
559
  parser.add_argument('--segmentation_path', type=_is_valid_path_argparse, metavar="FILE",
@@ -519,7 +593,6 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
519
593
 
520
594
  if args.verbose:
521
595
  # Get the console handler and set to debug
522
- print(logging.getLogger().handlers)
523
596
  logging.getLogger().handlers[0].setLevel(logging.DEBUG)
524
597
  logging.getLogger('datamint').setLevel(logging.DEBUG)
525
598
  _LOGGER.setLevel(logging.DEBUG)
@@ -532,6 +605,11 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
532
605
  if args.include_extensions is not None and args.exclude_extensions is not None:
533
606
  raise ValueError("--include-extensions and --exclude-extensions are mutually exclusive.")
534
607
 
608
+ # Apply default excluded extensions if neither include nor exclude extensions are specified
609
+ if args.include_extensions is None and args.exclude_extensions is None:
610
+ args.exclude_extensions = DEFAULT_EXCLUDED_EXTENSIONS
611
+ _LOGGER.debug(f"Applied default excluded extensions: {args.exclude_extensions}")
612
+
535
613
  try:
536
614
  if os.path.isfile(args.path):
537
615
  file_path = [args.path]
@@ -606,12 +684,15 @@ def print_input_summary(files_path: list[str],
606
684
  ext_counts = [(ext, count) for ext, count in ext_dict.items()]
607
685
  ext_counts.sort(key=lambda x: x[1], reverse=True)
608
686
 
687
+ # Get distinguishing paths for better display
688
+ distinguishing_paths = _get_minimal_distinguishing_paths(files_path)
689
+
609
690
  _USER_LOGGER.info(f"Number of files to be uploaded: {total_files}")
610
- _USER_LOGGER.info(f"\t{files_path[0]}")
691
+ _USER_LOGGER.info(f"\t{distinguishing_paths[files_path[0]]}")
611
692
  if total_files >= 2:
612
693
  if total_files >= 3:
613
694
  _USER_LOGGER.info("\t(...)")
614
- _USER_LOGGER.info(f"\t{files_path[-1]}")
695
+ _USER_LOGGER.info(f"\t{distinguishing_paths[files_path[-1]]}")
615
696
  _USER_LOGGER.info(f"Total size of the upload: {naturalsize(total_size)}")
616
697
  _USER_LOGGER.info(f"Number of files per extension:")
617
698
  for ext, count in ext_counts:
@@ -653,22 +734,28 @@ def print_results_summary(files_path: list[str],
653
734
  results: list[str | Exception]) -> int:
654
735
  # Check for failed uploads
655
736
  failure_files = [f for f, r in zip(files_path, results) if isinstance(r, Exception)]
737
+ # Get distinguishing paths for better error reporting
738
+ distinguishing_paths = _get_minimal_distinguishing_paths(files_path)
739
+
656
740
  _USER_LOGGER.info(f"\nUpload summary:")
657
741
  _USER_LOGGER.info(f"\tTotal files: {len(files_path)}")
658
742
  _USER_LOGGER.info(f"\tSuccessful uploads: {len(files_path) - len(failure_files)}")
659
- _USER_LOGGER.info(f"\tFailed uploads: {len(failure_files)}")
660
743
  if len(failure_files) > 0:
661
- _USER_LOGGER.warning(f"\tFailed files: {[os.path.basename(f) for f in failure_files]}")
744
+ _USER_LOGGER.warning(f"\tFailed uploads: {len(failure_files)}")
745
+ _USER_LOGGER.warning(f"\tFailed files: {[distinguishing_paths[f] for f in failure_files]}")
662
746
  _USER_LOGGER.warning(f"\nFailures:")
663
747
  for f, r in zip(files_path, results):
664
- _LOGGER.debug(f"Failure: {f} - {r}")
665
748
  if isinstance(r, Exception):
666
- _USER_LOGGER.warning(f"\t{os.path.basename(f)}: {r}")
749
+ _USER_LOGGER.warning(f"\t{distinguishing_paths[f]}: {r}")
750
+ else:
751
+ CONSOLE.print(f'✅ All uploads successful!', style='success')
667
752
  return len(failure_files)
668
753
 
669
754
 
670
755
  def main():
756
+ global CONSOLE
671
757
  load_cmdline_logging_config()
758
+ CONSOLE = [h for h in _USER_LOGGER.handlers if isinstance(h, ConsoleWrapperHandler)][0].console
672
759
 
673
760
  try:
674
761
  args, files_path, segfiles, metadata_files = _parse_args()
@@ -676,40 +763,67 @@ def main():
676
763
  _USER_LOGGER.error(f'Error validating arguments. {e}')
677
764
  sys.exit(1)
678
765
 
679
- print_input_summary(files_path,
680
- args=args,
681
- segfiles=segfiles,
682
- metadata_files=metadata_files,
683
- include_extensions=args.include_extensions)
766
+ try:
767
+ print_input_summary(files_path,
768
+ args=args,
769
+ segfiles=segfiles,
770
+ metadata_files=metadata_files,
771
+ include_extensions=args.include_extensions)
772
+
773
+ if not args.yes:
774
+ confirmation = input("Do you want to proceed with the upload? (y/n): ")
775
+ if confirmation.lower() != "y":
776
+ _USER_LOGGER.info("Upload cancelled.")
777
+ return
778
+ #######################################
684
779
 
685
- if not args.yes:
686
- confirmation = input("Do you want to proceed with the upload? (y/n): ")
687
- if confirmation.lower() != "y":
688
- _USER_LOGGER.info("Upload cancelled.")
780
+ has_a_dicom_file = any(is_dicom(f) for f in files_path)
781
+
782
+ try:
783
+ api_handler = APIHandler(check_connection=True)
784
+ except DatamintException as e:
785
+ _USER_LOGGER.error(f'❌ Connection failed: {e}')
786
+ return
787
+ try:
788
+ results = api_handler.upload_resources(channel=args.channel,
789
+ files_path=files_path,
790
+ tags=args.tag,
791
+ on_error='skip',
792
+ anonymize=args.retain_pii == False and has_a_dicom_file,
793
+ anonymize_retain_codes=args.retain_attribute,
794
+ mung_filename=args.mungfilename,
795
+ publish=args.publish,
796
+ segmentation_files=segfiles,
797
+ transpose_segmentation=args.transpose_segmentation,
798
+ assemble_dicoms=True,
799
+ metadata=metadata_files,
800
+ progress_bar=True
801
+ )
802
+ except pydicom.errors.InvalidDicomError as e:
803
+ _USER_LOGGER.error(f'❌ Invalid DICOM file: {e}')
689
804
  return
690
- #######################################
691
-
692
- has_a_dicom_file = any(is_dicom(f) for f in files_path)
693
-
694
- api_handler = APIHandler()
695
- results = api_handler.upload_resources(channel=args.channel,
696
- files_path=files_path,
697
- tags=args.tag,
698
- on_error='skip',
699
- anonymize=args.retain_pii == False and has_a_dicom_file,
700
- anonymize_retain_codes=args.retain_attribute,
701
- mung_filename=args.mungfilename,
702
- publish=args.publish,
703
- segmentation_files=segfiles,
704
- transpose_segmentation=args.transpose_segmentation,
705
- assemble_dicoms=True,
706
- metadata=metadata_files
707
- )
708
- _USER_LOGGER.info('Upload finished!')
709
- _LOGGER.debug(f"Number of results: {len(results)}")
710
-
711
- num_failures = print_results_summary(files_path, results)
712
- if num_failures > 0:
805
+ _USER_LOGGER.info('Upload finished!')
806
+ _LOGGER.debug(f"Number of results: {len(results)}")
807
+
808
+ # Add resources to project if specified
809
+ if args.project is not None:
810
+ _USER_LOGGER.info(f"Adding uploaded resources to project '{args.project}'...")
811
+ try:
812
+ # Filter successful uploads to get resource IDs
813
+ successful_resource_ids = [r for r in results if not isinstance(r, Exception)]
814
+ if successful_resource_ids:
815
+ api_handler.add_to_project(project_name=args.project, resource_ids=successful_resource_ids)
816
+ _USER_LOGGER.info(f"✅ Successfully added {len(successful_resource_ids)} resources to project '{args.project}'")
817
+ else:
818
+ _USER_LOGGER.warning("No successful uploads to add to project")
819
+ except Exception as e:
820
+ _USER_LOGGER.error(f"❌ Failed to add resources to project '{args.project}': {e}")
821
+
822
+ num_failures = print_results_summary(files_path, results)
823
+ if num_failures > 0:
824
+ sys.exit(1)
825
+ except KeyboardInterrupt:
826
+ CONSOLE.print("\nUpload cancelled by user.", style='warning')
713
827
  sys.exit(1)
714
828
 
715
829
 
@@ -0,0 +1,5 @@
1
+ class DatamintException(Exception):
2
+ """
3
+ Base class for exceptions in this module.
4
+ """
5
+ pass
@@ -7,7 +7,7 @@ handlers:
7
7
  level: WARNING
8
8
  show_time: False
9
9
  console_user:
10
- class: datamint.utils.logging_utils.ConditionalRichHandler
10
+ class: datamint.utils.logging_utils.ConsoleWrapperHandler
11
11
  level: INFO
12
12
  show_path: False
13
13
  show_time: False
@@ -0,0 +1,130 @@
1
+ from rich.theme import Theme
2
+ from logging import Logger, DEBUG, INFO, WARNING, ERROR, CRITICAL
3
+ from rich.console import Console
4
+ import platform
5
+ import os
6
+ import logging
7
+ import logging.config
8
+ from rich.console import ConsoleRenderable
9
+ from rich.logging import RichHandler
10
+ from rich.traceback import Traceback
11
+ import yaml
12
+ import importlib
13
+
14
+ _LOGGER = logging.getLogger(__name__)
15
+
16
+
17
+ class ConditionalRichHandler(RichHandler):
18
+ """
19
+ Class that uses 'show_level=True' only if the message level is WARNING or higher.
20
+ """
21
+
22
+ def __init__(self, *args, **kwargs):
23
+ super().__init__(*args, **kwargs)
24
+
25
+ def handle(self, record):
26
+ if record.levelno >= logging.WARNING:
27
+ self.show_level = True
28
+ else:
29
+ self.show_level = False
30
+ super().handle(record)
31
+
32
+ def render(self, *, record: logging.LogRecord,
33
+ traceback: Traceback | None,
34
+ message_renderable: ConsoleRenderable) -> ConsoleRenderable:
35
+ # if level is WARNING or higher, add the level column
36
+ try:
37
+ self._log_render.show_level = record.levelno >= logging.WARNING
38
+ ret = super().render(record=record, traceback=traceback, message_renderable=message_renderable)
39
+ self._log_render.show_level = False
40
+ except Exception as e:
41
+ _LOGGER.error(f"Error rendering log. {e}")
42
+ return ret
43
+
44
+
45
+ def load_cmdline_logging_config():
46
+ # Load the logging configuration file
47
+ try:
48
+ try:
49
+ # try loading the developer's logging config
50
+ with open('logging_dev.yaml', 'r') as f:
51
+ config = yaml.safe_load(f)
52
+ except:
53
+ with importlib.resources.open_text('datamint', 'logging.yaml') as f:
54
+ config = yaml.safe_load(f.read())
55
+
56
+ logging.config.dictConfig(config)
57
+ except Exception as e:
58
+ print(f"Warning: Error loading logging configuration file: {e}")
59
+ _LOGGER.exception(e)
60
+ logging.basicConfig(level=logging.INFO)
61
+
62
+
63
+ LEVELS_MAPPING = {
64
+ DEBUG: None,
65
+ INFO: None,
66
+ WARNING: "warning",
67
+ ERROR: "error",
68
+ CRITICAL: "error"
69
+ }
70
+
71
+
72
+ def _create_console_theme() -> Theme:
73
+ """Create a custom Rich theme optimized for cross-platform terminals."""
74
+ # Detect if we're likely on PowerShell (Windows + PowerShell)
75
+ is_powershell = (
76
+ platform.system() == "Windows" and
77
+ os.environ.get("PSModulePath") is not None
78
+ )
79
+
80
+ if is_powershell:
81
+ # PowerShell blue background - use high contrast colors
82
+ return Theme({
83
+ "warning": "bright_yellow",
84
+ "error": "bright_red on white",
85
+ "success": "bright_green",
86
+ "key": "bright_cyan",
87
+ "accent": "bright_cyan",
88
+ "title": "bold"
89
+ })
90
+ else:
91
+ # Linux/Unix terminals - standard colors
92
+ return Theme({
93
+ "warning": "yellow",
94
+ "error": "red",
95
+ "success": "green",
96
+ "key": "cyan",
97
+ "accent": "bright_blue",
98
+ "title": "bold"
99
+ })
100
+
101
+
102
+ class ConsoleWrapperHandler(ConditionalRichHandler):
103
+ """
104
+ A logging handler that uses a rich.console.Console to print log messages.
105
+ """
106
+ def __init__(self, *args, console: Console | None = None, **kwargs):
107
+ """
108
+ Initializes the ConsoleWrapperHandler.
109
+
110
+ Args:
111
+ console (Console | None): A rich Console instance. If None, a new one is created.
112
+ """
113
+ super().__init__(*args, **kwargs)
114
+ if console is None:
115
+ console = Console(theme=_create_console_theme())
116
+ self.console = console
117
+
118
+ def emit(self, record: logging.LogRecord) -> None:
119
+ """
120
+ Emits a log record.
121
+
122
+ Args:
123
+ record (logging.LogRecord): The log record to emit.
124
+ """
125
+ try:
126
+ msg = self.format(record)
127
+ style = LEVELS_MAPPING.get(record.levelno)
128
+ self.console.print(msg, style=style)
129
+ except Exception:
130
+ self.handleError(record)
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "datamint"
3
3
  description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
4
- version = "1.9.1"
4
+ version = "1.9.3"
5
5
  dynamic = ["dependencies"]
6
6
  requires-python = ">=3.10"
7
7
  readme = "README.md"
@@ -40,7 +40,7 @@ matplotlib = "*"
40
40
  lightning = "*"
41
41
  albumentations = ">=2.0.0"
42
42
  lazy-loader = ">=0.3.0"
43
- medimgkit = ">=0.4.4"
43
+ medimgkit = ">=0.5.0"
44
44
  # For compatibility with the datamintapi package
45
45
  datamintapi = "0.0.*"
46
46
  # Extra dependencies for docs
@@ -1,55 +0,0 @@
1
- import logging
2
- import logging.config
3
- from rich.console import ConsoleRenderable
4
- from rich.logging import RichHandler
5
- from rich.traceback import Traceback
6
- import yaml
7
- import importlib
8
-
9
- _LOGGER = logging.getLogger(__name__)
10
-
11
-
12
- class ConditionalRichHandler(RichHandler):
13
- """
14
- Class that uses 'show_level=True' only if the message level is WARNING or higher.
15
- """
16
-
17
- def __init__(self, *args, **kwargs):
18
- super().__init__(*args, **kwargs)
19
-
20
- def handle(self, record):
21
- if record.levelno >= logging.WARNING:
22
- self.show_level = True
23
- else:
24
- self.show_level = False
25
- super().handle(record)
26
-
27
- def render(self, *, record: logging.LogRecord,
28
- traceback: Traceback | None,
29
- message_renderable: ConsoleRenderable) -> ConsoleRenderable:
30
- # if level is WARNING or higher, add the level column
31
- try:
32
- self._log_render.show_level = record.levelno >= logging.WARNING
33
- ret = super().render(record=record, traceback=traceback, message_renderable=message_renderable)
34
- self._log_render.show_level = False
35
- except Exception as e:
36
- _LOGGER.error(f"Error rendering log. {e}")
37
- return ret
38
-
39
-
40
- def load_cmdline_logging_config():
41
- # Load the logging configuration file
42
- try:
43
- try:
44
- # try loading the developer's logging config
45
- with open('logging_dev.yaml', 'r') as f:
46
- config = yaml.safe_load(f)
47
- except:
48
- with importlib.resources.open_text('datamint', 'logging.yaml') as f:
49
- config = yaml.safe_load(f.read())
50
-
51
- logging.config.dictConfig(config)
52
- except Exception as e:
53
- print(f"Warning: Error loading logging configuration file: {e}")
54
- _LOGGER.exception(e)
55
- logging.basicConfig(level=logging.INFO)
File without changes
File without changes
File without changes