dbdicom 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbdicom might be problematic. Click here for more details.

dbdicom/dbd.py CHANGED
@@ -1,19 +1,25 @@
1
1
  import os
2
- from datetime import datetime
3
2
  import json
4
3
  from typing import Union
5
4
  import zipfile
5
+ import re
6
6
 
7
7
  from tqdm import tqdm
8
8
  import numpy as np
9
9
  import vreg
10
10
  from pydicom.dataset import Dataset
11
+ import pydicom
11
12
 
12
13
  import dbdicom.utils.arrays
13
14
  import dbdicom.dataset as dbdataset
14
15
  import dbdicom.database as dbdatabase
15
16
  import dbdicom.register as register
16
17
  import dbdicom.const as const
18
+ from dbdicom.utils.pydicom_dataset import (
19
+ get_values,
20
+ set_values,
21
+ set_value,
22
+ )
17
23
 
18
24
 
19
25
 
@@ -200,12 +206,13 @@ class DataBaseDicom():
200
206
  return register.series(self.register, entity, desc, contains, isin)
201
207
 
202
208
 
203
- def volume(self, entity:Union[list, str], dims:list=None) -> Union[vreg.Volume3D, list]:
209
+ def volume(self, entity:Union[list, str], dims:list=None, verbose=1) -> Union[vreg.Volume3D, list]:
204
210
  """Read volume or volumes.
205
211
 
206
212
  Args:
207
213
  entity (list, str): DICOM entity to read
208
214
  dims (list, optional): Non-spatial dimensions of the volume. Defaults to None.
215
+ verbose (bool, optional): If set to 1, shows progress bar. Defaults to 1.
209
216
 
210
217
  Returns:
211
218
  vreg.Volume3D | list: If the entity is a series this returns
@@ -228,12 +235,14 @@ class DataBaseDicom():
228
235
  # Read dicom files
229
236
  values = []
230
237
  volumes = []
231
- for f in tqdm(files, desc='Reading volume..'):
232
- ds = dbdataset.read_dataset(f)
233
- values.append(dbdataset.get_values(ds, dims))
238
+ for f in tqdm(files, desc='Reading volume..', disable=(verbose==0)):
239
+ ds = pydicom.dcmread(f)
240
+ values.append(get_values(ds, dims))
234
241
  volumes.append(dbdataset.volume(ds))
235
242
 
236
243
  # Format as mesh
244
+ # coords = np.stack(values, axis=-1, dtype=object)
245
+ values = [np.array(v, dtype=object) for v in values] # object array to allow for mixed types
237
246
  coords = np.stack(values, axis=-1)
238
247
  coords, inds = dbdicom.utils.arrays.meshvals(coords)
239
248
  vols = np.array(volumes)
@@ -291,7 +300,7 @@ class DataBaseDicom():
291
300
  ref_mgr = DataBaseDicom(ref[0])
292
301
  files = register.files(ref_mgr.register, ref)
293
302
  ref_mgr.close()
294
- ds = dbdataset.read_dataset(files[0])
303
+ ds = pydicom.dcmread(files[0])
295
304
 
296
305
  # Get the attributes of the destination series
297
306
  attr = self._series_attributes(series)
@@ -306,15 +315,17 @@ class DataBaseDicom():
306
315
  i=0
307
316
  vols = vol.separate().reshape(-1)
308
317
  for vt in tqdm(vols, desc='Writing volume..'):
309
- for sl in vt.split():
318
+ slices = vt.split()
319
+ for sl in slices:
310
320
  dbdataset.set_volume(ds, sl)
311
- dbdataset.set_value(ds, sl.dims, sl.coords[:,...])
321
+ sl_coords = [sl.coords[i,...].ravel()[0] for i in range(len(sl.dims))]
322
+ set_value(ds, sl.dims, sl_coords)
312
323
  self._write_dataset(ds, attr, n + 1 + i)
313
324
  i+=1
314
325
  return self
315
326
 
316
327
 
317
- def to_nifti(self, series:list, file:str, dims=None):
328
+ def to_nifti(self, series:list, file:str, dims=None, verbose=1):
318
329
  """Save a DICOM series in nifti format.
319
330
 
320
331
  Args:
@@ -322,8 +333,10 @@ class DataBaseDicom():
322
333
  file (str): file path of the nifti file.
323
334
  dims (list, optional): Non-spatial dimensions of the volume.
324
335
  Defaults to None.
336
+ verbose (bool, optional): If set to 1, shows progress bar. Defaults to 1.
337
+
325
338
  """
326
- vol = self.volume(series, dims)
339
+ vol = self.volume(series, dims, verbose)
327
340
  vreg.write_nifti(vol, file)
328
341
  return self
329
342
 
@@ -389,12 +402,12 @@ class DataBaseDicom():
389
402
  if attr is not None:
390
403
  values = np.empty(len(files), dtype=dict)
391
404
  for i, f in tqdm(enumerate(files), desc='Reading pixel data..'):
392
- ds = dbdataset.read_dataset(f)
393
- coords_array.append(dbdataset.get_values(ds, dims))
405
+ ds = pydicom.dcmread(f)
406
+ coords_array.append(get_values(ds, dims))
394
407
  # save as dict so numpy does not stack as arrays
395
408
  arrays[i] = {'pixel_data': dbdataset.pixel_data(ds)}
396
409
  if attr is not None:
397
- values[i] = {'values': dbdataset.get_values(ds, params)}
410
+ values[i] = {'values': get_values(ds, params)}
398
411
 
399
412
  # Format as mesh
400
413
  coords_array = np.stack([v for v in coords_array], axis=-1)
@@ -482,7 +495,7 @@ class DataBaseDicom():
482
495
  if attr is None:
483
496
  # If attributes are not provided, read all
484
497
  # attributes from the first file
485
- ds = dbdataset.read_dataset(files[0])
498
+ ds = pydicom.dcmread(files[0])
486
499
  exclude = ['PixelData', 'FloatPixelData', 'DoubleFloatPixelData']
487
500
  params = []
488
501
  param_labels = []
@@ -503,10 +516,10 @@ class DataBaseDicom():
503
516
  coords_array = []
504
517
  values = np.empty(len(files), dtype=dict)
505
518
  for i, f in tqdm(enumerate(files), desc='Reading values..'):
506
- ds = dbdataset.read_dataset(f)
507
- coords_array.append(dbdataset.get_values(ds, dims))
519
+ ds = pydicom.dcmread(f)
520
+ coords_array.append(get_values(ds, dims))
508
521
  # save as dict so numpy does not stack as arrays
509
- values[i] = {'values': dbdataset.get_values(ds, params)}
522
+ values[i] = {'values': get_values(ds, params)}
510
523
 
511
524
  # Format as mesh
512
525
  coords_array = np.stack([v for v in coords_array], axis=-1)
@@ -651,37 +664,43 @@ class DataBaseDicom():
651
664
  self.delete(from_entity)
652
665
  return self
653
666
 
654
- def split_series(self, series:list, attr:Union[str, tuple]) -> dict:
667
+ def split_series(self, series:list, attr:Union[str, tuple], key=None) -> list:
655
668
  """
656
669
  Split a series into multiple series
657
670
 
658
671
  Args:
659
672
  series (list): series to split.
660
673
  attr (str or tuple): dicom attribute to split the series by.
674
+ key (function): split by by key(attr)
661
675
  Returns:
662
- dict: dictionary with keys the unique values found (ascending)
663
- and as values the series corresponding to that value.
676
+ list: list of two-element tuples, where the first element is
677
+ is the value and the second element is the series corresponding to that value.
664
678
  """
665
679
 
666
680
  # Find all values of the attr and list files per value
667
681
  all_files = register.files(self.register, series)
668
- files = {}
682
+ files = []
683
+ values = []
669
684
  for f in tqdm(all_files, desc=f'Reading {attr}'):
670
- ds = dbdataset.read_dataset(f)
671
- v = dbdataset.get_values(ds, attr)
672
- if v in files:
673
- files[v].append(f)
685
+ ds = pydicom.dcmread(f)
686
+ v = get_values(ds, attr)
687
+ if key is not None:
688
+ v = key(v)
689
+ if v in values:
690
+ index = values.index(v)
691
+ files[index].append(f)
674
692
  else:
675
- files[v] = [f]
693
+ values.append(v)
694
+ files.append([f])
676
695
 
677
696
  # Copy the files for each value (sorted) to new series
678
- values = sorted(list(files.keys()))
679
- split_series = {}
680
- for v in tqdm(values, desc='Writing new series'):
697
+ split_series = []
698
+ for index, v in tqdm(enumerate(values), desc='Writing new series'):
681
699
  series_desc = series[-1] if isinstance(series, str) else series[-1][0]
682
- series_v = series[:3] + [f'{series_desc}_{attr}_{v}']
683
- self._files_to_series(files[v], series_v)
684
- split_series[v] = series_v
700
+ series_desc = clean_folder_name(f'{series_desc}_{attr}_{v}')
701
+ series_v = series[:3] + [(series_desc, 0)]
702
+ self._files_to_series(files[index], series_v)
703
+ split_series.append((v, series_v))
685
704
  return split_series
686
705
 
687
706
 
@@ -694,8 +713,8 @@ class DataBaseDicom():
694
713
  files = register.files(self.register, entity)
695
714
  v = np.empty((len(files), len(attributes)), dtype=object)
696
715
  for i, f in enumerate(files):
697
- ds = dbdataset.read_dataset(f)
698
- v[i,:] = dbdataset.get_values(ds, attributes)
716
+ ds = pydicom.dcmread(f)
717
+ v[i,:] = get_values(ds, attributes)
699
718
  return v
700
719
 
701
720
  def _copy_patient(self, from_patient, to_patient):
@@ -750,7 +769,7 @@ class DataBaseDicom():
750
769
  # Copy the files to the new series
751
770
  for i, f in tqdm(enumerate(files), total=len(files), desc=f'Copying series {to_series[1:]}'):
752
771
  # Read dataset and assign new properties
753
- ds = dbdataset.read_dataset(f)
772
+ ds = pydicom.dcmread(f)
754
773
  self._write_dataset(ds, attr, n + 1 + i)
755
774
 
756
775
  def _max_study_id(self, patient_id):
@@ -800,8 +819,8 @@ class DataBaseDicom():
800
819
  # If the patient exists and has files, read from file
801
820
  files = register.files(self.register, patient)
802
821
  attr = const.PATIENT_MODULE
803
- ds = dbdataset.read_dataset(files[0])
804
- vals = dbdataset.get_values(ds, attr)
822
+ ds = pydicom.dcmread(files[0])
823
+ vals = get_values(ds, attr)
805
824
  except:
806
825
  # If the patient does not exist, generate values
807
826
  if patient in self.patients():
@@ -820,8 +839,8 @@ class DataBaseDicom():
820
839
  # If the study exists and has files, read from file
821
840
  files = register.files(self.register, study)
822
841
  attr = const.STUDY_MODULE
823
- ds = dbdataset.read_dataset(files[0])
824
- vals = dbdataset.get_values(ds, attr)
842
+ ds = pydicom.dcmread(files[0])
843
+ vals = get_values(ds, attr)
825
844
  except register.AmbiguousError as e:
826
845
  raise register.AmbiguousError(e)
827
846
  except:
@@ -829,9 +848,9 @@ class DataBaseDicom():
829
848
  if study[:-1] not in self.patients():
830
849
  study_id = 1
831
850
  else:
832
- study_id = 1 + self._max_study_id(study[-1])
851
+ study_id = 1 + self._max_study_id(study[1])
833
852
  attr = ['StudyInstanceUID', 'StudyDescription', 'StudyID']
834
- study_uid = dbdataset.new_uid()
853
+ study_uid = pydicom.uid.generate_uid()
835
854
  study_desc = study[-1] if isinstance(study[-1], str) else study[-1][0]
836
855
  #study_date = datetime.today().strftime('%Y%m%d')
837
856
  vals = [study_uid, study_desc, str(study_id)]
@@ -844,8 +863,8 @@ class DataBaseDicom():
844
863
  # If the series exists and has files, read from file
845
864
  files = register.files(self.register, series)
846
865
  attr = const.SERIES_MODULE
847
- ds = dbdataset.read_dataset(files[0])
848
- vals = dbdataset.get_values(ds, attr)
866
+ ds = pydicom.dcmread(files[0])
867
+ vals = get_values(ds, attr)
849
868
  except register.AmbiguousError as e:
850
869
  raise register.AmbiguousError(e)
851
870
  except:
@@ -857,7 +876,7 @@ class DataBaseDicom():
857
876
  else:
858
877
  series_number = 1 + self._max_series_number(study_uid)
859
878
  attr = ['SeriesInstanceUID', 'SeriesDescription', 'SeriesNumber']
860
- series_uid = dbdataset.new_uid()
879
+ series_uid = pydicom.uid.generate_uid()
861
880
  series_desc = series[-1] if isinstance(series[-1], str) else series[-1][0]
862
881
  vals = [series_uid, series_desc, int(series_number)]
863
882
  return study_attr | {attr[i]:vals[i] for i in range(len(attr)) if vals[i] is not None}
@@ -865,9 +884,9 @@ class DataBaseDicom():
865
884
 
866
885
  def _write_dataset(self, ds:Dataset, attr:dict, instance_nr:int):
867
886
  # Set new attributes
868
- attr['SOPInstanceUID'] = dbdataset.new_uid()
887
+ attr['SOPInstanceUID'] = pydicom.uid.generate_uid()
869
888
  attr['InstanceNumber'] = str(instance_nr)
870
- dbdataset.set_values(ds, list(attr.keys()), list(attr.values()))
889
+ set_values(ds, list(attr.keys()), list(attr.values()))
871
890
  # Save results in a new file
872
891
  rel_dir = os.path.join(
873
892
  f"Patient__{attr['PatientID']}",
@@ -875,7 +894,7 @@ class DataBaseDicom():
875
894
  f"Series__{attr['SeriesNumber']}__{attr['SeriesDescription']}",
876
895
  )
877
896
  os.makedirs(os.path.join(self.path, rel_dir), exist_ok=True)
878
- rel_path = os.path.join(rel_dir, dbdataset.new_uid() + '.dcm')
897
+ rel_path = os.path.join(rel_dir, pydicom.uid.generate_uid() + '.dcm')
879
898
  dbdataset.write(ds, os.path.join(self.path, rel_path))
880
899
  # Add an entry in the register
881
900
  register.add_instance(self.register, attr, rel_path)
@@ -892,11 +911,13 @@ class DataBaseDicom():
892
911
  )
893
912
  os.makedirs(zip_dir, exist_ok=True)
894
913
  for sr in st['series']:
914
+ zip_file = os.path.join(
915
+ zip_dir,
916
+ f"Series__{sr['SeriesNumber']}__{sr['SeriesDescription']}.zip",
917
+ )
918
+ if os.path.exists(zip_file):
919
+ continue
895
920
  try:
896
- zip_file = os.path.join(
897
- zip_dir,
898
- f"Series__{sr['SeriesNumber']}__{sr['SeriesDescription']}.zip",
899
- )
900
921
  with zipfile.ZipFile(zip_file, 'w') as zipf:
901
922
  for rel_path in sr['instances'].values():
902
923
  file = os.path.join(self.path, rel_path)
@@ -910,6 +931,29 @@ class DataBaseDicom():
910
931
 
911
932
 
912
933
 
934
+ def clean_folder_name(name, replacement="", max_length=255):
935
+ # Strip leading/trailing whitespace
936
+ name = name.strip()
937
+
938
+ # Replace invalid characters (Windows, macOS, Linux-safe)
939
+ illegal_chars = r'[<>:"/\\|?*\[\]\x00-\x1F\x7F]'
940
+ name = re.sub(illegal_chars, replacement, name)
941
+
942
+ # Replace reserved Windows names
943
+ reserved = {
944
+ "CON", "PRN", "AUX", "NUL",
945
+ *(f"COM{i}" for i in range(1, 10)),
946
+ *(f"LPT{i}" for i in range(1, 10))
947
+ }
948
+ name_upper = name.upper().split(".")[0] # Just base name
949
+ if name_upper in reserved:
950
+ name = f"{name}_folder"
951
+
952
+ # Truncate to max length (common max: 255 bytes)
953
+ return name[:max_length] or "folder"
954
+
955
+
956
+
913
957
  def infer_slice_spacing(vols):
914
958
  # In case spacing between slices is not (correctly) encoded in
915
959
  # DICOM it can be inferred from the slice locations.