dbdicom 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbdicom might be problematic. Click here for more details.

dbdicom/dbd.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  from datetime import datetime
3
+ import json
3
4
 
4
5
  from tqdm import tqdm
5
6
  import numpy as np
@@ -8,9 +9,9 @@ import vreg
8
9
  from pydicom.dataset import Dataset
9
10
 
10
11
  import dbdicom.utils.arrays
11
- import dbdicom.utils.files as filetools
12
- import dbdicom.utils.dcm4che as dcm4che
12
+
13
13
  import dbdicom.dataset as dbdataset
14
+ import dbdicom.database as dbdatabase
14
15
  import dbdicom.register as register
15
16
  import dbdicom.const as const
16
17
 
@@ -32,7 +33,8 @@ class DataBaseDicom():
32
33
  file = self._register_file()
33
34
  if os.path.exists(file):
34
35
  try:
35
- self.register = pd.read_pickle(file)
36
+ with open(file, 'r') as f:
37
+ self.register = json.load(f)
36
38
  except:
37
39
  # If the file is corrupted, delete it and load again
38
40
  os.remove(file)
@@ -44,75 +46,45 @@ class DataBaseDicom():
44
46
  def read(self):
45
47
  """Read the DICOM folder again
46
48
  """
47
-
48
- files = filetools.all_files(self.path)
49
- self.register = dbdataset.read_dataframe(
50
- files,
51
- register.COLUMNS + ['NumberOfFrames','SOPClassUID'],
52
- path=self.path,
53
- images_only = True)
54
- self.register['removed'] = False
55
- self.register['created'] = False
56
- # No support for multiframe data at the moment
57
- self._multiframe_to_singleframe()
49
+ self.register = dbdatabase.read(self.path)
58
50
  # For now ensure all series have just a single CIOD
59
- self._split_series()
51
+ # Leaving this out for now until the issue occurs again
52
+ # self._split_series()
60
53
  return self
61
-
62
54
 
63
- def close(self):
64
- """Close the DICOM folder
65
-
66
- This also saves changes in the header file to disk.
67
- """
55
+
68
56
 
69
- created = self.register.created & (self.register.removed==False)
70
- removed = self.register.removed
71
- created = created[created].index
72
- removed = removed[removed].index
57
+ def delete(self, entity):
58
+ """Delete a DICOM entity from the database
73
59
 
60
+ Args:
61
+ entity (list): entity to delete
62
+ """
63
+ removed = register.index(self.register, entity)
74
64
  # delete datasets marked for removal
75
65
  for index in removed.tolist():
76
66
  file = os.path.join(self.path, index)
77
67
  if os.path.exists(file):
78
68
  os.remove(file)
79
69
  # and drop then from the register
80
- self.register.drop(index=removed, inplace=True)
81
-
82
- # for new or edited data, mark as saved.
83
- self.register.loc[created, 'created'] = False
84
-
85
- # save register
86
- file = self._register_file()
87
- self.register.to_pickle(file)
70
+ self.register = register.drop(removed)
88
71
  return self
89
72
 
90
73
 
91
- def restore(self):
92
- """Restore the DICOM folder to the last saved state."""
93
-
94
- created = self.register.created
95
- removed = self.register.removed & (self.register.created==False)
96
- created = created[created].index
97
- removed = removed[removed].index
98
-
99
- # permanently delete newly created datasets
100
- for index in created.tolist():
101
- file = os.path.join(self.path, index)
102
- if os.path.exists(file):
103
- os.remove(file)
104
-
105
- # and drop then from the register
106
- self.register.drop(index=created, inplace=True)
107
-
108
- # Restore those that were marked for removal
109
- self.register.loc[removed, 'removed'] = False
110
-
111
- # save register
74
+ def close(self):
75
+ """Close the DICOM folder
76
+
77
+ This also saves changes in the header file to disk.
78
+ """
79
+ # Save df as pkl
112
80
  file = self._register_file()
113
- self.register.to_pickle(file)
114
- return self
81
+ with open(file, 'w') as f:
82
+ json.dump(self.register, f, indent=4)
83
+ return self
115
84
 
85
+ def _register_file(self):
86
+ return os.path.join(self.path, 'dbtree.json')
87
+
116
88
 
117
89
  def summary(self):
118
90
  """Return a summary of the contents of the database.
@@ -122,6 +94,7 @@ class DataBaseDicom():
122
94
  """
123
95
  return register.summary(self.register)
124
96
 
97
+
125
98
  def print(self):
126
99
  """Print the contents of the DICOM folder
127
100
  """
@@ -277,10 +250,11 @@ class DataBaseDicom():
277
250
  dims (list, optional): Non-spatial dimensions of the volume. Defaults to None.
278
251
  multislice (bool, optional): Whether the data are to be read
279
252
  as multislice or not. In multislice data the voxel size
280
- is taken from the slice gap rather thsan the slice thickness. Defaults to False.
253
+ is taken from the slice gap rather than the slice thickness. Defaults to False.
281
254
  """
282
255
  if ref is None:
283
256
  ds = dbdataset.new_dataset('MRImage')
257
+ #ds = dbdataset.new_dataset('ParametricMap')
284
258
  else:
285
259
  if ref[0] == series[0]:
286
260
  ref_mgr = self
@@ -293,12 +267,11 @@ class DataBaseDicom():
293
267
  attr = self._attributes(series)
294
268
  n = self._max_instance_number(attr['SeriesInstanceUID'])
295
269
 
296
- new_instances = {}
297
270
  if vol.ndim==3:
298
271
  slices = vol.split()
299
272
  for i, sl in tqdm(enumerate(slices), desc='Writing volume..'):
300
273
  dbdataset.set_volume(ds, sl, multislice)
301
- self._write_dataset(ds, attr, n + 1 + i, new_instances)
274
+ self._write_dataset(ds, attr, n + 1 + i)
302
275
  else:
303
276
  i=0
304
277
  vols = vol.separate().reshape(-1)
@@ -306,10 +279,9 @@ class DataBaseDicom():
306
279
  for sl in vt.split():
307
280
  dbdataset.set_volume(ds, sl, multislice)
308
281
  dbdataset.set_value(ds, sl.dims, sl.coords[:,...])
309
- self._write_dataset(ds, attr, n + 1 + i, new_instances)
282
+ self._write_dataset(ds, attr, n + 1 + i)
310
283
  i+=1
311
- return self
312
- self._update_register(new_instances)
284
+ return self
313
285
 
314
286
 
315
287
  def to_nifti(self, series:list, file:str, dims=None, multislice=False):
@@ -343,26 +315,38 @@ class DataBaseDicom():
343
315
  self.write_volume(vol, series, ref, multislice)
344
316
  return self
345
317
 
346
- def pixel_data(self, series:list, dims:list=None, include=None) -> np.ndarray:
318
+ def pixel_data(self, series:list, dims:list=None, coords=False, include=None) -> np.ndarray:
347
319
  """Read the pixel data from a DICOM series
348
320
 
349
321
  Args:
350
322
  series (list): DICOM series to read
351
323
  dims (list, optional): Dimensions of the array.
324
+ coords (bool): If set to Trye, the coordinates of the
325
+ arrays are returned alongside the pixel data
352
326
  include (list, optional): list of DICOM attributes that are
353
327
  read on the fly to avoid reading the data twice.
354
328
 
355
329
  Returns:
356
- tuple: numpy array with pixel values and an array with
330
+ numpy.ndarray or tuple: numpy array with pixel values, with
331
+ at least 3 dimensions (x,y,z). If
332
+ coords is set these are returned too as an array with
357
333
  coordinates of the slices according to dims. If include
358
- is provide these are returned as a dictionary in a third
334
+ is provide the values are returned as a dictionary in the last
359
335
  return value.
360
336
  """
337
+ if coords:
338
+ if dims is None:
339
+ raise ValueError(
340
+ "Coordinates can only be returned if dimensions are specified."
341
+ )
361
342
 
362
- if np.isscalar(dims):
343
+ if dims is None:
344
+ dims = []
345
+ elif np.isscalar(dims):
363
346
  dims = [dims]
364
347
  else:
365
348
  dims = list(dims)
349
+ dims = ['SliceLocation'] + dims
366
350
 
367
351
  # Ensure return_vals is a list
368
352
  if include is None:
@@ -375,34 +359,40 @@ class DataBaseDicom():
375
359
  files = register.files(self.register, series)
376
360
 
377
361
  # Read dicom files
378
- coords = []
362
+ coords_array = []
379
363
  arrays = np.empty(len(files), dtype=dict)
380
364
  if include is not None:
381
365
  values = np.empty(len(files), dtype=dict)
382
366
  for i, f in tqdm(enumerate(files), desc='Reading pixel data..'):
383
367
  ds = dbdataset.read_dataset(f)
384
- coords.append(dbdataset.get_values(ds, dims))
368
+ coords_array.append(dbdataset.get_values(ds, dims))
385
369
  # save as dict so numpy does not stack as arrays
386
370
  arrays[i] = {'pixel_data': dbdataset.pixel_data(ds)}
387
371
  if include is not None:
388
372
  values[i] = {'values': dbdataset.get_values(ds, params)}
389
373
 
390
374
  # Format as mesh
391
- coords = np.stack([v for v in coords], axis=-1)
392
- coords, inds = dbdicom.utils.arrays.meshvals(coords)
375
+ coords_array = np.stack([v for v in coords_array], axis=-1)
376
+ coords_array, inds = dbdicom.utils.arrays.meshvals(coords_array)
393
377
 
394
- arrays = arrays[inds].reshape(coords.shape[1:])
378
+ arrays = arrays[inds].reshape(coords_array.shape[1:])
395
379
  arrays = np.stack([a['pixel_data'] for a in arrays.reshape(-1)], axis=-1)
396
- arrays = arrays.reshape(arrays.shape[:2] + coords.shape[1:])
380
+ arrays = arrays.reshape(arrays.shape[:2] + coords_array.shape[1:])
397
381
 
398
382
  if include is None:
399
- return arrays, coords
383
+ if coords:
384
+ return arrays, coords_array[1:,...]
385
+ else:
386
+ return arrays
400
387
 
401
- values = values[inds].reshape(coords.shape[1:])
388
+ values = values[inds].reshape(coords_array.shape[1:])
402
389
  values = np.stack([a['values'] for a in values.reshape(-1)], axis=-1)
403
- values = values.reshape((len(params), ) + coords.shape[1:])
390
+ values = values.reshape((len(params), ) + coords_array.shape[1:])
404
391
 
405
- return arrays, coords, values
392
+ if coords:
393
+ return arrays, coords_array[1:,...], values
394
+ else:
395
+ return arrays, values
406
396
 
407
397
 
408
398
  def unique(self, pars:list, entity:list) -> dict:
@@ -470,16 +460,6 @@ class DataBaseDicom():
470
460
  f"Cannot copy {from_entity} to {to_entity}. "
471
461
  )
472
462
 
473
- def delete(self, entity):
474
- """Delete a DICOM entity from the database
475
-
476
- Args:
477
- entity (list): entity to delete
478
- """
479
- index = register.index(self.register, entity)
480
- self.register.loc[index,'removed'] = True
481
- return self
482
-
483
463
  def move(self, from_entity, to_entity):
484
464
  """Move a DICOM entity
485
465
 
@@ -492,15 +472,15 @@ class DataBaseDicom():
492
472
 
493
473
  def _values(self, attributes:list, entity:list):
494
474
  # Create a np array v with values for each instance and attribute
495
- if set(attributes) <= set(self.register.columns):
496
- index = register.index(self.register, entity)
497
- v = self.register.loc[index, attributes].values
498
- else:
499
- files = register.files(self.register, entity)
500
- v = np.empty((len(files), len(attributes)), dtype=object)
501
- for i, f in enumerate(files):
502
- ds = dbdataset.read_dataset(f)
503
- v[i,:] = dbdataset.get_values(ds, attributes)
475
+ # if set(attributes) <= set(dbdatabase.COLUMNS):
476
+ # index = register.index(self.register, entity)
477
+ # v = self.register.loc[index, attributes].values
478
+ # else:
479
+ files = register.files(self.register, entity)
480
+ v = np.empty((len(files), len(attributes)), dtype=object)
481
+ for i, f in enumerate(files):
482
+ ds = dbdataset.read_dataset(f)
483
+ v[i,:] = dbdataset.get_values(ds, attributes)
504
484
  return v
505
485
 
506
486
  def _copy_patient(self, from_patient, to_patient):
@@ -544,30 +524,28 @@ class DataBaseDicom():
544
524
  n = self._max_instance_number(attr['SeriesInstanceUID'])
545
525
 
546
526
  # Copy the files to the new series
547
- new_instances = {}
548
527
  for i, f in tqdm(enumerate(files), total=len(files), desc=f'Copying series {to_series[1:]}'):
549
528
  # Read dataset and assign new properties
550
529
  ds = dbdataset.read_dataset(f)
551
- self._write_dataset(ds, attr, n + 1 + i, new_instances)
552
- self._update_register(new_instances)
530
+ self._write_dataset(ds, attr, n + 1 + i)
553
531
 
554
532
 
555
533
  def _max_series_number(self, study_uid):
556
- df = self.register
557
- df = df[(df.StudyInstanceUID==study_uid) & (df.removed==False)]
558
- n = df['SeriesNumber'].values
559
- n = n[n != -1]
560
- max_number=0 if n.size==0 else np.amax(n)
561
- return max_number
534
+ for pt in self.register:
535
+ for st in pt['studies']:
536
+ if st['StudyInstanceUID'] == study_uid:
537
+ n = [sr['SeriesNumber'] for sr in st['studies']]
538
+ return np.amax(n)
539
+ return 0
562
540
 
563
541
  def _max_instance_number(self, series_uid):
564
- df = self.register
565
- df = df[(df.SeriesInstanceUID==series_uid) & (df.removed==False)]
566
- n = df['InstanceNumber'].values
567
- n = n[n != -1]
568
- max_number=0 if n.size==0 else np.amax(n)
569
- return max_number
570
-
542
+ for pt in self.register:
543
+ for st in pt['studies']:
544
+ for sr in st['series']:
545
+ if sr['SeriesInstanceUID'] == series_uid:
546
+ n = list(sr['instances'].keys())
547
+ return np.amax([int(i) for i in n])
548
+ return 0
571
549
 
572
550
  def _attributes(self, entity):
573
551
  if len(entity)==4:
@@ -635,85 +613,49 @@ class DataBaseDicom():
635
613
  return study_attr | {attr[i]:vals[i] for i in range(len(attr)) if vals[i] is not None}
636
614
 
637
615
 
638
- def _write_dataset(self, ds:Dataset, attr:dict, instance_nr:int, register:dict):
616
+ def _write_dataset(self, ds:Dataset, attr:dict, instance_nr:int):
639
617
  # Set new attributes
640
618
  attr['SOPInstanceUID'] = dbdataset.new_uid()
641
- attr['InstanceNumber'] = instance_nr
619
+ attr['InstanceNumber'] = str(instance_nr)
642
620
  dbdataset.set_values(ds, list(attr.keys()), list(attr.values()))
643
621
  # Save results in a new file
644
622
  rel_path = os.path.join('dbdicom', dbdataset.new_uid() + '.dcm')
645
623
  dbdataset.write(ds, os.path.join(self.path, rel_path))
646
- # Add a row to the register
647
- register[rel_path] = dbdataset.get_values(ds, self.register.columns)
648
-
624
+ # Add an entry in the register
625
+ register.add_instance(self.register, attr, rel_path)
626
+
627
+
628
+
629
+ # def _split_series(self):
630
+ # """
631
+ # Split series with multiple SOP Classes.
632
+
633
+ # If a series contain instances from different SOP Classes,
634
+ # these are separated out into multiple series with identical SOP Classes.
635
+ # """
636
+ # # For each series, check if there are multiple
637
+ # # SOP Classes in the series and split them if yes.
638
+ # for series in self.series():
639
+ # series_index = register.index(self.register, series)
640
+ # df_series = self.register.loc[series_index]
641
+ # sop_classes = df_series.SOPClassUID.unique()
642
+ # if len(sop_classes) > 1:
643
+ # # For each sop_class, create a new series and move all
644
+ # # instances of that sop_class to the new series
645
+ # desc = series[-1] if isinstance(series, str) else series[0]
646
+ # for i, sop_class in tqdm(enumerate(sop_classes[1:]), desc='Splitting series with multiple SOP Classes.'):
647
+ # df_sop_class = df_series[df_series.SOPClassUID == sop_class]
648
+ # relpaths = df_sop_class.index.tolist()
649
+ # sop_class_files = [os.path.join(self.path, p) for p in relpaths]
650
+ # sop_class_series = series[:-1] + [desc + f' [{i+1}]']
651
+ # self._files_to_series(sop_class_files, sop_class_series)
652
+ # # Delete original files permanently
653
+ # self.register.drop(relpaths)
654
+ # for f in sop_class_files:
655
+ # os.remove(f)
656
+ # self.register.drop('SOPClassUID', axis=1, inplace=True)
649
657
 
650
- def _update_register(self, new_instances:dict):
651
- # A new instances to the register
652
- df = pd.DataFrame.from_dict(new_instances, orient='index', columns=self.register.columns)
653
- df['removed'] = False
654
- df['created'] = True
655
- self.register = pd.concat([self.register, df])
656
658
 
657
659
 
658
- def _register_file(self):
659
- filename = os.path.basename(os.path.normpath(self.path)) + ".pkl"
660
- return os.path.join(self.path, filename)
661
-
662
-
663
- def _multiframe_to_singleframe(self):
664
- """Converts all multiframe files in the folder into single-frame files.
665
-
666
- Reads all the multi-frame files in the folder,
667
- converts them to singleframe files, and delete the original multiframe file.
668
- """
669
- singleframe = self.register.NumberOfFrames.isnull()
670
- multiframe = singleframe == False
671
- nr_multiframe = multiframe.sum()
672
- if nr_multiframe != 0:
673
- for relpath in tqdm(self.register[multiframe].index.values, desc="Converting multiframe file " + relpath):
674
- filepath = os.path.join(self.path, relpath)
675
- singleframe_files = dcm4che.split_multiframe(filepath)
676
- if singleframe_files != []:
677
- # add the single frame files to the dataframe
678
- df = dbdataset.read_dataframe(singleframe_files, self.register.columns, path=self.path)
679
- df['removed'] = False
680
- df['created'] = False
681
- self.register = pd.concat([self.register, df])
682
- # delete the original multiframe
683
- os.remove(filepath)
684
- # drop the file also if the conversion has failed
685
- self.register.drop(index=relpath, inplace=True)
686
- self.register.drop('NumberOfFrames', axis=1, inplace=True)
687
-
688
-
689
- def _split_series(self):
690
- """
691
- Split series with multiple SOP Classes.
692
-
693
- If a series contain instances from different SOP Classes,
694
- these are separated out into multiple series with identical SOP Classes.
695
- """
696
- # For each series, check if there are multiple
697
- # SOP Classes in the series and split them if yes.
698
- all_series = self.series()
699
- for series in tqdm(all_series, desc='Splitting series with multiple SOP Classes.'):
700
- series_index = register.index(self.register, series)
701
- df_series = self.register.loc[series_index]
702
- sop_classes = df_series.SOPClassUID.unique()
703
- if len(sop_classes) > 1:
704
- # For each sop_class, create a new series and move all
705
- # instances of that sop_class to the new series
706
- desc = series[-1] if isinstance(series, str) else series[0]
707
- for i, sop_class in enumerate(sop_classes[1:]):
708
- df_sop_class = df_series[df_series.SOPClassUID == sop_class]
709
- relpaths = df_sop_class.index.tolist()
710
- sop_class_files = [os.path.join(self.path, p) for p in relpaths]
711
- sop_class_series = series[:-1] + [desc + f' [{i+1}]']
712
- self._files_to_series(sop_class_files, sop_class_series)
713
- # Delete original files permanently
714
- self.register.drop(relpaths)
715
- for f in sop_class_files:
716
- os.remove(f)
717
- self.register.drop('SOPClassUID', axis=1, inplace=True)
718
660
 
719
661