brukerapi 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
brukerapi/folders.py CHANGED
@@ -1,29 +1,35 @@
1
- from .dataset import Dataset
2
- from .jcampdx import JCAMPDX
3
- from .exceptions import *
4
- from pathlib import Path
5
1
  import copy
6
- import operator as op
7
2
  import json
8
- from random import random
9
3
  from copy import deepcopy
4
+ from pathlib import Path
10
5
 
11
- DEFAULT_DATASET_STATE = {
12
- "parameter_files" : [],
13
- "property_files" : [],
14
- "load": False
15
- }
6
+ from .dataset import Dataset
7
+ from .exceptions import (
8
+ FilterEvalFalse,
9
+ IncompleteDataset,
10
+ InvalidJcampdxFile,
11
+ JcampdxVersionError,
12
+ NotADatasetDir,
13
+ NotExperimentFolder,
14
+ NotProcessingFolder,
15
+ NotStudyFolder,
16
+ UnsuportedDatasetType,
17
+ )
18
+ from .jcampdx import JCAMPDX
19
+
20
+ DEFAULT_DATASET_STATE = {"parameter_files": [], "property_files": [], "load": False}
16
21
 
17
22
 
18
23
  class Folder:
19
24
  """A representation of a generic folder. It implements several functions to simplify the folder manipulation."""
25
+
20
26
  def __init__(
21
- self,
22
- path: str,
23
- parent: 'Folder' = None,
24
- recursive: bool = True,
25
- dataset_index: list = ['fid','2dseq','ser','rawdata'],
26
- dataset_state: dict = DEFAULT_DATASET_STATE
27
+ self,
28
+ path: str,
29
+ parent: "Folder" = None,
30
+ recursive: bool | None = None, # noqa: FBT001
31
+ dataset_index: list | None = None,
32
+ dataset_state: dict = DEFAULT_DATASET_STATE,
27
33
  ):
28
34
  """The constructor for Folder class.
29
35
 
@@ -33,6 +39,13 @@ class Folder:
33
39
  :param dataset_index: only data sets listed here will be indexed
34
40
  :return:
35
41
  """
42
+
43
+ if recursive is None:
44
+ recursive = True
45
+
46
+ if dataset_index is None:
47
+ dataset_index = ["fid", "2dseq", "ser", "rawdata"]
48
+
36
49
  self.path = Path(path)
37
50
 
38
51
  self.validate()
@@ -41,6 +54,7 @@ class Folder:
41
54
  self._dataset_index = dataset_index
42
55
  self._set_dataset_state(dataset_state)
43
56
  self.children = self.make_tree(recursive=recursive)
57
+ self.make_children_map() # build lookup map after children exist
44
58
 
45
59
  def validate(self):
46
60
  """Validate whether the given path exists an leads to a folder.
@@ -53,11 +67,11 @@ class Folder:
53
67
  def _set_dataset_state(self, passed):
54
68
  result = deepcopy(DEFAULT_DATASET_STATE)
55
69
 
56
- if 'parameter_files' in passed.keys():
57
- passed['parameter_files'] = result['parameter_files'] + passed['parameter_files']
70
+ if "parameter_files" in passed:
71
+ passed["parameter_files"] = result["parameter_files"] + passed["parameter_files"]
58
72
 
59
- if 'property_files' in passed.keys():
60
- passed['property_files'] = result['property_files'] + passed['property_files']
73
+ if "property_files" in passed:
74
+ passed["property_files"] = result["property_files"] + passed["property_files"]
61
75
 
62
76
  result.update(passed)
63
77
  self._dataset_state = result
@@ -65,10 +79,11 @@ class Folder:
65
79
  def __str__(self) -> str:
66
80
  return str(self.path)
67
81
 
68
- def __getattr__(
69
- self,
70
- name: str
71
- ):
82
+ def make_children_map(self):
83
+ """Build a dictionary for fast name lookups."""
84
+ self._children_map = {child.path.name: child for child in self.children}
85
+
86
+ def __getattr__(self, name: str):
72
87
  """Access individual files in folder. :obj:`.Dataset` and :obj:`.JCAMPDX` instances are not loaded, to access the
73
88
  data and parameters, to load the data, use context manager, or the `load()` function.
74
89
 
@@ -83,10 +98,17 @@ class Folder:
83
98
  :param name: Name of Dataset, JCAMPDX, or Folder
84
99
  :return:
85
100
  """
86
- for child in self.children:
87
- if child.path.name == name:
88
- return child
89
- raise KeyError
101
+ if hasattr(self, "_children_map"):
102
+ try:
103
+ return self._children_map[name]
104
+ except KeyError:
105
+ pass
106
+ else:
107
+ self.make_children_map()
108
+ if name in self._children_map:
109
+ return self._children_map[name]
110
+
111
+ raise KeyError(f"Child '{name}' not found in {self.path}")
90
112
 
91
113
  def __getitem__(self, name):
92
114
  """Access individual files in folder, dict style. :obj:`.Dataset` and :obj:`.JCAMPDX` instances are not loaded, to access the
@@ -117,7 +139,7 @@ class Folder:
117
139
 
118
140
  self.clean(node=self)
119
141
 
120
- def query_pass(self, query: str, node: 'Folder' = None):
142
+ def query_pass(self, query: str, node: "Folder" = None):
121
143
  children_out = []
122
144
  for child in node.children:
123
145
  if isinstance(child, Folder):
@@ -134,7 +156,7 @@ class Folder:
134
156
  node.children = children_out
135
157
  return node
136
158
 
137
- def clean(self, node: 'Folder' = None) -> 'Folder':
159
+ def clean(self, node: "Folder" = None) -> "Folder":
138
160
  """Remove empty folders from the tree
139
161
 
140
162
  :param node:
@@ -176,144 +198,100 @@ class Folder:
176
198
  """List of :obj:`.Study` instances contained in folder and its sub-folders"""
177
199
  return TypeFilter(Study).list(self)
178
200
 
179
- def make_tree(
180
- self,
181
- recursive: bool = True
182
- ) -> list:
183
- """Make a directory tree containing brukerapi objects only
184
-
185
- :param self:
186
- :param recursive: explore all levels of hierarchy
187
- :return:
188
- """
201
+ def make_tree(self, *, recursive: bool = True) -> list:
202
+ """Build a folder tree with optimized traversal."""
189
203
  children = []
190
- for file in self.path.iterdir():
191
- path = file
204
+ entries = list(self.path.iterdir())
192
205
 
206
+ for path in entries:
193
207
  if path.is_dir() and recursive:
194
- # try create Study
195
- try:
196
- children.append(Study(path, parent=self, recursive=recursive, dataset_index=self._dataset_index,
197
- dataset_state=self._dataset_state))
208
+ if Study.contains(path, ["subject"]):
209
+ children.append(Study(path, parent=self, recursive=recursive, dataset_index=self._dataset_index, dataset_state=self._dataset_state))
198
210
  continue
199
- except NotStudyFolder:
200
- pass
201
- # try create Experiment
202
- try:
203
- children.append(Experiment(path, parent=self, recursive=recursive, dataset_index=self._dataset_index,
204
- dataset_state=self._dataset_state))
211
+ if Experiment.contains(path, ["acqp"]):
212
+ children.append(Experiment(path, parent=self, recursive=recursive, dataset_index=self._dataset_index, dataset_state=self._dataset_state))
205
213
  continue
206
- except NotExperimentFolder:
207
- pass
208
- #try create Processing
209
- try:
210
- children.append(Processing(path, parent=self, recursive=recursive, dataset_index=self._dataset_index,
211
- dataset_state=self._dataset_state))
214
+ if Processing.contains(path, ["visu_pars"]):
215
+ children.append(Processing(path, parent=self, recursive=recursive, dataset_index=self._dataset_index, dataset_state=self._dataset_state))
212
216
  continue
213
- except NotProcessingFolder:
214
- pass
215
- children.append(Folder(path, parent=self, recursive=recursive, dataset_index=self._dataset_index,
216
- dataset_state=self._dataset_state))
217
+ children.append(Folder(path, parent=self, recursive=recursive, dataset_index=self._dataset_index, dataset_state=self._dataset_state))
217
218
  continue
218
- try:
219
- if path.name in self._dataset_index:
219
+
220
+ if path.name in self._dataset_index or (path.name.partition(".")[0] in self._dataset_index and "rawdata" in path.name):
221
+ try:
220
222
  children.append(Dataset(path, **self._dataset_state))
223
+ except (UnsuportedDatasetType, IncompleteDataset, NotADatasetDir):
221
224
  continue
222
- except (UnsuportedDatasetType, IncompleteDataset, NotADatasetDir):
223
- pass
225
+
224
226
  try:
225
227
  children.append(JCAMPDX(path, load=False))
226
- continue
227
228
  except (InvalidJcampdxFile, JcampdxVersionError):
228
- pass
229
+ continue
230
+
229
231
  return children
230
232
 
231
233
  @staticmethod
232
- def contains(
233
- path: str,
234
- required: list
235
- ) -> bool:
236
- """Checks whether folder specified by path contains files listed in required.
237
-
238
- :param path: path to a folder
239
- :param required: list of required files
240
- :return:
241
- """
242
- for file in path.iterdir():
243
- try:
244
- required.remove(file.name)
245
- except ValueError:
246
- pass
247
-
248
- if required:
249
- return False
250
- else:
251
- return True
252
-
253
- def print(self, level=0, recursive=True):
234
+ def contains(path: str | Path, required: list) -> bool:
235
+ """Checks whether folder specified by path contains all required files."""
236
+ path = Path(path)
237
+ required_set = set(required)
238
+ existing_files = {f.name for f in path.iterdir()}
239
+ return required_set.issubset(existing_files)
240
+
241
+ def print(self, level=0, recursive=None):
254
242
  """Print structure of the :obj:`.Folder` instance.
255
243
 
256
244
  :param level: level of hierarchy
257
245
  :param recursive: print recursively
258
246
  :return:
259
247
  """
248
+
249
+ if recursive is None:
250
+ recursive = True
251
+
260
252
  if level == 0:
261
- prefix=''
253
+ prefix = ""
262
254
  else:
263
- prefix = '{} └--'.format(' ' * level)
255
+ prefix = "{} └--".format(" " * level)
264
256
 
265
- print('{} {} [{}]'.format(prefix,self.path.name, self.__class__.__name__))
257
+ print(f"{prefix} {self.path.name} [{self.__class__.__name__}]")
266
258
 
267
259
  for child in self.children:
268
260
  if isinstance(child, Folder) and recursive:
269
- child.print(level=level+1)
261
+ child.print(level=level + 1)
270
262
  else:
271
- print('{} {} [{}]'.format(' '+prefix,child.path.name, child.__class__.__name__))
272
-
273
- def clean(self, node: 'Folder' = None) -> 'Folder':
274
- """Remove empty folders from the tree
275
-
276
- :param node:
277
- :return: tree without empty folders
278
- """
279
- if node is None:
280
- node = self
281
-
282
- remove = []
283
- for child in node.children:
284
- if isinstance(child, Folder):
285
- self.clean(child)
286
- if not child.children:
287
- remove.append(child)
288
- for child in remove:
289
- node.children.remove(child)
263
+ print("{} {} [{}]".format(" " + prefix, child.path.name, child.__class__.__name__))
290
264
 
291
265
  def to_json(self, path=None):
292
266
  if path:
293
- with open(path, 'w') as json_file:
267
+ with open(path, "w") as json_file:
294
268
  json.dump(self.to_json(), json_file, sort_keys=True, indent=4)
295
269
  else:
296
270
  return json.dumps(self.to_json(), sort_keys=True, indent=4)
271
+ return None
297
272
 
298
- def report(self, path_out=None, format_=None, write=True, props=None, verbose=None):
273
+ def report(self, path_out=None, format_=None, write=None, props=None, verbose=None):
274
+ if write is None:
275
+ write = True
299
276
 
300
277
  out = {}
301
278
 
302
279
  if format_ is None:
303
- format_ = 'json'
280
+ format_ = "json"
304
281
 
305
282
  for dataset in self.get_dataset_list_rec():
306
- with dataset(add_parameters=['subject']) as d:
283
+ with dataset(add_parameters=["subject"]) as d:
307
284
  if write:
308
285
  if path_out:
309
- d.report(path=path_out/'{}.{}'.format(d.id, format_), props=props, verbose=verbose)
286
+ d.report(path=path_out / f"{d.id}.{format_}", props=props, verbose=verbose)
310
287
  else:
311
- d.report(path=d.path.parent/'{}.{}'.format(d.id, format_), props=props, verbose=verbose)
288
+ d.report(path=d.path.parent / f"{d.id}.{format_}", props=props, verbose=verbose)
312
289
  else:
313
- out[d.id]=d.to_json(props=props)
290
+ out[d.id] = d.to_json(props=props)
314
291
 
315
292
  if not write:
316
293
  return out
294
+ return None
317
295
 
318
296
 
319
297
  class Study(Folder):
@@ -322,13 +300,14 @@ class Study(Folder):
322
300
  Tutorial :doc:`tutorials/how-to-study`
323
301
 
324
302
  """
303
+
325
304
  def __init__(
326
- self,
327
- path: str,
328
- parent: 'Folder' = None,
329
- recursive: bool = True,
330
- dataset_index: list = ['fid', '2dseq', 'ser', 'rawdata'],
331
- dataset_state: dict = DEFAULT_DATASET_STATE
305
+ self,
306
+ path: str,
307
+ parent: "Folder" = None,
308
+ recursive: bool | None = None, # noqa: FBT001
309
+ dataset_index: list | None = None,
310
+ dataset_state: dict = DEFAULT_DATASET_STATE,
332
311
  ):
333
312
  """The constructor for Study class.
334
313
 
@@ -337,10 +316,16 @@ class Study(Folder):
337
316
  :param recursive: recursively create sub-folders
338
317
  :return:
339
318
  """
319
+
320
+ if recursive is None:
321
+ recursive = True
322
+
323
+ if dataset_index is None:
324
+ dataset_index = ["fid", "2dseq", "ser", "rawdata"]
325
+
340
326
  self.path = Path(path)
341
327
  self.validate()
342
- super(Study, self).__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index,
343
- dataset_state=dataset_state)
328
+ super().__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index, dataset_state=dataset_state)
344
329
 
345
330
  def validate(self):
346
331
  """Validate whether the given path exists an leads to a :class:`Study` folder.
@@ -350,14 +335,15 @@ class Study(Folder):
350
335
  if not self.path.is_dir():
351
336
  raise NotStudyFolder
352
337
 
353
- if not self.contains(self.path, ['subject',]):
338
+ if not self.contains(
339
+ self.path,
340
+ [
341
+ "subject",
342
+ ],
343
+ ):
354
344
  raise NotStudyFolder
355
345
 
356
- def get_dataset(
357
- self,
358
- exp_id: str = None,
359
- proc_id: str = None
360
- ) -> Dataset:
346
+ def get_dataset(self, exp_id: str | None = None, proc_id: str | None = None) -> Dataset:
361
347
  """Get a :obj:`.Dataset` from the study folder. Fid data set is returned if `exp_id` is specified, 2dseq data set
362
348
  is returned if `exp_id` and `proc_id` are specified.
363
349
 
@@ -369,27 +355,28 @@ class Study(Folder):
369
355
  exp = self._get_exp(exp_id)
370
356
 
371
357
  if proc_id:
372
- return exp._get_proc(proc_id)['2dseq']
373
- else:
374
- return exp['fid']
358
+ return exp._get_proc(proc_id)["2dseq"]
359
+ return exp["fid"]
375
360
 
376
361
  def _get_exp(self, exp_id):
377
362
  for exp in self.experiment_list:
378
363
  if exp.path.name == exp_id:
379
364
  return exp
365
+ return None
380
366
 
381
367
 
382
368
  class Experiment(Folder):
383
369
  """Representation of the Bruker Experiment folder. The folder can contain *fid*, *ser* a *rawdata.SUBTYPE* data sets.
384
370
  It can contain multiple :obj:`.Processing` instances.
385
371
  """
372
+
386
373
  def __init__(
387
- self,
388
- path: str,
389
- parent: 'Folder' = None,
390
- recursive: bool = True,
391
- dataset_index: list = ['fid','ser', 'rawdata'],
392
- dataset_state: dict = DEFAULT_DATASET_STATE
374
+ self,
375
+ path: str,
376
+ parent: "Folder" = None,
377
+ recursive: bool | None = None, # noqa: FBT001
378
+ dataset_index: list | None = None,
379
+ dataset_state: dict = DEFAULT_DATASET_STATE,
393
380
  ):
394
381
  """The constructor for Experiment class.
395
382
 
@@ -398,10 +385,16 @@ class Experiment(Folder):
398
385
  :param recursive: recursively create sub-folders
399
386
  :return:
400
387
  """
388
+
389
+ if recursive is None:
390
+ recursive = True
391
+
392
+ if dataset_index is None:
393
+ dataset_index = ["fid", "ser", "rawdata"]
394
+
401
395
  self.path = Path(path)
402
396
  self.validate()
403
- super(Experiment, self).__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index,
404
- dataset_state=dataset_state)
397
+ super().__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index, dataset_state=dataset_state)
405
398
 
406
399
  def validate(self):
407
400
  """Validate whether the given path exists an leads to a :class:`Experiment` folder.
@@ -411,18 +404,23 @@ class Experiment(Folder):
411
404
  if not self.path.is_dir():
412
405
  raise NotExperimentFolder
413
406
 
414
- if not self.contains(self.path, ['acqp', ]):
407
+ if not self.contains(
408
+ self.path,
409
+ [
410
+ "acqp",
411
+ ],
412
+ ):
415
413
  raise NotExperimentFolder
416
414
 
417
415
  def _get_proc(self, proc_id):
418
416
  for proc in self.processing_list:
419
417
  if proc.path.name == proc_id:
420
418
  return proc
419
+ return None
421
420
 
422
421
 
423
422
  class Processing(Folder):
424
- def __init__(self, path, parent=None, recursive=True, dataset_index=['2dseq','1r','1i'],
425
- dataset_state: dict = DEFAULT_DATASET_STATE):
423
+ def __init__(self, path, parent=None, recursive=None, dataset_index=None, dataset_state: dict = DEFAULT_DATASET_STATE):
426
424
  """The constructor for Processing class.
427
425
 
428
426
  :param path: path to a folder
@@ -430,10 +428,16 @@ class Processing(Folder):
430
428
  :param recursive: recursively create sub-folders
431
429
  :return:
432
430
  """
431
+
432
+ if recursive is None:
433
+ recursive = True
434
+
435
+ if dataset_index is None:
436
+ dataset_index = ["2dseq", "1r", "1i"]
437
+
433
438
  self.path = Path(path)
434
439
  self.validate()
435
- super(Processing, self).__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index,
436
- dataset_state=dataset_state)
440
+ super().__init__(path, parent=parent, recursive=recursive, dataset_index=dataset_index, dataset_state=dataset_state)
437
441
 
438
442
  def validate(self):
439
443
  """Validate whether the given path exists an leads to a :class:`Processing` folder.
@@ -443,22 +447,30 @@ class Processing(Folder):
443
447
  if not self.path.is_dir():
444
448
  raise NotProcessingFolder
445
449
 
446
- if not self.contains(self.path, ['visu_pars',]):
450
+ if not self.contains(
451
+ self.path,
452
+ [
453
+ "visu_pars",
454
+ ],
455
+ ):
447
456
  raise NotProcessingFolder
448
457
 
449
458
 
450
459
  class Filter:
451
- def __init__(self, query, in_place=True, recursive=True):
460
+ def __init__(self, query, in_place=None, recursive=None):
461
+ if in_place is None:
462
+ in_place = True
463
+
464
+ if recursive is None:
465
+ recursive = True
466
+
452
467
  self.in_place = in_place
453
468
  self.recursive = recursive
454
469
  self.query = query
455
470
 
456
471
  def filter(self, folder):
457
-
458
472
  # either perform the filtering of the original folder, or make a copy
459
- if self.in_place:
460
- folder = folder
461
- else:
473
+ if not self.in_place:
462
474
  folder = copy.deepcopy(folder)
463
475
 
464
476
  # perform filtering
@@ -475,13 +487,12 @@ class Filter:
475
487
  node = q.pop()
476
488
  try:
477
489
  self.filter_eval(node)
478
- count +=1
490
+ count += 1
479
491
  except FilterEvalFalse:
480
492
  pass
481
493
  finally:
482
- if self.recursive:
483
- if isinstance(node, Folder) or isinstance(node, Study):
484
- q += node.children
494
+ if self.recursive and (isinstance(node, (Folder, Study))):
495
+ q += node.children
485
496
  return count
486
497
 
487
498
  def list(self, folder):
@@ -496,15 +507,13 @@ class Filter:
496
507
  except FilterEvalFalse:
497
508
  pass
498
509
  finally:
499
- if self.recursive:
500
- if isinstance(node, Folder):
501
- q += node.children
510
+ if self.recursive and isinstance(node, Folder):
511
+ q += node.children
502
512
  return list
503
513
 
504
514
  def filter_pass(self, node):
505
515
  children_out = []
506
516
  for child in node.children:
507
-
508
517
  if isinstance(child, Folder):
509
518
  children_out.append(self.filter_pass(child))
510
519
  else:
@@ -518,17 +527,22 @@ class Filter:
518
527
 
519
528
  def filter_eval(self, node):
520
529
  if isinstance(node, Dataset):
521
- with node(add_properties=['subject']) as n:
530
+ with node(add_properties=["subject"]) as n:
522
531
  n.query(self.query)
523
532
  else:
524
533
  raise FilterEvalFalse
525
534
 
526
535
 
527
536
  class TypeFilter(Filter):
528
- def __init__(self, value, in_place=True, recursive=True):
529
- super(TypeFilter, self).__init__(in_place, recursive)
537
+ def __init__(self, value, in_place=None, recursive=None):
538
+ if in_place is None:
539
+ in_place = True
540
+ if recursive is None:
541
+ recursive = True
542
+
543
+ super().__init__(in_place, recursive)
530
544
  self.type = value
531
545
 
532
546
  def filter_eval(self, node):
533
547
  if not isinstance(node, self.type):
534
- raise FilterEvalFalse
548
+ raise FilterEvalFalse