brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. brkraw/__init__.py +9 -3
  2. brkraw/apps/__init__.py +12 -0
  3. brkraw/apps/addon/__init__.py +30 -0
  4. brkraw/apps/addon/core.py +35 -0
  5. brkraw/apps/addon/dependencies.py +402 -0
  6. brkraw/apps/addon/installation.py +500 -0
  7. brkraw/apps/addon/io.py +21 -0
  8. brkraw/apps/hook/__init__.py +25 -0
  9. brkraw/apps/hook/core.py +636 -0
  10. brkraw/apps/loader/__init__.py +10 -0
  11. brkraw/apps/loader/core.py +622 -0
  12. brkraw/apps/loader/formatter.py +288 -0
  13. brkraw/apps/loader/helper.py +797 -0
  14. brkraw/apps/loader/info/__init__.py +11 -0
  15. brkraw/apps/loader/info/scan.py +85 -0
  16. brkraw/apps/loader/info/scan.yaml +90 -0
  17. brkraw/apps/loader/info/study.py +69 -0
  18. brkraw/apps/loader/info/study.yaml +156 -0
  19. brkraw/apps/loader/info/transform.py +92 -0
  20. brkraw/apps/loader/types.py +220 -0
  21. brkraw/cli/__init__.py +5 -0
  22. brkraw/cli/commands/__init__.py +2 -0
  23. brkraw/cli/commands/addon.py +327 -0
  24. brkraw/cli/commands/config.py +205 -0
  25. brkraw/cli/commands/convert.py +903 -0
  26. brkraw/cli/commands/hook.py +348 -0
  27. brkraw/cli/commands/info.py +74 -0
  28. brkraw/cli/commands/init.py +214 -0
  29. brkraw/cli/commands/params.py +106 -0
  30. brkraw/cli/commands/prune.py +288 -0
  31. brkraw/cli/commands/session.py +371 -0
  32. brkraw/cli/hook_args.py +80 -0
  33. brkraw/cli/main.py +83 -0
  34. brkraw/cli/utils.py +60 -0
  35. brkraw/core/__init__.py +13 -0
  36. brkraw/core/config.py +380 -0
  37. brkraw/core/entrypoints.py +25 -0
  38. brkraw/core/formatter.py +367 -0
  39. brkraw/core/fs.py +495 -0
  40. brkraw/core/jcamp.py +600 -0
  41. brkraw/core/layout.py +451 -0
  42. brkraw/core/parameters.py +781 -0
  43. brkraw/core/zip.py +1121 -0
  44. brkraw/dataclasses/__init__.py +14 -0
  45. brkraw/dataclasses/node.py +139 -0
  46. brkraw/dataclasses/reco.py +33 -0
  47. brkraw/dataclasses/scan.py +61 -0
  48. brkraw/dataclasses/study.py +131 -0
  49. brkraw/default/__init__.py +3 -0
  50. brkraw/default/pruner_specs/deid4share.yaml +42 -0
  51. brkraw/default/rules/00_default.yaml +4 -0
  52. brkraw/default/specs/metadata_dicom.yaml +236 -0
  53. brkraw/default/specs/metadata_transforms.py +92 -0
  54. brkraw/resolver/__init__.py +7 -0
  55. brkraw/resolver/affine.py +539 -0
  56. brkraw/resolver/datatype.py +69 -0
  57. brkraw/resolver/fid.py +90 -0
  58. brkraw/resolver/helpers.py +36 -0
  59. brkraw/resolver/image.py +188 -0
  60. brkraw/resolver/nifti.py +370 -0
  61. brkraw/resolver/shape.py +235 -0
  62. brkraw/schema/__init__.py +3 -0
  63. brkraw/schema/context_map.yaml +62 -0
  64. brkraw/schema/meta.yaml +57 -0
  65. brkraw/schema/niftiheader.yaml +95 -0
  66. brkraw/schema/pruner.yaml +55 -0
  67. brkraw/schema/remapper.yaml +128 -0
  68. brkraw/schema/rules.yaml +154 -0
  69. brkraw/specs/__init__.py +10 -0
  70. brkraw/specs/hook/__init__.py +12 -0
  71. brkraw/specs/hook/logic.py +31 -0
  72. brkraw/specs/hook/validator.py +22 -0
  73. brkraw/specs/meta/__init__.py +5 -0
  74. brkraw/specs/meta/validator.py +156 -0
  75. brkraw/specs/pruner/__init__.py +15 -0
  76. brkraw/specs/pruner/logic.py +361 -0
  77. brkraw/specs/pruner/validator.py +119 -0
  78. brkraw/specs/remapper/__init__.py +27 -0
  79. brkraw/specs/remapper/logic.py +924 -0
  80. brkraw/specs/remapper/validator.py +314 -0
  81. brkraw/specs/rules/__init__.py +6 -0
  82. brkraw/specs/rules/logic.py +263 -0
  83. brkraw/specs/rules/validator.py +103 -0
  84. brkraw-0.5.0.dist-info/METADATA +81 -0
  85. brkraw-0.5.0.dist-info/RECORD +88 -0
  86. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
  87. brkraw-0.5.0.dist-info/entry_points.txt +13 -0
  88. brkraw/lib/__init__.py +0 -4
  89. brkraw/lib/backup.py +0 -641
  90. brkraw/lib/bids.py +0 -0
  91. brkraw/lib/errors.py +0 -125
  92. brkraw/lib/loader.py +0 -1220
  93. brkraw/lib/orient.py +0 -194
  94. brkraw/lib/parser.py +0 -48
  95. brkraw/lib/pvobj.py +0 -301
  96. brkraw/lib/reference.py +0 -245
  97. brkraw/lib/utils.py +0 -471
  98. brkraw/scripts/__init__.py +0 -0
  99. brkraw/scripts/brk_backup.py +0 -106
  100. brkraw/scripts/brkraw.py +0 -744
  101. brkraw/ui/__init__.py +0 -0
  102. brkraw/ui/config.py +0 -17
  103. brkraw/ui/main_win.py +0 -214
  104. brkraw/ui/previewer.py +0 -225
  105. brkraw/ui/scan_info.py +0 -72
  106. brkraw/ui/scan_list.py +0 -73
  107. brkraw/ui/subj_info.py +0 -128
  108. brkraw-0.3.11.dist-info/METADATA +0 -25
  109. brkraw-0.3.11.dist-info/RECORD +0 -28
  110. brkraw-0.3.11.dist-info/entry_points.txt +0 -3
  111. brkraw-0.3.11.dist-info/top_level.txt +0 -2
  112. tests/__init__.py +0 -0
  113. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/lib/backup.py DELETED
@@ -1,641 +0,0 @@
1
- from .errors import *
2
- from .loader import BrukerLoader
3
- from .utils import get_dirsize, get_filesize, yes_or_no, TimeCounter
4
- import os
5
- import sys
6
- import tqdm
7
- import pickle
8
- import zipfile
9
- import datetime
10
- import getpass
11
- _bar_fmt = '{l_bar}{bar:20}{r_bar}{bar:-20b}'
12
- _user = getpass.getuser()
13
- _width = 80
14
- _line_sep_1 = '-' * _width
15
- _line_sep_2 = '=' * _width
16
- _empty_sep = ''
17
-
18
-
19
- class NamedTuple(object):
20
- def __init__(self, **kwargs):
21
- self.__dict__.update(kwargs)
22
-
23
-
24
- class BackupCache:
25
- def __init__(self):
26
- self._init_dataset_class()
27
-
28
- def logging(self, message, method):
29
- now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
30
- self.log_data.append(NamedTuple(datetime=now, method=method, message=message))
31
-
32
- @property
33
- def num_raw(self):
34
- return len(self.raw_data)
35
- #TODO: need to check if the space enough to perform backup, as well as handle the crash event
36
- #during the backup (the cache updated even the backup failed)
37
-
38
- @property
39
- def num_arc(self):
40
- return len(self.arc_data)
41
-
42
- def _init_dataset_class(self):
43
- # dataset
44
- self.raw_data = []
45
- self.arc_data = []
46
- self.log_data = []
47
-
48
- def get_rpath_obj(self, path, by_arc=False):
49
- if len(self.raw_data):
50
- if by_arc:
51
- data_pid = [b.data_pid for b in self.arc_data if b.path == path]
52
- if len(data_pid):
53
- rpath_obj = [r for r in self.raw_data if r.data_pid == data_pid[0]]
54
- if len(rpath_obj):
55
- return rpath_obj[0]
56
- else:
57
- return None
58
- else:
59
- return None
60
- else:
61
- rpath_obj = [r for r in self.raw_data if r.path == path]
62
- if len(rpath_obj):
63
- return rpath_obj[0]
64
- else:
65
- return None
66
- else:
67
- return None
68
-
69
- def get_bpath_obj(self, path, by_raw=False):
70
- if len(self.arc_data):
71
- if by_raw:
72
- r = self.get_rpath_obj(path)
73
- if r is None:
74
- return []
75
- else:
76
- return [b for b in self.arc_data if b.data_pid == r.data_pid]
77
- else:
78
- data_pid = [b for b in self.arc_data if b.path == path][0].data_pid
79
- return [b for b in self.arc_data if b.data_pid == data_pid]
80
- else:
81
- return []
82
-
83
- def isin(self, path, raw=True):
84
- if raw:
85
- list_data = self.raw_data
86
- else:
87
- list_data = self.arc_data
88
- _history = [d for d in list_data if d.path == path]
89
- if len(_history):
90
- return True
91
- else:
92
- return False
93
-
94
- def set_raw(self, dirname, raw_dir, removed=False):
95
- # rawobj: data_pid, path, garbage, removed, backup
96
- if not removed:
97
- dir_path = os.path.join(raw_dir, dirname)
98
- if not self.isin(dirname, raw=True): # continue if the path is not saved in this cache obj
99
- if os.path.isdir(dir_path):
100
- raw = BrukerLoader(dir_path)
101
- garbage = False if raw.is_pvdataset else True
102
- rawobj = NamedTuple(data_pid=self.num_raw,
103
- path=dirname,
104
- garbage=garbage,
105
- removed=removed,
106
- backup=False)
107
- self.raw_data.append(rawobj)
108
- else:
109
- self.logging('{} is not a valid directory. [raw dataset must be a directory]'.format(dir_path),
110
- 'set_raw')
111
- else:
112
- rawobj = NamedTuple(data_pid=self.num_raw,
113
- path=dirname,
114
- garbage=None,
115
- removed=removed,
116
- backup=True)
117
- self.raw_data.append(rawobj)
118
-
119
- def set_arc(self, arc_fname, arc_dir, raw_dir):
120
- # arcobj: data_pid, path, garbage, crashed, issued
121
- arc_path = os.path.join(arc_dir, arc_fname)
122
-
123
- if not self.isin(arc_fname, raw=False): # continue if the path is not saved in this cache obj
124
- issued = False
125
- try:
126
- arc = BrukerLoader(arc_path)
127
- raw_dname = arc.pvobj.path
128
- raw_path = os.path.join(raw_dir, raw_dname)
129
- garbage = False if arc.is_pvdataset else True
130
- crashed = False
131
- except:
132
- self.logging('{} is crashed.'.format(arc_path),
133
- 'set_arc')
134
- arc = None
135
- raw_dname = None
136
- raw_path = None
137
- garbage = True
138
- crashed = True
139
-
140
- if raw_dname != None:
141
- r = self.get_rpath_obj(raw_dname)
142
- else:
143
- r = None
144
-
145
- if r is None:
146
- raw_dname = os.path.splitext(arc_fname)[0]
147
- self.set_raw(raw_dname, raw_dir, removed=True)
148
- r = self.get_rpath_obj(raw_dname)
149
- r.garbage = garbage
150
- if crashed:
151
- issued = True
152
- else:
153
- if arc is None:
154
- issued = True
155
- else:
156
- if not r.removed:
157
- if not r.backup:
158
- pass
159
- else:
160
- raw = BrukerLoader(raw_path)
161
- if raw.num_recos != arc.num_recos:
162
- issued = True
163
- arcobj = NamedTuple(data_pid=r.data_pid,
164
- path=arc_fname,
165
- garbage=garbage,
166
- crashed=crashed,
167
- issued=issued)
168
- if not crashed:
169
- if not issued:
170
- # backup completed data must has no issue
171
- r.backup = True
172
-
173
- self.arc_data.append(arcobj)
174
-
175
- def is_duplicated(self, file_path, by_arc=False):
176
- if by_arc:
177
- b = self.get_bpath_obj(file_path, by_raw=False)
178
- else:
179
- b = self.get_bpath_obj(file_path, by_raw=True)
180
- if len(b) > 1:
181
- return True
182
- else:
183
- return False
184
-
185
-
186
- class BackupCacheHandler:
187
- def __init__(self, raw_path, backup_path, fname='.brk-backup_cache'):
188
- """ Handler class for backup data
189
-
190
- Args:
191
- raw_path: path for raw dataset
192
- backup_path: path for backup dataset
193
- fname: file name to pickle cache data
194
- """
195
- self._cache = None
196
- self._rpath = os.path.expanduser(raw_path)
197
- self._apath = os.path.expanduser(backup_path)
198
- self._cache_path = os.path.join(self._apath, fname)
199
- self._load_pickle()
200
- # self._parse_info()
201
-
202
- def _load_pickle(self):
203
- if os.path.exists(self._cache_path):
204
- try:
205
- with open(self._cache_path, 'rb') as cache:
206
- self._cache = pickle.load(cache)
207
- except EOFError:
208
- os.remove(self._cache_path)
209
- self._cache = BackupCache()
210
- else:
211
- self._cache = BackupCache()
212
- self._save_pickle()
213
-
214
- def _save_pickle(self):
215
- with open(self._cache_path, 'wb') as f:
216
- pickle.dump(self._cache, f)
217
-
218
- def logging(self, message, method):
219
- method = 'Handler.{}'.format(method)
220
- self._cache.logging(message, method)
221
-
222
- @property
223
- def is_duplicated(self):
224
- return self._cache.is_duplicated
225
-
226
- @property
227
- def get_rpath_obj(self):
228
- return self._cache.get_rpath_obj
229
-
230
- @property
231
- def get_bpath_obj(self):
232
- return self._cache.get_bpath_obj
233
-
234
- @property
235
- def arc_data(self):
236
- return self._cache.arc_data
237
-
238
- @property
239
- def raw_data(self):
240
- return self._cache.raw_data
241
-
242
- @property
243
- def scan(self):
244
- return self._parse_info
245
-
246
- def _parse_info(self):
247
- print('\n-- Parsing metadata from the raw and archived directories --')
248
- list_of_raw = sorted([d for d in os.listdir(self._rpath) if
249
- os.path.isdir(os.path.join(self._rpath, d)) and 'import' not in d])
250
- list_of_brk = sorted([d for d in os.listdir(self._apath) if
251
- (os.path.isfile(os.path.join(self._apath, d)) and
252
- (d.endswith('zip') or d.endswith('PvDatasets')))])
253
-
254
- # parse dataset
255
- print('\nScanning raw datasets and update cache...')
256
- for r in tqdm.tqdm(list_of_raw, bar_format=_bar_fmt):
257
- self._cache.set_raw(r, raw_dir=self._rpath)
258
- self._save_pickle()
259
-
260
- print('\nScanning archived datasets and update cache...')
261
- for b in tqdm.tqdm(list_of_brk, bar_format=_bar_fmt):
262
- self._cache.set_arc(b, arc_dir=self._apath, raw_dir=self._rpath)
263
- self._save_pickle()
264
-
265
- # update raw dataset information (raw dataset cache will remain even its removed)
266
- print('\nScanning raw dataset cache...')
267
- for r in tqdm.tqdm(self.raw_data[:], bar_format=_bar_fmt):
268
- if r.path != None:
269
- if not os.path.exists(os.path.join(self._rpath, r.path)):
270
- if not r.removed:
271
- r.removed = True
272
- self._save_pickle()
273
-
274
- print('\nReviewing the cached information...')
275
- for b in tqdm.tqdm(self.arc_data[:], bar_format=_bar_fmt):
276
- arc_path = os.path.join(self._apath, b.path)
277
- if not os.path.exists(arc_path): # backup dataset is not existing, remove the cache
278
- self.arc_data.remove(b)
279
- else: # backup dataset is existing then check status again
280
- if b.issued: # check if the issue has benn resolved.
281
- if b.crashed: # check if the dataset re-backed up.
282
- if zipfile.is_zipfile(arc_path):
283
- b.crashed = False # backup success!
284
- b.issued = False if self.is_same_as_raw(b.path) else True
285
- if b.issued:
286
- if b.garbage:
287
- if BrukerLoader(arc_path).is_pvdataset:
288
- b.garbage = False
289
- # else the backup dataset it still crashed.
290
- else: # the dataset has an issue but not crashed, so check if the issue has been resolved.
291
- b.issued = False if self.is_same_as_raw(b.path) else True
292
- if not b.issued: # if issue resolved
293
- r = self.get_rpath_obj(b.path, by_arc=True)
294
- r.backup = True
295
- else: # if no issue with the dataset, do nothing.
296
- r = self.get_rpath_obj(b.path, by_arc=True)
297
- if not r.backup:
298
- r.backup = True
299
- self._save_pickle()
300
-
301
- def is_same_as_raw(self, filename):
302
- arc = BrukerLoader(os.path.join(self._apath, filename))
303
- if arc.pvobj.path != None:
304
- raw_path = os.path.join(self._rpath, arc.pvobj.path)
305
- if os.path.exists(raw_path):
306
- raw = BrukerLoader(raw_path)
307
- return arc.num_recos == raw.num_recos
308
- else:
309
- return None
310
- else:
311
- return None
312
-
313
- def get_duplicated(self):
314
- duplicated = dict()
315
- for b in self.arc_data:
316
- if self.is_duplicated(b.path, by_arc=True):
317
- rpath = self.get_rpath_obj(b.path, by_arc=True).path
318
- if rpath in duplicated.keys():
319
- duplicated[rpath].append(b.path)
320
- else:
321
- duplicated[rpath] = [b.path]
322
- else:
323
- pass
324
- return duplicated
325
-
326
- def get_list_for_backup(self):
327
- return [r for r in self.get_incompleted() if not r.garbage]
328
-
329
- def get_issued(self):
330
- return [b for b in self.arc_data if b.issued]
331
-
332
- def get_crashed(self):
333
- return [b for b in self.arc_data if b.crashed]
334
-
335
- def get_incompleted(self):
336
- return [r for r in self.raw_data if not r.backup]
337
-
338
- def get_completed(self):
339
- return [r for r in self.raw_data if r.backup]
340
-
341
- def get_garbage(self):
342
- return [b for b in self.arc_data if b.garbage]
343
-
344
- @staticmethod
345
- def _gen_header(title, width=_width):
346
- lines = []
347
- gen_by = 'Generated by {}'.format(_user).rjust(width)
348
-
349
- lines.append(_empty_sep)
350
- lines.append(_line_sep_2)
351
- lines.append(_empty_sep)
352
- lines.append(title.center(width))
353
- lines.append(gen_by)
354
- lines.append(_line_sep_2)
355
- lines.append(_empty_sep)
356
- return lines
357
-
358
- def _get_backup_status(self):
359
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
360
- lines = self._gen_header('Report of the status of archived data [{}]'.format(now))
361
- list_need_to_be_backup = self.get_list_for_backup()[:]
362
- total_list = len(list_need_to_be_backup)
363
- if len(list_need_to_be_backup):
364
- lines.append('>> The list of raw data need to be archived.')
365
- lines.append('[Note: The list exclude the raw data does not contain any binary file]')
366
- lines.append(_line_sep_1)
367
- lines.append('{}{}'.format('Rawdata Path'.center(_width-10), 'Size'.rjust(10)))
368
- for r in list_need_to_be_backup:
369
- if len(r.path) > _width-10:
370
- path_name = '{}... '.format(r.path[:_width-14])
371
- else:
372
- path_name = r.path
373
- raw_path = os.path.join(self._rpath, r.path)
374
- dir_size, unit = get_dirsize(raw_path)
375
- if unit == 'B':
376
- dir_size = '{} {}'.format(dir_size, unit).rjust(10)
377
- else:
378
- dir_size = '{0:.2f}{1}'.format(dir_size, unit).rjust(10)
379
- lines.append('{}{}'.format(path_name.ljust(_width-10), dir_size))
380
- lines.append(_line_sep_1)
381
- lines.append(_empty_sep)
382
-
383
- list_issued = self.get_issued()
384
- total_list += len(list_issued)
385
- if len(list_issued):
386
- lines.append('>> Failed or incompleted archived data.')
387
- lines.append('[Note: The listed data are either crashed or incompleted]')
388
- lines.append(_line_sep_1)
389
- lines.append('{}{}{}'.format('Archived Path'.center(60),
390
- 'Condition'.rjust(10),
391
- 'Size'.rjust(10)))
392
- for b in self.get_issued():
393
- if len(b.path) > _width-20:
394
- path_name = '{}... '.format(b.path[:_width-24])
395
- else:
396
- path_name = b.path
397
- arc_path = os.path.join(self._apath, b.path)
398
- file_size, unit = get_filesize(arc_path)
399
- if b.crashed:
400
- raw_path = self.get_rpath_obj(b.path, by_arc=True).path
401
- if raw_path is None:
402
- condition = 'Failed'
403
- else:
404
- condition = 'Crashed'
405
- else:
406
- condition = 'Issued'
407
- if unit == 'B':
408
- file_size = '{} {}'.format(file_size, unit).rjust(10)
409
- else:
410
- file_size = '{0:.2f}{1}'.format(file_size, unit).rjust(10)
411
- lines.append('{}{}{}'.format(path_name.ljust(_width-20),
412
- condition.center(10),
413
- file_size))
414
- lines.append(_line_sep_1)
415
- lines.append(_empty_sep)
416
-
417
- list_duplicated = self.get_duplicated()
418
- total_list += len(list_duplicated)
419
- if len(list_duplicated.keys()):
420
- lines.append('>> List of duplicated archived data.')
421
- lines.append('[Note: The listed raw data has been archived into multiple files]')
422
- lines.append(_line_sep_1)
423
- lines.append('{} {}'.format('Raw Path'.center(int(_width/2)-1),
424
- 'Archived'.center(int(_width/2)-1)))
425
- for rpath, bpaths in list_duplicated.items():
426
- if rpath is None:
427
- rpath = '-- Removed --'
428
- if len(rpath) > int(_width/2)-1:
429
- rpath = '{}... '.format(rpath[:int(_width/2)-5])
430
- for i, bpath in enumerate(bpaths):
431
- if len(bpath) > int(_width/2)-1:
432
- bpath = '{}... '.format(bpath[:int(_width/2)-5])
433
- if i == 0:
434
- lines.append('{}:-{}'.format(rpath.ljust(int(_width/2)-1),
435
- bpath.ljust(int(_width/2)-1)))
436
- else:
437
- lines.append('{} -{}'.format(''.center(int(_width/2)-1),
438
- bpath.ljust(int(_width/2)-1)))
439
- lines.append(_line_sep_1)
440
- lines.append(_empty_sep)
441
-
442
- if total_list == 0:
443
- lines.append(_empty_sep)
444
- lines.append('The status of archived data is up-to-date...'.center(80))
445
- lines.append(_empty_sep)
446
- lines.append(_line_sep_1)
447
- return '\n'.join(lines)
448
-
449
- def print_status(self, fobj=sys.stdout):
450
- summary = self._get_backup_status()
451
- print(summary, file=fobj)
452
-
453
- def print_completed(self, fobj=sys.stdout):
454
- now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
455
- lines = self._gen_header('List of archived dataset [{}]'.format(now))
456
- list_of_completed = self.get_completed()
457
- if len(list_of_completed):
458
- lines.append(_line_sep_1)
459
- lines.append('{}{}{}'.format('Rawdata Path'.center(_width - 20),
460
- 'Removed'.rjust(10),
461
- 'Archived'.rjust(10)))
462
- for r in list_of_completed:
463
- if len(r.path) > _width - 20:
464
- path_name = '{}... '.format(r.path[:_width - 24])
465
- else:
466
- path_name = r.path
467
- removed = 'True' if r.removed else 'False'
468
- archived = 'True' if r.backup else 'False'
469
- lines.append('{}{}{}'.format(path_name.ljust(_width - 20),
470
- removed.center(10),
471
- archived.center(10)))
472
- lines.append(_line_sep_1)
473
- lines.append(_empty_sep)
474
- else:
475
- lines.append(_empty_sep)
476
- lines.append('No archived data...'.center(80))
477
- lines.append(_empty_sep)
478
- lines.append(_line_sep_1)
479
- summary = '\n'.join(lines)
480
- print(summary, file=fobj)
481
-
482
- def clean(self):
483
- print('\n[Warning] The archived data that contains any issue will be deleted by this command '
484
- 'and it cannot be revert.')
485
- print(' Prior to run this, please update the cache for data status using "review" function.\n')
486
- ans = yes_or_no('Are you sure to continue?')
487
-
488
- if ans:
489
- list_data = dict(issued=self.get_issued()[:],
490
- garbage=self.get_garbage()[:],
491
- crashed=self.get_crashed()[:],
492
- duplicated=self.get_duplicated().copy())
493
- for label, dset in list_data.items():
494
- if label == 'duplicated':
495
- print('\nStart removing {} archived data...'.format(label.upper()))
496
- if len(dset.items()):
497
- for raw_dname, arcs in dset.items():
498
- if raw_dname != None:
499
- raw_path = os.path.join(self._rpath, raw_dname)
500
- if os.path.exists(raw_path):
501
- r_size, r_unit = get_dirsize(raw_path)
502
- r_size = '{0:.2f} {1}'.format(r_size, r_unit)
503
- else:
504
- r_size = 'Removed'
505
- if len(raw_dname) < 60:
506
- raw_dname = '{}...'.format(raw_dname[:56])
507
- else:
508
- r_size = 'Removed'
509
- raw_dname = 'No name'
510
- print('Raw dataset: [{}] {}'.format(raw_dname.ljust(60), r_size.rjust(10)))
511
- num_dup = len(arcs)
512
- dup_list = [' +-{}'] * num_dup
513
- print('\n'.join(dup_list).format(*arcs))
514
- for arc_fname in arcs:
515
- path_to_clean = os.path.join(self._apath, arc_fname)
516
- ans_4rm = yes_or_no(' - Are you sure to remove [{}] ?\n '.format(arc_fname))
517
- if ans_4rm:
518
- try:
519
- os.remove(path_to_clean)
520
- a = self.get_bpath_obj(arc_fname)
521
- if len(a):
522
- self.arc_data.remove(a[0])
523
- except OSError:
524
- error = RemoveFailedError(path_to_clean)
525
- self.logging(error.message, 'clean')
526
- print(' Failed! The file is locked.')
527
- else:
528
- raise UnexpectedError
529
- else:
530
- if len(dset):
531
- print('\nStart removing {} archived data...'.format(label.upper()))
532
-
533
- def ask_to_remove():
534
- ans_4rm = yes_or_no(' - Are you sure to remove [{}] ?\n '.format(path_to_clean))
535
- if ans_4rm:
536
- try:
537
- os.remove(path_to_clean)
538
- self.arc_data.remove(a)
539
- except OSError:
540
- error = RemoveFailedError(path_to_clean)
541
- self.logging(error.message, 'clean')
542
- print(' Failed! The file is locked.')
543
- else:
544
- raise UnexpectedError
545
- for a in dset:
546
- path_to_clean = os.path.join(self._apath, a.path)
547
- if label == 'issued':
548
- if a.garbages or a.crashed:
549
- pass
550
- else:
551
- ask_to_remove()
552
- elif label == 'garbage':
553
- if a.crashed:
554
- pass
555
- else:
556
- ask_to_remove()
557
- self._save_pickle()
558
-
559
- def backup(self, fobj=sys.stdout):
560
- list_raws = self.get_list_for_backup()[:]
561
- list_issued = self.get_issued()[:]
562
- print('\nStarting backup for raw data not listed in the cache...')
563
- self.logging('Archiving process starts...', 'backup')
564
-
565
- for i, dlist in enumerate([list_raws, list_issued]):
566
- if i == 0:
567
- print('\n[step1] Archiving the raw data that has not been archived.')
568
- self.logging('Archive the raw data has not been archived...', 'backup')
569
- elif i == 1:
570
- print('\n[step2] Archiving the data that has issued on archived data.')
571
- self.logging('Archive the raw data contains any issue...', 'backup')
572
-
573
- for r in tqdm.tqdm(dlist, unit=' dataset(s)', bar_format=_bar_fmt):
574
- run_backup = True
575
- raw_path = os.path.join(self._rpath, r.path)
576
- arc_path = os.path.join(self._apath, '{}.zip'.format(r.path))
577
- tmp_path = os.path.join(self._apath, '{}.part'.format(r.path))
578
- if os.path.exists(raw_path):
579
- if os.path.exists(tmp_path):
580
- print(' -[{}] is detected and removed...'.format(tmp_path), file=fobj)
581
- os.unlink(tmp_path)
582
- if os.path.exists(arc_path):
583
- if not zipfile.is_zipfile(arc_path):
584
- print(' -[{}] is crashed file, removing...'.format(arc_path), file=fobj)
585
- os.unlink(arc_path)
586
- else:
587
- arc = BrukerLoader(arc_path)
588
- raw = BrukerLoader(raw_path)
589
- if arc.is_pvdataset:
590
- if arc.num_recos != raw.num_recos:
591
- print(' - [{}] is mismatching with the corresponding raw data, '
592
- 'removing...'.format(arc_path), file=fobj)
593
- os.unlink(arc_path)
594
- else:
595
- run_backup = False
596
- else:
597
- print(' - [{}] is mismatching with the corresponding raw data, '
598
- 'removing...'.format(arc_path), file=fobj)
599
- os.unlink(arc_path)
600
- if run_backup:
601
- print('\n :: Compressing [{}]...'.format(raw_path), file=fobj)
602
- # Compressing
603
- timer = TimeCounter()
604
- try: # exception handling in case compression is failed
605
- with zipfile.ZipFile(tmp_path, 'w') as zip:
606
- # prepare file counters for use of tqdm
607
- file_counter = 0
608
- for _ in os.walk(raw_path):
609
- file_counter += 1
610
-
611
- for i, (root, dirs, files) in tqdm.tqdm(enumerate(os.walk(raw_path)),
612
- bar_format=_bar_fmt,
613
- total=file_counter,
614
- unit=' file(s)'):
615
- splitted_root = root.split(os.sep)
616
- if i == 0:
617
- root_idx = splitted_root.index(r.path)
618
- for f in files:
619
- arc_name = os.sep.join(splitted_root[root_idx:] + [f])
620
- zip.write(os.path.join(root, f), arcname=arc_name)
621
- print(' - [{}] is created.'.format(os.path.basename(arc_path)), file=fobj)
622
-
623
- except Exception:
624
- print_internal_error(fobj)
625
- error = ArchiveFailedError(raw_path)
626
- self.logging(error.message, 'backup')
627
- raise error
628
-
629
- print(' - processed time: {} sec'.format(timer.time()), file=fobj)
630
-
631
- # Backup validation
632
- if not os.path.exists(tmp_path): # Check if the file is generated
633
- error = ArchiveFailedError(raw_path)
634
- self.logging(error.message, 'backup')
635
- raise error
636
- else:
637
- try:
638
- os.rename(tmp_path, arc_path)
639
- except:
640
- print_internal_error(fobj)
641
- raise UnexpectedError
brkraw/lib/bids.py DELETED
File without changes