brukerapi 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brukerapi/cli.py +21 -30
- brukerapi/config/properties_fid_core.json +63 -6
- brukerapi/config/properties_rawdata_core.json +16 -9
- brukerapi/config/properties_rawdata_custom.json +65 -1
- brukerapi/data.py +2 -3
- brukerapi/dataset.py +159 -158
- brukerapi/exceptions.py +57 -84
- brukerapi/folders.py +183 -169
- brukerapi/jcampdx.py +223 -237
- brukerapi/mergers.py +15 -22
- brukerapi/schemas.py +222 -279
- brukerapi/splitters.py +100 -87
- brukerapi/utils.py +35 -36
- brukerapi-0.2.0.dist-info/METADATA +244 -0
- brukerapi-0.2.0.dist-info/RECORD +25 -0
- {brukerapi-0.1.9.dist-info → brukerapi-0.2.0.dist-info}/WHEEL +1 -1
- brukerapi-0.1.9.dist-info/METADATA +0 -13
- brukerapi-0.1.9.dist-info/RECORD +0 -25
- {brukerapi-0.1.9.dist-info → brukerapi-0.2.0.dist-info}/entry_points.txt +0 -0
- {brukerapi-0.1.9.dist-info → brukerapi-0.2.0.dist-info/licenses}/LICENSE +0 -0
- {brukerapi-0.1.9.dist-info → brukerapi-0.2.0.dist-info}/top_level.txt +0 -0
brukerapi/jcampdx.py
CHANGED
|
@@ -1,32 +1,50 @@
|
|
|
1
|
-
import
|
|
2
|
-
from .exceptions import *
|
|
3
|
-
from pathlib import Path
|
|
1
|
+
import json
|
|
4
2
|
import re
|
|
5
|
-
import ast
|
|
6
3
|
from collections import OrderedDict
|
|
7
|
-
import
|
|
4
|
+
from pathlib import Path
|
|
8
5
|
|
|
9
|
-
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .exceptions import InvalidJcampdxFile, JcampdxFileError, JcampdxVersionError, ParameterNotFound
|
|
9
|
+
|
|
10
|
+
SUPPORTED_VERSIONS = ["4.24", "5.0", "5.00 Bruker JCAMP library", "5.00 BRUKER JCAMP library", "5.01"]
|
|
10
11
|
GRAMMAR = {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
12
|
+
"COMMENT_LINE": r"\$\$[^\n]*\n",
|
|
13
|
+
"PARAMETER": "##",
|
|
14
|
+
"USER_DEFINED": r"\$",
|
|
15
|
+
"TRAILING_EOL": r"\n$",
|
|
16
|
+
"DATA_LABEL": r"\(XY..XY\)",
|
|
17
|
+
"DATA_DELIMETERS": r", |\n",
|
|
18
|
+
"SIZE_BRACKET": r"^\([^\(\)<>]*\)(?!$)",
|
|
19
|
+
"LIST_DELIMETER": ", ",
|
|
20
|
+
"EQUAL_SIGN": "=",
|
|
21
|
+
"SINGLE_NUMBER": r"-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?",
|
|
22
|
+
"PARALLEL_BRACKET": r"\) ",
|
|
23
|
+
"GEO_OBJ": r"\(\(\([\s\S]*\)[\s\S]*\)[\s\S]*\)",
|
|
24
|
+
"HEADER": "TITLE|JCAMPDX|JCAMP-DX|DATA TYPE|DATATYPE|ORIGIN|OWNER",
|
|
25
|
+
"VERSION_TITLE": "JCAMPDX|JCAMP-DX",
|
|
26
|
+
}
|
|
27
|
+
|
|
26
28
|
MAX_LINE_LEN = 78
|
|
27
29
|
|
|
30
|
+
# Precompile all regexes
|
|
31
|
+
_COMPILED_GRAMMAR = {k: re.compile(v) if k not in ["LIST_DELIMETER", "EQUAL_SIGN"] else v for k, v in GRAMMAR.items()}
|
|
32
|
+
|
|
33
|
+
# Example usage:
|
|
34
|
+
_COMMENT_RE = _COMPILED_GRAMMAR["COMMENT_LINE"]
|
|
35
|
+
_USER_DEFINED_RE = _COMPILED_GRAMMAR["USER_DEFINED"]
|
|
36
|
+
_TRAILING_EOL_RE = _COMPILED_GRAMMAR["TRAILING_EOL"]
|
|
37
|
+
_DATA_LABEL_RE = _COMPILED_GRAMMAR["DATA_LABEL"]
|
|
38
|
+
_SIZE_BRACKET_RE = _COMPILED_GRAMMAR["SIZE_BRACKET"]
|
|
39
|
+
_SINGLE_NUMBER_RE = _COMPILED_GRAMMAR["SINGLE_NUMBER"]
|
|
40
|
+
_PARALLEL_BRACKET_RE = _COMPILED_GRAMMAR["PARALLEL_BRACKET"]
|
|
41
|
+
_GEO_OBJ_RE = _COMPILED_GRAMMAR["GEO_OBJ"]
|
|
42
|
+
_HEADER_RE = _COMPILED_GRAMMAR["HEADER"]
|
|
43
|
+
_VERSION_TITLE_RE = _COMPILED_GRAMMAR["VERSION_TITLE"]
|
|
44
|
+
_PARAMETER_RE = _COMPILED_GRAMMAR["PARAMETER"]
|
|
28
45
|
|
|
29
|
-
|
|
46
|
+
|
|
47
|
+
class Parameter:
|
|
30
48
|
"""
|
|
31
49
|
Data model of a single jcamp-dx parameter.
|
|
32
50
|
|
|
@@ -48,6 +66,7 @@ class Parameter(object):
|
|
|
48
66
|
|
|
49
67
|
The value is parsed once it is requested. Parse methods are different for individual subclasses.
|
|
50
68
|
"""
|
|
69
|
+
|
|
51
70
|
def __init__(self, key_str, size_str, val_str, version):
|
|
52
71
|
"""
|
|
53
72
|
:param key_str: key part of the parameter e.g. ##$ACQ_ReceiverSelect
|
|
@@ -61,18 +80,17 @@ class Parameter(object):
|
|
|
61
80
|
self.version = version
|
|
62
81
|
|
|
63
82
|
def __str__(self):
|
|
83
|
+
str_ = f"{self.key_str}"
|
|
64
84
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if self.version == '4.24':
|
|
68
|
-
str_ += '='
|
|
85
|
+
if self.version == "4.24":
|
|
86
|
+
str_ += "="
|
|
69
87
|
else:
|
|
70
|
-
str_ +=
|
|
88
|
+
str_ += "= "
|
|
71
89
|
|
|
72
|
-
if self.size_str !=
|
|
73
|
-
str_ +=
|
|
90
|
+
if self.size_str != "":
|
|
91
|
+
str_ += f"{self.size_str}\n"
|
|
74
92
|
|
|
75
|
-
str_ +=
|
|
93
|
+
str_ += f"{self.val_str}"
|
|
76
94
|
|
|
77
95
|
return str_
|
|
78
96
|
|
|
@@ -80,64 +98,56 @@ class Parameter(object):
|
|
|
80
98
|
return self.key_str
|
|
81
99
|
|
|
82
100
|
def to_dict(self):
|
|
83
|
-
|
|
84
|
-
result = {'value': self._encode_parameter(self.value)}
|
|
101
|
+
result = {"value": self._encode_parameter(self.value)}
|
|
85
102
|
|
|
86
103
|
if self.size:
|
|
87
|
-
result[
|
|
104
|
+
result["size"] = self._encode_parameter(self.size)
|
|
88
105
|
|
|
89
106
|
return result
|
|
90
107
|
|
|
91
108
|
def _encode_parameter(self, var):
|
|
92
|
-
if isinstance(var, np.integer
|
|
109
|
+
if isinstance(var, (np.integer, np.int32)):
|
|
93
110
|
return int(var)
|
|
94
|
-
|
|
111
|
+
if isinstance(var, np.floating):
|
|
95
112
|
return float(var)
|
|
96
|
-
|
|
113
|
+
if isinstance(var, np.ndarray):
|
|
97
114
|
return var.tolist()
|
|
98
|
-
|
|
115
|
+
if isinstance(var, np.dtype):
|
|
99
116
|
return var.name
|
|
100
|
-
|
|
117
|
+
if isinstance(var, list):
|
|
101
118
|
return [self._encode_parameter(var_) for var_ in var]
|
|
102
|
-
|
|
119
|
+
if isinstance(var, tuple):
|
|
103
120
|
return self._encode_parameter(list(var))
|
|
104
|
-
|
|
105
|
-
return var
|
|
106
|
-
|
|
121
|
+
return var
|
|
107
122
|
|
|
108
123
|
@property
|
|
109
124
|
def key(self):
|
|
110
|
-
return
|
|
125
|
+
return self.key_str.replace("##", "").replace("$", "").rstrip()
|
|
111
126
|
|
|
112
127
|
@key.setter
|
|
113
128
|
def key(self, key):
|
|
114
|
-
#Throw error
|
|
129
|
+
# Throw error
|
|
115
130
|
pass
|
|
116
131
|
|
|
117
132
|
@property
|
|
118
133
|
def user_defined(self):
|
|
119
|
-
|
|
120
|
-
return True
|
|
121
|
-
else:
|
|
122
|
-
return False
|
|
134
|
+
return bool(_USER_DEFINED_RE.search(self.key_str))
|
|
123
135
|
|
|
124
136
|
@property
|
|
125
137
|
def tuple(self):
|
|
126
138
|
value = self.value
|
|
127
|
-
if isinstance(value, int
|
|
139
|
+
if isinstance(value, (int, float)):
|
|
128
140
|
return (value,)
|
|
129
|
-
|
|
130
|
-
return tuple(value)
|
|
141
|
+
return tuple(value)
|
|
131
142
|
|
|
132
143
|
@property
|
|
133
144
|
def list(self):
|
|
134
145
|
value = self.value
|
|
135
146
|
if isinstance(value, list):
|
|
136
147
|
return value
|
|
137
|
-
|
|
148
|
+
if isinstance(value, (float, int, str)):
|
|
138
149
|
return [value]
|
|
139
|
-
|
|
140
|
-
return list(value)
|
|
150
|
+
return list(value)
|
|
141
151
|
|
|
142
152
|
@property
|
|
143
153
|
def nested(self):
|
|
@@ -157,9 +167,7 @@ class Parameter(object):
|
|
|
157
167
|
value = self.list
|
|
158
168
|
if isinstance(value[0], list):
|
|
159
169
|
return value
|
|
160
|
-
|
|
161
|
-
return [value]
|
|
162
|
-
|
|
170
|
+
return [value]
|
|
163
171
|
|
|
164
172
|
@property
|
|
165
173
|
def array(self):
|
|
@@ -170,10 +178,7 @@ class Parameter(object):
|
|
|
170
178
|
value = self.value
|
|
171
179
|
if isinstance(value, np.ndarray):
|
|
172
180
|
return value.shape
|
|
173
|
-
|
|
174
|
-
raise AttributeError
|
|
175
|
-
|
|
176
|
-
|
|
181
|
+
raise AttributeError
|
|
177
182
|
|
|
178
183
|
@classmethod
|
|
179
184
|
def pack_key(cls, value, usr_defined):
|
|
@@ -182,31 +187,25 @@ class Parameter(object):
|
|
|
182
187
|
val_str = value
|
|
183
188
|
|
|
184
189
|
if usr_defined:
|
|
185
|
-
val_str =
|
|
190
|
+
val_str = "$" + val_str
|
|
186
191
|
|
|
187
|
-
return
|
|
192
|
+
return "##" + val_str
|
|
188
193
|
|
|
189
194
|
|
|
190
195
|
class GenericParameter(Parameter):
|
|
191
196
|
def __init__(self, version, key, size_bracket, value):
|
|
192
|
-
super(
|
|
197
|
+
super().__init__(version, key, size_bracket, value)
|
|
193
198
|
|
|
194
199
|
@classmethod
|
|
195
200
|
def from_values(cls, version, key, size, value, user_defined):
|
|
196
|
-
|
|
197
|
-
key_str = key
|
|
198
|
-
size_str = size
|
|
199
|
-
value_str = value
|
|
200
|
-
|
|
201
|
-
super(GenericParameter, cls).__init__(version, key_str, size_str, value_str)
|
|
201
|
+
return cls(version, key, size, value)
|
|
202
202
|
|
|
203
203
|
@property
|
|
204
|
-
def value(self
|
|
205
|
-
|
|
206
|
-
val_str = re.sub('\n', '', self.val_str)
|
|
204
|
+
def value(self):
|
|
205
|
+
val_str = self.val_str.replace("\n", "")
|
|
207
206
|
|
|
208
207
|
# unwrap wrapped list
|
|
209
|
-
if re.match(
|
|
208
|
+
if re.match(r"@[0-9]*\*", val_str) is not None:
|
|
210
209
|
val_str = self._unwrap_list(val_str)
|
|
211
210
|
|
|
212
211
|
val_str_list = GenericParameter.split_parallel_lists(val_str)
|
|
@@ -219,12 +218,10 @@ class GenericParameter(Parameter):
|
|
|
219
218
|
value.append(GenericParameter.parse_value(val_str))
|
|
220
219
|
|
|
221
220
|
if isinstance(value, np.ndarray) and self.size:
|
|
222
|
-
if not
|
|
223
|
-
return np.reshape(value, self.size, order=
|
|
224
|
-
else:
|
|
225
|
-
return value
|
|
226
|
-
else:
|
|
221
|
+
if "str" not in value.dtype.name:
|
|
222
|
+
return np.reshape(value, self.size, order="C")
|
|
227
223
|
return value
|
|
224
|
+
return value
|
|
228
225
|
|
|
229
226
|
@value.setter
|
|
230
227
|
def value(self, value):
|
|
@@ -246,7 +243,7 @@ class GenericParameter(Parameter):
|
|
|
246
243
|
val_str = value
|
|
247
244
|
|
|
248
245
|
self.size = size
|
|
249
|
-
self.val_str= val_str
|
|
246
|
+
self.val_str = val_str
|
|
250
247
|
|
|
251
248
|
def primed_dict(self, index):
|
|
252
249
|
nested_list = self.nested
|
|
@@ -265,20 +262,17 @@ class GenericParameter(Parameter):
|
|
|
265
262
|
|
|
266
263
|
return sub_list
|
|
267
264
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
265
|
@property
|
|
272
266
|
def size(self):
|
|
273
267
|
size_str = self.size_str[1:-2]
|
|
274
268
|
|
|
275
|
-
if size_str ==
|
|
269
|
+
if size_str == "":
|
|
276
270
|
return None
|
|
277
271
|
|
|
278
|
-
#"(3,3)\n" -> 3,3
|
|
272
|
+
# "(3,3)\n" -> 3,3
|
|
279
273
|
if ".." in size_str:
|
|
280
274
|
try:
|
|
281
|
-
size_str = np.array(size_str.split(".."), dtype=
|
|
275
|
+
size_str = np.array(size_str.split(".."), dtype="int32")
|
|
282
276
|
size = range(size_str[0], size_str[1])
|
|
283
277
|
except ValueError:
|
|
284
278
|
# size bracket is returned as string
|
|
@@ -287,7 +281,7 @@ class GenericParameter(Parameter):
|
|
|
287
281
|
|
|
288
282
|
elif "," in size_str:
|
|
289
283
|
size_str = size_str.split(",")
|
|
290
|
-
size = tuple(np.array(size_str, dtype=
|
|
284
|
+
size = tuple(np.array(size_str, dtype="int32"))
|
|
291
285
|
else:
|
|
292
286
|
size = (int(size_str),)
|
|
293
287
|
|
|
@@ -296,55 +290,55 @@ class GenericParameter(Parameter):
|
|
|
296
290
|
@size.setter
|
|
297
291
|
def size(self, size):
|
|
298
292
|
if size is None:
|
|
299
|
-
self.size_str =
|
|
293
|
+
self.size_str = ""
|
|
300
294
|
return
|
|
301
295
|
|
|
302
296
|
if isinstance(size, tuple):
|
|
303
297
|
# (1,3,3) -> "( 1,3,3 )"
|
|
304
298
|
if len(size) > 1:
|
|
305
|
-
size_str =
|
|
306
|
-
#(1,) -> "( 1 )"
|
|
299
|
+
size_str = f"( {str(size)[1:-1]} )"
|
|
300
|
+
# (1,) -> "( 1 )"
|
|
307
301
|
else:
|
|
308
|
-
size_str =
|
|
302
|
+
size_str = f"( {str(size)[1:-2]} )"
|
|
309
303
|
elif isinstance(size, range):
|
|
310
|
-
size_str =
|
|
304
|
+
size_str = "({size.start}..{size.stop})"
|
|
311
305
|
elif isinstance(size, int):
|
|
312
|
-
size_str =
|
|
306
|
+
size_str = f"( {size!s} )"
|
|
313
307
|
else:
|
|
314
|
-
size_str =
|
|
308
|
+
size_str = f"({size})"
|
|
315
309
|
|
|
316
310
|
self.size_str = size_str
|
|
317
311
|
|
|
318
312
|
@classmethod
|
|
319
313
|
def parse_value(cls, val_str, size_bracket=None):
|
|
320
314
|
# remove \n
|
|
321
|
-
val_str =
|
|
315
|
+
val_str = val_str.replace("\n", "")
|
|
322
316
|
|
|
323
317
|
# sharp string
|
|
324
|
-
if val_str.startswith(
|
|
325
|
-
|
|
326
|
-
val_strs = re.findall('<[^<>]*>', val_str)
|
|
318
|
+
if val_str.startswith("<") and val_str.endswith(">"):
|
|
319
|
+
val_strs = re.findall("<[^<>]*>", val_str)
|
|
327
320
|
|
|
328
321
|
if len(val_strs) == 1:
|
|
329
322
|
return val_strs[0]
|
|
330
|
-
|
|
331
|
-
return np.array(val_strs)
|
|
332
|
-
|
|
323
|
+
return np.array(val_strs)
|
|
333
324
|
|
|
334
325
|
# int/float
|
|
335
|
-
if
|
|
326
|
+
if _SINGLE_NUMBER_RE.fullmatch(val_str):
|
|
336
327
|
try:
|
|
337
|
-
|
|
328
|
+
try:
|
|
329
|
+
value = int(val_str)
|
|
330
|
+
except ValueError:
|
|
331
|
+
value = float(val_str)
|
|
338
332
|
|
|
339
333
|
# if value is int, or float, return, tuple will be parsed as list later on
|
|
340
|
-
if isinstance(value, float
|
|
334
|
+
if isinstance(value, (float, int)):
|
|
341
335
|
return value
|
|
342
336
|
except (ValueError, SyntaxError):
|
|
343
337
|
pass
|
|
344
338
|
|
|
345
339
|
# list
|
|
346
|
-
if val_str.startswith(
|
|
347
|
-
val_strs =
|
|
340
|
+
if val_str.startswith("(") and val_str.endswith(""):
|
|
341
|
+
val_strs = val_str[1:-1].split(", ")
|
|
348
342
|
value = []
|
|
349
343
|
|
|
350
344
|
for val_str in val_strs:
|
|
@@ -352,27 +346,25 @@ class GenericParameter(Parameter):
|
|
|
352
346
|
|
|
353
347
|
return value
|
|
354
348
|
|
|
355
|
-
val_strs =
|
|
349
|
+
val_strs = val_str.split(" ")
|
|
356
350
|
|
|
357
351
|
if len(val_strs) > 1:
|
|
358
352
|
# try casting into int, or float array, if both of casts fail, it should be string array
|
|
359
353
|
try:
|
|
360
|
-
return np.array(val_strs).astype(
|
|
354
|
+
return np.array(val_strs).astype("int")
|
|
361
355
|
except ValueError:
|
|
362
356
|
pass
|
|
363
357
|
|
|
364
358
|
try:
|
|
365
|
-
return np.array(val_strs).astype(
|
|
359
|
+
return np.array(val_strs).astype("float")
|
|
366
360
|
except ValueError:
|
|
367
361
|
pass
|
|
368
362
|
|
|
369
363
|
return np.array(val_strs)
|
|
370
|
-
|
|
371
|
-
return val_strs[0]
|
|
364
|
+
return val_strs[0]
|
|
372
365
|
|
|
373
366
|
@classmethod
|
|
374
367
|
def serialize_value(cls, value):
|
|
375
|
-
|
|
376
368
|
if isinstance(value, float):
|
|
377
369
|
val_str = cls.serialize_float(value)
|
|
378
370
|
elif isinstance(value, int):
|
|
@@ -388,65 +380,60 @@ class GenericParameter(Parameter):
|
|
|
388
380
|
@classmethod
|
|
389
381
|
def serialize_float(cls, value, version):
|
|
390
382
|
if version == 4.24:
|
|
391
|
-
return "{:.6e}"
|
|
392
|
-
|
|
393
|
-
return str(value)
|
|
383
|
+
return f"{value:.6e}"
|
|
384
|
+
return str(value)
|
|
394
385
|
|
|
395
386
|
@classmethod
|
|
396
387
|
def serialize_list(cls, value):
|
|
397
|
-
|
|
398
388
|
if isinstance(value[0], list):
|
|
399
|
-
|
|
400
|
-
val_str = ''
|
|
389
|
+
val_str = ""
|
|
401
390
|
|
|
402
391
|
for value_ in value:
|
|
403
392
|
val_str += cls.serialize_list(value_)
|
|
404
|
-
val_str +=
|
|
393
|
+
val_str += " "
|
|
405
394
|
|
|
406
395
|
return val_str
|
|
407
396
|
|
|
408
|
-
|
|
409
|
-
val_str = '('
|
|
397
|
+
val_str = "("
|
|
410
398
|
|
|
411
399
|
for item in value:
|
|
412
400
|
val_str += cls.serialize_value(item)
|
|
413
|
-
val_str +=
|
|
401
|
+
val_str += ", "
|
|
414
402
|
|
|
415
|
-
return val_str[:-2] +
|
|
403
|
+
return val_str[:-2] + ")"
|
|
416
404
|
|
|
417
405
|
@classmethod
|
|
418
406
|
def serialize_nested_list(cls, values):
|
|
419
|
-
val_str =
|
|
407
|
+
val_str = ""
|
|
420
408
|
|
|
421
409
|
for value in values:
|
|
422
410
|
val_str += GenericParameter.serialize_list(value)
|
|
423
|
-
val_str +=
|
|
411
|
+
val_str += " "
|
|
424
412
|
|
|
425
413
|
return val_str[0:-1]
|
|
426
414
|
|
|
427
415
|
@classmethod
|
|
428
416
|
def serialize_ndarray(cls, value):
|
|
429
|
-
val_str =
|
|
417
|
+
val_str = ""
|
|
430
418
|
|
|
431
419
|
for value_ in value:
|
|
432
420
|
val_str_ = str(value_)
|
|
433
421
|
val_str += val_str_
|
|
434
|
-
val_str +=
|
|
422
|
+
val_str += " "
|
|
435
423
|
|
|
436
424
|
return val_str[:-1]
|
|
437
425
|
|
|
438
426
|
@classmethod
|
|
439
427
|
def split_parallel_lists(cls, val_str):
|
|
440
|
-
lst =
|
|
428
|
+
lst = _PARALLEL_BRACKET_RE.split(val_str)
|
|
441
429
|
|
|
442
430
|
if len(lst) == 1:
|
|
443
431
|
return lst[0]
|
|
444
432
|
|
|
445
433
|
def restore_right_bra(string):
|
|
446
|
-
if string.endswith(
|
|
434
|
+
if string.endswith(")"):
|
|
447
435
|
return string
|
|
448
|
-
|
|
449
|
-
return string + ')'
|
|
436
|
+
return string + ")"
|
|
450
437
|
|
|
451
438
|
for i in range(len(lst)):
|
|
452
439
|
lst[i] = restore_right_bra(lst[i])
|
|
@@ -454,17 +441,16 @@ class GenericParameter(Parameter):
|
|
|
454
441
|
return lst
|
|
455
442
|
|
|
456
443
|
def _unwrap_list(self, val_str):
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
size, value = re.split('\*', sub)
|
|
444
|
+
while re.search(r"@[0-9]*\*\(-?\d*\.?\d*\)", val_str):
|
|
445
|
+
match = re.search(r"@[0-9]*\*\(-?\d*\.?\d*\)", val_str)
|
|
446
|
+
left = val_str[0 : match.start()]
|
|
447
|
+
right = val_str[match.end() :]
|
|
448
|
+
sub = val_str[match.start() : match.end()]
|
|
449
|
+
size, value = re.split(r"\*", sub)
|
|
464
450
|
size = int(size[1:])
|
|
465
|
-
middle =
|
|
466
|
-
for
|
|
467
|
-
middle +=
|
|
451
|
+
middle = ""
|
|
452
|
+
for _ in range(size):
|
|
453
|
+
middle += f"{value[1:-1]} "
|
|
468
454
|
val_str = left + middle[0:-1] + right
|
|
469
455
|
|
|
470
456
|
return val_str
|
|
@@ -472,7 +458,7 @@ class GenericParameter(Parameter):
|
|
|
472
458
|
|
|
473
459
|
class HeaderParameter(Parameter):
|
|
474
460
|
def __init__(self, key_str, size_str, val_str, version):
|
|
475
|
-
super(
|
|
461
|
+
super().__init__(key_str, size_str, val_str, version)
|
|
476
462
|
|
|
477
463
|
@property
|
|
478
464
|
def value(self):
|
|
@@ -489,7 +475,7 @@ class HeaderParameter(Parameter):
|
|
|
489
475
|
|
|
490
476
|
class GeometryParameter(Parameter):
|
|
491
477
|
def __init__(self, key_str, size_str, val_str, version):
|
|
492
|
-
super(
|
|
478
|
+
super().__init__(key_str, size_str, val_str, version)
|
|
493
479
|
|
|
494
480
|
@property
|
|
495
481
|
def value(self):
|
|
@@ -506,7 +492,7 @@ class GeometryParameter(Parameter):
|
|
|
506
492
|
# :return: 4x4 3D Affine Transformation Matrix
|
|
507
493
|
# """
|
|
508
494
|
# # TODO support for multiple slice packages
|
|
509
|
-
# match = re.match('\(\(\([^\)]*\)', self.val_str)
|
|
495
|
+
# match = re.match(r'\(\(\([^\)]*\)', self.val_str)
|
|
510
496
|
# affine_str = self.val_str[match.start() + 3: match.end() - 1]
|
|
511
497
|
# orient, shift = affine_str.split(', ')
|
|
512
498
|
#
|
|
@@ -519,7 +505,6 @@ class GeometryParameter(Parameter):
|
|
|
519
505
|
# return affine
|
|
520
506
|
|
|
521
507
|
def to_dict(self):
|
|
522
|
-
|
|
523
508
|
# result = {'affine': self._encode_parameter(self.affine)}
|
|
524
509
|
result = {}
|
|
525
510
|
return result
|
|
@@ -527,11 +512,11 @@ class GeometryParameter(Parameter):
|
|
|
527
512
|
|
|
528
513
|
class DataParameter(Parameter):
|
|
529
514
|
def __init__(self, version, key, size_bracket, value):
|
|
530
|
-
super(
|
|
515
|
+
super().__init__(version, key, size_bracket, value)
|
|
531
516
|
|
|
532
517
|
@property
|
|
533
518
|
def value(self):
|
|
534
|
-
val_list =
|
|
519
|
+
val_list = self.val_str.replace("\n", ",").split(", ")
|
|
535
520
|
data = [GenericParameter.parse_value(x) for x in val_list]
|
|
536
521
|
return np.reshape(data, (2, -1))
|
|
537
522
|
|
|
@@ -540,11 +525,11 @@ class DataParameter(Parameter):
|
|
|
540
525
|
val_str = ""
|
|
541
526
|
|
|
542
527
|
for i in range(len(value)):
|
|
543
|
-
val_str += "{:.6e}"
|
|
528
|
+
val_str += f"{value[i]:.6e}"
|
|
544
529
|
if np.mod(i, 2) == 0:
|
|
545
|
-
val_str +=
|
|
530
|
+
val_str += ", "
|
|
546
531
|
else:
|
|
547
|
-
val_str +=
|
|
532
|
+
val_str += "\n"
|
|
548
533
|
|
|
549
534
|
self.value = val_str
|
|
550
535
|
|
|
@@ -554,10 +539,10 @@ class DataParameter(Parameter):
|
|
|
554
539
|
|
|
555
540
|
@size.setter
|
|
556
541
|
def size(self, value):
|
|
557
|
-
self.size_str =
|
|
542
|
+
self.size_str = f"({value})"
|
|
558
543
|
|
|
559
544
|
|
|
560
|
-
class JCAMPDX
|
|
545
|
+
class JCAMPDX:
|
|
561
546
|
"""Representation of a single jcamp-dx file.
|
|
562
547
|
|
|
563
548
|
It's main component is a dictionary of parameters.
|
|
@@ -569,17 +554,19 @@ class JCAMPDX(object):
|
|
|
569
554
|
|
|
570
555
|
from bruker.jcampdx import JCAMPDX
|
|
571
556
|
|
|
572
|
-
visu_pars = JCAMPDX(
|
|
573
|
-
size = visu_pars.get_value(
|
|
557
|
+
visu_pars = JCAMPDX("path/visu_pars")
|
|
558
|
+
size = visu_pars.get_value("VisuCoreSize")
|
|
574
559
|
|
|
575
560
|
"""
|
|
576
561
|
|
|
577
|
-
def __init__(self, path, load=
|
|
562
|
+
def __init__(self, path, load=None, **kwargs):
|
|
578
563
|
"""JCAMPDX constructor
|
|
579
564
|
|
|
580
565
|
JCAMPDX object is constructed by passing a path to a valid jcamp-dx file. It is possible to construct an
|
|
581
566
|
empty object.
|
|
582
567
|
"""
|
|
568
|
+
if load is None:
|
|
569
|
+
load = True
|
|
583
570
|
|
|
584
571
|
# If path is directory
|
|
585
572
|
self.path = Path(path)
|
|
@@ -601,21 +588,18 @@ class JCAMPDX(object):
|
|
|
601
588
|
return self.path.name
|
|
602
589
|
|
|
603
590
|
def __str__(self, file=None):
|
|
604
|
-
|
|
605
|
-
|
|
606
591
|
if self.params == {}:
|
|
607
592
|
return self.type
|
|
608
593
|
|
|
609
|
-
jcampdx_serial =
|
|
594
|
+
jcampdx_serial = ""
|
|
610
595
|
|
|
611
596
|
for param in self.params.values():
|
|
612
|
-
|
|
613
597
|
param_str = str(param)
|
|
614
598
|
|
|
615
599
|
if len(param_str) > 78:
|
|
616
600
|
param_str = JCAMPDX.wrap_lines(param_str)
|
|
617
601
|
|
|
618
|
-
jcampdx_serial +=
|
|
602
|
+
jcampdx_serial += f"{param_str}\n"
|
|
619
603
|
|
|
620
604
|
return jcampdx_serial[0:-1] + "\n##END= "
|
|
621
605
|
|
|
@@ -633,6 +617,9 @@ class JCAMPDX(object):
|
|
|
633
617
|
def __getitem__(self, key):
|
|
634
618
|
return self.params[key]
|
|
635
619
|
|
|
620
|
+
def __contains__(self, item):
|
|
621
|
+
return item in self.params
|
|
622
|
+
|
|
636
623
|
def __delitem__(self, key):
|
|
637
624
|
del self.params[key]
|
|
638
625
|
|
|
@@ -646,7 +633,7 @@ class JCAMPDX(object):
|
|
|
646
633
|
self.params = {}
|
|
647
634
|
|
|
648
635
|
def to_dict(self):
|
|
649
|
-
parameters =
|
|
636
|
+
parameters = {}
|
|
650
637
|
|
|
651
638
|
for param in self.params.items():
|
|
652
639
|
parameters[param[0]] = param[1].to_dict()
|
|
@@ -661,34 +648,27 @@ class JCAMPDX(object):
|
|
|
661
648
|
:param names: *list* names of properties to be exported
|
|
662
649
|
"""
|
|
663
650
|
if path:
|
|
664
|
-
with open(path,
|
|
665
|
-
|
|
651
|
+
with open(path, "w") as json_file:
|
|
652
|
+
json.dump(self.to_dict(), json_file, indent=4)
|
|
666
653
|
else:
|
|
667
654
|
return json.dumps(self.to_dict(), indent=4)
|
|
655
|
+
return None
|
|
668
656
|
|
|
669
657
|
@property
|
|
670
658
|
def version(self):
|
|
671
|
-
|
|
672
|
-
return self.params[
|
|
673
|
-
except KeyError:
|
|
674
|
-
pass
|
|
675
|
-
|
|
676
|
-
try:
|
|
677
|
-
self.params['JCAMP-DX'].value
|
|
678
|
-
except KeyError:
|
|
679
|
-
pass
|
|
680
|
-
|
|
681
|
-
try:
|
|
682
|
-
_, version = JCAMPDX.load_parameter(self.path, 'JCAMPDX')
|
|
683
|
-
return version.value
|
|
684
|
-
except (InvalidJcampdxFile, ParameterNotFound):
|
|
685
|
-
pass
|
|
659
|
+
if "JCAMPDX" in self.params:
|
|
660
|
+
return self.params["JCAMPDX"]
|
|
686
661
|
|
|
687
662
|
try:
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
663
|
+
with self.path.open("r") as f:
|
|
664
|
+
for _ in range(10):
|
|
665
|
+
line = f.readline()
|
|
666
|
+
if line.startswith("##JCAMPDX="):
|
|
667
|
+
return line.strip().split("=", 1)[1]
|
|
668
|
+
if line.startswith("##JCAMP-DX="):
|
|
669
|
+
return line.strip().split("=", 1)[1]
|
|
670
|
+
except (UnicodeDecodeError, OSError) as e:
|
|
671
|
+
raise InvalidJcampdxFile from e
|
|
692
672
|
|
|
693
673
|
raise InvalidJcampdxFile(self.path)
|
|
694
674
|
|
|
@@ -702,6 +682,7 @@ class JCAMPDX(object):
|
|
|
702
682
|
"""
|
|
703
683
|
PUBLIC INTERFACE
|
|
704
684
|
"""
|
|
685
|
+
|
|
705
686
|
def get_parameters(self):
|
|
706
687
|
return self.params
|
|
707
688
|
|
|
@@ -715,33 +696,37 @@ class JCAMPDX(object):
|
|
|
715
696
|
return self.params[key].value
|
|
716
697
|
|
|
717
698
|
def get_list(self, key):
|
|
718
|
-
"""Idea is to ensure, that a parameter will be a list even if parameter only contains one entry
|
|
719
|
-
"""
|
|
699
|
+
"""Idea is to ensure, that a parameter will be a list even if parameter only contains one entry"""
|
|
720
700
|
value = self.get_value(key)
|
|
721
701
|
if isinstance(value, list):
|
|
722
702
|
return value
|
|
723
|
-
|
|
703
|
+
if isinstance(value, np.ndarray):
|
|
724
704
|
return list(value)
|
|
725
|
-
|
|
726
|
-
|
|
705
|
+
return [
|
|
706
|
+
value,
|
|
707
|
+
]
|
|
727
708
|
|
|
728
709
|
def get_nested_list(self, key):
|
|
729
710
|
value = self.get_value(key)
|
|
730
711
|
if not isinstance(value, list):
|
|
731
|
-
value =[
|
|
712
|
+
value = [
|
|
713
|
+
value,
|
|
714
|
+
]
|
|
732
715
|
|
|
733
716
|
if not isinstance(value[0], list):
|
|
734
|
-
value = [
|
|
717
|
+
value = [
|
|
718
|
+
value,
|
|
719
|
+
]
|
|
735
720
|
|
|
736
721
|
return value
|
|
737
722
|
|
|
738
|
-
def set_nested_list(self,key, value):
|
|
723
|
+
def set_nested_list(self, key, value):
|
|
739
724
|
self.params[key].value = value
|
|
740
725
|
|
|
741
726
|
def get_int(self, key):
|
|
742
727
|
return int(self.get_value(key))
|
|
743
728
|
|
|
744
|
-
def set_int(self,key, value):
|
|
729
|
+
def set_int(self, key, value):
|
|
745
730
|
self.params[key].value = value
|
|
746
731
|
|
|
747
732
|
def get_float(self, key):
|
|
@@ -750,13 +735,12 @@ class JCAMPDX(object):
|
|
|
750
735
|
def get_tuple(self, key):
|
|
751
736
|
value = self.get_value(key)
|
|
752
737
|
|
|
753
|
-
if isinstance(value, int
|
|
738
|
+
if isinstance(value, (int, float)):
|
|
754
739
|
return (value,)
|
|
755
|
-
|
|
756
|
-
return tuple(value)
|
|
740
|
+
return tuple(value)
|
|
757
741
|
|
|
758
|
-
def get_array(self, key, dtype=None, shape=(-1,), order=
|
|
759
|
-
parameter=self.get_parameter(key)
|
|
742
|
+
def get_array(self, key, dtype=None, shape=(-1,), order="C"):
|
|
743
|
+
parameter = self.get_parameter(key)
|
|
760
744
|
value = parameter.value
|
|
761
745
|
size = parameter.size
|
|
762
746
|
|
|
@@ -771,17 +755,19 @@ class JCAMPDX(object):
|
|
|
771
755
|
|
|
772
756
|
return np.reshape(value, shape, order=order)
|
|
773
757
|
|
|
774
|
-
def set_array(self, key, value, file=None
|
|
775
|
-
|
|
776
|
-
parameter = self.get_parameter(key, file)
|
|
758
|
+
def set_array(self, key, value, file=None, order="C"):
|
|
759
|
+
self.get_parameter(key, file)
|
|
777
760
|
|
|
778
|
-
value = np.reshape(value,(-1,), order=order)
|
|
761
|
+
value = np.reshape(value, (-1,), order=order)
|
|
779
762
|
self.__setattr__(key, value.tolist())
|
|
780
763
|
|
|
781
|
-
def get_str(self, key, strip_sharp=
|
|
764
|
+
def get_str(self, key, strip_sharp=None):
|
|
765
|
+
if strip_sharp is None:
|
|
766
|
+
strip_sharp = True
|
|
767
|
+
|
|
782
768
|
value = str(self.get_value(key))
|
|
783
769
|
|
|
784
|
-
if strip_sharp and value.startswith(
|
|
770
|
+
if strip_sharp and value.startswith("<") and value.endswith(">"):
|
|
785
771
|
value = value[1:-1]
|
|
786
772
|
|
|
787
773
|
return value
|
|
@@ -796,22 +782,21 @@ class JCAMPDX(object):
|
|
|
796
782
|
with open(path) as f:
|
|
797
783
|
try:
|
|
798
784
|
content = f.read()
|
|
799
|
-
except:
|
|
800
|
-
raise InvalidJcampdxFile(path)
|
|
785
|
+
except (UnicodeDecodeError, OSError) as e:
|
|
786
|
+
raise InvalidJcampdxFile(path) from e
|
|
801
787
|
|
|
802
|
-
match = re.search(
|
|
788
|
+
match = re.search(rf"##{key}[^\#\$]+|##\${key}[^\#\$]+", content)
|
|
803
789
|
|
|
804
|
-
if match
|
|
790
|
+
if match is None:
|
|
805
791
|
raise ParameterNotFound(key, path)
|
|
806
792
|
|
|
807
|
-
line = content[match.start():match.end()-1]
|
|
793
|
+
line = content[match.start() : match.end() - 1] # strip trailing EOL
|
|
808
794
|
key, parameter = JCAMPDX.handle_jcampdx_line(line, None)
|
|
809
795
|
|
|
810
796
|
return key, parameter
|
|
811
797
|
|
|
812
798
|
@classmethod
|
|
813
799
|
def read_jcampdx(cls, path):
|
|
814
|
-
|
|
815
800
|
path = Path(path)
|
|
816
801
|
|
|
817
802
|
params = {}
|
|
@@ -819,25 +804,26 @@ class JCAMPDX(object):
|
|
|
819
804
|
with path.open() as f:
|
|
820
805
|
try:
|
|
821
806
|
content = f.read()
|
|
822
|
-
except:
|
|
823
|
-
raise JcampdxFileError(
|
|
807
|
+
except (UnicodeDecodeError, OSError) as e:
|
|
808
|
+
raise JcampdxFileError(f"file {path} is not a text file") from e
|
|
824
809
|
|
|
825
810
|
# remove all comments
|
|
826
|
-
content =
|
|
811
|
+
content = _COMMENT_RE.sub("", content)
|
|
827
812
|
|
|
828
813
|
# split into individual entries
|
|
829
|
-
content =
|
|
814
|
+
content = _PARAMETER_RE.split(content)[1:-1]
|
|
830
815
|
|
|
831
816
|
# strip trailing EOL
|
|
832
|
-
content = [
|
|
817
|
+
content = [_TRAILING_EOL_RE.sub("", x) for x in content]
|
|
833
818
|
|
|
834
819
|
# ASSUMPTION the jcampdx version string is in the second row
|
|
835
820
|
try:
|
|
836
821
|
version_line = content[1]
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
822
|
+
except IndexError:
|
|
823
|
+
raise JcampdxFileError(f"file {path} is too short or not a text file") from IndexError
|
|
824
|
+
|
|
825
|
+
if re.search(GRAMMAR["VERSION_TITLE"], version_line) is None:
|
|
826
|
+
raise JcampdxFileError(f"file {path} is not a JCAMP-DX file")
|
|
841
827
|
|
|
842
828
|
_, _, version = JCAMPDX.divide_jcampdx_line(version_line)
|
|
843
829
|
|
|
@@ -846,18 +832,19 @@ class JCAMPDX(object):
|
|
|
846
832
|
|
|
847
833
|
for line in content:
|
|
848
834
|
# Restore the ##
|
|
849
|
-
key, parameter = JCAMPDX.handle_jcampdx_line(
|
|
835
|
+
key, parameter = JCAMPDX.handle_jcampdx_line(f"##{line}", version)
|
|
850
836
|
params[key] = parameter
|
|
851
837
|
return params
|
|
852
838
|
|
|
853
839
|
@classmethod
|
|
854
840
|
def handle_jcampdx_line(cls, line, version):
|
|
855
841
|
key_str, size_str, val_str = cls.divide_jcampdx_line(line)
|
|
856
|
-
|
|
842
|
+
|
|
843
|
+
if _GEO_OBJ_RE.search(line) is not None:
|
|
857
844
|
parameter = GeometryParameter(key_str, size_str, val_str, version)
|
|
858
|
-
elif
|
|
845
|
+
elif _DATA_LABEL_RE.search(line):
|
|
859
846
|
parameter = DataParameter(key_str, size_str, val_str, version)
|
|
860
|
-
elif
|
|
847
|
+
elif _HEADER_RE.search(key_str):
|
|
861
848
|
parameter = HeaderParameter(key_str, size_str, val_str, version)
|
|
862
849
|
else:
|
|
863
850
|
parameter = GenericParameter(key_str, size_str, val_str, version)
|
|
@@ -872,11 +859,11 @@ class JCAMPDX(object):
|
|
|
872
859
|
|
|
873
860
|
@classmethod
|
|
874
861
|
def split_key_value_pair(cls, line):
|
|
875
|
-
# ASSUMPTION the first
|
|
862
|
+
# ASSUMPTION the first occurrence of = in jcampdx line divides key and value pair
|
|
876
863
|
# example:
|
|
877
|
-
match = re.search(GRAMMAR[
|
|
878
|
-
key = line[0:match.start()]
|
|
879
|
-
val_str = line[match.end():].lstrip()
|
|
864
|
+
match = re.search(GRAMMAR["EQUAL_SIGN"], line)
|
|
865
|
+
key = line[0 : match.start()]
|
|
866
|
+
val_str = line[match.end() :].lstrip()
|
|
880
867
|
return key, val_str
|
|
881
868
|
|
|
882
869
|
@classmethod
|
|
@@ -891,36 +878,35 @@ class JCAMPDX(object):
|
|
|
891
878
|
:return value: value string without bracket in case, size bracket is found, otherwise returns unmodified val_str
|
|
892
879
|
:return size: size bracket str
|
|
893
880
|
"""
|
|
894
|
-
match = re.search(GRAMMAR[
|
|
881
|
+
match = re.search(GRAMMAR["SIZE_BRACKET"], val_str)
|
|
895
882
|
|
|
896
883
|
if match is None:
|
|
897
|
-
return val_str,
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
val_str = val_str[match.end():].lstrip()
|
|
884
|
+
return val_str, ""
|
|
885
|
+
size_bracket = val_str[match.start() : match.end()]
|
|
886
|
+
val_str = val_str[match.end() :].lstrip()
|
|
901
887
|
|
|
902
888
|
return val_str, size_bracket
|
|
903
889
|
|
|
904
890
|
@classmethod
|
|
905
891
|
def wrap_lines(cls, line):
|
|
906
|
-
line_wraps = re.split(
|
|
892
|
+
line_wraps = re.split(r"\n", line)
|
|
907
893
|
tail = line_wraps[-1]
|
|
908
894
|
|
|
909
|
-
tail_bits = re.split(
|
|
895
|
+
tail_bits = re.split(r"\s", tail)
|
|
910
896
|
|
|
911
897
|
lines = 1
|
|
912
|
-
tail =
|
|
898
|
+
tail = ""
|
|
913
899
|
|
|
914
900
|
for tail_bit in tail_bits:
|
|
915
901
|
if len(tail + tail_bit) > lines * MAX_LINE_LEN:
|
|
916
|
-
tail +=
|
|
902
|
+
tail += "\n"
|
|
917
903
|
lines += 1
|
|
918
904
|
tail += tail_bit
|
|
919
|
-
tail +=
|
|
905
|
+
tail += " "
|
|
920
906
|
|
|
921
907
|
line_wraps[-1] = tail[:-1]
|
|
922
908
|
|
|
923
|
-
return
|
|
909
|
+
return "\n".join(line_wraps)
|
|
924
910
|
|
|
925
911
|
def write(self, path):
|
|
926
912
|
"""
|
|
@@ -928,5 +914,5 @@ class JCAMPDX(object):
|
|
|
928
914
|
:param path:
|
|
929
915
|
:return:
|
|
930
916
|
"""
|
|
931
|
-
with Path(path).open(
|
|
917
|
+
with Path(path).open("w") as f:
|
|
932
918
|
f.write(str(self))
|