brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +9 -3
- brkraw/apps/__init__.py +12 -0
- brkraw/apps/addon/__init__.py +30 -0
- brkraw/apps/addon/core.py +35 -0
- brkraw/apps/addon/dependencies.py +402 -0
- brkraw/apps/addon/installation.py +500 -0
- brkraw/apps/addon/io.py +21 -0
- brkraw/apps/hook/__init__.py +25 -0
- brkraw/apps/hook/core.py +636 -0
- brkraw/apps/loader/__init__.py +10 -0
- brkraw/apps/loader/core.py +622 -0
- brkraw/apps/loader/formatter.py +288 -0
- brkraw/apps/loader/helper.py +797 -0
- brkraw/apps/loader/info/__init__.py +11 -0
- brkraw/apps/loader/info/scan.py +85 -0
- brkraw/apps/loader/info/scan.yaml +90 -0
- brkraw/apps/loader/info/study.py +69 -0
- brkraw/apps/loader/info/study.yaml +156 -0
- brkraw/apps/loader/info/transform.py +92 -0
- brkraw/apps/loader/types.py +220 -0
- brkraw/cli/__init__.py +5 -0
- brkraw/cli/commands/__init__.py +2 -0
- brkraw/cli/commands/addon.py +327 -0
- brkraw/cli/commands/config.py +205 -0
- brkraw/cli/commands/convert.py +903 -0
- brkraw/cli/commands/hook.py +348 -0
- brkraw/cli/commands/info.py +74 -0
- brkraw/cli/commands/init.py +214 -0
- brkraw/cli/commands/params.py +106 -0
- brkraw/cli/commands/prune.py +288 -0
- brkraw/cli/commands/session.py +371 -0
- brkraw/cli/hook_args.py +80 -0
- brkraw/cli/main.py +83 -0
- brkraw/cli/utils.py +60 -0
- brkraw/core/__init__.py +13 -0
- brkraw/core/config.py +380 -0
- brkraw/core/entrypoints.py +25 -0
- brkraw/core/formatter.py +367 -0
- brkraw/core/fs.py +495 -0
- brkraw/core/jcamp.py +600 -0
- brkraw/core/layout.py +451 -0
- brkraw/core/parameters.py +781 -0
- brkraw/core/zip.py +1121 -0
- brkraw/dataclasses/__init__.py +14 -0
- brkraw/dataclasses/node.py +139 -0
- brkraw/dataclasses/reco.py +33 -0
- brkraw/dataclasses/scan.py +61 -0
- brkraw/dataclasses/study.py +131 -0
- brkraw/default/__init__.py +3 -0
- brkraw/default/pruner_specs/deid4share.yaml +42 -0
- brkraw/default/rules/00_default.yaml +4 -0
- brkraw/default/specs/metadata_dicom.yaml +236 -0
- brkraw/default/specs/metadata_transforms.py +92 -0
- brkraw/resolver/__init__.py +7 -0
- brkraw/resolver/affine.py +539 -0
- brkraw/resolver/datatype.py +69 -0
- brkraw/resolver/fid.py +90 -0
- brkraw/resolver/helpers.py +36 -0
- brkraw/resolver/image.py +188 -0
- brkraw/resolver/nifti.py +370 -0
- brkraw/resolver/shape.py +235 -0
- brkraw/schema/__init__.py +3 -0
- brkraw/schema/context_map.yaml +62 -0
- brkraw/schema/meta.yaml +57 -0
- brkraw/schema/niftiheader.yaml +95 -0
- brkraw/schema/pruner.yaml +55 -0
- brkraw/schema/remapper.yaml +128 -0
- brkraw/schema/rules.yaml +154 -0
- brkraw/specs/__init__.py +10 -0
- brkraw/specs/hook/__init__.py +12 -0
- brkraw/specs/hook/logic.py +31 -0
- brkraw/specs/hook/validator.py +22 -0
- brkraw/specs/meta/__init__.py +5 -0
- brkraw/specs/meta/validator.py +156 -0
- brkraw/specs/pruner/__init__.py +15 -0
- brkraw/specs/pruner/logic.py +361 -0
- brkraw/specs/pruner/validator.py +119 -0
- brkraw/specs/remapper/__init__.py +27 -0
- brkraw/specs/remapper/logic.py +924 -0
- brkraw/specs/remapper/validator.py +314 -0
- brkraw/specs/rules/__init__.py +6 -0
- brkraw/specs/rules/logic.py +263 -0
- brkraw/specs/rules/validator.py +103 -0
- brkraw-0.5.0.dist-info/METADATA +81 -0
- brkraw-0.5.0.dist-info/RECORD +88 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
- brkraw-0.5.0.dist-info/entry_points.txt +13 -0
- brkraw/lib/__init__.py +0 -4
- brkraw/lib/backup.py +0 -641
- brkraw/lib/bids.py +0 -0
- brkraw/lib/errors.py +0 -125
- brkraw/lib/loader.py +0 -1220
- brkraw/lib/orient.py +0 -194
- brkraw/lib/parser.py +0 -48
- brkraw/lib/pvobj.py +0 -301
- brkraw/lib/reference.py +0 -245
- brkraw/lib/utils.py +0 -471
- brkraw/scripts/__init__.py +0 -0
- brkraw/scripts/brk_backup.py +0 -106
- brkraw/scripts/brkraw.py +0 -744
- brkraw/ui/__init__.py +0 -0
- brkraw/ui/config.py +0 -17
- brkraw/ui/main_win.py +0 -214
- brkraw/ui/previewer.py +0 -225
- brkraw/ui/scan_info.py +0 -72
- brkraw/ui/scan_list.py +0 -73
- brkraw/ui/subj_info.py +0 -128
- brkraw-0.3.11.dist-info/METADATA +0 -25
- brkraw-0.3.11.dist-info/RECORD +0 -28
- brkraw-0.3.11.dist-info/entry_points.txt +0 -3
- brkraw-0.3.11.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/core/jcamp.py
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Low-level JCAMP-DX parser for Bruker Paravision parameter files.
|
|
3
|
+
|
|
4
|
+
This module provides functional utilities to parse Paravision JCAMP-DX
|
|
5
|
+
formatted text (e.g., `method`, `acqp`, `reco` files) into a raw but structured
|
|
6
|
+
representation based on OrderedDicts. The goal is to preserve the original
|
|
7
|
+
hierarchical format as much as possible, while making it accessible to
|
|
8
|
+
downstream code.
|
|
9
|
+
|
|
10
|
+
Design choices:
|
|
11
|
+
- This module focuses on syntactic parsing only and keeps values in a
|
|
12
|
+
minimally processed form.
|
|
13
|
+
- Higher-level normalization, type conversion, and object-oriented access
|
|
14
|
+
are delegated to the `Parameters` class in `parameters.py`.
|
|
15
|
+
- The API is intentionally function-based (no classes) to keep the parsing
|
|
16
|
+
logic small, composable, and easier to maintain.
|
|
17
|
+
|
|
18
|
+
The main entry point is `parse_jcamp_from_path`, which returns:
|
|
19
|
+
- `params`: an OrderedDict of parameter keys mapped to `{"shape", "data"}`
|
|
20
|
+
- `comments`: JCAMP comment lines (prefixed by `$$`)
|
|
21
|
+
- `exceptions`: raw lines or entries that could not be parsed cleanly
|
|
22
|
+
|
|
23
|
+
A simple smoke test utility `run_smoke_test` is provided to validate all
|
|
24
|
+
`.jdx` fixture files within a directory.
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
import re
|
|
30
|
+
from collections import OrderedDict
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import IO, Iterable, Union, Optional, List, Tuple, Any
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
REGEX_PATTERNS = {
|
|
38
|
+
'value': re.compile(r'^\((?P<left>[^()]*)\)\s*(?P<right>.*)$'),
|
|
39
|
+
'comment': re.compile(r'\$\$.*'),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def split_shape_and_data(string: Optional[str]):
|
|
44
|
+
"""Split a raw `(shape) value` string into shape metadata and payload.
|
|
45
|
+
|
|
46
|
+
This function detects a leading parenthesized shape specification such as
|
|
47
|
+
`(1, 2, 3) rest-of-value` and splits it into a shape tuple and the
|
|
48
|
+
remaining string.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
string: Raw JCAMP value that may start with a parenthesized shape.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Tuple[Optional[Tuple[int, ...]], Optional[str]]:
|
|
55
|
+
A pair `(shape, data)` where:
|
|
56
|
+
|
|
57
|
+
- `shape` is a tuple of ints when a valid shape is present,
|
|
58
|
+
otherwise None.
|
|
59
|
+
- `data` is the remaining value string, or None if empty.
|
|
60
|
+
"""
|
|
61
|
+
if not string:
|
|
62
|
+
return None, None
|
|
63
|
+
|
|
64
|
+
s = string.strip()
|
|
65
|
+
if not s:
|
|
66
|
+
return None, None
|
|
67
|
+
|
|
68
|
+
m = REGEX_PATTERNS['value'].match(s)
|
|
69
|
+
if not m:
|
|
70
|
+
return None, s
|
|
71
|
+
|
|
72
|
+
left_raw = m.group('left').strip()
|
|
73
|
+
right_raw = m.group('right').strip()
|
|
74
|
+
right_raw = right_raw or None
|
|
75
|
+
|
|
76
|
+
shape_candidate = is_shape(left_raw)
|
|
77
|
+
|
|
78
|
+
if shape_candidate is not None:
|
|
79
|
+
return shape_candidate, right_raw
|
|
80
|
+
return None, s
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def is_shape(string: str):
|
|
84
|
+
"""Convert a comma-separated shape string into a tuple of ints if valid.
|
|
85
|
+
|
|
86
|
+
This helper is used to interpret text such as `'1, 2, 3'` as a shape
|
|
87
|
+
description.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
string: Candidate shape string (for example `'1,2,3'`).
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Optional[Tuple[int, ...]]: A tuple of ints when parsing succeeds,
|
|
94
|
+
otherwise None.
|
|
95
|
+
"""
|
|
96
|
+
parts = [p.strip() for p in string.split(',')]
|
|
97
|
+
int_values = []
|
|
98
|
+
for p in parts:
|
|
99
|
+
try:
|
|
100
|
+
value = int(p)
|
|
101
|
+
except ValueError:
|
|
102
|
+
return None
|
|
103
|
+
int_values.append(value)
|
|
104
|
+
return tuple(int_values)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def to_number(token: str):
|
|
108
|
+
"""Convert a string token to an int or float when possible.
|
|
109
|
+
|
|
110
|
+
Handles plain integers, floating point values, and exponential notation.
|
|
111
|
+
If conversion fails, the original string is returned unchanged.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
token: Raw token that may represent a number.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Parsed numeric value (int/float) or the original token string.
|
|
118
|
+
"""
|
|
119
|
+
token = token.strip()
|
|
120
|
+
if not token:
|
|
121
|
+
return token
|
|
122
|
+
try:
|
|
123
|
+
# Handle floats or exponential notation.
|
|
124
|
+
if '.' in token or 'e' in token.lower():
|
|
125
|
+
return float(token)
|
|
126
|
+
return int(token)
|
|
127
|
+
except ValueError:
|
|
128
|
+
return token
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def split_top_level_commas(s: str) -> List[str]:
|
|
132
|
+
"""Split a string on top-level commas while respecting nesting.
|
|
133
|
+
|
|
134
|
+
Commas inside parentheses `(...)` or angle-bracketed blocks `<...>` are
|
|
135
|
+
ignored. Only commas at depth 0 are treated as separators.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
s: Input string that may contain parenthesized groups and angle
|
|
139
|
+
brackets.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List[str]: Substrings separated by top-level commas.
|
|
143
|
+
"""
|
|
144
|
+
parts: List[str] = []
|
|
145
|
+
buf: List[str] = []
|
|
146
|
+
depth = 0
|
|
147
|
+
angle_depth = 0
|
|
148
|
+
escape = False
|
|
149
|
+
|
|
150
|
+
for ch in s:
|
|
151
|
+
if escape:
|
|
152
|
+
buf.append(ch)
|
|
153
|
+
escape = False
|
|
154
|
+
continue
|
|
155
|
+
if ch == '\\':
|
|
156
|
+
buf.append(ch)
|
|
157
|
+
escape = True
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
if ch == '<':
|
|
161
|
+
angle_depth += 1
|
|
162
|
+
buf.append(ch)
|
|
163
|
+
elif ch == '>':
|
|
164
|
+
buf.append(ch)
|
|
165
|
+
angle_depth = max(angle_depth - 1, 0)
|
|
166
|
+
elif angle_depth == 0 and ch == '(':
|
|
167
|
+
depth += 1
|
|
168
|
+
buf.append(ch)
|
|
169
|
+
elif angle_depth == 0 and ch == ')':
|
|
170
|
+
depth -= 1
|
|
171
|
+
buf.append(ch)
|
|
172
|
+
elif angle_depth == 0 and ch == ',' and depth == 0:
|
|
173
|
+
parts.append(''.join(buf).strip())
|
|
174
|
+
buf = []
|
|
175
|
+
else:
|
|
176
|
+
buf.append(ch)
|
|
177
|
+
|
|
178
|
+
if buf:
|
|
179
|
+
parts.append(''.join(buf).strip())
|
|
180
|
+
|
|
181
|
+
return parts
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def split_tokens_angle_aware(s: str) -> List[str]:
|
|
185
|
+
"""Tokenize by whitespace while keeping `<...>` blocks intact.
|
|
186
|
+
|
|
187
|
+
Angle-bracketed sections such as `<PVM_SliceGeoObj>` are treated as
|
|
188
|
+
indivisible tokens, even when they contain spaces.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
s: Raw string possibly containing angle-bracketed sections.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List[str]: Token list with angle-bracketed content preserved.
|
|
195
|
+
"""
|
|
196
|
+
tokens: List[str] = []
|
|
197
|
+
buf: List[str] = []
|
|
198
|
+
angle_depth = 0
|
|
199
|
+
escape = False
|
|
200
|
+
|
|
201
|
+
for ch in s:
|
|
202
|
+
if escape:
|
|
203
|
+
buf.append(ch)
|
|
204
|
+
escape = False
|
|
205
|
+
continue
|
|
206
|
+
if ch == '\\':
|
|
207
|
+
buf.append(ch)
|
|
208
|
+
escape = True
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
if ch == '<':
|
|
212
|
+
if buf and angle_depth == 0:
|
|
213
|
+
tokens.append(''.join(buf))
|
|
214
|
+
buf = []
|
|
215
|
+
angle_depth += 1
|
|
216
|
+
buf.append(ch)
|
|
217
|
+
elif ch == '>':
|
|
218
|
+
buf.append(ch)
|
|
219
|
+
if angle_depth > 0:
|
|
220
|
+
angle_depth -= 1
|
|
221
|
+
if angle_depth == 0:
|
|
222
|
+
tokens.append(''.join(buf))
|
|
223
|
+
buf = []
|
|
224
|
+
elif ch.isspace() and angle_depth == 0:
|
|
225
|
+
if buf:
|
|
226
|
+
tokens.append(''.join(buf))
|
|
227
|
+
buf = []
|
|
228
|
+
else:
|
|
229
|
+
buf.append(ch)
|
|
230
|
+
|
|
231
|
+
if buf:
|
|
232
|
+
tokens.append(''.join(buf))
|
|
233
|
+
|
|
234
|
+
return [t.strip() for t in tokens if t.strip()]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def is_single_outer_paren(s: str) -> bool:
|
|
238
|
+
"""Check whether the entire string is wrapped by a single outer pair.
|
|
239
|
+
|
|
240
|
+
This function distinguishes between a single outer group:
|
|
241
|
+
"(a b c)"
|
|
242
|
+
and multiple outer groups:
|
|
243
|
+
"(a b)(c d)"
|
|
244
|
+
|
|
245
|
+
Angle-bracketed blocks `<...>` are ignored for the purpose of depth
|
|
246
|
+
tracking.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
s: Input string.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
bool: True if the string is wrapped by exactly one outer pair of
|
|
253
|
+
parentheses, False otherwise.
|
|
254
|
+
"""
|
|
255
|
+
s = s.strip()
|
|
256
|
+
if not (s.startswith('(') and s.endswith(')')):
|
|
257
|
+
return False
|
|
258
|
+
|
|
259
|
+
depth = 0
|
|
260
|
+
angle_depth = 0
|
|
261
|
+
escape = False
|
|
262
|
+
|
|
263
|
+
for i, ch in enumerate(s):
|
|
264
|
+
if escape:
|
|
265
|
+
escape = False
|
|
266
|
+
continue
|
|
267
|
+
if ch == '\\':
|
|
268
|
+
escape = True
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
if ch == '<':
|
|
272
|
+
angle_depth += 1
|
|
273
|
+
elif ch == '>':
|
|
274
|
+
angle_depth = max(angle_depth - 1, 0)
|
|
275
|
+
elif angle_depth == 0:
|
|
276
|
+
if ch == '(':
|
|
277
|
+
depth += 1
|
|
278
|
+
elif ch == ')':
|
|
279
|
+
depth -= 1
|
|
280
|
+
if depth == 0 and i != len(s) - 1:
|
|
281
|
+
# If depth hits 0 before the end, there are multiple outer groups.
|
|
282
|
+
return False
|
|
283
|
+
return depth == 0
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def parse_leaf_tokens(text: str):
|
|
287
|
+
"""Parse whitespace-delimited tokens at a leaf level.
|
|
288
|
+
|
|
289
|
+
This function expects a string without parentheses. It splits on whitespace,
|
|
290
|
+
preserves `<...>` blocks as single tokens, and converts numeric tokens to
|
|
291
|
+
int or float where possible.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
text: Leaf-level string without parentheses.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Any: Parsed value, which may be:
|
|
298
|
+
- None for empty input
|
|
299
|
+
- a single value (scalar or string)
|
|
300
|
+
- a list of parsed values
|
|
301
|
+
"""
|
|
302
|
+
tokens = split_tokens_angle_aware(text)
|
|
303
|
+
values = [to_number(t) for t in tokens]
|
|
304
|
+
|
|
305
|
+
if len(values) == 0:
|
|
306
|
+
return None
|
|
307
|
+
if len(values) == 1:
|
|
308
|
+
return values[0]
|
|
309
|
+
return values
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def parse_segment(seg: str):
|
|
313
|
+
"""Parse a segment containing nested parentheses and leaf tokens.
|
|
314
|
+
|
|
315
|
+
This function decomposes a substring that may include nested groups of
|
|
316
|
+
parentheses and free-form tokens. Text at depth 0 is parsed as leaf
|
|
317
|
+
tokens, while each nested `( ... )` group is parsed recursively via
|
|
318
|
+
`parse_nested`.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
seg: Substring potentially containing nested groups and leaf tokens.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Any: Parsed object that may be:
|
|
325
|
+
- None for empty segments
|
|
326
|
+
- a single value
|
|
327
|
+
- a list of values and/or nested structures
|
|
328
|
+
"""
|
|
329
|
+
seg = seg.strip()
|
|
330
|
+
if not seg:
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
items: List[Any] = []
|
|
334
|
+
buf: List[str] = [] # Text outside parentheses at depth 0.
|
|
335
|
+
depth = 0
|
|
336
|
+
start_idx = None
|
|
337
|
+
angle_depth = 0
|
|
338
|
+
escape = False
|
|
339
|
+
|
|
340
|
+
for i, ch in enumerate(seg):
|
|
341
|
+
if escape:
|
|
342
|
+
if depth == 0:
|
|
343
|
+
buf.append(ch)
|
|
344
|
+
escape = False
|
|
345
|
+
continue
|
|
346
|
+
if ch == '\\':
|
|
347
|
+
if depth == 0:
|
|
348
|
+
buf.append(ch)
|
|
349
|
+
escape = True
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
if ch == '<':
|
|
353
|
+
angle_depth += 1
|
|
354
|
+
if depth == 0:
|
|
355
|
+
buf.append(ch)
|
|
356
|
+
|
|
357
|
+
elif ch == '>':
|
|
358
|
+
if depth == 0:
|
|
359
|
+
buf.append(ch)
|
|
360
|
+
if angle_depth > 0:
|
|
361
|
+
angle_depth -= 1
|
|
362
|
+
|
|
363
|
+
elif angle_depth == 0 and ch == '(':
|
|
364
|
+
if depth == 0:
|
|
365
|
+
# Process buffered text before an opening parenthesis at depth 0.
|
|
366
|
+
if buf:
|
|
367
|
+
leaf_text = ''.join(buf).strip()
|
|
368
|
+
if leaf_text:
|
|
369
|
+
leaf_val = parse_leaf_tokens(leaf_text)
|
|
370
|
+
if leaf_val is not None:
|
|
371
|
+
items.append(leaf_val)
|
|
372
|
+
buf = []
|
|
373
|
+
start_idx = i
|
|
374
|
+
depth += 1
|
|
375
|
+
|
|
376
|
+
elif angle_depth == 0 and ch == ')':
|
|
377
|
+
depth -= 1
|
|
378
|
+
if depth == 0 and start_idx is not None:
|
|
379
|
+
group_str = seg[start_idx:i+1]
|
|
380
|
+
items.append(parse_nested(group_str))
|
|
381
|
+
start_idx = None
|
|
382
|
+
|
|
383
|
+
else:
|
|
384
|
+
if depth == 0:
|
|
385
|
+
buf.append(ch)
|
|
386
|
+
# Content at depth > 0 will be handled in the group string.
|
|
387
|
+
|
|
388
|
+
# Process any trailing text.
|
|
389
|
+
if buf:
|
|
390
|
+
leaf_text = ''.join(buf).strip()
|
|
391
|
+
if leaf_text:
|
|
392
|
+
leaf_val = parse_leaf_tokens(leaf_text)
|
|
393
|
+
if leaf_val is not None:
|
|
394
|
+
items.append(leaf_val)
|
|
395
|
+
|
|
396
|
+
if len(items) == 0:
|
|
397
|
+
return None
|
|
398
|
+
if len(items) == 1:
|
|
399
|
+
return items[0]
|
|
400
|
+
return items
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def parse_nested(s: str):
|
|
404
|
+
"""Parse JCAMP-style nested parentheses and comma-separated structures.
|
|
405
|
+
|
|
406
|
+
This is the core recursive parser for Paravision/JCAMP text. It handles:
|
|
407
|
+
- Optional outer parentheses
|
|
408
|
+
- Top-level comma separation
|
|
409
|
+
- Nested groups via `parse_segment`
|
|
410
|
+
- Numeric token conversion via `to_number`
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
s: Raw string containing parentheses and comma-separated segments.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Any: Parsed Python object corresponding to the nested structure, or
|
|
417
|
+
None for empty input.
|
|
418
|
+
"""
|
|
419
|
+
if s is None:
|
|
420
|
+
return None
|
|
421
|
+
|
|
422
|
+
s = s.strip()
|
|
423
|
+
if not s:
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
if "(" not in s and ")" not in s and "," not in s and " " not in s:
|
|
427
|
+
return to_number(s)
|
|
428
|
+
|
|
429
|
+
# 1) Strip outer parentheses if they wrap the entire content.
|
|
430
|
+
while is_single_outer_paren(s):
|
|
431
|
+
s = s[1:-1].strip()
|
|
432
|
+
if not s:
|
|
433
|
+
return []
|
|
434
|
+
|
|
435
|
+
# 2) Split on top-level commas.
|
|
436
|
+
parts = split_top_level_commas(s)
|
|
437
|
+
|
|
438
|
+
# If there is no comma, treat the whole string as a single segment.
|
|
439
|
+
if len(parts) == 1:
|
|
440
|
+
return parse_segment(parts[0])
|
|
441
|
+
|
|
442
|
+
# When multiple commas exist, parse each segment and return a list.
|
|
443
|
+
results = []
|
|
444
|
+
for part in parts:
|
|
445
|
+
if not part.strip():
|
|
446
|
+
continue
|
|
447
|
+
val = parse_segment(part)
|
|
448
|
+
if val is not None:
|
|
449
|
+
results.append(val)
|
|
450
|
+
|
|
451
|
+
return results
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _parse_lines(lines: Iterable[str]) -> dict:
|
|
455
|
+
"""Core parser that operates on an iterable of lines."""
|
|
456
|
+
params = OrderedDict()
|
|
457
|
+
comments: List[str] = []
|
|
458
|
+
raw_params: List[str] = []
|
|
459
|
+
exceptions: List[str] = []
|
|
460
|
+
|
|
461
|
+
for raw in lines:
|
|
462
|
+
line = raw.rstrip("\n")
|
|
463
|
+
if REGEX_PATTERNS["comment"].match(line):
|
|
464
|
+
comments.append(line.lstrip("$$").strip())
|
|
465
|
+
else:
|
|
466
|
+
raw_params.append(line)
|
|
467
|
+
|
|
468
|
+
for param in " ".join(raw_params).split("##"):
|
|
469
|
+
if not param:
|
|
470
|
+
continue
|
|
471
|
+
key, sep, value = param.strip().partition("=")
|
|
472
|
+
if sep == "=":
|
|
473
|
+
shape, data = split_shape_and_data(value)
|
|
474
|
+
if isinstance(data, str):
|
|
475
|
+
data = parse_nested(data)
|
|
476
|
+
params[key] = {"shape": shape, "data": data}
|
|
477
|
+
else:
|
|
478
|
+
stripped = param.strip()
|
|
479
|
+
if stripped:
|
|
480
|
+
exceptions.append(stripped)
|
|
481
|
+
|
|
482
|
+
return {"params": params, "comments": comments, "exceptions": exceptions}
|
|
483
|
+
|
|
484
|
+
def parse_jcamp_from_path(path: Path) -> dict:
|
|
485
|
+
"""Read a JCAMP/Paravision file from disk and parse it."""
|
|
486
|
+
with open(path, "r", encoding="utf-8", errors="ignore") as fp:
|
|
487
|
+
return parse_jcamp(fp)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def parse_jcamp_from_text(text: str) -> dict:
|
|
491
|
+
"""Parse JCAMP text already loaded into memory (string)."""
|
|
492
|
+
return _parse_lines(text.splitlines())
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def parse_jcamp_from_bytes(data: Union[bytes, bytearray], *, encoding: str = "utf-8") -> dict:
|
|
496
|
+
"""Parse JCAMP content supplied as bytes."""
|
|
497
|
+
return parse_jcamp_from_text(data.decode(encoding, errors="ignore"))
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def parse_jcamp(stream: Union[IO[str], IO[bytes], Path, str, bytes, bytearray]) -> dict:
|
|
501
|
+
"""Generic parser that accepts path, str/bytes, or file-like objects."""
|
|
502
|
+
# Path-like or str path
|
|
503
|
+
if isinstance(stream, (str, Path)):
|
|
504
|
+
return parse_jcamp_from_path(Path(stream))
|
|
505
|
+
|
|
506
|
+
# Raw bytes/bytearray
|
|
507
|
+
if isinstance(stream, (bytes, bytearray)):
|
|
508
|
+
return parse_jcamp_from_bytes(stream)
|
|
509
|
+
|
|
510
|
+
# File-like: attempt to read, resetting position if possible
|
|
511
|
+
if hasattr(stream, "read"):
|
|
512
|
+
reader = stream # type: ignore[assignment]
|
|
513
|
+
try:
|
|
514
|
+
pos = reader.tell() # type: ignore[attr-defined]
|
|
515
|
+
except Exception:
|
|
516
|
+
pos = None
|
|
517
|
+
|
|
518
|
+
content = reader.read() # type: ignore[call-arg]
|
|
519
|
+
|
|
520
|
+
if pos is not None:
|
|
521
|
+
try:
|
|
522
|
+
reader.seek(pos) # type: ignore[attr-defined]
|
|
523
|
+
except Exception:
|
|
524
|
+
pass
|
|
525
|
+
|
|
526
|
+
if isinstance(content, (bytes, bytearray)):
|
|
527
|
+
return parse_jcamp_from_bytes(content)
|
|
528
|
+
elif isinstance(content, str):
|
|
529
|
+
return parse_jcamp_from_text(content)
|
|
530
|
+
|
|
531
|
+
raise TypeError(
|
|
532
|
+
"Unsupported JCAMP source. Provide a Path/str, bytes, or file-like object."
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def run_smoke_test(fixtures_dir: Path) -> dict:
|
|
537
|
+
"""Run a smoke test over all `.jdx` files in a fixtures directory.
|
|
538
|
+
|
|
539
|
+
For each `.jdx` file, this function attempts to parse JCAMP content and
|
|
540
|
+
records whether parsing completed successfully and whether any exceptions
|
|
541
|
+
were produced.
|
|
542
|
+
|
|
543
|
+
This is intended as a lightweight regression check for the JCAMP parser.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
fixtures_dir: Directory containing one or more `.jdx` JCAMP files.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
dict: Summary of the smoke test results with keys:
|
|
550
|
+
|
|
551
|
+
- `total_files` (int):
|
|
552
|
+
Number of `.jdx` files processed.
|
|
553
|
+
- `ok_files` (List[Path]):
|
|
554
|
+
Files that parsed without any recorded exceptions.
|
|
555
|
+
- `files_with_exceptions` (List[Tuple[Path, List[str]]]):
|
|
556
|
+
Files that parsed but produced non-empty `exceptions`.
|
|
557
|
+
- `parse_errors` (List[Tuple[Path, Exception]]):
|
|
558
|
+
Files that raised an exception during parsing.
|
|
559
|
+
"""
|
|
560
|
+
summary = {
|
|
561
|
+
"total_files": 0,
|
|
562
|
+
"ok_files": [],
|
|
563
|
+
"files_with_exceptions": [],
|
|
564
|
+
"parse_errors": [],
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
for jdx_path in sorted(fixtures_dir.glob("*.jdx")):
|
|
568
|
+
summary["total_files"] += 1
|
|
569
|
+
logger.info(f"Parsing {jdx_path}")
|
|
570
|
+
|
|
571
|
+
try:
|
|
572
|
+
result = parse_jcamp_from_path(jdx_path)
|
|
573
|
+
except Exception as exc:
|
|
574
|
+
logger.error(f"Failed to parse {jdx_path}: {exc}")
|
|
575
|
+
summary["parse_errors"].append((jdx_path, exc))
|
|
576
|
+
continue
|
|
577
|
+
|
|
578
|
+
exceptions = result.get("exceptions") or []
|
|
579
|
+
if exceptions:
|
|
580
|
+
logger.warning(
|
|
581
|
+
f"Found {len(exceptions)} exceptions in {jdx_path}"
|
|
582
|
+
)
|
|
583
|
+
summary["files_with_exceptions"].append((jdx_path, exceptions))
|
|
584
|
+
else:
|
|
585
|
+
summary["ok_files"].append(jdx_path)
|
|
586
|
+
|
|
587
|
+
return summary
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
__all__ = [
|
|
591
|
+
"parse_jcamp_from_path",
|
|
592
|
+
"parse_jcamp_from_text",
|
|
593
|
+
"parse_jcamp_from_bytes",
|
|
594
|
+
"parse_jcamp",
|
|
595
|
+
"run_smoke_test"
|
|
596
|
+
]
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
def __dir__() -> List[str]:
|
|
600
|
+
return sorted(__all__)
|