brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. brkraw/__init__.py +9 -3
  2. brkraw/apps/__init__.py +12 -0
  3. brkraw/apps/addon/__init__.py +30 -0
  4. brkraw/apps/addon/core.py +35 -0
  5. brkraw/apps/addon/dependencies.py +402 -0
  6. brkraw/apps/addon/installation.py +500 -0
  7. brkraw/apps/addon/io.py +21 -0
  8. brkraw/apps/hook/__init__.py +25 -0
  9. brkraw/apps/hook/core.py +636 -0
  10. brkraw/apps/loader/__init__.py +10 -0
  11. brkraw/apps/loader/core.py +622 -0
  12. brkraw/apps/loader/formatter.py +288 -0
  13. brkraw/apps/loader/helper.py +797 -0
  14. brkraw/apps/loader/info/__init__.py +11 -0
  15. brkraw/apps/loader/info/scan.py +85 -0
  16. brkraw/apps/loader/info/scan.yaml +90 -0
  17. brkraw/apps/loader/info/study.py +69 -0
  18. brkraw/apps/loader/info/study.yaml +156 -0
  19. brkraw/apps/loader/info/transform.py +92 -0
  20. brkraw/apps/loader/types.py +220 -0
  21. brkraw/cli/__init__.py +5 -0
  22. brkraw/cli/commands/__init__.py +2 -0
  23. brkraw/cli/commands/addon.py +327 -0
  24. brkraw/cli/commands/config.py +205 -0
  25. brkraw/cli/commands/convert.py +903 -0
  26. brkraw/cli/commands/hook.py +348 -0
  27. brkraw/cli/commands/info.py +74 -0
  28. brkraw/cli/commands/init.py +214 -0
  29. brkraw/cli/commands/params.py +106 -0
  30. brkraw/cli/commands/prune.py +288 -0
  31. brkraw/cli/commands/session.py +371 -0
  32. brkraw/cli/hook_args.py +80 -0
  33. brkraw/cli/main.py +83 -0
  34. brkraw/cli/utils.py +60 -0
  35. brkraw/core/__init__.py +13 -0
  36. brkraw/core/config.py +380 -0
  37. brkraw/core/entrypoints.py +25 -0
  38. brkraw/core/formatter.py +367 -0
  39. brkraw/core/fs.py +495 -0
  40. brkraw/core/jcamp.py +600 -0
  41. brkraw/core/layout.py +451 -0
  42. brkraw/core/parameters.py +781 -0
  43. brkraw/core/zip.py +1121 -0
  44. brkraw/dataclasses/__init__.py +14 -0
  45. brkraw/dataclasses/node.py +139 -0
  46. brkraw/dataclasses/reco.py +33 -0
  47. brkraw/dataclasses/scan.py +61 -0
  48. brkraw/dataclasses/study.py +131 -0
  49. brkraw/default/__init__.py +3 -0
  50. brkraw/default/pruner_specs/deid4share.yaml +42 -0
  51. brkraw/default/rules/00_default.yaml +4 -0
  52. brkraw/default/specs/metadata_dicom.yaml +236 -0
  53. brkraw/default/specs/metadata_transforms.py +92 -0
  54. brkraw/resolver/__init__.py +7 -0
  55. brkraw/resolver/affine.py +539 -0
  56. brkraw/resolver/datatype.py +69 -0
  57. brkraw/resolver/fid.py +90 -0
  58. brkraw/resolver/helpers.py +36 -0
  59. brkraw/resolver/image.py +188 -0
  60. brkraw/resolver/nifti.py +370 -0
  61. brkraw/resolver/shape.py +235 -0
  62. brkraw/schema/__init__.py +3 -0
  63. brkraw/schema/context_map.yaml +62 -0
  64. brkraw/schema/meta.yaml +57 -0
  65. brkraw/schema/niftiheader.yaml +95 -0
  66. brkraw/schema/pruner.yaml +55 -0
  67. brkraw/schema/remapper.yaml +128 -0
  68. brkraw/schema/rules.yaml +154 -0
  69. brkraw/specs/__init__.py +10 -0
  70. brkraw/specs/hook/__init__.py +12 -0
  71. brkraw/specs/hook/logic.py +31 -0
  72. brkraw/specs/hook/validator.py +22 -0
  73. brkraw/specs/meta/__init__.py +5 -0
  74. brkraw/specs/meta/validator.py +156 -0
  75. brkraw/specs/pruner/__init__.py +15 -0
  76. brkraw/specs/pruner/logic.py +361 -0
  77. brkraw/specs/pruner/validator.py +119 -0
  78. brkraw/specs/remapper/__init__.py +27 -0
  79. brkraw/specs/remapper/logic.py +924 -0
  80. brkraw/specs/remapper/validator.py +314 -0
  81. brkraw/specs/rules/__init__.py +6 -0
  82. brkraw/specs/rules/logic.py +263 -0
  83. brkraw/specs/rules/validator.py +103 -0
  84. brkraw-0.5.0.dist-info/METADATA +81 -0
  85. brkraw-0.5.0.dist-info/RECORD +88 -0
  86. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
  87. brkraw-0.5.0.dist-info/entry_points.txt +13 -0
  88. brkraw/lib/__init__.py +0 -4
  89. brkraw/lib/backup.py +0 -641
  90. brkraw/lib/bids.py +0 -0
  91. brkraw/lib/errors.py +0 -125
  92. brkraw/lib/loader.py +0 -1220
  93. brkraw/lib/orient.py +0 -194
  94. brkraw/lib/parser.py +0 -48
  95. brkraw/lib/pvobj.py +0 -301
  96. brkraw/lib/reference.py +0 -245
  97. brkraw/lib/utils.py +0 -471
  98. brkraw/scripts/__init__.py +0 -0
  99. brkraw/scripts/brk_backup.py +0 -106
  100. brkraw/scripts/brkraw.py +0 -744
  101. brkraw/ui/__init__.py +0 -0
  102. brkraw/ui/config.py +0 -17
  103. brkraw/ui/main_win.py +0 -214
  104. brkraw/ui/previewer.py +0 -225
  105. brkraw/ui/scan_info.py +0 -72
  106. brkraw/ui/scan_list.py +0 -73
  107. brkraw/ui/subj_info.py +0 -128
  108. brkraw-0.3.11.dist-info/METADATA +0 -25
  109. brkraw-0.3.11.dist-info/RECORD +0 -28
  110. brkraw-0.3.11.dist-info/entry_points.txt +0 -3
  111. brkraw-0.3.11.dist-info/top_level.txt +0 -2
  112. tests/__init__.py +0 -0
  113. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/core/jcamp.py ADDED
@@ -0,0 +1,600 @@
1
+ """
2
+ Low-level JCAMP-DX parser for Bruker Paravision parameter files.
3
+
4
+ This module provides functional utilities to parse Paravision JCAMP-DX
5
+ formatted text (e.g., `method`, `acqp`, `reco` files) into a raw but structured
6
+ representation based on OrderedDicts. The goal is to preserve the original
7
+ hierarchical format as much as possible, while making it accessible to
8
+ downstream code.
9
+
10
+ Design choices:
11
+ - This module focuses on syntactic parsing only and keeps values in a
12
+ minimally processed form.
13
+ - Higher-level normalization, type conversion, and object-oriented access
14
+ are delegated to the `Parameters` class in `parameters.py`.
15
+ - The API is intentionally function-based (no classes) to keep the parsing
16
+ logic small, composable, and easier to maintain.
17
+
18
+ The main entry point is `parse_jcamp_from_path`, which returns:
19
+ - `params`: an OrderedDict of parameter keys mapped to `{"shape", "data"}`
20
+ - `comments`: JCAMP comment lines (prefixed by `$$`)
21
+ - `exceptions`: raw lines or entries that could not be parsed cleanly
22
+
23
+ A simple smoke test utility `run_smoke_test` is provided to validate all
24
+ `.jdx` fixture files within a directory.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+ import re
30
+ from collections import OrderedDict
31
+ from pathlib import Path
32
+ from typing import IO, Iterable, Union, Optional, List, Tuple, Any
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ REGEX_PATTERNS = {
38
+ 'value': re.compile(r'^\((?P<left>[^()]*)\)\s*(?P<right>.*)$'),
39
+ 'comment': re.compile(r'\$\$.*'),
40
+ }
41
+
42
+
43
+ def split_shape_and_data(string: Optional[str]):
44
+ """Split a raw `(shape) value` string into shape metadata and payload.
45
+
46
+ This function detects a leading parenthesized shape specification such as
47
+ `(1, 2, 3) rest-of-value` and splits it into a shape tuple and the
48
+ remaining string.
49
+
50
+ Args:
51
+ string: Raw JCAMP value that may start with a parenthesized shape.
52
+
53
+ Returns:
54
+ Tuple[Optional[Tuple[int, ...]], Optional[str]]:
55
+ A pair `(shape, data)` where:
56
+
57
+ - `shape` is a tuple of ints when a valid shape is present,
58
+ otherwise None.
59
+ - `data` is the remaining value string, or None if empty.
60
+ """
61
+ if not string:
62
+ return None, None
63
+
64
+ s = string.strip()
65
+ if not s:
66
+ return None, None
67
+
68
+ m = REGEX_PATTERNS['value'].match(s)
69
+ if not m:
70
+ return None, s
71
+
72
+ left_raw = m.group('left').strip()
73
+ right_raw = m.group('right').strip()
74
+ right_raw = right_raw or None
75
+
76
+ shape_candidate = is_shape(left_raw)
77
+
78
+ if shape_candidate is not None:
79
+ return shape_candidate, right_raw
80
+ return None, s
81
+
82
+
83
+ def is_shape(string: str):
84
+ """Convert a comma-separated shape string into a tuple of ints if valid.
85
+
86
+ This helper is used to interpret text such as `'1, 2, 3'` as a shape
87
+ description.
88
+
89
+ Args:
90
+ string: Candidate shape string (for example `'1,2,3'`).
91
+
92
+ Returns:
93
+ Optional[Tuple[int, ...]]: A tuple of ints when parsing succeeds,
94
+ otherwise None.
95
+ """
96
+ parts = [p.strip() for p in string.split(',')]
97
+ int_values = []
98
+ for p in parts:
99
+ try:
100
+ value = int(p)
101
+ except ValueError:
102
+ return None
103
+ int_values.append(value)
104
+ return tuple(int_values)
105
+
106
+
107
+ def to_number(token: str):
108
+ """Convert a string token to an int or float when possible.
109
+
110
+ Handles plain integers, floating point values, and exponential notation.
111
+ If conversion fails, the original string is returned unchanged.
112
+
113
+ Args:
114
+ token: Raw token that may represent a number.
115
+
116
+ Returns:
117
+ Parsed numeric value (int/float) or the original token string.
118
+ """
119
+ token = token.strip()
120
+ if not token:
121
+ return token
122
+ try:
123
+ # Handle floats or exponential notation.
124
+ if '.' in token or 'e' in token.lower():
125
+ return float(token)
126
+ return int(token)
127
+ except ValueError:
128
+ return token
129
+
130
+
131
+ def split_top_level_commas(s: str) -> List[str]:
132
+ """Split a string on top-level commas while respecting nesting.
133
+
134
+ Commas inside parentheses `(...)` or angle-bracketed blocks `<...>` are
135
+ ignored. Only commas at depth 0 are treated as separators.
136
+
137
+ Args:
138
+ s: Input string that may contain parenthesized groups and angle
139
+ brackets.
140
+
141
+ Returns:
142
+ List[str]: Substrings separated by top-level commas.
143
+ """
144
+ parts: List[str] = []
145
+ buf: List[str] = []
146
+ depth = 0
147
+ angle_depth = 0
148
+ escape = False
149
+
150
+ for ch in s:
151
+ if escape:
152
+ buf.append(ch)
153
+ escape = False
154
+ continue
155
+ if ch == '\\':
156
+ buf.append(ch)
157
+ escape = True
158
+ continue
159
+
160
+ if ch == '<':
161
+ angle_depth += 1
162
+ buf.append(ch)
163
+ elif ch == '>':
164
+ buf.append(ch)
165
+ angle_depth = max(angle_depth - 1, 0)
166
+ elif angle_depth == 0 and ch == '(':
167
+ depth += 1
168
+ buf.append(ch)
169
+ elif angle_depth == 0 and ch == ')':
170
+ depth -= 1
171
+ buf.append(ch)
172
+ elif angle_depth == 0 and ch == ',' and depth == 0:
173
+ parts.append(''.join(buf).strip())
174
+ buf = []
175
+ else:
176
+ buf.append(ch)
177
+
178
+ if buf:
179
+ parts.append(''.join(buf).strip())
180
+
181
+ return parts
182
+
183
+
184
+ def split_tokens_angle_aware(s: str) -> List[str]:
185
+ """Tokenize by whitespace while keeping `<...>` blocks intact.
186
+
187
+ Angle-bracketed sections such as `<PVM_SliceGeoObj>` are treated as
188
+ indivisible tokens, even when they contain spaces.
189
+
190
+ Args:
191
+ s: Raw string possibly containing angle-bracketed sections.
192
+
193
+ Returns:
194
+ List[str]: Token list with angle-bracketed content preserved.
195
+ """
196
+ tokens: List[str] = []
197
+ buf: List[str] = []
198
+ angle_depth = 0
199
+ escape = False
200
+
201
+ for ch in s:
202
+ if escape:
203
+ buf.append(ch)
204
+ escape = False
205
+ continue
206
+ if ch == '\\':
207
+ buf.append(ch)
208
+ escape = True
209
+ continue
210
+
211
+ if ch == '<':
212
+ if buf and angle_depth == 0:
213
+ tokens.append(''.join(buf))
214
+ buf = []
215
+ angle_depth += 1
216
+ buf.append(ch)
217
+ elif ch == '>':
218
+ buf.append(ch)
219
+ if angle_depth > 0:
220
+ angle_depth -= 1
221
+ if angle_depth == 0:
222
+ tokens.append(''.join(buf))
223
+ buf = []
224
+ elif ch.isspace() and angle_depth == 0:
225
+ if buf:
226
+ tokens.append(''.join(buf))
227
+ buf = []
228
+ else:
229
+ buf.append(ch)
230
+
231
+ if buf:
232
+ tokens.append(''.join(buf))
233
+
234
+ return [t.strip() for t in tokens if t.strip()]
235
+
236
+
237
+ def is_single_outer_paren(s: str) -> bool:
238
+ """Check whether the entire string is wrapped by a single outer pair.
239
+
240
+ This function distinguishes between a single outer group:
241
+ "(a b c)"
242
+ and multiple outer groups:
243
+ "(a b)(c d)"
244
+
245
+ Angle-bracketed blocks `<...>` are ignored for the purpose of depth
246
+ tracking.
247
+
248
+ Args:
249
+ s: Input string.
250
+
251
+ Returns:
252
+ bool: True if the string is wrapped by exactly one outer pair of
253
+ parentheses, False otherwise.
254
+ """
255
+ s = s.strip()
256
+ if not (s.startswith('(') and s.endswith(')')):
257
+ return False
258
+
259
+ depth = 0
260
+ angle_depth = 0
261
+ escape = False
262
+
263
+ for i, ch in enumerate(s):
264
+ if escape:
265
+ escape = False
266
+ continue
267
+ if ch == '\\':
268
+ escape = True
269
+ continue
270
+
271
+ if ch == '<':
272
+ angle_depth += 1
273
+ elif ch == '>':
274
+ angle_depth = max(angle_depth - 1, 0)
275
+ elif angle_depth == 0:
276
+ if ch == '(':
277
+ depth += 1
278
+ elif ch == ')':
279
+ depth -= 1
280
+ if depth == 0 and i != len(s) - 1:
281
+ # If depth hits 0 before the end, there are multiple outer groups.
282
+ return False
283
+ return depth == 0
284
+
285
+
286
+ def parse_leaf_tokens(text: str):
287
+ """Parse whitespace-delimited tokens at a leaf level.
288
+
289
+ This function expects a string without parentheses. It splits on whitespace,
290
+ preserves `<...>` blocks as single tokens, and converts numeric tokens to
291
+ int or float where possible.
292
+
293
+ Args:
294
+ text: Leaf-level string without parentheses.
295
+
296
+ Returns:
297
+ Any: Parsed value, which may be:
298
+ - None for empty input
299
+ - a single value (scalar or string)
300
+ - a list of parsed values
301
+ """
302
+ tokens = split_tokens_angle_aware(text)
303
+ values = [to_number(t) for t in tokens]
304
+
305
+ if len(values) == 0:
306
+ return None
307
+ if len(values) == 1:
308
+ return values[0]
309
+ return values
310
+
311
+
312
+ def parse_segment(seg: str):
313
+ """Parse a segment containing nested parentheses and leaf tokens.
314
+
315
+ This function decomposes a substring that may include nested groups of
316
+ parentheses and free-form tokens. Text at depth 0 is parsed as leaf
317
+ tokens, while each nested `( ... )` group is parsed recursively via
318
+ `parse_nested`.
319
+
320
+ Args:
321
+ seg: Substring potentially containing nested groups and leaf tokens.
322
+
323
+ Returns:
324
+ Any: Parsed object that may be:
325
+ - None for empty segments
326
+ - a single value
327
+ - a list of values and/or nested structures
328
+ """
329
+ seg = seg.strip()
330
+ if not seg:
331
+ return None
332
+
333
+ items: List[Any] = []
334
+ buf: List[str] = [] # Text outside parentheses at depth 0.
335
+ depth = 0
336
+ start_idx = None
337
+ angle_depth = 0
338
+ escape = False
339
+
340
+ for i, ch in enumerate(seg):
341
+ if escape:
342
+ if depth == 0:
343
+ buf.append(ch)
344
+ escape = False
345
+ continue
346
+ if ch == '\\':
347
+ if depth == 0:
348
+ buf.append(ch)
349
+ escape = True
350
+ continue
351
+
352
+ if ch == '<':
353
+ angle_depth += 1
354
+ if depth == 0:
355
+ buf.append(ch)
356
+
357
+ elif ch == '>':
358
+ if depth == 0:
359
+ buf.append(ch)
360
+ if angle_depth > 0:
361
+ angle_depth -= 1
362
+
363
+ elif angle_depth == 0 and ch == '(':
364
+ if depth == 0:
365
+ # Process buffered text before an opening parenthesis at depth 0.
366
+ if buf:
367
+ leaf_text = ''.join(buf).strip()
368
+ if leaf_text:
369
+ leaf_val = parse_leaf_tokens(leaf_text)
370
+ if leaf_val is not None:
371
+ items.append(leaf_val)
372
+ buf = []
373
+ start_idx = i
374
+ depth += 1
375
+
376
+ elif angle_depth == 0 and ch == ')':
377
+ depth -= 1
378
+ if depth == 0 and start_idx is not None:
379
+ group_str = seg[start_idx:i+1]
380
+ items.append(parse_nested(group_str))
381
+ start_idx = None
382
+
383
+ else:
384
+ if depth == 0:
385
+ buf.append(ch)
386
+ # Content at depth > 0 will be handled in the group string.
387
+
388
+ # Process any trailing text.
389
+ if buf:
390
+ leaf_text = ''.join(buf).strip()
391
+ if leaf_text:
392
+ leaf_val = parse_leaf_tokens(leaf_text)
393
+ if leaf_val is not None:
394
+ items.append(leaf_val)
395
+
396
+ if len(items) == 0:
397
+ return None
398
+ if len(items) == 1:
399
+ return items[0]
400
+ return items
401
+
402
+
403
+ def parse_nested(s: str):
404
+ """Parse JCAMP-style nested parentheses and comma-separated structures.
405
+
406
+ This is the core recursive parser for Paravision/JCAMP text. It handles:
407
+ - Optional outer parentheses
408
+ - Top-level comma separation
409
+ - Nested groups via `parse_segment`
410
+ - Numeric token conversion via `to_number`
411
+
412
+ Args:
413
+ s: Raw string containing parentheses and comma-separated segments.
414
+
415
+ Returns:
416
+ Any: Parsed Python object corresponding to the nested structure, or
417
+ None for empty input.
418
+ """
419
+ if s is None:
420
+ return None
421
+
422
+ s = s.strip()
423
+ if not s:
424
+ return None
425
+
426
+ if "(" not in s and ")" not in s and "," not in s and " " not in s:
427
+ return to_number(s)
428
+
429
+ # 1) Strip outer parentheses if they wrap the entire content.
430
+ while is_single_outer_paren(s):
431
+ s = s[1:-1].strip()
432
+ if not s:
433
+ return []
434
+
435
+ # 2) Split on top-level commas.
436
+ parts = split_top_level_commas(s)
437
+
438
+ # If there is no comma, treat the whole string as a single segment.
439
+ if len(parts) == 1:
440
+ return parse_segment(parts[0])
441
+
442
+ # When multiple commas exist, parse each segment and return a list.
443
+ results = []
444
+ for part in parts:
445
+ if not part.strip():
446
+ continue
447
+ val = parse_segment(part)
448
+ if val is not None:
449
+ results.append(val)
450
+
451
+ return results
452
+
453
+
454
+ def _parse_lines(lines: Iterable[str]) -> dict:
455
+ """Core parser that operates on an iterable of lines."""
456
+ params = OrderedDict()
457
+ comments: List[str] = []
458
+ raw_params: List[str] = []
459
+ exceptions: List[str] = []
460
+
461
+ for raw in lines:
462
+ line = raw.rstrip("\n")
463
+ if REGEX_PATTERNS["comment"].match(line):
464
+ comments.append(line.lstrip("$$").strip())
465
+ else:
466
+ raw_params.append(line)
467
+
468
+ for param in " ".join(raw_params).split("##"):
469
+ if not param:
470
+ continue
471
+ key, sep, value = param.strip().partition("=")
472
+ if sep == "=":
473
+ shape, data = split_shape_and_data(value)
474
+ if isinstance(data, str):
475
+ data = parse_nested(data)
476
+ params[key] = {"shape": shape, "data": data}
477
+ else:
478
+ stripped = param.strip()
479
+ if stripped:
480
+ exceptions.append(stripped)
481
+
482
+ return {"params": params, "comments": comments, "exceptions": exceptions}
483
+
484
+ def parse_jcamp_from_path(path: Path) -> dict:
485
+ """Read a JCAMP/Paravision file from disk and parse it."""
486
+ with open(path, "r", encoding="utf-8", errors="ignore") as fp:
487
+ return parse_jcamp(fp)
488
+
489
+
490
+ def parse_jcamp_from_text(text: str) -> dict:
491
+ """Parse JCAMP text already loaded into memory (string)."""
492
+ return _parse_lines(text.splitlines())
493
+
494
+
495
+ def parse_jcamp_from_bytes(data: Union[bytes, bytearray], *, encoding: str = "utf-8") -> dict:
496
+ """Parse JCAMP content supplied as bytes."""
497
+ return parse_jcamp_from_text(data.decode(encoding, errors="ignore"))
498
+
499
+
500
+ def parse_jcamp(stream: Union[IO[str], IO[bytes], Path, str, bytes, bytearray]) -> dict:
501
+ """Generic parser that accepts path, str/bytes, or file-like objects."""
502
+ # Path-like or str path
503
+ if isinstance(stream, (str, Path)):
504
+ return parse_jcamp_from_path(Path(stream))
505
+
506
+ # Raw bytes/bytearray
507
+ if isinstance(stream, (bytes, bytearray)):
508
+ return parse_jcamp_from_bytes(stream)
509
+
510
+ # File-like: attempt to read, resetting position if possible
511
+ if hasattr(stream, "read"):
512
+ reader = stream # type: ignore[assignment]
513
+ try:
514
+ pos = reader.tell() # type: ignore[attr-defined]
515
+ except Exception:
516
+ pos = None
517
+
518
+ content = reader.read() # type: ignore[call-arg]
519
+
520
+ if pos is not None:
521
+ try:
522
+ reader.seek(pos) # type: ignore[attr-defined]
523
+ except Exception:
524
+ pass
525
+
526
+ if isinstance(content, (bytes, bytearray)):
527
+ return parse_jcamp_from_bytes(content)
528
+ elif isinstance(content, str):
529
+ return parse_jcamp_from_text(content)
530
+
531
+ raise TypeError(
532
+ "Unsupported JCAMP source. Provide a Path/str, bytes, or file-like object."
533
+ )
534
+
535
+
536
+ def run_smoke_test(fixtures_dir: Path) -> dict:
537
+ """Run a smoke test over all `.jdx` files in a fixtures directory.
538
+
539
+ For each `.jdx` file, this function attempts to parse JCAMP content and
540
+ records whether parsing completed successfully and whether any exceptions
541
+ were produced.
542
+
543
+ This is intended as a lightweight regression check for the JCAMP parser.
544
+
545
+ Args:
546
+ fixtures_dir: Directory containing one or more `.jdx` JCAMP files.
547
+
548
+ Returns:
549
+ dict: Summary of the smoke test results with keys:
550
+
551
+ - `total_files` (int):
552
+ Number of `.jdx` files processed.
553
+ - `ok_files` (List[Path]):
554
+ Files that parsed without any recorded exceptions.
555
+ - `files_with_exceptions` (List[Tuple[Path, List[str]]]):
556
+ Files that parsed but produced non-empty `exceptions`.
557
+ - `parse_errors` (List[Tuple[Path, Exception]]):
558
+ Files that raised an exception during parsing.
559
+ """
560
+ summary = {
561
+ "total_files": 0,
562
+ "ok_files": [],
563
+ "files_with_exceptions": [],
564
+ "parse_errors": [],
565
+ }
566
+
567
+ for jdx_path in sorted(fixtures_dir.glob("*.jdx")):
568
+ summary["total_files"] += 1
569
+ logger.info(f"Parsing {jdx_path}")
570
+
571
+ try:
572
+ result = parse_jcamp_from_path(jdx_path)
573
+ except Exception as exc:
574
+ logger.error(f"Failed to parse {jdx_path}: {exc}")
575
+ summary["parse_errors"].append((jdx_path, exc))
576
+ continue
577
+
578
+ exceptions = result.get("exceptions") or []
579
+ if exceptions:
580
+ logger.warning(
581
+ f"Found {len(exceptions)} exceptions in {jdx_path}"
582
+ )
583
+ summary["files_with_exceptions"].append((jdx_path, exceptions))
584
+ else:
585
+ summary["ok_files"].append(jdx_path)
586
+
587
+ return summary
588
+
589
+
590
+ __all__ = [
591
+ "parse_jcamp_from_path",
592
+ "parse_jcamp_from_text",
593
+ "parse_jcamp_from_bytes",
594
+ "parse_jcamp",
595
+ "run_smoke_test"
596
+ ]
597
+
598
+
599
+ def __dir__() -> List[str]:
600
+ return sorted(__all__)