chromstream 0.0.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chromstream-0.0.2/src/chromstream.egg-info → chromstream-0.2.0}/PKG-INFO +26 -6
- {chromstream-0.0.2 → chromstream-0.2.0}/README.md +24 -5
- {chromstream-0.0.2 → chromstream-0.2.0}/pyproject.toml +2 -1
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream/__init__.py +9 -7
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream/data_processing.py +39 -8
- chromstream-0.2.0/src/chromstream/hdf5_common.py +260 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream/objects.py +182 -7
- chromstream-0.2.0/src/chromstream/parsers/__init__.py +27 -0
- chromstream-0.2.0/src/chromstream/parsers/agilent.py +450 -0
- chromstream-0.2.0/src/chromstream/parsers/chromeleon.py +278 -0
- chromstream-0.2.0/src/chromstream/parsers/dispatch.py +80 -0
- chromstream-0.2.0/src/chromstream/parsers/hdf5.py +166 -0
- chromstream-0.0.2/src/chromstream/parsers.py → chromstream-0.2.0/src/chromstream/parsers/other_files.py +0 -266
- chromstream-0.2.0/src/chromstream/writers/__init__.py +1 -0
- chromstream-0.2.0/src/chromstream/writers/hdf5_writer.py +152 -0
- {chromstream-0.0.2 → chromstream-0.2.0/src/chromstream.egg-info}/PKG-INFO +26 -6
- chromstream-0.2.0/src/chromstream.egg-info/SOURCES.txt +30 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream.egg-info/requires.txt +1 -0
- chromstream-0.2.0/tests/test_data_processing.py +31 -0
- chromstream-0.2.0/tests/test_hdf5_channel.py +105 -0
- chromstream-0.2.0/tests/test_hdf5_chromatogram.py +167 -0
- chromstream-0.2.0/tests/test_hdf5_parser.py +178 -0
- chromstream-0.2.0/tests/test_hdf5_reassembly.py +85 -0
- chromstream-0.2.0/tests/test_hdf5_writer.py +155 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/tests/test_objects.py +11 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/tests/test_parsers.py +74 -0
- chromstream-0.0.2/src/chromstream/__pycache__/__init__.cpython-310.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/__init__.cpython-311.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/data_processing.cpython-310.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/data_processing.cpython-311.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/objects.cpython-310.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/objects.cpython-311.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/parsers.cpython-310.pyc +0 -0
- chromstream-0.0.2/src/chromstream/__pycache__/parsers.cpython-311.pyc +0 -0
- chromstream-0.0.2/src/chromstream.egg-info/SOURCES.txt +0 -24
- {chromstream-0.0.2 → chromstream-0.2.0}/LICENSE.md +0 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/MANIFEST.in +0 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/setup.cfg +0 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream/py.typed +0 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream.egg-info/dependency_links.txt +0 -0
- {chromstream-0.0.2 → chromstream-0.2.0}/src/chromstream.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chromstream
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A Python package for online gas chromatography.
|
|
5
5
|
Author-email: Sebastian Rejman <s.rejman@uu.nl>
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,7 @@ Classifier: Operating System :: MacOS
|
|
|
23
23
|
Requires-Python: >=3.9
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
License-File: LICENSE.md
|
|
26
|
+
Requires-Dist: h5py>=3.14.0
|
|
26
27
|
Requires-Dist: matplotlib>=3.9.4
|
|
27
28
|
Requires-Dist: numpy
|
|
28
29
|
Requires-Dist: pandas>=2.2.2
|
|
@@ -57,12 +58,17 @@ A Python package for processing on-line gas chromatography data. ChromStream pro
|
|
|
57
58
|
|
|
58
59
|
## Features
|
|
59
60
|
|
|
60
|
-
- Parse chromatographic data from multiple formats
|
|
61
|
+
- Parse chromatographic data from multiple formats:
|
|
62
|
+
- Chromeleon (exported txt)
|
|
63
|
+
- Agilent .d directories
|
|
64
|
+
- Agilent .dx files
|
|
65
|
+
- ChromStream HDF5 experiment files
|
|
61
66
|
- Access to data at experiment, channel and chromatogram level
|
|
62
67
|
- Quick plotting of chromatograms
|
|
63
68
|
- Small selection of baseline corrections, possibility to use custom ones
|
|
64
69
|
- Integration using a dict of peaks
|
|
65
70
|
- Addition of logfiles
|
|
71
|
+
- Export experiments to a compact HDF5 format
|
|
66
72
|
|
|
67
73
|
## Installation
|
|
68
74
|
|
|
@@ -108,9 +114,22 @@ exp.channels['channel-name'].chromatograms[0].plot()
|
|
|
108
114
|
ChromStream currently supports parsing data from:
|
|
109
115
|
|
|
110
116
|
- Chromeleon software exports (`.txt`)
|
|
111
|
-
-
|
|
117
|
+
- Agilent .d directories and .dx files
|
|
118
|
+
- ChromStream HDF5 experiment files (`.h5`)
|
|
112
119
|
- simple log files (e.g. exported from labview)
|
|
113
120
|
|
|
121
|
+
ChromStream can also export `Experiment` objects to HDF5 and load them back again:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
import chromstream as cs
|
|
125
|
+
|
|
126
|
+
exp = cs.Experiment(name="example")
|
|
127
|
+
exp.add_mult_chromatograms("path-to-run.dx")
|
|
128
|
+
exp.to_hdf5("example.h5")
|
|
129
|
+
|
|
130
|
+
loaded = cs.parse_experiment_hdf5("example.h5")
|
|
131
|
+
```
|
|
132
|
+
|
|
114
133
|
## Documentation
|
|
115
134
|
|
|
116
135
|
- You can find the full documentation of the package [here](https://myonics.github.io/ChromStream/).
|
|
@@ -120,12 +139,14 @@ ChromStream currently supports parsing data from:
|
|
|
120
139
|
Check out the `example_notebooks/` directory for comprehensive examples:
|
|
121
140
|
|
|
122
141
|
- `example_calibration.ipynb` - GC calibration procedures
|
|
142
|
+
- `cracking_example.ipynb` - full procedure for analyzing a cracking dataset
|
|
143
|
+
- `exporting_hdf5.ipynb` - brief example showing HDF5 export and re-loading
|
|
123
144
|
|
|
124
145
|
|
|
125
146
|
## Roadmap
|
|
126
147
|
- Support for more files formats
|
|
127
148
|
- Addition of more data sources such as spectroscopy
|
|
128
|
-
- JSON
|
|
149
|
+
- JSON persistence
|
|
129
150
|
- tests
|
|
130
151
|
|
|
131
152
|
## Contributing
|
|
@@ -135,6 +156,5 @@ PRs are more than welcome.
|
|
|
135
156
|
|
|
136
157
|
## Authors
|
|
137
158
|
|
|
138
|
-
Sebastian Rejman - Utrecht University
|
|
139
|
-
|
|
159
|
+
Sebastian Rejman - Fritz-Haber-Institute / Utrecht University
|
|
140
160
|
|
|
@@ -8,12 +8,17 @@ A Python package for processing on-line gas chromatography data. ChromStream pro
|
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
|
-
- Parse chromatographic data from multiple formats
|
|
11
|
+
- Parse chromatographic data from multiple formats:
|
|
12
|
+
- Chromeleon (exported txt)
|
|
13
|
+
- Agilent .d directories
|
|
14
|
+
- Agilent .dx files
|
|
15
|
+
- ChromStream HDF5 experiment files
|
|
12
16
|
- Access to data at experiment, channel and chromatogram level
|
|
13
17
|
- Quick plotting of chromatograms
|
|
14
18
|
- Small selection of baseline corrections, possibility to use custom ones
|
|
15
19
|
- Integration using a dict of peaks
|
|
16
20
|
- Addition of logfiles
|
|
21
|
+
- Export experiments to a compact HDF5 format
|
|
17
22
|
|
|
18
23
|
## Installation
|
|
19
24
|
|
|
@@ -59,9 +64,22 @@ exp.channels['channel-name'].chromatograms[0].plot()
|
|
|
59
64
|
ChromStream currently supports parsing data from:
|
|
60
65
|
|
|
61
66
|
- Chromeleon software exports (`.txt`)
|
|
62
|
-
-
|
|
67
|
+
- Agilent .d directories and .dx files
|
|
68
|
+
- ChromStream HDF5 experiment files (`.h5`)
|
|
63
69
|
- simple log files (e.g. exported from labview)
|
|
64
70
|
|
|
71
|
+
ChromStream can also export `Experiment` objects to HDF5 and load them back again:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
import chromstream as cs
|
|
75
|
+
|
|
76
|
+
exp = cs.Experiment(name="example")
|
|
77
|
+
exp.add_mult_chromatograms("path-to-run.dx")
|
|
78
|
+
exp.to_hdf5("example.h5")
|
|
79
|
+
|
|
80
|
+
loaded = cs.parse_experiment_hdf5("example.h5")
|
|
81
|
+
```
|
|
82
|
+
|
|
65
83
|
## Documentation
|
|
66
84
|
|
|
67
85
|
- You can find the full documentation of the package [here](https://myonics.github.io/ChromStream/).
|
|
@@ -71,12 +89,14 @@ ChromStream currently supports parsing data from:
|
|
|
71
89
|
Check out the `example_notebooks/` directory for comprehensive examples:
|
|
72
90
|
|
|
73
91
|
- `example_calibration.ipynb` - GC calibration procedures
|
|
92
|
+
- `cracking_example.ipynb` - full procedure for analyzing a cracking dataset
|
|
93
|
+
- `exporting_hdf5.ipynb` - brief example showing HDF5 export and re-loading
|
|
74
94
|
|
|
75
95
|
|
|
76
96
|
## Roadmap
|
|
77
97
|
- Support for more files formats
|
|
78
98
|
- Addition of more data sources such as spectroscopy
|
|
79
|
-
- JSON
|
|
99
|
+
- JSON persistence
|
|
80
100
|
- tests
|
|
81
101
|
|
|
82
102
|
## Contributing
|
|
@@ -86,6 +106,5 @@ PRs are more than welcome.
|
|
|
86
106
|
|
|
87
107
|
## Authors
|
|
88
108
|
|
|
89
|
-
Sebastian Rejman - Utrecht University
|
|
90
|
-
|
|
109
|
+
Sebastian Rejman - Fritz-Haber-Institute / Utrecht University
|
|
91
110
|
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "chromstream"
|
|
7
7
|
description="A Python package for online gas chromatography."
|
|
8
|
-
version = "0.0
|
|
8
|
+
version = "0.2.0"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
11
11
|
authors = [{ name = "Sebastian Rejman", email = "s.rejman@uu.nl" }]
|
|
@@ -26,6 +26,7 @@ classifiers = [
|
|
|
26
26
|
]
|
|
27
27
|
requires-python = ">=3.9"
|
|
28
28
|
dependencies = [
|
|
29
|
+
"h5py>=3.14.0",
|
|
29
30
|
"matplotlib>=3.9.4",
|
|
30
31
|
"numpy",
|
|
31
32
|
"pandas>=2.2.2",
|
|
@@ -6,10 +6,12 @@ from importlib.metadata import version
|
|
|
6
6
|
|
|
7
7
|
from .parsers import *
|
|
8
8
|
|
|
9
|
-
from .objects import *
|
|
10
|
-
|
|
11
|
-
from .data_processing import *
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
from .objects import *
|
|
10
|
+
|
|
11
|
+
from .data_processing import *
|
|
12
|
+
|
|
13
|
+
from .writers import *
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Load the version
|
|
17
|
+
__version__ = version("chromstream")
|
|
@@ -4,7 +4,9 @@ Data processing functions for chromatogram analysis
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
+
import inspect
|
|
7
8
|
from typing import TYPE_CHECKING
|
|
9
|
+
from typing import Callable
|
|
8
10
|
|
|
9
11
|
import pandas as pd
|
|
10
12
|
from scipy.integrate import trapezoid
|
|
@@ -12,8 +14,18 @@ from scipy.integrate import trapezoid
|
|
|
12
14
|
if TYPE_CHECKING:
|
|
13
15
|
from .objects import ChannelChromatograms, Chromatogram
|
|
14
16
|
|
|
17
|
+
BaselineFunction = Callable[..., pd.Series]
|
|
18
|
+
_BASELINE_FUNCTIONS: dict[str, BaselineFunction] = {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def register_baseline(func: BaselineFunction) -> BaselineFunction:
|
|
22
|
+
"""Register a baseline function for discovery."""
|
|
23
|
+
_BASELINE_FUNCTIONS[func.__name__] = func
|
|
24
|
+
return func
|
|
25
|
+
|
|
15
26
|
|
|
16
27
|
# Baseline functions
|
|
28
|
+
@register_baseline
|
|
17
29
|
def min_subtract(data: pd.DataFrame) -> pd.Series:
|
|
18
30
|
"""
|
|
19
31
|
Simple minimum subtraction baseline correction
|
|
@@ -28,6 +40,7 @@ def min_subtract(data: pd.DataFrame) -> pd.Series:
|
|
|
28
40
|
return signal - signal.min()
|
|
29
41
|
|
|
30
42
|
|
|
43
|
+
@register_baseline
|
|
31
44
|
def time_window_baseline(
|
|
32
45
|
data: pd.DataFrame, time_window: tuple[float, float] = (0, 1)
|
|
33
46
|
) -> pd.Series:
|
|
@@ -52,6 +65,7 @@ def time_window_baseline(
|
|
|
52
65
|
return data[signal_col] - baseline_value # type: ignore[operator]
|
|
53
66
|
|
|
54
67
|
|
|
68
|
+
@register_baseline
|
|
55
69
|
def time_point_baseline(data: pd.DataFrame, time_point: float) -> pd.Series:
|
|
56
70
|
"""
|
|
57
71
|
Use signal value at a specific time point as baseline
|
|
@@ -74,6 +88,7 @@ def time_point_baseline(data: pd.DataFrame, time_point: float) -> pd.Series:
|
|
|
74
88
|
return data[signal_col] - baseline_value # type: ignore[operator]
|
|
75
89
|
|
|
76
90
|
|
|
91
|
+
@register_baseline
|
|
77
92
|
def linear_baseline(
|
|
78
93
|
data: pd.DataFrame, start_time: float, end_time: float
|
|
79
94
|
) -> pd.Series:
|
|
@@ -353,11 +368,27 @@ def split_chromatogram(
|
|
|
353
368
|
return split_chromatograms
|
|
354
369
|
|
|
355
370
|
|
|
356
|
-
def list_baseline_functions():
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
371
|
+
def list_baseline_functions(verbose: bool = False) -> str:
|
|
372
|
+
"""List available baseline functions.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
verbose: If True, include each function docstring in the output.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
String with one baseline function per block.
|
|
379
|
+
"""
|
|
380
|
+
baseline_names = list(_BASELINE_FUNCTIONS)
|
|
381
|
+
if not verbose:
|
|
382
|
+
output = "\n".join(baseline_names)
|
|
383
|
+
print(output)
|
|
384
|
+
return output
|
|
385
|
+
|
|
386
|
+
formatted_functions = []
|
|
387
|
+
for name in baseline_names:
|
|
388
|
+
docstring = inspect.getdoc(_BASELINE_FUNCTIONS[name]) or "No docstring provided."
|
|
389
|
+
doc_block = "\n".join(f" {line}" for line in docstring.splitlines())
|
|
390
|
+
formatted_functions.append(f"{name}\n{doc_block}")
|
|
391
|
+
|
|
392
|
+
output = "\n\n".join(formatted_functions)
|
|
393
|
+
print(output)
|
|
394
|
+
return output
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import h5py
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from chromstream.objects import ChannelChromatograms, Chromatogram
|
|
11
|
+
|
|
12
|
+
_INJECTION_NAME_PATTERN = re.compile(r"inj-(?P<index>\d+)$")
|
|
13
|
+
_UNSUPPORTED_FILE_MESSAGE = "Only parsing of ChromStream HDF5 files is supported."
|
|
14
|
+
|
|
15
|
+
# Root attribute names owned by each file type; excluded from the free-form
|
|
16
|
+
# metadata dict on read and rejected as metadata keys on write.
|
|
17
|
+
RESERVED_EXPERIMENT_ATTRS = frozenset({"schema", "label", "creation_date", "author"})
|
|
18
|
+
RESERVED_CHANNEL_ATTRS = frozenset({"schema", "label", "name"})
|
|
19
|
+
RESERVED_CHROMATOGRAM_ATTRS = frozenset(
|
|
20
|
+
{"schema", "label", "channel", "injection_index", "injection_time"}
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _to_hdf5_attr(value: object) -> str | int | float | bool | bytes:
|
|
25
|
+
"""Convert a Python value to an HDF5-compatible scalar attribute."""
|
|
26
|
+
if isinstance(value, pd.Timestamp):
|
|
27
|
+
return value.isoformat()
|
|
28
|
+
if isinstance(value, Path):
|
|
29
|
+
return str(value)
|
|
30
|
+
if isinstance(value, np.generic):
|
|
31
|
+
value = value.item()
|
|
32
|
+
if isinstance(value, (str, int, float, bool, bytes)):
|
|
33
|
+
return value
|
|
34
|
+
raise TypeError(
|
|
35
|
+
"Metadata values must be scalar HDF5-compatible values. "
|
|
36
|
+
f"Unsupported value {value!r} of type {type(value).__name__}."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _from_hdf5_attr(value: object) -> object:
|
|
41
|
+
"""Convert an HDF5 attribute value to a plain Python value."""
|
|
42
|
+
if isinstance(value, bytes):
|
|
43
|
+
return value.decode("utf-8")
|
|
44
|
+
if isinstance(value, np.generic):
|
|
45
|
+
return value.item()
|
|
46
|
+
return value
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _require_attr(attrs: h5py.AttributeManager, key: str, context: str) -> object:
|
|
50
|
+
"""Return a required HDF5 attribute or raise a descriptive error."""
|
|
51
|
+
if key not in attrs:
|
|
52
|
+
raise ValueError(f"Missing required attribute {key!r} in {context}.")
|
|
53
|
+
return _from_hdf5_attr(attrs[key])
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _parse_timestamp(value: object, context: str) -> pd.Timestamp:
|
|
57
|
+
"""Parse a timestamp attribute and raise a consistent error on failure."""
|
|
58
|
+
try:
|
|
59
|
+
timestamp = pd.Timestamp(value)
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
raise ValueError(f"Invalid timestamp {value!r} in {context}.") from exc
|
|
62
|
+
|
|
63
|
+
if pd.isna(timestamp):
|
|
64
|
+
raise ValueError(f"Invalid timestamp {value!r} in {context}.")
|
|
65
|
+
return timestamp
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _require_schema(hdf: h5py.File, expected: str, context: str = "file root") -> str:
|
|
69
|
+
"""Validate the root 'schema' attribute against the expected schema string."""
|
|
70
|
+
if "schema" not in hdf.attrs:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"{_UNSUPPORTED_FILE_MESSAGE} Missing required attribute 'schema' in {context}."
|
|
73
|
+
)
|
|
74
|
+
schema = _from_hdf5_attr(hdf.attrs["schema"])
|
|
75
|
+
if "chromstream" not in str(schema).lower():
|
|
76
|
+
raise ValueError(f"{_UNSUPPORTED_FILE_MESSAGE} Found schema {schema!r}.")
|
|
77
|
+
# TODO: Handle different schema versions. Currently only an exact match is accepted.
|
|
78
|
+
if schema != expected:
|
|
79
|
+
raise ValueError(f"Unknown schema {schema!r}. Expected {expected!r}.")
|
|
80
|
+
return str(schema)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _write_chromatogram_group(
|
|
84
|
+
group: h5py.Group,
|
|
85
|
+
chromatogram: Chromatogram,
|
|
86
|
+
*,
|
|
87
|
+
compression: str | None,
|
|
88
|
+
context: str,
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Write injection_time + retention_time/signal datasets into ``group``.
|
|
91
|
+
|
|
92
|
+
Assumes column 0 is retention time and column 1 is signal; other columns
|
|
93
|
+
are ignored.
|
|
94
|
+
"""
|
|
95
|
+
if chromatogram.data.shape[1] < 2:
|
|
96
|
+
raise ValueError(f"Chromatogram for {context} must have at least two columns.")
|
|
97
|
+
ret_time_column = chromatogram.data.columns[0]
|
|
98
|
+
signal_column = chromatogram.data.columns[1]
|
|
99
|
+
|
|
100
|
+
if chromatogram.injection_time is None or pd.isna(chromatogram.injection_time):
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"Chromatogram for {context} is missing a valid injection_time."
|
|
103
|
+
)
|
|
104
|
+
group.attrs["injection_time"] = pd.Timestamp(
|
|
105
|
+
chromatogram.injection_time
|
|
106
|
+
).isoformat()
|
|
107
|
+
|
|
108
|
+
retention_time_dataset = group.create_dataset(
|
|
109
|
+
"retention_time",
|
|
110
|
+
data=chromatogram.data[ret_time_column].to_numpy(),
|
|
111
|
+
compression=compression,
|
|
112
|
+
)
|
|
113
|
+
retention_time_dataset.attrs["unit"] = chromatogram.time_unit
|
|
114
|
+
retention_time_dataset.attrs["column_name"] = ret_time_column
|
|
115
|
+
|
|
116
|
+
signal_dataset = group.create_dataset(
|
|
117
|
+
"signal",
|
|
118
|
+
data=chromatogram.data[signal_column].to_numpy(),
|
|
119
|
+
compression=compression,
|
|
120
|
+
)
|
|
121
|
+
signal_dataset.attrs["unit"] = chromatogram.signal_unit
|
|
122
|
+
signal_dataset.attrs["column_name"] = signal_column
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _read_chromatogram_group(
|
|
126
|
+
group: h5py.Group,
|
|
127
|
+
*,
|
|
128
|
+
channel: str,
|
|
129
|
+
path: Path | None,
|
|
130
|
+
context: str,
|
|
131
|
+
metadata: dict | None = None,
|
|
132
|
+
) -> Chromatogram:
|
|
133
|
+
"""Read a single chromatogram from ``group``.
|
|
134
|
+
|
|
135
|
+
When ``metadata`` is None (experiment/channel layout) the metadata dict is
|
|
136
|
+
rebuilt from the stored units only; standalone chromatogram files pass the
|
|
137
|
+
full metadata dict explicitly.
|
|
138
|
+
"""
|
|
139
|
+
injection_time = _parse_timestamp(
|
|
140
|
+
_require_attr(group.attrs, "injection_time", context), context
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if "retention_time" not in group or "signal" not in group:
|
|
144
|
+
raise ValueError(
|
|
145
|
+
f"{context} must contain 'retention_time' and 'signal' datasets."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
retention_time_dataset = group["retention_time"]
|
|
149
|
+
signal_dataset = group["signal"]
|
|
150
|
+
if not isinstance(retention_time_dataset, h5py.Dataset):
|
|
151
|
+
raise ValueError(f"'retention_time' in {context} must be an HDF5 dataset.")
|
|
152
|
+
if not isinstance(signal_dataset, h5py.Dataset):
|
|
153
|
+
raise ValueError(f"'signal' in {context} must be an HDF5 dataset.")
|
|
154
|
+
|
|
155
|
+
time_column_name = _require_attr(
|
|
156
|
+
retention_time_dataset.attrs,
|
|
157
|
+
"column_name",
|
|
158
|
+
f"dataset 'retention_time' in {context}",
|
|
159
|
+
)
|
|
160
|
+
signal_column_name = _require_attr(
|
|
161
|
+
signal_dataset.attrs, "column_name", f"dataset 'signal' in {context}"
|
|
162
|
+
)
|
|
163
|
+
time_unit = _require_attr(
|
|
164
|
+
retention_time_dataset.attrs, "unit", f"dataset 'retention_time' in {context}"
|
|
165
|
+
)
|
|
166
|
+
signal_unit = _require_attr(
|
|
167
|
+
signal_dataset.attrs, "unit", f"dataset 'signal' in {context}"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
time_values = retention_time_dataset[()]
|
|
171
|
+
signal_values = signal_dataset[()]
|
|
172
|
+
if len(time_values) != len(signal_values):
|
|
173
|
+
raise ValueError(f"Dataset length mismatch in {context}.")
|
|
174
|
+
|
|
175
|
+
if metadata is None:
|
|
176
|
+
metadata = {"time_unit": str(time_unit), "Signal Unit": str(signal_unit)}
|
|
177
|
+
|
|
178
|
+
return Chromatogram(
|
|
179
|
+
data=pd.DataFrame(
|
|
180
|
+
{
|
|
181
|
+
str(time_column_name): time_values,
|
|
182
|
+
str(signal_column_name): signal_values,
|
|
183
|
+
}
|
|
184
|
+
),
|
|
185
|
+
injection_time=injection_time,
|
|
186
|
+
metadata=metadata,
|
|
187
|
+
channel=channel,
|
|
188
|
+
path=path,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _write_channel_group(
|
|
193
|
+
group: h5py.Group,
|
|
194
|
+
channel: ChannelChromatograms,
|
|
195
|
+
*,
|
|
196
|
+
compression: str | None,
|
|
197
|
+
) -> None:
|
|
198
|
+
"""Write a channel's name attr and its injections subgroup into ``group``."""
|
|
199
|
+
group.attrs["name"] = channel.channel
|
|
200
|
+
injections_group = group.create_group("injections")
|
|
201
|
+
for injection_key in sorted(channel.chromatograms):
|
|
202
|
+
chromatogram = channel.chromatograms[injection_key]
|
|
203
|
+
injection_group = injections_group.create_group(f"inj-{injection_key:04d}")
|
|
204
|
+
_write_chromatogram_group(
|
|
205
|
+
injection_group,
|
|
206
|
+
chromatogram,
|
|
207
|
+
compression=compression,
|
|
208
|
+
context=f"channel {channel.channel!r} injection {injection_key!r}",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _read_channel_group(
|
|
213
|
+
group: h5py.Group,
|
|
214
|
+
*,
|
|
215
|
+
path: Path | None,
|
|
216
|
+
expected_name: str | None = None,
|
|
217
|
+
) -> ChannelChromatograms:
|
|
218
|
+
"""Read a ChannelChromatograms from ``group`` (name attr + injections group)."""
|
|
219
|
+
context = f"channel group {group.name!r}"
|
|
220
|
+
stored_name = _require_attr(group.attrs, "name", context)
|
|
221
|
+
channel_name = str(stored_name)
|
|
222
|
+
if expected_name is not None and channel_name != expected_name:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Channel group name mismatch for {expected_name!r}: "
|
|
225
|
+
f"stored name is {stored_name!r}."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if "injections" not in group:
|
|
229
|
+
raise ValueError(
|
|
230
|
+
f"Missing required group 'injections' in channel {channel_name!r}."
|
|
231
|
+
)
|
|
232
|
+
injections_group = group["injections"]
|
|
233
|
+
if not isinstance(injections_group, h5py.Group):
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"'injections' in channel {channel_name!r} must be an HDF5 group."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
channel = ChannelChromatograms(channel=channel_name)
|
|
239
|
+
for injection_group_name in injections_group:
|
|
240
|
+
match = _INJECTION_NAME_PATTERN.fullmatch(injection_group_name)
|
|
241
|
+
if match is None:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"Invalid injection group name {injection_group_name!r} "
|
|
244
|
+
f"in channel {channel_name!r}."
|
|
245
|
+
)
|
|
246
|
+
injection_group = injections_group[injection_group_name]
|
|
247
|
+
if not isinstance(injection_group, h5py.Group):
|
|
248
|
+
raise ValueError(
|
|
249
|
+
f"Injection entry {injection_group_name!r} in channel "
|
|
250
|
+
f"{channel_name!r} must be an HDF5 group."
|
|
251
|
+
)
|
|
252
|
+
chromatogram = _read_chromatogram_group(
|
|
253
|
+
injection_group,
|
|
254
|
+
channel=channel_name,
|
|
255
|
+
path=path,
|
|
256
|
+
context=f"injection group {injection_group_name!r}",
|
|
257
|
+
)
|
|
258
|
+
channel.add_chromatogram(int(match.group("index")), chromatogram)
|
|
259
|
+
|
|
260
|
+
return channel
|