omh-shim 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. omh_shim-1.0.2/AUTHORS.md +7 -0
  2. omh_shim-1.0.2/LICENSE +21 -0
  3. omh_shim-1.0.2/PKG-INFO +153 -0
  4. omh_shim-1.0.2/README.md +121 -0
  5. omh_shim-1.0.2/omh_shim/__init__.py +111 -0
  6. omh_shim-1.0.2/omh_shim/_dispatch.py +42 -0
  7. omh_shim-1.0.2/omh_shim/_helpers.py +123 -0
  8. omh_shim-1.0.2/omh_shim/_schema_loader.py +36 -0
  9. omh_shim-1.0.2/omh_shim/_validate.py +64 -0
  10. omh_shim-1.0.2/omh_shim/errors.py +17 -0
  11. omh_shim-1.0.2/omh_shim/schemas/README.md +30 -0
  12. omh_shim-1.0.2/omh_shim/schemas/__init__.py +0 -0
  13. omh_shim-1.0.2/omh_shim/schemas/activity-name-1.x.json +17 -0
  14. omh_shim-1.0.2/omh_shim/schemas/date-time-1.x.json +16 -0
  15. omh_shim-1.0.2/omh_shim/schemas/descriptive-statistic-1.0.json +53 -0
  16. omh_shim-1.0.2/omh_shim/schemas/descriptive-statistic-1.x.json +108 -0
  17. omh_shim-1.0.2/omh_shim/schemas/descriptive-statistic-denominator-1.x.json +14 -0
  18. omh_shim-1.0.2/omh_shim/schemas/duration-unit-value-1.x.json +45 -0
  19. omh_shim-1.0.2/omh_shim/schemas/kcal-unit-value-1.x.json +32 -0
  20. omh_shim-1.0.2/omh_shim/schemas/length-unit-value-1.x.json +45 -0
  21. omh_shim-1.0.2/omh_shim/schemas/local_heart-rate-variability_1-0.json +35 -0
  22. omh_shim-1.0.2/omh_shim/schemas/omh_heart-rate_2-0.json +66 -0
  23. omh_shim-1.0.2/omh_shim/schemas/omh_oxygen-saturation_2-0.json +91 -0
  24. omh_shim-1.0.2/omh_shim/schemas/omh_physical-activity_1-2.json +58 -0
  25. omh_shim-1.0.2/omh_shim/schemas/omh_sleep-duration_2-0.json +83 -0
  26. omh_shim-1.0.2/omh_shim/schemas/omh_sleep-episode_1-1.json +80 -0
  27. omh_shim-1.0.2/omh_shim/schemas/omh_step-count_3-0.json +81 -0
  28. omh_shim-1.0.2/omh_shim/schemas/part-of-day-1.x.json +40 -0
  29. omh_shim-1.0.2/omh_shim/schemas/temporal-relationship-to-physical-activity-1.x.json +40 -0
  30. omh_shim-1.0.2/omh_shim/schemas/temporal-relationship-to-sleep-1.x.json +28 -0
  31. omh_shim-1.0.2/omh_shim/schemas/time-frame-1.x.json +46 -0
  32. omh_shim-1.0.2/omh_shim/schemas/time-interval-1.x.json +86 -0
  33. omh_shim-1.0.2/omh_shim/schemas/unit-value-1.x.json +31 -0
  34. omh_shim-1.0.2/omh_shim/sources/__init__.py +1 -0
  35. omh_shim-1.0.2/omh_shim/sources/oura_raw.py +102 -0
  36. omh_shim-1.0.2/omh_shim/sources/ow_normalized.py +114 -0
  37. omh_shim-1.0.2/omh_shim.egg-info/PKG-INFO +153 -0
  38. omh_shim-1.0.2/omh_shim.egg-info/SOURCES.txt +43 -0
  39. omh_shim-1.0.2/omh_shim.egg-info/dependency_links.txt +1 -0
  40. omh_shim-1.0.2/omh_shim.egg-info/requires.txt +8 -0
  41. omh_shim-1.0.2/omh_shim.egg-info/top_level.txt +1 -0
  42. omh_shim-1.0.2/pyproject.toml +98 -0
  43. omh_shim-1.0.2/setup.cfg +4 -0
  44. omh_shim-1.0.2/tests/test_core.py +326 -0
  45. omh_shim-1.0.2/tests/test_sources.py +113 -0
@@ -0,0 +1,7 @@
1
+ # Authors
2
+
3
+ **Primary maintainer:** JupyterHealth Exchange team
4
+
5
+ **`omh_shim/sources/oura_raw/`** — converter mapping logic ported with permission from
6
+ [dicristea/oura-clinical-workbench/data_syn](https://github.com/dicristea/oura-clinical-workbench/tree/main/data_syn).
7
+ Each `oura_raw` source file carries an attribution comment.
omh_shim-1.0.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 JupyterHealth Exchange contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: omh-shim
3
+ Version: 1.0.2
4
+ Summary: Convert wearable health data from vendor schemas to Open mHealth schemas
5
+ Author: JupyterHealth Exchange contributors
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/jupyterhealth/omh-shim
8
+ Project-URL: Issues, https://github.com/jupyterhealth/omh-shim/issues
9
+ Keywords: health,open-mhealth,wearables,fhir,schema-conversion
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Healthcare Industry
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ License-File: AUTHORS.md
24
+ Requires-Dist: jsonschema<5.0,>=4.18
25
+ Requires-Dist: referencing>=0.30
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.5; extra == "dev"
30
+ Requires-Dist: mypy>=1.10; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # omh-shim
34
+
35
+ Convert wearable health data from vendor schemas to [Open mHealth](https://www.openmhealth.org/) schemas.
36
+
37
+ ## Status
38
+
39
+ v1.0 — initial public release. Public API is stable; converter coverage will continue to expand.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install git+https://github.com/jupyterhealth/omh-shim.git@v1.0.1
45
+ ```
46
+
47
+ ## Usage
48
+
49
+ Timestamp-based data types (one reading at a known instant):
50
+
51
+ ```python
52
+ from omh_shim import convert
53
+
54
+ omh_record = convert(
55
+ source="ow_normalized",
56
+ data_type="heart_rate",
57
+ sample={
58
+ "timestamp": "2026-04-09T08:30:00+00:00",
59
+ "type": "heart_rate",
60
+ "value": 72,
61
+ "unit": "bpm",
62
+ "source": {"source_name": "Oura Ring", "device_model": "Oura Gen 3"},
63
+ },
64
+ )
65
+ ```
66
+
67
+ Daily data types (``step_count``, ``physical_activity``, ``sleep_duration``)
68
+ aggregate over a calendar day, so they REQUIRE an explicit timezone so the day
69
+ boundaries reflect the user's local day rather than silently assuming UTC:
70
+
71
+ ```python
72
+ from datetime import UTC
73
+ from zoneinfo import ZoneInfo
74
+
75
+ # UTC-anchored upstream data
76
+ convert(
77
+ source="oura_raw",
78
+ data_type="step_count",
79
+ sample={"day": "2026-04-09", "steps": 8432},
80
+ tz=UTC,
81
+ )
82
+
83
+ # User's local timezone
84
+ convert(
85
+ source="oura_raw",
86
+ data_type="step_count",
87
+ sample={"day": "2026-04-09", "steps": 8432},
88
+ tz=ZoneInfo("America/Los_Angeles"),
89
+ )
90
+ ```
91
+
92
+ Pass `header=True` to get the full IEEE 1752.1 data-point envelope with
93
+ UUID, schema_id components, creation timestamp, modality, and optional
94
+ `external_datasheets`:
95
+
96
+ ```python
97
+ convert(
98
+ source="oura_raw",
99
+ data_type="heart_rate",
100
+ sample={"bpm": 72, "timestamp": "2026-04-09T08:00:00Z"},
101
+ header=True,
102
+ external_datasheets=[
103
+ {"datasheet_type": "manufacturer", "datasheet_reference": "Oura"},
104
+ ],
105
+ )
106
+ # Returns:
107
+ # {
108
+ # "header": {
109
+ # "uuid": "...",
110
+ # "schema_id": {"namespace": "omh", "name": "heart-rate", "version": "2.0"},
111
+ # "source_creation_date_time": "...",
112
+ # "modality": "sensed",
113
+ # "external_datasheets": [{"datasheet_type": "manufacturer", "datasheet_reference": "Oura"}]
114
+ # },
115
+ # "body": {
116
+ # "heart_rate": {"value": 72.0, "unit": "beats/min"},
117
+ # "effective_time_frame": {"date_time": "2026-04-09T08:00:00Z"}
118
+ # }
119
+ # }
120
+ ```
121
+
122
+ `convert` raises `ConversionError` for unknown `(source, data_type)` pairs,
123
+ invalid sample shapes, naive (timezone-less) datetimes, or a missing ``tz``
124
+ for daily data types. It raises `ValidationError` if the converter output
125
+ fails schema validation.
126
+
127
+ ## Supported sources and data types (v1.0)
128
+
129
+ | `source` | `data_type` values |
130
+ |---|---|
131
+ | `ow_normalized` | `heart_rate`, `heart_rate_variability`, `step_count`, `sleep_duration`, `sleep_episode`, `physical_activity` |
132
+ | `oura_raw` | `heart_rate`, `heart_rate_variability`, `step_count`, `sleep_duration`, `sleep_episode`, `physical_activity` |
133
+
134
+ Note: `heart_rate_variability` targets the local placeholder schema
135
+ `local:heart-rate-variability:1.0` (Open mHealth has not published a canonical
136
+ HRV schema as of 2026-04). The `local:` namespace is deliberate — downstream
137
+ consumers should not assume OMH-standard interoperability for HRV records.
138
+
139
+ ## Mapping references
140
+
141
+ - [`docs/mappings/oura_raw.md`](docs/mappings/oura_raw.md) — Oura Ring v2 API → OMH (body fields)
142
+ - [`docs/mappings/ow_normalized.md`](docs/mappings/ow_normalized.md) — Open Wearables normalized API → OMH (body fields)
143
+ - [`docs/mappings/ieee-1752-header.md`](docs/mappings/ieee-1752-header.md) — IEEE 1752.1 data-point header envelope
144
+
145
+ ## Credits
146
+
147
+ `omh_shim/sources/oura_raw.py` ports converter mapping logic with permission
148
+ from [dicristea/oura-clinical-workbench](https://github.com/dicristea/oura-clinical-workbench/tree/main/data_syn).
149
+ See [AUTHORS.md](AUTHORS.md).
150
+
151
+ ## License
152
+
153
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,121 @@
1
+ # omh-shim
2
+
3
+ Convert wearable health data from vendor schemas to [Open mHealth](https://www.openmhealth.org/) schemas.
4
+
5
+ ## Status
6
+
7
+ v1.0 — initial public release. Public API is stable; converter coverage will continue to expand.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install git+https://github.com/jupyterhealth/omh-shim.git@v1.0.1
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ Timestamp-based data types (one reading at a known instant):
18
+
19
+ ```python
20
+ from omh_shim import convert
21
+
22
+ omh_record = convert(
23
+ source="ow_normalized",
24
+ data_type="heart_rate",
25
+ sample={
26
+ "timestamp": "2026-04-09T08:30:00+00:00",
27
+ "type": "heart_rate",
28
+ "value": 72,
29
+ "unit": "bpm",
30
+ "source": {"source_name": "Oura Ring", "device_model": "Oura Gen 3"},
31
+ },
32
+ )
33
+ ```
34
+
35
+ Daily data types (``step_count``, ``physical_activity``, ``sleep_duration``)
36
+ aggregate over a calendar day, so they REQUIRE an explicit timezone so the day
37
+ boundaries reflect the user's local day rather than silently assuming UTC:
38
+
39
+ ```python
40
+ from datetime import UTC
41
+ from zoneinfo import ZoneInfo
42
+
43
+ # UTC-anchored upstream data
44
+ convert(
45
+ source="oura_raw",
46
+ data_type="step_count",
47
+ sample={"day": "2026-04-09", "steps": 8432},
48
+ tz=UTC,
49
+ )
50
+
51
+ # User's local timezone
52
+ convert(
53
+ source="oura_raw",
54
+ data_type="step_count",
55
+ sample={"day": "2026-04-09", "steps": 8432},
56
+ tz=ZoneInfo("America/Los_Angeles"),
57
+ )
58
+ ```
59
+
60
+ Pass `header=True` to get the full IEEE 1752.1 data-point envelope with
61
+ UUID, schema_id components, creation timestamp, modality, and optional
62
+ `external_datasheets`:
63
+
64
+ ```python
65
+ convert(
66
+ source="oura_raw",
67
+ data_type="heart_rate",
68
+ sample={"bpm": 72, "timestamp": "2026-04-09T08:00:00Z"},
69
+ header=True,
70
+ external_datasheets=[
71
+ {"datasheet_type": "manufacturer", "datasheet_reference": "Oura"},
72
+ ],
73
+ )
74
+ # Returns:
75
+ # {
76
+ # "header": {
77
+ # "uuid": "...",
78
+ # "schema_id": {"namespace": "omh", "name": "heart-rate", "version": "2.0"},
79
+ # "source_creation_date_time": "...",
80
+ # "modality": "sensed",
81
+ # "external_datasheets": [{"datasheet_type": "manufacturer", "datasheet_reference": "Oura"}]
82
+ # },
83
+ # "body": {
84
+ # "heart_rate": {"value": 72.0, "unit": "beats/min"},
85
+ # "effective_time_frame": {"date_time": "2026-04-09T08:00:00Z"}
86
+ # }
87
+ # }
88
+ ```
89
+
90
+ `convert` raises `ConversionError` for unknown `(source, data_type)` pairs,
91
+ invalid sample shapes, naive (timezone-less) datetimes, or a missing ``tz``
92
+ for daily data types. It raises `ValidationError` if the converter output
93
+ fails schema validation.
94
+
95
+ ## Supported sources and data types (v1.0)
96
+
97
+ | `source` | `data_type` values |
98
+ |---|---|
99
+ | `ow_normalized` | `heart_rate`, `heart_rate_variability`, `step_count`, `sleep_duration`, `sleep_episode`, `physical_activity` |
100
+ | `oura_raw` | `heart_rate`, `heart_rate_variability`, `step_count`, `sleep_duration`, `sleep_episode`, `physical_activity` |
101
+
102
+ Note: `heart_rate_variability` targets the local placeholder schema
103
+ `local:heart-rate-variability:1.0` (Open mHealth has not published a canonical
104
+ HRV schema as of 2026-04). The `local:` namespace is deliberate — downstream
105
+ consumers should not assume OMH-standard interoperability for HRV records.
106
+
107
+ ## Mapping references
108
+
109
+ - [`docs/mappings/oura_raw.md`](docs/mappings/oura_raw.md) — Oura Ring v2 API → OMH (body fields)
110
+ - [`docs/mappings/ow_normalized.md`](docs/mappings/ow_normalized.md) — Open Wearables normalized API → OMH (body fields)
111
+ - [`docs/mappings/ieee-1752-header.md`](docs/mappings/ieee-1752-header.md) — IEEE 1752.1 data-point header envelope
112
+
113
+ ## Credits
114
+
115
+ `omh_shim/sources/oura_raw.py` ports converter mapping logic with permission
116
+ from [dicristea/oura-clinical-workbench](https://github.com/dicristea/oura-clinical-workbench/tree/main/data_syn).
117
+ See [AUTHORS.md](AUTHORS.md).
118
+
119
+ ## License
120
+
121
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,111 @@
1
+ """omh-shim: convert wearable health data to Open mHealth schemas."""
2
+
3
+ from collections.abc import Mapping
4
+ from datetime import tzinfo
5
+ from types import MappingProxyType
6
+ from typing import Any
7
+
8
+ from omh_shim import _dispatch, _schema_loader, _validate
9
+ from omh_shim._helpers import build_header
10
+ from omh_shim.errors import ConversionError, ValidationError
11
+
12
+ __all__ = ["convert", "ConversionError", "ValidationError", "SCHEMA_IDS"]
13
+ __version__ = "1.0.2"
14
+
15
+ SCHEMA_IDS: Mapping[str, str] = MappingProxyType({
16
+ "heart_rate": "omh:heart-rate:2.0",
17
+ "heart_rate_variability": "local:heart-rate-variability:1.0",
18
+ "step_count": "omh:step-count:3.0",
19
+ "sleep_duration": "omh:sleep-duration:2.0",
20
+ "sleep_episode": "omh:sleep-episode:1.1",
21
+ "physical_activity": "omh:physical-activity:1.2",
22
+ "oxygen_saturation": "omh:oxygen-saturation:2.0",
23
+ })
24
+ """Read-only mapping of data_type -> schema id. ``heart_rate_variability``
25
+ uses a ``local:`` namespace placeholder (OMH has no canonical HRV schema)."""
26
+
27
+ # Fail fast if someone adds a converter without a schema id (or vice versa),
28
+ # or a schema id without a loader filename entry. Uses raise (not assert)
29
+ # so it survives python -O.
30
+ _registered = {dt for (_, dt) in _dispatch.REGISTRY}
31
+ if _registered != SCHEMA_IDS.keys():
32
+ raise RuntimeError(f"REGISTRY/SCHEMA_IDS drift: {_registered ^ SCHEMA_IDS.keys()}")
33
+ if set(SCHEMA_IDS.values()) != _schema_loader.known_ids():
34
+ raise RuntimeError(
35
+ f"SCHEMA_IDS/loader drift: {set(SCHEMA_IDS.values()) ^ _schema_loader.known_ids()}"
36
+ )
37
+ del _registered
38
+
39
+
40
+ _SOURCE_DEVICE_MAP: Mapping[str, str] = MappingProxyType({
41
+ "oura_raw": "Oura Ring",
42
+ })
43
+
44
+
45
+ def _extract_datasheets(
46
+ sample: Mapping[str, Any], *, source: str | None = None,
47
+ ) -> list[dict[str, str]] | None:
48
+ """Extract external_datasheets from the sample's source metadata.
49
+
50
+ OW normalized samples include ``source.provider`` and ``source.device``
51
+ as a nested dict; that more-specific metadata wins when present. Raw
52
+ samples (e.g. ``oura_raw``) don't carry a source field — for those, the
53
+ device is implicit from the ``source`` parameter and resolved via
54
+ ``_SOURCE_DEVICE_MAP``.
55
+ """
56
+ source_meta = sample.get("source") if isinstance(sample, Mapping) else None
57
+ if isinstance(source_meta, Mapping):
58
+ device = source_meta.get("device") or source_meta.get("device_model")
59
+ provider = source_meta.get("provider") or source_meta.get("source_name")
60
+ ref = device or provider
61
+ if ref:
62
+ return [{"datasheet_type": "manufacturer", "datasheet_reference": str(ref)}]
63
+ if source and source in _SOURCE_DEVICE_MAP:
64
+ return [{
65
+ "datasheet_type": "manufacturer",
66
+ "datasheet_reference": _SOURCE_DEVICE_MAP[source],
67
+ }]
68
+ return None
69
+
70
+
71
+ def convert(
72
+ source: str,
73
+ data_type: str,
74
+ sample: Mapping[str, Any],
75
+ *,
76
+ tz: tzinfo | None = None,
77
+ validate: bool = True,
78
+ ) -> dict[str, Any]:
79
+ """Convert one source sample to one Open mHealth data-point.
80
+
81
+ Always returns the full IEEE 1752.1 data-point envelope::
82
+
83
+ {"header": {...}, "body": {...}}
84
+
85
+ The header includes ``uuid``, ``schema_id``, ``source_creation_date_time``,
86
+ ``modality``, and ``external_datasheets`` (auto-populated from the sample's
87
+ source metadata when available).
88
+
89
+ ``tz`` is required for daily data types (step_count, physical_activity,
90
+ sleep_duration) — pass ``datetime.UTC`` or a ``ZoneInfo``.
91
+
92
+ Raises ``ConversionError`` on invalid input, ``ValidationError`` on
93
+ schema mismatch (when ``validate=True``).
94
+ """
95
+ converter = _dispatch.lookup(source, data_type)
96
+ try:
97
+ body = converter(sample, tz=tz)
98
+ except (KeyError, ValueError, TypeError) as e:
99
+ raise ConversionError(
100
+ f"{source}/{data_type}: {type(e).__name__}: {e}"
101
+ ) from e
102
+ schema_id = SCHEMA_IDS[data_type]
103
+ if validate:
104
+ _validate.validate_output(body, schema_id)
105
+ return {
106
+ "header": build_header(
107
+ schema_id,
108
+ external_datasheets=_extract_datasheets(sample, source=source),
109
+ ),
110
+ "body": body,
111
+ }
@@ -0,0 +1,42 @@
1
+ """(source, data_type) -> converter function lookup."""
2
+
3
+ from collections.abc import Mapping
4
+ from datetime import tzinfo
5
+ from types import MappingProxyType
6
+ from typing import Any, Protocol
7
+
8
+ from omh_shim.errors import ConversionError
9
+ from omh_shim.sources import oura_raw, ow_normalized
10
+
11
+
12
+ class _Converter(Protocol):
13
+ def __call__(
14
+ self, sample: Mapping[str, Any], *, tz: tzinfo | None
15
+ ) -> dict[str, Any]: ...
16
+
17
+
18
+ REGISTRY: Mapping[tuple[str, str], _Converter] = MappingProxyType({
19
+ ("oura_raw", "heart_rate"): oura_raw.heart_rate,
20
+ ("oura_raw", "heart_rate_variability"): oura_raw.heart_rate_variability,
21
+ ("oura_raw", "step_count"): oura_raw.step_count,
22
+ ("oura_raw", "sleep_duration"): oura_raw.sleep_duration,
23
+ ("oura_raw", "sleep_episode"): oura_raw.sleep_episode,
24
+ ("oura_raw", "physical_activity"): oura_raw.physical_activity,
25
+ ("ow_normalized", "heart_rate"): ow_normalized.heart_rate,
26
+ ("ow_normalized", "heart_rate_variability"): ow_normalized.heart_rate_variability,
27
+ ("ow_normalized", "step_count"): ow_normalized.step_count,
28
+ ("ow_normalized", "sleep_duration"): ow_normalized.sleep_duration,
29
+ ("ow_normalized", "sleep_episode"): ow_normalized.sleep_episode,
30
+ ("ow_normalized", "physical_activity"): ow_normalized.physical_activity,
31
+ ("ow_normalized", "oxygen_saturation"): ow_normalized.oxygen_saturation,
32
+ })
33
+
34
+
35
+ def lookup(source: str, data_type: str) -> _Converter:
36
+ """Return the registered converter, or raise ``ConversionError``."""
37
+ try:
38
+ return REGISTRY[(source, data_type)]
39
+ except KeyError as e:
40
+ raise ConversionError(
41
+ f"No converter for source={source!r} data_type={data_type!r}"
42
+ ) from e
@@ -0,0 +1,123 @@
1
+ """Shared helpers for source converters."""
2
+
3
+ import uuid
4
+ from collections.abc import Callable, Mapping
5
+ from datetime import UTC, datetime, timedelta, tzinfo
6
+ from typing import Any
7
+
8
+ from omh_shim.errors import ConversionError
9
+
10
+
11
+ def parse_datetime(value: Any) -> datetime:
12
+ """Parse an ISO-8601 string into a timezone-aware datetime.
13
+
14
+ Rejects naive datetimes — silent UTC coercion is a clinical-data footgun.
15
+ """
16
+ if isinstance(value, datetime):
17
+ dt = value
18
+ elif isinstance(value, str):
19
+ s = value.strip()
20
+ if s.endswith("Z"):
21
+ s = s[:-1] + "+00:00"
22
+ try:
23
+ dt = datetime.fromisoformat(s)
24
+ except ValueError as e:
25
+ raise ConversionError(f"invalid ISO-8601 datetime: {value!r}") from e
26
+ else:
27
+ raise ConversionError(
28
+ f"expected ISO-8601 datetime string, got {type(value).__name__}"
29
+ )
30
+ if dt.tzinfo is None:
31
+ raise ConversionError(
32
+ f"datetime {value!r} has no timezone; omh-shim requires explicit "
33
+ "timezone offsets to avoid silently misaligning clinical data"
34
+ )
35
+ return dt
36
+
37
+
38
+ def isoformat(dt: datetime) -> str:
39
+ """ISO-8601 with ``Z`` suffix when the offset is UTC."""
40
+ return dt.isoformat().replace("+00:00", "Z")
41
+
42
+
43
+ def day_interval(date_str: str, *, tz: tzinfo | None) -> dict[str, Any]:
44
+ """OMH time_interval covering one calendar day in ``tz``. Raises if
45
+ ``tz`` is None — a "day" in Tokyo is not a "day" in UTC."""
46
+ if tz is None:
47
+ raise ConversionError(
48
+ "this data type requires an explicit timezone — pass tz=... to "
49
+ "convert() so day boundaries reflect the user's local calendar day"
50
+ )
51
+ start = datetime.fromisoformat(date_str).replace(tzinfo=tz)
52
+ end = start + timedelta(days=1)
53
+ return {"start_date_time": isoformat(start), "end_date_time": isoformat(end)}
54
+
55
+
56
+ def interval_from_bounds(start: str, end: str) -> dict[str, Any]:
57
+ """OMH time_interval from explicit start/end ISO-8601 strings."""
58
+ return {
59
+ "start_date_time": isoformat(parse_datetime(start)),
60
+ "end_date_time": isoformat(parse_datetime(end)),
61
+ }
62
+
63
+
64
+ def date_time_frame(timestamp: Any) -> dict[str, Any]:
65
+ """OMH effective_time_frame with a single date_time."""
66
+ return {"date_time": isoformat(parse_datetime(timestamp))}
67
+
68
+
69
+ def unit_value(
70
+ value: Any,
71
+ unit: str,
72
+ cast: Callable[[Any], Any] = float,
73
+ ) -> dict[str, Any]:
74
+ """OMH unit_value: ``{"value": cast(value), "unit": unit}``."""
75
+ return {"value": cast(value), "unit": unit}
76
+
77
+
78
+ def build_header(
79
+ schema_id: str,
80
+ *,
81
+ external_datasheets: list[dict[str, str]] | None = None,
82
+ ) -> dict[str, Any]:
83
+ """Build an IEEE 1752.1 data-point header per ``header-1.0.json``.
84
+
85
+ Properties conform to the IEEE 1752.1 header schema:
86
+ ``uuid``, ``schema_id``, ``source_creation_date_time`` (required);
87
+ ``modality``, ``external_datasheets`` (optional).
88
+
89
+ ``acquisition_provenance`` is NOT included — it belongs to the older
90
+ OMH data-point schema, not the IEEE 1752.1 standard.
91
+ """
92
+ namespace, name, version = schema_id.split(":", 2)
93
+ header: dict[str, Any] = {
94
+ "uuid": str(uuid.uuid4()),
95
+ "schema_id": {
96
+ "namespace": namespace,
97
+ "name": name,
98
+ "version": version,
99
+ },
100
+ "source_creation_date_time": isoformat(datetime.now(UTC)),
101
+ "modality": "sensed",
102
+ }
103
+ if external_datasheets:
104
+ header["external_datasheets"] = external_datasheets
105
+ return header
106
+
107
+
108
+ def set_optional(
109
+ out: dict[str, Any],
110
+ out_key: str,
111
+ sample: Mapping[str, Any],
112
+ field: str,
113
+ *,
114
+ unit: str,
115
+ cast: Callable[[Any], Any] = float,
116
+ scale: float = 1,
117
+ ) -> None:
118
+ """Set ``out[out_key]`` to a unit_value if ``sample[field]`` is present
119
+ and not None. Scale is applied before cast (so 32.5 min * 60 -> 1950 sec)."""
120
+ v = sample.get(field)
121
+ if v is None:
122
+ return
123
+ out[out_key] = {"value": cast(v * scale), "unit": unit}
@@ -0,0 +1,36 @@
1
+ """Load vendored OMH JSON schemas by schema id.
2
+
3
+ Uses an explicit lookup table instead of string substitution to avoid
4
+ filename collisions (``omh:a.b:1.0`` and ``omh:a-b:1.0`` would collide
5
+ under ``:``->``_``, ``.``->``-`` encoding).
6
+ """
7
+
8
+ import importlib.resources
9
+ import json
10
+ from functools import cache
11
+ from typing import Any
12
+
13
+ _FILENAMES: dict[str, str] = {
14
+ "omh:heart-rate:2.0": "omh_heart-rate_2-0.json",
15
+ "local:heart-rate-variability:1.0": "local_heart-rate-variability_1-0.json",
16
+ "omh:step-count:3.0": "omh_step-count_3-0.json",
17
+ "omh:sleep-duration:2.0": "omh_sleep-duration_2-0.json",
18
+ "omh:sleep-episode:1.1": "omh_sleep-episode_1-1.json",
19
+ "omh:physical-activity:1.2": "omh_physical-activity_1-2.json",
20
+ "omh:oxygen-saturation:2.0": "omh_oxygen-saturation_2-0.json",
21
+ }
22
+
23
+
24
+ def known_ids() -> frozenset[str]:
25
+ """Schema ids this loader has filename entries for."""
26
+ return frozenset(_FILENAMES)
27
+
28
+
29
+ @cache
30
+ def load(schema_id: str) -> dict[str, Any]:
31
+ """Load a vendored JSON schema by id. Raises KeyError if unknown."""
32
+ filename = _FILENAMES[schema_id]
33
+ resource = importlib.resources.files("omh_shim.schemas").joinpath(filename)
34
+ with resource.open("r", encoding="utf-8") as f:
35
+ loaded: dict[str, Any] = json.load(f)
36
+ return loaded
@@ -0,0 +1,64 @@
1
+ """Validate converter outputs against vendored OMH schemas.
2
+
3
+ OMH schemas use ``$ref`` to reference other schemas by relative filename
4
+ (e.g. ``"unit-value-1.x.json"``). All transitively-referenced schemas are
5
+ vendored alongside the top-level OMH schemas in ``omh_shim/schemas/`` so
6
+ ref resolution can be served from local files without network access.
7
+ """
8
+
9
+ import importlib.resources
10
+ import json
11
+ from functools import lru_cache
12
+ from typing import Any
13
+
14
+ from jsonschema import Draft7Validator
15
+ from referencing import Registry, Resource
16
+ from referencing.jsonschema import DRAFT7
17
+
18
+ from omh_shim._schema_loader import load as load_schema
19
+ from omh_shim.errors import ValidationError
20
+
21
+
22
+ # maxsize is bounded to a small constant: there are currently 6 top-level
23
+ # schema ids (see omh_shim.SCHEMA_IDS). 16 leaves room for future types
24
+ # without making the cache unbounded.
25
+ @lru_cache(maxsize=16)
26
+ def _validator(schema_id: str) -> Draft7Validator:
27
+ """Cached Draft7Validator per schema id. Built once, reused across calls."""
28
+ return Draft7Validator(load_schema(schema_id), registry=_registry())
29
+
30
+
31
+ @lru_cache(maxsize=1)
32
+ def _registry() -> Registry:
33
+ """Build a referencing.Registry that serves every vendored schema by filename."""
34
+ schemas_pkg = importlib.resources.files("omh_shim.schemas")
35
+ resources = []
36
+ for entry in schemas_pkg.iterdir():
37
+ name = entry.name
38
+ if not name.endswith(".json"):
39
+ continue
40
+ with entry.open("r", encoding="utf-8") as f:
41
+ doc = json.load(f)
42
+ resources.append((name, Resource.from_contents(doc, default_specification=DRAFT7)))
43
+ return Registry().with_resources(resources)
44
+
45
+
46
+ def validate_output(output: dict[str, Any], schema_id: str) -> None:
47
+ """Validate ``output`` against the OMH schema identified by ``schema_id``.
48
+
49
+ Raises ``ValidationError`` with a human-readable message listing all
50
+ violations. Returns ``None`` on success.
51
+ """
52
+ errors = sorted(
53
+ _validator(schema_id).iter_errors(output),
54
+ key=lambda e: list(e.absolute_path),
55
+ )
56
+ if not errors:
57
+ return
58
+ pieces = []
59
+ for e in errors:
60
+ path = "/".join(str(p) for p in e.absolute_path) or "<root>"
61
+ pieces.append(f"{path}: {e.message}")
62
+ raise ValidationError(
63
+ f"Output does not conform to {schema_id}: " + "; ".join(pieces)
64
+ )