linkml-redcap 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_redcap-0.1.0/LICENSE +21 -0
- linkml_redcap-0.1.0/PKG-INFO +192 -0
- linkml_redcap-0.1.0/README.md +164 -0
- linkml_redcap-0.1.0/pyproject.toml +76 -0
- linkml_redcap-0.1.0/src/linkml_redcap/__init__.py +22 -0
- linkml_redcap-0.1.0/src/linkml_redcap/_resources.py +29 -0
- linkml_redcap-0.1.0/src/linkml_redcap/_version.py +8 -0
- linkml_redcap-0.1.0/src/linkml_redcap/data_dictionary/__init__.py +25 -0
- linkml_redcap-0.1.0/src/linkml_redcap/data_dictionary/schema/README.md +3 -0
- linkml_redcap-0.1.0/src/linkml_redcap/data_dictionary/schema/redcap_data_dictionary.yaml +421 -0
- linkml_redcap-0.1.0/src/linkml_redcap/py.typed +0 -0
- linkml_redcap-0.1.0/src/linkml_redcap/record/__init__.py +39 -0
- linkml_redcap-0.1.0/src/linkml_redcap/record/grouping.py +275 -0
- linkml_redcap-0.1.0/src/linkml_redcap/record/schema/README.md +26 -0
- linkml_redcap-0.1.0/src/linkml_redcap/record/schema/redcap_record.yaml +312 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 P2GX
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: linkml-redcap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LinkML schemas modelling REDCap structures — the data dictionary and the record-data envelope
|
|
5
|
+
Home-page: https://github.com/linkml/linkml-redcap
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: linkml,redcap,data-dictionary,ontology,fair,rare-disease,phenopackets
|
|
8
|
+
Author: Adam SL Graefe
|
|
9
|
+
Author-email: adam.graefe@charite.de
|
|
10
|
+
Requires-Python: >=3.10,<3.13
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
|
|
23
|
+
Requires-Dist: linkml-runtime (>=1.9.4)
|
|
24
|
+
Project-URL: Documentation, https://linkml.github.io/linkml-redcap
|
|
25
|
+
Project-URL: Repository, https://github.com/linkml/linkml-redcap
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# linkml-redcap
|
|
29
|
+
|
|
30
|
+
[](https://github.com/linkml/linkml-redcap/actions/workflows/ci.yaml)
|
|
31
|
+
[](https://badge.fury.io/py/linkml-redcap)
|
|
32
|
+
[](https://opensource.org/licenses/MIT)
|
|
33
|
+
[](https://linkml.github.io/linkml-redcap)
|
|
34
|
+
|
|
35
|
+
LinkML schemas modelling **REDCap structures**. This package is the canonical,
|
|
36
|
+
vendor-neutral source of the REDCap primitives that downstream projects build
|
|
37
|
+
on — what a valid data dictionary looks like, and what record data looks like in
|
|
38
|
+
both its flat and structured shapes. You can find more information in the [documentation](https://linkml.github.io/linkml-redcap).
|
|
39
|
+
|
|
40
|
+
It is an umbrella with two submodules:
|
|
41
|
+
|
|
42
|
+
- **`data_dictionary`** — the meta-schema that formalises a REDCap *data
|
|
43
|
+
dictionary*: the 18-column CSV structure, field-naming rules, field/validation
|
|
44
|
+
types, and matrix groups.
|
|
45
|
+
- **`record`** — a reusable envelope for REDCap *record data*: the structural
|
|
46
|
+
fields REDCap adds to every export (`record_id`, `redcap_event_name`,
|
|
47
|
+
`redcap_repeat_instrument`, `redcap_repeat_instance`, ...), the universal
|
|
48
|
+
`*_complete` / yes-no / true-false / checkbox value spaces, the REDCap
|
|
49
|
+
date/time string types, and the `RepeatedElement` wrapper — plus the generic
|
|
50
|
+
flat ⇄ structured grouping step.
|
|
51
|
+
|
|
52
|
+
It contains **no project-specific or RareLink-specific rules**. Those conventions
|
|
53
|
+
(variable naming, instrument naming, annotation profile) live in
|
|
54
|
+
[RareLink](https://github.com/BIH-CEI/rarelink) and are layered on top of these
|
|
55
|
+
primitives. Contributions and collaboration are welcome.
|
|
56
|
+
|
|
57
|
+
## Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install linkml-redcap
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Usage
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
# --- the data dictionary meta-schema ---
|
|
67
|
+
from linkml_redcap.data_dictionary import schema_view as dd_view
|
|
68
|
+
sv = dd_view()
|
|
69
|
+
assert "Field" in sv.all_classes()
|
|
70
|
+
assert "FieldType" in sv.all_enums()
|
|
71
|
+
|
|
72
|
+
# --- the record-data envelope ---
|
|
73
|
+
from linkml_redcap.record import schema_view as record_view, group_flat_records
|
|
74
|
+
rv = record_view()
|
|
75
|
+
assert "FlatRecord" in rv.all_classes()
|
|
76
|
+
assert "StructuredRecord" in rv.all_classes()
|
|
77
|
+
assert "FormCompleteStatus" in rv.all_enums()
|
|
78
|
+
|
|
79
|
+
# group a flat REDCap export into per-record objects (generic structural step)
|
|
80
|
+
grouped = group_flat_records(flat_rows, drop_empty=True)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Each submodule exposes the same loader API — `schema_path() -> Path` (for LinkML
|
|
84
|
+
CLI tools) and `schema_view() -> SchemaView` (for introspection). Schemas load
|
|
85
|
+
via `importlib.resources`, so they work from any install location — regular
|
|
86
|
+
installs, zipped wheels, or air-gapped environments.
|
|
87
|
+
|
|
88
|
+
## What's in it
|
|
89
|
+
|
|
90
|
+
### `linkml_redcap.data_dictionary`
|
|
91
|
+
|
|
92
|
+
A LinkML meta-schema for the REDCap data dictionary CSV:
|
|
93
|
+
|
|
94
|
+
- **`DataDictionary`** — tree root, a complete DD
|
|
95
|
+
- **`Field`** — one CSV row, all 18 REDCap columns as typed/constrained slots
|
|
96
|
+
- **`Choice`** — structured representation of one permissible value
|
|
97
|
+
- **`Instrument`** — logical grouping of contiguous fields sharing a `form_name`
|
|
98
|
+
|
|
99
|
+
Enums cover REDCap's native value spaces (`FieldType`, `TextValidationType`
|
|
100
|
+
incl. the European comma-decimal variants, `CustomAlignment`, `IdentifierStatus`,
|
|
101
|
+
`MatrixRanking`).
|
|
102
|
+
|
|
103
|
+
> This schema is **vendor-neutral REDCap only**. RareLink ontology conventions —
|
|
104
|
+
> variable-name prefixes (`snomedct_…`), the structured `field_annotation`
|
|
105
|
+
> grammar, and BioPortal/ontology curation — are **not** defined here; they live
|
|
106
|
+
> in [rarelink](https://github.com/BIH-CEI/rarelink).
|
|
107
|
+
|
|
108
|
+
### `linkml_redcap.record`
|
|
109
|
+
|
|
110
|
+
The single source of truth for the **REDCap record representation** — the actual
|
|
111
|
+
shape of REDCap record data, in the two serializations the data uses:
|
|
112
|
+
|
|
113
|
+
- **`FlatRecord`** (abstract) — one flat-export row; a project specialises it via
|
|
114
|
+
`is_a: FlatRecord` and adds one typed slot per field variable.
|
|
115
|
+
- **`StructuredRecord`** (abstract) — the lossless record-grouped serialization:
|
|
116
|
+
one object per `record_id` with its repeating-instrument instances nested. A
|
|
117
|
+
project specialises it (`is_a`), marks itself `tree_root`, and adds its typed
|
|
118
|
+
non-repeating instrument slots.
|
|
119
|
+
- **`RepeatedElement`** — the `(redcap_repeat_instrument, redcap_repeat_instance)`
|
|
120
|
+
wrapper for one repeating-instrument instance. The canonical version of the
|
|
121
|
+
class rarelink and cieinr currently redeclare per repo — import this one.
|
|
122
|
+
- Structural slots: `record_id`, `redcap_event_name`, `redcap_repeat_instrument`,
|
|
123
|
+
`redcap_repeat_instance`, `redcap_data_access_group`,
|
|
124
|
+
`redcap_survey_identifier`, `repeated_elements`.
|
|
125
|
+
- Enums **`FormCompleteStatus`** (0/1/2), **`YesNo`**, **`TrueFalse`**,
|
|
126
|
+
**`CheckboxState`** — REDCap's universal value spaces.
|
|
127
|
+
- Types **`redcap_date`** / **`redcap_datetime`** / **`redcap_time`** /
|
|
128
|
+
**`redcap_integer`** / **`redcap_number`** / **`redcap_email`** — one typed
|
|
129
|
+
primitive per REDCap value space, so a project can give every field a defined
|
|
130
|
+
range (never a bare `string` where REDCap constrains the value, never `Any`).
|
|
131
|
+
- `grouping.group_flat_records` / `ungroup_records` — the lossless flat ⇄
|
|
132
|
+
structured conversion (the cardinality change `linkml-map` can't express),
|
|
133
|
+
depending only on REDCap-native fields.
|
|
134
|
+
|
|
135
|
+
> The structured envelope here is deliberately **generic, abstract and
|
|
136
|
+
> convention-free** — REDCap's own record-and-repeats model, serialized
|
|
137
|
+
> hierarchically. It defines **no** typed instrument classes, variable-naming
|
|
138
|
+
> rules, or ontology/Phenopacket mappings: that *semantic* layer is the
|
|
139
|
+
> consuming project's (the instrument classes that specialise these) and
|
|
140
|
+
> [rarelink](https://github.com/BIH-CEI/rarelink)'s (the conventions and the
|
|
141
|
+
> Phenopacket/FHIR engine). Nothing RareLink-specific is defined here.
|
|
142
|
+
|
|
143
|
+
See [`instructions/MAPPING_WORKFLOW.md`](instructions/MAPPING_WORKFLOW.md) for the
|
|
144
|
+
full flat → grouped → structured pipeline and how it feeds Phenopacket/FHIR export.
|
|
145
|
+
|
|
146
|
+
## Why this exists
|
|
147
|
+
|
|
148
|
+
REDCap data dictionaries are unstructured CSVs governed by implicit rules, and
|
|
149
|
+
REDCap record exports are flat and wide. Making both machine-readable lets
|
|
150
|
+
downstream tools **validate** any DD or export, **generate** DDs from higher-level
|
|
151
|
+
specs, and **transform** flat exports into structured models with `linkml-map`
|
|
152
|
+
instead of hand-written, per-instrument Python — the same envelope serving
|
|
153
|
+
RareLink-CDM, the MII KDS-SE editions, CIEINR, and any other RareLink-based model.
|
|
154
|
+
|
|
155
|
+
## Backwards compatibility
|
|
156
|
+
|
|
157
|
+
This package is an import dependency, so its schema `id`s and public
|
|
158
|
+
class/slot/enum/type names are treated as a stable API: additive minor releases
|
|
159
|
+
only, deprecate-don't-remove, breaking changes ⇒ major bump. The policy is in
|
|
160
|
+
[`instructions/COMPATIBILITY.md`](instructions/COMPATIBILITY.md) and enforced by
|
|
161
|
+
`tests/test_public_surface.py`, which fails CI if any public name disappears.
|
|
162
|
+
|
|
163
|
+
## Ecosystem
|
|
164
|
+
|
|
165
|
+
- [**RareLink**](https://github.com/BIH-CEI/rarelink) — the REDCap-based rare
|
|
166
|
+
disease interoperability framework that defines the rules built on these
|
|
167
|
+
primitives
|
|
168
|
+
- [**rd-cdm**](https://pypi.org/project/rd-cdm/) — ontology code systems & versions
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
git clone https://github.com/linkml/linkml-redcap
|
|
174
|
+
cd linkml-redcap
|
|
175
|
+
poetry install --with dev
|
|
176
|
+
|
|
177
|
+
# Validate the schemas
|
|
178
|
+
poetry run linkml-lint src/linkml_redcap/data_dictionary/schema/redcap_data_dictionary.yaml
|
|
179
|
+
poetry run linkml-lint src/linkml_redcap/record/schema/redcap_record.yaml
|
|
180
|
+
|
|
181
|
+
# Run tests
|
|
182
|
+
poetry run pytest
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Releases are tag-triggered: push a `vX.Y.Z` tag matching the version in
|
|
186
|
+
`pyproject.toml` and GitHub Actions builds and publishes to PyPI via OIDC
|
|
187
|
+
trusted publishing.
|
|
188
|
+
|
|
189
|
+
## License
|
|
190
|
+
|
|
191
|
+
MIT
|
|
192
|
+
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# linkml-redcap
|
|
2
|
+
|
|
3
|
+
[](https://github.com/linkml/linkml-redcap/actions/workflows/ci.yaml)
|
|
4
|
+
[](https://badge.fury.io/py/linkml-redcap)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://linkml.github.io/linkml-redcap)
|
|
7
|
+
|
|
8
|
+
LinkML schemas modelling **REDCap structures**. This package is the canonical,
|
|
9
|
+
vendor-neutral source of the REDCap primitives that downstream projects build
|
|
10
|
+
on — what a valid data dictionary looks like, and what record data looks like in
|
|
11
|
+
both its flat and structured shapes. You can find more information in the [documentation](https://linkml.github.io/linkml-redcap).
|
|
12
|
+
|
|
13
|
+
It is an umbrella with two submodules:
|
|
14
|
+
|
|
15
|
+
- **`data_dictionary`** — the meta-schema that formalises a REDCap *data
|
|
16
|
+
dictionary*: the 18-column CSV structure, field-naming rules, field/validation
|
|
17
|
+
types, and matrix groups.
|
|
18
|
+
- **`record`** — a reusable envelope for REDCap *record data*: the structural
|
|
19
|
+
fields REDCap adds to every export (`record_id`, `redcap_event_name`,
|
|
20
|
+
`redcap_repeat_instrument`, `redcap_repeat_instance`, ...), the universal
|
|
21
|
+
`*_complete` / yes-no / true-false / checkbox value spaces, the REDCap
|
|
22
|
+
date/time string types, and the `RepeatedElement` wrapper — plus the generic
|
|
23
|
+
flat ⇄ structured grouping step.
|
|
24
|
+
|
|
25
|
+
It contains **no project-specific or RareLink-specific rules**. Those conventions
|
|
26
|
+
(variable naming, instrument naming, annotation profile) live in
|
|
27
|
+
[RareLink](https://github.com/BIH-CEI/rarelink) and are layered on top of these
|
|
28
|
+
primitives. Contributions and collaboration are welcome.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install linkml-redcap
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
# --- the data dictionary meta-schema ---
|
|
40
|
+
from linkml_redcap.data_dictionary import schema_view as dd_view
|
|
41
|
+
sv = dd_view()
|
|
42
|
+
assert "Field" in sv.all_classes()
|
|
43
|
+
assert "FieldType" in sv.all_enums()
|
|
44
|
+
|
|
45
|
+
# --- the record-data envelope ---
|
|
46
|
+
from linkml_redcap.record import schema_view as record_view, group_flat_records
|
|
47
|
+
rv = record_view()
|
|
48
|
+
assert "FlatRecord" in rv.all_classes()
|
|
49
|
+
assert "StructuredRecord" in rv.all_classes()
|
|
50
|
+
assert "FormCompleteStatus" in rv.all_enums()
|
|
51
|
+
|
|
52
|
+
# group a flat REDCap export into per-record objects (generic structural step)
|
|
53
|
+
grouped = group_flat_records(flat_rows, drop_empty=True)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Each submodule exposes the same loader API — `schema_path() -> Path` (for LinkML
|
|
57
|
+
CLI tools) and `schema_view() -> SchemaView` (for introspection). Schemas load
|
|
58
|
+
via `importlib.resources`, so they work from any install location — regular
|
|
59
|
+
installs, zipped wheels, or air-gapped environments.
|
|
60
|
+
|
|
61
|
+
## What's in it
|
|
62
|
+
|
|
63
|
+
### `linkml_redcap.data_dictionary`
|
|
64
|
+
|
|
65
|
+
A LinkML meta-schema for the REDCap data dictionary CSV:
|
|
66
|
+
|
|
67
|
+
- **`DataDictionary`** — tree root, a complete DD
|
|
68
|
+
- **`Field`** — one CSV row, all 18 REDCap columns as typed/constrained slots
|
|
69
|
+
- **`Choice`** — structured representation of one permissible value
|
|
70
|
+
- **`Instrument`** — logical grouping of contiguous fields sharing a `form_name`
|
|
71
|
+
|
|
72
|
+
Enums cover REDCap's native value spaces (`FieldType`, `TextValidationType`
|
|
73
|
+
incl. the European comma-decimal variants, `CustomAlignment`, `IdentifierStatus`,
|
|
74
|
+
`MatrixRanking`).
|
|
75
|
+
|
|
76
|
+
> This schema is **vendor-neutral REDCap only**. RareLink ontology conventions —
|
|
77
|
+
> variable-name prefixes (`snomedct_…`), the structured `field_annotation`
|
|
78
|
+
> grammar, and BioPortal/ontology curation — are **not** defined here; they live
|
|
79
|
+
> in [rarelink](https://github.com/BIH-CEI/rarelink).
|
|
80
|
+
|
|
81
|
+
### `linkml_redcap.record`
|
|
82
|
+
|
|
83
|
+
The single source of truth for the **REDCap record representation** — the actual
|
|
84
|
+
shape of REDCap record data, in the two serializations the data uses:
|
|
85
|
+
|
|
86
|
+
- **`FlatRecord`** (abstract) — one flat-export row; a project specialises it via
|
|
87
|
+
`is_a: FlatRecord` and adds one typed slot per field variable.
|
|
88
|
+
- **`StructuredRecord`** (abstract) — the lossless record-grouped serialization:
|
|
89
|
+
one object per `record_id` with its repeating-instrument instances nested. A
|
|
90
|
+
project specialises it (`is_a`), marks itself `tree_root`, and adds its typed
|
|
91
|
+
non-repeating instrument slots.
|
|
92
|
+
- **`RepeatedElement`** — the `(redcap_repeat_instrument, redcap_repeat_instance)`
|
|
93
|
+
wrapper for one repeating-instrument instance. The canonical version of the
|
|
94
|
+
class rarelink and cieinr currently redeclare per repo — import this one.
|
|
95
|
+
- Structural slots: `record_id`, `redcap_event_name`, `redcap_repeat_instrument`,
|
|
96
|
+
`redcap_repeat_instance`, `redcap_data_access_group`,
|
|
97
|
+
`redcap_survey_identifier`, `repeated_elements`.
|
|
98
|
+
- Enums **`FormCompleteStatus`** (0/1/2), **`YesNo`**, **`TrueFalse`**,
|
|
99
|
+
**`CheckboxState`** — REDCap's universal value spaces.
|
|
100
|
+
- Types **`redcap_date`** / **`redcap_datetime`** / **`redcap_time`** /
|
|
101
|
+
**`redcap_integer`** / **`redcap_number`** / **`redcap_email`** — one typed
|
|
102
|
+
primitive per REDCap value space, so a project can give every field a defined
|
|
103
|
+
range (never a bare `string` where REDCap constrains the value, never `Any`).
|
|
104
|
+
- `grouping.group_flat_records` / `ungroup_records` — the lossless flat ⇄
|
|
105
|
+
structured conversion (the cardinality change `linkml-map` can't express),
|
|
106
|
+
depending only on REDCap-native fields.
|
|
107
|
+
|
|
108
|
+
> The structured envelope here is deliberately **generic, abstract and
|
|
109
|
+
> convention-free** — REDCap's own record-and-repeats model, serialized
|
|
110
|
+
> hierarchically. It defines **no** typed instrument classes, variable-naming
|
|
111
|
+
> rules, or ontology/Phenopacket mappings: that *semantic* layer is the
|
|
112
|
+
> consuming project's (the instrument classes that specialise these) and
|
|
113
|
+
> [rarelink](https://github.com/BIH-CEI/rarelink)'s (the conventions and the
|
|
114
|
+
> Phenopacket/FHIR engine). Nothing RareLink-specific is defined here.
|
|
115
|
+
|
|
116
|
+
See [`instructions/MAPPING_WORKFLOW.md`](instructions/MAPPING_WORKFLOW.md) for the
|
|
117
|
+
full flat → grouped → structured pipeline and how it feeds Phenopacket/FHIR export.
|
|
118
|
+
|
|
119
|
+
## Why this exists
|
|
120
|
+
|
|
121
|
+
REDCap data dictionaries are unstructured CSVs governed by implicit rules, and
|
|
122
|
+
REDCap record exports are flat and wide. Making both machine-readable lets
|
|
123
|
+
downstream tools **validate** any DD or export, **generate** DDs from higher-level
|
|
124
|
+
specs, and **transform** flat exports into structured models with `linkml-map`
|
|
125
|
+
instead of hand-written, per-instrument Python — the same envelope serving
|
|
126
|
+
RareLink-CDM, the MII KDS-SE editions, CIEINR, and any other RareLink-based model.
|
|
127
|
+
|
|
128
|
+
## Backwards compatibility
|
|
129
|
+
|
|
130
|
+
This package is an import dependency, so its schema `id`s and public
|
|
131
|
+
class/slot/enum/type names are treated as a stable API: additive minor releases
|
|
132
|
+
only, deprecate-don't-remove, breaking changes ⇒ major bump. The policy is in
|
|
133
|
+
[`instructions/COMPATIBILITY.md`](instructions/COMPATIBILITY.md) and enforced by
|
|
134
|
+
`tests/test_public_surface.py`, which fails CI if any public name disappears.
|
|
135
|
+
|
|
136
|
+
## Ecosystem
|
|
137
|
+
|
|
138
|
+
- [**RareLink**](https://github.com/BIH-CEI/rarelink) — the REDCap-based rare
|
|
139
|
+
disease interoperability framework that defines the rules built on these
|
|
140
|
+
primitives
|
|
141
|
+
- [**rd-cdm**](https://pypi.org/project/rd-cdm/) — ontology code systems & versions
|
|
142
|
+
|
|
143
|
+
## Development
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
git clone https://github.com/linkml/linkml-redcap
|
|
147
|
+
cd linkml-redcap
|
|
148
|
+
poetry install --with dev
|
|
149
|
+
|
|
150
|
+
# Validate the schemas
|
|
151
|
+
poetry run linkml-lint src/linkml_redcap/data_dictionary/schema/redcap_data_dictionary.yaml
|
|
152
|
+
poetry run linkml-lint src/linkml_redcap/record/schema/redcap_record.yaml
|
|
153
|
+
|
|
154
|
+
# Run tests
|
|
155
|
+
poetry run pytest
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Releases are tag-triggered: push a `vX.Y.Z` tag matching the version in
|
|
159
|
+
`pyproject.toml` and GitHub Actions builds and publishes to PyPI via OIDC
|
|
160
|
+
trusted publishing.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "linkml-redcap"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "LinkML schemas modelling REDCap structures — the data dictionary and the record-data envelope"
|
|
5
|
+
authors = [
|
|
6
|
+
"Adam SL Graefe <adam.graefe@charite.de>",
|
|
7
|
+
"Corey Cox <corey@tislab.org>"
|
|
8
|
+
]
|
|
9
|
+
license = "MIT"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
homepage = "https://github.com/linkml/linkml-redcap"
|
|
12
|
+
repository = "https://github.com/linkml/linkml-redcap"
|
|
13
|
+
documentation = "https://linkml.github.io/linkml-redcap"
|
|
14
|
+
keywords = [
|
|
15
|
+
"linkml",
|
|
16
|
+
"redcap",
|
|
17
|
+
"data-dictionary",
|
|
18
|
+
"ontology",
|
|
19
|
+
"fair",
|
|
20
|
+
"rare-disease",
|
|
21
|
+
"phenopackets",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
"Intended Audience :: Science/Research",
|
|
27
|
+
"Intended Audience :: Healthcare Industry",
|
|
28
|
+
"License :: OSI Approved :: MIT License",
|
|
29
|
+
"Operating System :: OS Independent",
|
|
30
|
+
"Programming Language :: Python :: 3",
|
|
31
|
+
"Programming Language :: Python :: 3.10",
|
|
32
|
+
"Programming Language :: Python :: 3.11",
|
|
33
|
+
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Medical Science Apps.",
|
|
36
|
+
]
|
|
37
|
+
packages = [{ include = "linkml_redcap", from = "src" }]
|
|
38
|
+
|
|
39
|
+
# Ship schema YAMLs and the py.typed marker inside the wheel.
|
|
40
|
+
include = [
|
|
41
|
+
{ path = "src/linkml_redcap/**/schema/*.yaml", format = ["sdist", "wheel"] },
|
|
42
|
+
{ path = "src/linkml_redcap/py.typed", format = ["sdist", "wheel"] },
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[tool.poetry.dependencies]
|
|
46
|
+
python = ">=3.10,<3.13"
|
|
47
|
+
linkml-runtime = ">=1.9.4"
|
|
48
|
+
|
|
49
|
+
[tool.poetry.group.dev.dependencies]
|
|
50
|
+
linkml = ">=1.8.0" # for gen-pydantic, gen-doc, linkml-lint, linkml-validate
|
|
51
|
+
pytest = ">=8.0"
|
|
52
|
+
pytest-cov = ">=5.0"
|
|
53
|
+
ruff = ">=0.5"
|
|
54
|
+
pre-commit = ">=3.5"
|
|
55
|
+
|
|
56
|
+
[tool.poetry.group.docs.dependencies]
|
|
57
|
+
mkdocs = ">=1.6"
|
|
58
|
+
mkdocs-material = ">=9.5"
|
|
59
|
+
mkdocs-mermaid2-plugin = ">=1.1"
|
|
60
|
+
mkdocs-include-markdown-plugin = ">=6.0" # render instructions/*.md as doc pages
|
|
61
|
+
|
|
62
|
+
[build-system]
|
|
63
|
+
requires = ["poetry-core>=1.5.0"]
|
|
64
|
+
build-backend = "poetry.core.masonry.api"
|
|
65
|
+
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
line-length = 100
|
|
68
|
+
target-version = "py310"
|
|
69
|
+
|
|
70
|
+
[tool.ruff.lint]
|
|
71
|
+
select = ["E", "F", "I", "N", "UP", "B", "SIM"]
|
|
72
|
+
ignore = ["E501"] # line length handled by formatter
|
|
73
|
+
|
|
74
|
+
[tool.pytest.ini_options]
|
|
75
|
+
testpaths = ["tests"]
|
|
76
|
+
addopts = "-ra --strict-markers"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""linkml-redcap: LinkML schemas for REDCap structures.
|
|
2
|
+
|
|
3
|
+
Two submodules are available:
|
|
4
|
+
|
|
5
|
+
* :mod:`linkml_redcap.data_dictionary` — the meta-schema describing a valid
|
|
6
|
+
REDCap *data dictionary* (the 18-column CSV).
|
|
7
|
+
* :mod:`linkml_redcap.record` — the reusable envelope for REDCap *record data*,
|
|
8
|
+
in both its flat-export and structured/nested shapes, plus structural
|
|
9
|
+
grouping helpers for flat ⇄ structured conversion.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from linkml_redcap import data_dictionary, record
|
|
13
|
+
from linkml_redcap._version import __version__
|
|
14
|
+
from linkml_redcap.data_dictionary import schema_path, schema_view
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"data_dictionary",
|
|
18
|
+
"record",
|
|
19
|
+
"schema_path",
|
|
20
|
+
"schema_view",
|
|
21
|
+
"__version__",
|
|
22
|
+
]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Resolve bundled schema files to stable filesystem paths.
|
|
2
|
+
|
|
3
|
+
``importlib.resources.as_file`` only guarantees a real path *within* its context
|
|
4
|
+
manager: for a zip-imported package it extracts the resource to a temporary file
|
|
5
|
+
that is removed as soon as the context exits. Returning such a path from a public
|
|
6
|
+
``schema_path()`` would hand callers (and ``SchemaView``) a path that may already
|
|
7
|
+
be gone.
|
|
8
|
+
|
|
9
|
+
We therefore enter the ``as_file`` context under a single process-lifetime
|
|
10
|
+
``ExitStack`` that is closed at interpreter shutdown, so the path stays valid for
|
|
11
|
+
the whole session. For a normally installed (unzipped) wheel this is a no-op that
|
|
12
|
+
simply returns the real file on disk.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import atexit
|
|
18
|
+
from contextlib import ExitStack
|
|
19
|
+
from importlib.resources import as_file, files
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
_file_manager = ExitStack()
|
|
23
|
+
atexit.register(_file_manager.close)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def resolve_schema(package: str, filename: str) -> Path:
|
|
27
|
+
"""Return a stable filesystem path to ``<package>/schema/<filename>``."""
|
|
28
|
+
resource = files(package).joinpath("schema").joinpath(filename)
|
|
29
|
+
return Path(_file_manager.enter_context(as_file(resource)))
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Resolved package version (falls back when running from a source tree)."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
__version__ = version("linkml-redcap")
|
|
7
|
+
except PackageNotFoundError: # not installed (e.g. running from a checkout)
|
|
8
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""LinkML meta-schema for REDCap data dictionaries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from linkml_redcap._resources import resolve_schema
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from linkml_runtime.utils.schemaview import SchemaView
|
|
12
|
+
|
|
13
|
+
SCHEMA_FILENAME = "redcap_data_dictionary.yaml"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def schema_path() -> Path:
|
|
17
|
+
"""Return the filesystem path to the bundled meta-schema YAML."""
|
|
18
|
+
return resolve_schema(__name__, SCHEMA_FILENAME)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def schema_view() -> SchemaView:
|
|
22
|
+
"""Return a SchemaView instance ready for introspection."""
|
|
23
|
+
from linkml_runtime.utils.schemaview import SchemaView
|
|
24
|
+
|
|
25
|
+
return SchemaView(str(schema_path()))
|