linkml-redcap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ """linkml-redcap: LinkML schemas for REDCap structures.
2
+
3
+ Two submodules are available:
4
+
5
+ * :mod:`linkml_redcap.data_dictionary` — the meta-schema describing a valid
6
+ REDCap *data dictionary* (the 18-column CSV).
7
+ * :mod:`linkml_redcap.record` — the reusable envelope for REDCap *record data*,
8
+ in both its flat-export and structured/nested shapes, plus structural
9
+ grouping helpers for flat ⇄ structured conversion.
10
+ """
11
+
12
+ from linkml_redcap import data_dictionary, record
13
+ from linkml_redcap._version import __version__
14
+ from linkml_redcap.data_dictionary import schema_path, schema_view
15
+
16
+ __all__ = [
17
+ "data_dictionary",
18
+ "record",
19
+ "schema_path",
20
+ "schema_view",
21
+ "__version__",
22
+ ]
@@ -0,0 +1,29 @@
1
+ """Resolve bundled schema files to stable filesystem paths.
2
+
3
+ ``importlib.resources.as_file`` only guarantees a real path *within* its context
4
+ manager: for a zip-imported package it extracts the resource to a temporary file
5
+ that is removed as soon as the context exits. Returning such a path from a public
6
+ ``schema_path()`` would hand callers (and ``SchemaView``) a path that may already
7
+ be gone.
8
+
9
+ We therefore enter the ``as_file`` context under a single process-lifetime
10
+ ``ExitStack`` that is closed at interpreter shutdown, so the path stays valid for
11
+ the whole session. For a normally installed (unzipped) wheel this is a no-op that
12
+ simply returns the real file on disk.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import atexit
18
+ from contextlib import ExitStack
19
+ from importlib.resources import as_file, files
20
+ from pathlib import Path
21
+
22
+ _file_manager = ExitStack()
23
+ atexit.register(_file_manager.close)
24
+
25
+
26
+ def resolve_schema(package: str, filename: str) -> Path:
27
+ """Return a stable filesystem path to ``<package>/schema/<filename>``."""
28
+ resource = files(package).joinpath("schema").joinpath(filename)
29
+ return Path(_file_manager.enter_context(as_file(resource)))
@@ -0,0 +1,8 @@
1
+ """Resolved package version (falls back when running from a source tree)."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ try:
6
+ __version__ = version("linkml-redcap")
7
+ except PackageNotFoundError: # not installed (e.g. running from a checkout)
8
+ __version__ = "0.1.0"
@@ -0,0 +1,25 @@
1
+ """LinkML meta-schema for REDCap data dictionaries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from linkml_redcap._resources import resolve_schema
9
+
10
+ if TYPE_CHECKING:
11
+ from linkml_runtime.utils.schemaview import SchemaView
12
+
13
+ SCHEMA_FILENAME = "redcap_data_dictionary.yaml"
14
+
15
+
16
+ def schema_path() -> Path:
17
+ """Return the filesystem path to the bundled meta-schema YAML."""
18
+ return resolve_schema(__name__, SCHEMA_FILENAME)
19
+
20
+
21
+ def schema_view() -> SchemaView:
22
+ """Return a SchemaView instance ready for introspection."""
23
+ from linkml_runtime.utils.schemaview import SchemaView
24
+
25
+ return SchemaView(str(schema_path()))
@@ -0,0 +1,3 @@
1
+ # Schema Directory
2
+
3
+ This folder contains the LinkML schema yaml files.
@@ -0,0 +1,421 @@
1
+ ---
2
+ id: https://w3id.org/linkml/redcap-data-dictionary
3
+ name: redcap_data_dictionary
4
+ title: REDCap Data Dictionary LinkML Schema
5
+ description: >-
6
+ A LinkML meta-schema that formally models the structure of REDCap data
7
+ dictionaries (CSV format). This schema defines the rules, constraints, and
8
+ relationships of all 18 REDCap data dictionary columns, enabling
9
+ programmatic validation, generation, and transformation of REDCap instruments.
10
+ It is a vendor-neutral model of REDCap itself; RareLink-style ontology
11
+ conventions (variable-name prefixes, coded choices, structured field
12
+ annotations, BioPortal/ontology curation) are layered on top in the rarelink
13
+ repository, not defined here.
14
+
15
+ license: MIT
16
+ version: 0.1.0
17
+
18
+ see_also:
19
+ - https://github.com/linkml/linkml-redcap
20
+ - https://w3id.org/linkml/redcap-record # sibling schema: REDCap *record* data
21
+ - https://github.com/BIH-CEI/rarelink
22
+ - https://rarelink.readthedocs.io
23
+ - https://projectredcap.org
24
+
25
+ prefixes:
26
+ linkml: https://w3id.org/linkml/
27
+ redcap_dd: https://w3id.org/linkml/redcap-data-dictionary/
28
+ schema: http://schema.org/
29
+
30
+ default_prefix: redcap_dd
31
+ default_range: string
32
+
33
+ imports:
34
+ - linkml:types
35
+
36
+
37
+ # =============================================================================
38
+ # CLASSES
39
+ # =============================================================================
40
+ classes:
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Top-Level Container
44
+ # ---------------------------------------------------------------------------
45
+ DataDictionary:
46
+ description: >-
47
+ A complete REDCap data dictionary representing one or more instruments
48
+ (forms). This is the top-level container that corresponds to the CSV file
49
+ uploaded to REDCap. Fields must be grouped contiguously by form_name.
50
+ tree_root: true
51
+ comments:
52
+ - >-
53
+ The first field in the data dictionary must be the record identifier
54
+ (typically record_id) and must be of field_type 'text'.
55
+ - >-
56
+ All fields belonging to the same form_name must be in contiguous rows;
57
+ no interleaving of forms is permitted.
58
+ attributes:
59
+ fields:
60
+ description: >-
61
+ Ordered list of all fields (variables) in the data dictionary.
62
+ Fields within the same form must be in contiguous rows.
63
+ range: Field
64
+ multivalued: true
65
+ inlined_as_list: true
66
+ required: true
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Core REDCap Field (mirrors one row of the CSV)
70
+ # ---------------------------------------------------------------------------
71
+ Field:
72
+ description: >-
73
+ A single variable (field) in a REDCap data dictionary. This class maps
74
+ directly to one row of the REDCap data dictionary CSV, with each
75
+ attribute corresponding to one of the 18 standard columns.
76
+ attributes:
77
+
78
+ variable_field_name:
79
+ description: >-
80
+ The unique variable name for this field. Must contain only lowercase
81
+ letters, numbers, and underscores. Cannot start with a number.
82
+ Must be unique across the entire project.
83
+ range: string
84
+ required: true
85
+ identifier: true
86
+ pattern: "^[a-z][a-z0-9_]*$"
87
+
88
+ form_name:
89
+ description: >-
90
+ The instrument (form) this field belongs to. Must be lowercase
91
+ with underscores only. All fields for a form must be contiguous.
92
+ range: string
93
+ required: true
94
+ pattern: "^[a-z][a-z0-9_]*$"
95
+
96
+ section_header:
97
+ description: >-
98
+ Optional section header text displayed above this field to visually
99
+ group fields within a form. Supports HTML/rich text.
100
+ range: string
101
+ required: false
102
+
103
+ field_type:
104
+ description: >-
105
+ The type of input field. Determines how the field is rendered
106
+ in the data entry form and what data it can accept.
107
+ range: FieldType
108
+ required: true
109
+
110
+ field_label:
111
+ description: >-
112
+ The display label shown to users during data entry. Supports
113
+ HTML/rich text formatting. For ontology-based models, this
114
+ typically includes a section number and human-readable name
115
+ (e.g., '2.1 Date of birth').
116
+ range: string
117
+ required: true
118
+
119
+ choices_calculations_slider_labels:
120
+ description: >-
121
+ For dropdown, radio, and checkbox fields: pipe-delimited list of
122
+ 'raw_value, label' pairs (e.g., '1, Yes | 2, No | 3, Unknown').
123
+ For calc fields: the calculation expression.
124
+ For slider fields: 'left | center | right' anchor labels.
125
+ For text fields with BioPortal ontology autocomplete:
126
+ 'BIOPORTAL:ONTOLOGY_NAME' (e.g., 'BIOPORTAL:HP').
127
+ range: string
128
+ required: false
129
+
130
+ field_note:
131
+ description: >-
132
+ Supplementary text displayed below the field during data entry.
133
+ Typically provides instructions, expected formats, or context.
134
+ range: string
135
+ required: false
136
+
137
+ text_validation_type_or_show_slider_number:
138
+ description: >-
139
+ For text fields: the validation type constraining input format.
140
+ For slider fields: whether to show the numeric value ('number').
141
+ range: TextValidationType
142
+ required: false
143
+
144
+ text_validation_min:
145
+ description: >-
146
+ Minimum allowed value for validated text fields (dates, numbers).
147
+ range: string
148
+ required: false
149
+
150
+ text_validation_max:
151
+ description: >-
152
+ Maximum allowed value for validated text fields (dates, numbers).
153
+ range: string
154
+ required: false
155
+
156
+ identifier:
157
+ description: >-
158
+ Whether this field contains identifying information (PHI).
159
+ Set to 'y' for identifier fields. Affects data export behaviour.
160
+ range: IdentifierStatus
161
+ required: false
162
+
163
+ branching_logic:
164
+ description: >-
165
+ Conditional display logic. Uses REDCap's branching syntax
166
+ (e.g., '[field_name] = ''value'''). When specified, the field
167
+ is only shown if the condition evaluates to true.
168
+ range: string
169
+ required: false
170
+
171
+ required_field:
172
+ description: >-
173
+ Whether the field is required for form completion. Set to 'y'
174
+ for required fields. Can also contain conditional expressions
175
+ for conditionally required fields.
176
+ range: string
177
+ required: false
178
+
179
+ custom_alignment:
180
+ description: >-
181
+ Controls the alignment of the field on the form.
182
+ LV = Left Vertical, LH = Left Horizontal,
183
+ RV = Right Vertical (default), RH = Right Horizontal.
184
+ range: CustomAlignment
185
+ required: false
186
+
187
+ question_number:
188
+ description: >-
189
+ Custom question number for surveys. Overrides auto-numbering.
190
+ Any text entered here is displayed as the question number.
191
+ range: string
192
+ required: false
193
+
194
+ matrix_group_name:
195
+ description: >-
196
+ Groups multiple radio/checkbox fields into a matrix display.
197
+ All fields with the same matrix_group_name must be contiguous
198
+ and use the same field_type and choices.
199
+ range: string
200
+ required: false
201
+ pattern: "^[a-z0-9_]*$"
202
+
203
+ matrix_ranking:
204
+ description: >-
205
+ Whether ranking is enabled for this matrix group. When enabled,
206
+ no two fields in the matrix can have the same selected value.
207
+ range: MatrixRanking
208
+ required: false
209
+
210
+ field_annotation:
211
+ description: >-
212
+ REDCap's field annotation column — free text plus action tags
213
+ (e.g. @HIDDEN, @READONLY, @CALCTEXT). Any structured convention layered
214
+ inside it (semantic variable codes, ontology-coded choices, ontology
215
+ versions, interoperability mappings) is defined downstream by rarelink,
216
+ not by this base schema.
217
+ range: string
218
+ required: false
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Structured Choice (for programmatic use beyond flat CSV)
222
+ # ---------------------------------------------------------------------------
223
+ Choice:
224
+ description: >-
225
+ A single permissible value for a dropdown, radio, or checkbox field.
226
+ In REDCap CSV format, choices are encoded as a pipe-delimited string,
227
+ but this class provides a structured representation for programmatic
228
+ generation and validation.
229
+ attributes:
230
+ raw_value:
231
+ description: >-
232
+ The stored value (code) for this choice. In ontology-based models,
233
+ this typically encodes the ontology prefix and concept ID
234
+ (e.g., 'snomedct_248152002'). Must contain only lowercase letters,
235
+ numbers, and underscores to comply with REDCap naming rules.
236
+ range: string
237
+ required: true
238
+ pattern: "^[a-z0-9_]*$"
239
+ label:
240
+ description: >-
241
+ The human-readable display label for this choice
242
+ (e.g., 'Female', 'Male', 'Unknown').
243
+ range: string
244
+ required: true
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # Instrument (logical grouping derived from form_name)
248
+ # ---------------------------------------------------------------------------
249
+ Instrument:
250
+ description: >-
251
+ A logical REDCap instrument (form) grouping related fields.
252
+ This class is not directly represented in the CSV but is derived
253
+ from the contiguous grouping of fields sharing the same form_name.
254
+ In RareLink, instruments follow the naming convention
255
+ 'rarelink_N_section_name' and each has a completion status field.
256
+ attributes:
257
+ instrument_name:
258
+ description: >-
259
+ The form_name shared by all fields in this instrument.
260
+ range: string
261
+ required: true
262
+ identifier: true
263
+ pattern: "^[a-z][a-z0-9_]*$"
264
+ instrument_label:
265
+ description: >-
266
+ Human-readable label for the instrument. REDCap derives this
267
+ by capitalising and replacing underscores with spaces.
268
+ range: string
269
+ required: false
270
+ is_repeating:
271
+ description: >-
272
+ Whether this instrument supports repeating instances.
273
+ In RareLink, sections 4-8 (care pathway, disease, genetic
274
+ findings, phenotypic features, measurements, family history)
275
+ are repeating instruments.
276
+ range: boolean
277
+ required: false
278
+ fields:
279
+ description: All fields belonging to this instrument.
280
+ range: Field
281
+ multivalued: true
282
+ inlined_as_list: true
283
+ required: true
284
+
285
+
286
+ # =============================================================================
287
+ # ENUMS
288
+ # =============================================================================
289
+ enums:
290
+
291
+ FieldType:
292
+ description: >-
293
+ The complete set of REDCap field types that determine how a
294
+ variable is rendered and what data it accepts.
295
+ permissible_values:
296
+ text:
297
+ description: >-
298
+ Single-line text input. Can be combined with validation types
299
+ (date_ymd, integer, number, email, etc.) and BioPortal
300
+ ontology autocomplete.
301
+ notes:
302
+ description: Large multi-line text area for free-text entry.
303
+ dropdown:
304
+ description: >-
305
+ Single-select dropdown menu. Requires choices to be specified
306
+ in the choices column.
307
+ radio:
308
+ description: >-
309
+ Single-select radio button group. Requires choices to be
310
+ specified in the choices column.
311
+ checkbox:
312
+ description: >-
313
+ Multi-select checkboxes. Requires choices. Each checkbox
314
+ creates a separate binary variable in the export.
315
+ yesno:
316
+ description: >-
317
+ Built-in Yes/No radio buttons (stored as 1/0).
318
+ truefalse:
319
+ description: >-
320
+ Built-in True/False radio buttons (stored as 1/0).
321
+ calc:
322
+ description: >-
323
+ Calculated field. The calculation expression is specified
324
+ in the choices column.
325
+ file:
326
+ description: File upload field for attaching documents or images.
327
+ slider:
328
+ description: >-
329
+ Visual analogue scale (0-100). Anchor labels are specified
330
+ in the choices column as 'left | center | right'.
331
+ descriptive:
332
+ description: >-
333
+ Display-only field for instructional text, images, or HTML
334
+ content. Does not collect data.
335
+ sql:
336
+ description: >-
337
+ Dynamic dropdown populated by a SQL query against the
338
+ REDCap database.
339
+
340
+ TextValidationType:
341
+ description: >-
342
+ Validation rules for text fields constraining the input format.
343
+ permissible_values:
344
+ date_ymd:
345
+ description: Date in YYYY-MM-DD format.
346
+ date_mdy:
347
+ description: Date in MM-DD-YYYY format.
348
+ date_dmy:
349
+ description: Date in DD-MM-YYYY format.
350
+ datetime_ymd:
351
+ description: Datetime in YYYY-MM-DD HH:MM format.
352
+ datetime_mdy:
353
+ description: Datetime in MM-DD-YYYY HH:MM format.
354
+ datetime_dmy:
355
+ description: Datetime in DD-MM-YYYY HH:MM format.
356
+ datetime_seconds_ymd:
357
+ description: Datetime with seconds in YYYY-MM-DD HH:MM:SS format.
358
+ datetime_seconds_mdy:
359
+ description: Datetime with seconds in MM-DD-YYYY HH:MM:SS format.
360
+ datetime_seconds_dmy:
361
+ description: Datetime with seconds in DD-MM-YYYY HH:MM:SS format.
362
+ time:
363
+ description: Time in HH:MM format.
364
+ time_mm_ss:
365
+ description: Time in MM:SS format.
366
+ integer:
367
+ description: Whole number (no decimals).
368
+ number:
369
+ description: Decimal number.
370
+ number_1dp:
371
+ description: Number with 1 decimal place.
372
+ number_2dp:
373
+ description: Number with 2 decimal places.
374
+ number_3dp:
375
+ description: Number with 3 decimal places.
376
+ number_4dp:
377
+ description: Number with 4 decimal places.
378
+ number_comma_decimal:
379
+ description: Decimal number using a comma as the decimal separator (e.g. de_DE).
380
+ number_1dp_comma_decimal:
381
+ description: Number with 1 decimal place, comma decimal separator.
382
+ number_2dp_comma_decimal:
383
+ description: Number with 2 decimal places, comma decimal separator.
384
+ phone:
385
+ description: Phone number format.
386
+ email:
387
+ description: Email address format.
388
+ zipcode:
389
+ description: US ZIP code format.
390
+ alpha_only:
391
+ description: Letters only (no numbers or special characters).
392
+
393
+ CustomAlignment:
394
+ description: Field alignment options for the data entry form.
395
+ permissible_values:
396
+ LV:
397
+ description: Left Vertical
398
+ LH:
399
+ description: Left Horizontal
400
+ RV:
401
+ description: Right Vertical (default)
402
+ RH:
403
+ description: Right Horizontal
404
+
405
+ IdentifierStatus:
406
+ description: >-
407
+ Whether a field contains Protected Health Information. The REDCap column is
408
+ 'y' for identifiers and blank otherwise; a blank cell is represented as an
409
+ absent value (the slot is optional), not as a permissible value.
410
+ permissible_values:
411
+ y:
412
+ description: Field contains identifying information (PHI).
413
+
414
+ MatrixRanking:
415
+ description: >-
416
+ Whether matrix ranking is enabled. 'y' when enabled and blank otherwise;
417
+ a blank cell is represented as an absent value (the slot is optional), not
418
+ as a permissible value.
419
+ permissible_values:
420
+ y:
421
+ description: Ranking is enabled for this matrix group.
linkml_redcap/py.typed ADDED
File without changes
@@ -0,0 +1,39 @@
1
+ """Reusable LinkML envelope for REDCap record data (flat and structured)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from linkml_redcap._resources import resolve_schema
9
+ from linkml_redcap.record.grouping import (
10
+ STRUCTURAL_KEYS,
11
+ group_flat_records,
12
+ ungroup_records,
13
+ )
14
+
15
+ if TYPE_CHECKING:
16
+ from linkml_runtime.utils.schemaview import SchemaView
17
+
18
+ SCHEMA_FILENAME = "redcap_record.yaml"
19
+
20
+ __all__ = [
21
+ "SCHEMA_FILENAME",
22
+ "STRUCTURAL_KEYS",
23
+ "schema_path",
24
+ "schema_view",
25
+ "group_flat_records",
26
+ "ungroup_records",
27
+ ]
28
+
29
+
30
+ def schema_path() -> Path:
31
+ """Return the filesystem path to the bundled record-envelope schema YAML."""
32
+ return resolve_schema(__name__, SCHEMA_FILENAME)
33
+
34
+
35
+ def schema_view() -> SchemaView:
36
+ """Return a SchemaView over the record-envelope schema, ready for introspection."""
37
+ from linkml_runtime.utils.schemaview import SchemaView
38
+
39
+ return SchemaView(str(schema_path()))