bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""Stratification utilities for quantile-based item assignment.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for assigning items to quantile bins based
|
|
4
|
+
on numeric properties, with optional stratification by grouping variables.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from collections.abc import Callable, Hashable
|
|
11
|
+
from typing import Any, TypeVar
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
from bead.items.item import MetadataValue
|
|
17
|
+
|
|
18
|
+
T = TypeVar("T")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def assign_quantiles(
|
|
22
|
+
items: list[T],
|
|
23
|
+
property_getter: Callable[[T], float],
|
|
24
|
+
n_quantiles: int = 10,
|
|
25
|
+
stratify_by: Callable[[T], Hashable] | None = None,
|
|
26
|
+
) -> dict[T, int]:
|
|
27
|
+
"""Assign quantile bins to items based on numeric property.
|
|
28
|
+
|
|
29
|
+
Divides items into n_quantiles bins based on the distribution of
|
|
30
|
+
a numeric property extracted via property_getter. Optionally stratifies
|
|
31
|
+
by a grouping variable, computing separate quantiles for each group.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
items : list[T]
|
|
36
|
+
List of items to assign to quantile bins.
|
|
37
|
+
property_getter : Callable[[T], float]
|
|
38
|
+
Function that extracts a numeric value from each item.
|
|
39
|
+
This value is used to compute quantiles.
|
|
40
|
+
n_quantiles : int
|
|
41
|
+
Number of quantile bins (default: 10 for deciles).
|
|
42
|
+
Must be >= 2.
|
|
43
|
+
stratify_by : Callable[[T], Hashable] | None
|
|
44
|
+
Optional function that extracts a grouping variable from each item.
|
|
45
|
+
If provided, quantiles are computed separately for each group.
|
|
46
|
+
Groups must be hashable (str, int, UUID, tuple, etc.).
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
dict[T, int]
|
|
51
|
+
Dictionary mapping each item to its quantile bin (0 to n_quantiles-1).
|
|
52
|
+
|
|
53
|
+
Raises
|
|
54
|
+
------
|
|
55
|
+
ValueError
|
|
56
|
+
If n_quantiles < 2 or items list is empty.
|
|
57
|
+
|
|
58
|
+
Examples
|
|
59
|
+
--------
|
|
60
|
+
Basic usage with simple numeric values:
|
|
61
|
+
>>> items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
62
|
+
>>> result = assign_quantiles(
|
|
63
|
+
... items,
|
|
64
|
+
... property_getter=lambda x: x,
|
|
65
|
+
... n_quantiles=4
|
|
66
|
+
... )
|
|
67
|
+
>>> result[1] # First item in lowest quartile
|
|
68
|
+
0
|
|
69
|
+
>>> result[10] # Last item in highest quartile
|
|
70
|
+
3
|
|
71
|
+
|
|
72
|
+
With Item objects and stratification:
|
|
73
|
+
>>> from bead.items.item import Item
|
|
74
|
+
>>> from uuid import uuid4
|
|
75
|
+
>>> items = [
|
|
76
|
+
... Item(item_template_id=uuid4(), item_metadata={"score": 10.5, "group": "A"}),
|
|
77
|
+
... Item(item_template_id=uuid4(), item_metadata={"score": 5.2, "group": "A"}),
|
|
78
|
+
... Item(item_template_id=uuid4(), item_metadata={"score": 8.1, "group": "B"}),
|
|
79
|
+
... Item(item_template_id=uuid4(), item_metadata={"score": 3.3, "group": "B"}),
|
|
80
|
+
... ]
|
|
81
|
+
>>> result = assign_quantiles(
|
|
82
|
+
... items,
|
|
83
|
+
... property_getter=lambda x: x.item_metadata["score"],
|
|
84
|
+
... n_quantiles=2,
|
|
85
|
+
... stratify_by=lambda x: x.item_metadata["group"]
|
|
86
|
+
... ) # doctest: +SKIP
|
|
87
|
+
|
|
88
|
+
With UUID keys (common pattern):
|
|
89
|
+
>>> from uuid import UUID
|
|
90
|
+
>>> item_uuids = [uuid4() for _ in range(100)]
|
|
91
|
+
>>> item_scores = {uid: float(i) for i, uid in enumerate(item_uuids)}
|
|
92
|
+
>>> result = assign_quantiles(
|
|
93
|
+
... item_uuids,
|
|
94
|
+
... property_getter=lambda uid: item_scores[uid],
|
|
95
|
+
... n_quantiles=10
|
|
96
|
+
... ) # doctest: +SKIP
|
|
97
|
+
"""
|
|
98
|
+
if not items:
|
|
99
|
+
raise ValueError("items list cannot be empty")
|
|
100
|
+
|
|
101
|
+
if n_quantiles < 2:
|
|
102
|
+
raise ValueError(f"n_quantiles must be >= 2, got {n_quantiles}")
|
|
103
|
+
|
|
104
|
+
# if no stratification, compute quantiles for all items
|
|
105
|
+
if stratify_by is None:
|
|
106
|
+
return _assign_quantiles_single_group(items, property_getter, n_quantiles)
|
|
107
|
+
|
|
108
|
+
# stratified: compute quantiles separately for each group
|
|
109
|
+
groups: dict[Hashable, list[T]] = defaultdict(list)
|
|
110
|
+
for item in items:
|
|
111
|
+
group_key = stratify_by(item)
|
|
112
|
+
groups[group_key].append(item)
|
|
113
|
+
|
|
114
|
+
# compute quantiles for each group
|
|
115
|
+
result: dict[T, int] = {}
|
|
116
|
+
for group_items in groups.values():
|
|
117
|
+
group_result = _assign_quantiles_single_group(
|
|
118
|
+
group_items, property_getter, n_quantiles
|
|
119
|
+
)
|
|
120
|
+
result.update(group_result)
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _assign_quantiles_single_group(
|
|
126
|
+
items: list[T],
|
|
127
|
+
property_getter: Callable[[T], float],
|
|
128
|
+
n_quantiles: int,
|
|
129
|
+
) -> dict[T, int]:
|
|
130
|
+
"""Assign quantiles to items within a single group.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
items : list[T]
|
|
135
|
+
List of items in this group.
|
|
136
|
+
property_getter : Callable[[T], float]
|
|
137
|
+
Function to extract numeric value.
|
|
138
|
+
n_quantiles : int
|
|
139
|
+
Number of quantile bins.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
dict[T, int]
|
|
144
|
+
Mapping from item to quantile bin (0 to n_quantiles-1).
|
|
145
|
+
"""
|
|
146
|
+
if not items:
|
|
147
|
+
return {}
|
|
148
|
+
|
|
149
|
+
# extract scores
|
|
150
|
+
scores: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.array(
|
|
151
|
+
[property_getter(item) for item in items]
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# compute quantile edges
|
|
155
|
+
# linspace(0, 1, n+1) gives [0, 1/n, 2/n, ..., 1]
|
|
156
|
+
quantile_edges: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.quantile(
|
|
157
|
+
scores, np.linspace(0, 1, n_quantiles + 1)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# assign each item to a quantile bin
|
|
161
|
+
result: dict[T, int] = {}
|
|
162
|
+
for item, score in zip(items, scores.tolist(), strict=True):
|
|
163
|
+
# searchsorted finds the index where score would be inserted;
|
|
164
|
+
# we use quantile_edges[1:] to exclude the 0th edge;
|
|
165
|
+
# this maps scores to bins [0, n_quantiles-1]
|
|
166
|
+
quantile_idx = int(np.searchsorted(quantile_edges[1:], float(score)))
|
|
167
|
+
result[item] = quantile_idx
|
|
168
|
+
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def assign_quantiles_by_uuid(
|
|
173
|
+
item_ids: list[UUID],
|
|
174
|
+
item_metadata: dict[UUID, dict[str, MetadataValue]],
|
|
175
|
+
property_key: str,
|
|
176
|
+
n_quantiles: int = 10,
|
|
177
|
+
stratify_by_key: str | None = None,
|
|
178
|
+
) -> dict[UUID, int]:
|
|
179
|
+
"""Assign quantile bins to items by UUID with metadata lookup.
|
|
180
|
+
|
|
181
|
+
Convenience function for the common pattern of working with UUIDs
|
|
182
|
+
and metadata dictionaries (stand-off annotation pattern).
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
item_ids : list[UUID]
|
|
187
|
+
List of item UUIDs.
|
|
188
|
+
item_metadata : dict[UUID, dict[str, MetadataValue]]
|
|
189
|
+
Metadata dictionary mapping UUIDs to their metadata dicts.
|
|
190
|
+
property_key : str
|
|
191
|
+
Key in item_metadata[uuid] dict to use for quantile computation.
|
|
192
|
+
n_quantiles : int
|
|
193
|
+
Number of quantile bins (default: 10).
|
|
194
|
+
stratify_by_key : str | None
|
|
195
|
+
Optional key in metadata dict to use for stratification.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
dict[UUID, int]
|
|
200
|
+
Dictionary mapping each UUID to its quantile bin (0 to n_quantiles-1).
|
|
201
|
+
|
|
202
|
+
Raises
|
|
203
|
+
------
|
|
204
|
+
ValueError
|
|
205
|
+
If property_key missing from any item's metadata.
|
|
206
|
+
KeyError
|
|
207
|
+
If any UUID not found in item_metadata.
|
|
208
|
+
|
|
209
|
+
Examples
|
|
210
|
+
--------
|
|
211
|
+
>>> from uuid import uuid4
|
|
212
|
+
>>> uuids = [uuid4() for _ in range(100)]
|
|
213
|
+
>>> metadata = {
|
|
214
|
+
... uid: {"score": float(i), "group": "A" if i < 50 else "B"}
|
|
215
|
+
... for i, uid in enumerate(uuids)
|
|
216
|
+
... }
|
|
217
|
+
>>> result = assign_quantiles_by_uuid(
|
|
218
|
+
... uuids,
|
|
219
|
+
... metadata,
|
|
220
|
+
... property_key="score",
|
|
221
|
+
... n_quantiles=4,
|
|
222
|
+
... stratify_by_key="group"
|
|
223
|
+
... ) # doctest: +SKIP
|
|
224
|
+
"""
|
|
225
|
+
# validate that all items have the property
|
|
226
|
+
for uid in item_ids:
|
|
227
|
+
if uid not in item_metadata:
|
|
228
|
+
raise KeyError(f"UUID {uid} not found in item_metadata")
|
|
229
|
+
if property_key not in item_metadata[uid]:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"Property '{property_key}' not found in metadata for UUID {uid}"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# create property getter
|
|
235
|
+
def property_getter(uid: UUID) -> float:
|
|
236
|
+
value = item_metadata[uid][property_key]
|
|
237
|
+
return float(value) # type: ignore[arg-type]
|
|
238
|
+
|
|
239
|
+
# create stratification getter if needed
|
|
240
|
+
stratify_func: Callable[[UUID], int | float | str | bool] | None
|
|
241
|
+
if stratify_by_key:
|
|
242
|
+
if any(stratify_by_key not in item_metadata[uid] for uid in item_ids):
|
|
243
|
+
raise ValueError(
|
|
244
|
+
f"Stratification key '{stratify_by_key}' not found in all items"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def stratify_getter(uid: UUID) -> int | float | str | bool:
|
|
248
|
+
return item_metadata[uid][stratify_by_key] # type: ignore[return-value]
|
|
249
|
+
|
|
250
|
+
stratify_func = stratify_getter
|
|
251
|
+
else:
|
|
252
|
+
stratify_func = None
|
|
253
|
+
|
|
254
|
+
return assign_quantiles(item_ids, property_getter, n_quantiles, stratify_func)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Participant metadata system for bead.
|
|
2
|
+
|
|
3
|
+
This module provides data models and utilities for managing participant
|
|
4
|
+
metadata with privacy-preserving external ID mapping. It supports:
|
|
5
|
+
|
|
6
|
+
- Configurable metadata fields with validation (FieldSpec, ParticipantMetadataSpec)
|
|
7
|
+
- Participant data models with UUID-based identification (Participant)
|
|
8
|
+
- Privacy-compliant external ID mapping (ParticipantIDMapping)
|
|
9
|
+
- Collection classes with JSONL I/O (ParticipantCollection, IDMappingCollection)
|
|
10
|
+
- DataFrame merge utilities for analysis (merge_participant_metadata, etc.)
|
|
11
|
+
|
|
12
|
+
All DataFrame operations support both pandas and polars backends.
|
|
13
|
+
|
|
14
|
+
Examples
|
|
15
|
+
--------
|
|
16
|
+
>>> from bead.participants import (
|
|
17
|
+
... Participant,
|
|
18
|
+
... ParticipantCollection,
|
|
19
|
+
... FieldSpec,
|
|
20
|
+
... ParticipantMetadataSpec,
|
|
21
|
+
... )
|
|
22
|
+
|
|
23
|
+
>>> # Define metadata schema
|
|
24
|
+
>>> spec = ParticipantMetadataSpec(
|
|
25
|
+
... name="study_demographics",
|
|
26
|
+
... fields=[
|
|
27
|
+
... FieldSpec(name="age", field_type="int", required=True),
|
|
28
|
+
... FieldSpec(
|
|
29
|
+
... name="education",
|
|
30
|
+
... field_type="str",
|
|
31
|
+
... allowed_values=["high_school", "bachelors", "masters", "phd"],
|
|
32
|
+
... ),
|
|
33
|
+
... ],
|
|
34
|
+
... )
|
|
35
|
+
|
|
36
|
+
>>> # Create participant with metadata
|
|
37
|
+
>>> p = Participant(
|
|
38
|
+
... participant_metadata={"age": 25, "education": "bachelors"},
|
|
39
|
+
... study_id="study_001",
|
|
40
|
+
... )
|
|
41
|
+
|
|
42
|
+
>>> # Validate against spec
|
|
43
|
+
>>> is_valid, errors = p.validate_against_spec(spec)
|
|
44
|
+
>>> is_valid
|
|
45
|
+
True
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
from bead.participants.collection import IDMappingCollection, ParticipantCollection
|
|
49
|
+
from bead.participants.merging import (
|
|
50
|
+
build_participant_lookup,
|
|
51
|
+
create_analysis_dataframe,
|
|
52
|
+
merge_participant_metadata,
|
|
53
|
+
resolve_external_ids,
|
|
54
|
+
)
|
|
55
|
+
from bead.participants.metadata_spec import FieldSpec, ParticipantMetadataSpec
|
|
56
|
+
from bead.participants.models import Participant, ParticipantIDMapping
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
# Models
|
|
60
|
+
"Participant",
|
|
61
|
+
"ParticipantIDMapping",
|
|
62
|
+
# Metadata specification
|
|
63
|
+
"FieldSpec",
|
|
64
|
+
"ParticipantMetadataSpec",
|
|
65
|
+
# Collections
|
|
66
|
+
"ParticipantCollection",
|
|
67
|
+
"IDMappingCollection",
|
|
68
|
+
# Merge utilities
|
|
69
|
+
"merge_participant_metadata",
|
|
70
|
+
"resolve_external_ids",
|
|
71
|
+
"create_analysis_dataframe",
|
|
72
|
+
"build_participant_lookup",
|
|
73
|
+
]
|