bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,312 @@
1
+ """Utilities for merging participant metadata with judgment data.
2
+
3
+ This module provides functions for joining participant metadata with
4
+ judgment DataFrames for analysis. All functions support both pandas
5
+ and polars DataFrames, preserving the input type.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import pandas as pd
11
+ import polars as pl
12
+
13
+ from bead.participants.collection import IDMappingCollection, ParticipantCollection
14
+
15
+ # Type alias for supported DataFrame types
16
+ DataFrame = pd.DataFrame | pl.DataFrame
17
+
18
+
19
+ def merge_participant_metadata(
20
+ judgments_df: DataFrame,
21
+ participants: ParticipantCollection,
22
+ id_column: str = "participant_id",
23
+ metadata_columns: list[str] | None = None,
24
+ how: str = "left",
25
+ ) -> DataFrame:
26
+ """Merge participant metadata into a judgments DataFrame.
27
+
28
+ Preserves input DataFrame type (pandas in -> pandas out,
29
+ polars in -> polars out).
30
+
31
+ Parameters
32
+ ----------
33
+ judgments_df : DataFrame
34
+ DataFrame containing judgment data with participant IDs.
35
+ participants : ParticipantCollection
36
+ Collection of participants with metadata.
37
+ id_column : str
38
+ Column in judgments_df containing participant IDs (default: "participant_id").
39
+ metadata_columns : list[str] | None
40
+ Specific metadata columns to include. If None, includes all.
41
+ how : str
42
+ Merge type: "left", "inner", "outer" (default: "left").
43
+
44
+ Returns
45
+ -------
46
+ DataFrame
47
+ Merged DataFrame with participant metadata columns added.
48
+
49
+ Examples
50
+ --------
51
+ >>> import pandas as pd
52
+ >>> from bead.participants.collection import ParticipantCollection
53
+ >>> from bead.participants.models import Participant
54
+ >>> judgments = pd.DataFrame({
55
+ ... "participant_id": ["uuid1", "uuid2"],
56
+ ... "response": [5, 3],
57
+ ... })
58
+ >>> collection = ParticipantCollection(name="test")
59
+ >>> # ... add participants ...
60
+ >>> # merged = merge_participant_metadata(judgments, collection)
61
+ """
62
+ is_polars = isinstance(judgments_df, pl.DataFrame)
63
+
64
+ # Convert participants to DataFrame with same backend
65
+ backend = "polars" if is_polars else "pandas"
66
+ participant_df = participants.to_dataframe(
67
+ backend=backend, # type: ignore[arg-type]
68
+ include_fields=metadata_columns,
69
+ flatten_metadata=True,
70
+ )
71
+
72
+ if is_polars:
73
+ assert isinstance(judgments_df, pl.DataFrame)
74
+ assert isinstance(participant_df, pl.DataFrame)
75
+
76
+ # Polars join
77
+ return judgments_df.join(
78
+ participant_df,
79
+ left_on=id_column,
80
+ right_on="participant_id",
81
+ how=how, # type: ignore[arg-type]
82
+ suffix="_participant",
83
+ )
84
+ else:
85
+ assert isinstance(judgments_df, pd.DataFrame)
86
+ assert isinstance(participant_df, pd.DataFrame)
87
+
88
+ # Pandas merge
89
+ merged = pd.merge(
90
+ judgments_df,
91
+ participant_df,
92
+ left_on=id_column,
93
+ right_on="participant_id",
94
+ how=how, # type: ignore[arg-type]
95
+ suffixes=("", "_participant"),
96
+ )
97
+
98
+ # Remove duplicate participant_id column if created
99
+ if "participant_id_participant" in merged.columns:
100
+ merged = merged.drop(columns=["participant_id_participant"])
101
+
102
+ return merged
103
+
104
+
105
+ def resolve_external_ids(
106
+ df: DataFrame,
107
+ id_mappings: IDMappingCollection,
108
+ external_id_column: str = "PROLIFIC_PID",
109
+ output_column: str = "participant_id",
110
+ drop_unresolved: bool = False,
111
+ ) -> DataFrame:
112
+ """Resolve external IDs to internal participant UUIDs.
113
+
114
+ Preserves input DataFrame type.
115
+
116
+ Parameters
117
+ ----------
118
+ df : DataFrame
119
+ DataFrame with external participant IDs.
120
+ id_mappings : IDMappingCollection
121
+ Collection of ID mappings.
122
+ external_id_column : str
123
+ Column containing external IDs (default: "PROLIFIC_PID").
124
+ output_column : str
125
+ Column name for resolved UUIDs (default: "participant_id").
126
+ drop_unresolved : bool
127
+ If True, drop rows with unresolved IDs (default: False).
128
+
129
+ Returns
130
+ -------
131
+ DataFrame
132
+ DataFrame with resolved participant UUIDs.
133
+
134
+ Examples
135
+ --------
136
+ >>> import pandas as pd
137
+ >>> from uuid import uuid4
138
+ >>> from bead.participants.collection import IDMappingCollection
139
+ >>> raw_data = pd.DataFrame({
140
+ ... "PROLIFIC_PID": ["ABC123", "DEF456"],
141
+ ... "response": [5, 3],
142
+ ... })
143
+ >>> mappings = IDMappingCollection(name="test", source="prolific")
144
+ >>> pid = uuid4()
145
+ >>> mappings.add_mapping("ABC123", pid)
146
+ >>> resolved = resolve_external_ids(raw_data, mappings)
147
+ >>> output_column in resolved.columns
148
+ True
149
+ """
150
+ is_polars = isinstance(df, pl.DataFrame)
151
+
152
+ # Create lookup dict
153
+ lookup: dict[str, str] = {
154
+ m.external_id: str(m.participant_id)
155
+ for m in id_mappings.mappings
156
+ if m.is_active
157
+ }
158
+
159
+ if is_polars:
160
+ assert isinstance(df, pl.DataFrame)
161
+
162
+ # Polars: use map_elements or replace
163
+ result = df.with_columns(
164
+ pl.col(external_id_column)
165
+ .map_elements(lambda x: lookup.get(x), return_dtype=pl.Utf8)
166
+ .alias(output_column)
167
+ )
168
+
169
+ if drop_unresolved:
170
+ result = result.filter(pl.col(output_column).is_not_null())
171
+
172
+ return result
173
+ else:
174
+ assert isinstance(df, pd.DataFrame)
175
+
176
+ # Pandas: use map
177
+ result = df.copy()
178
+ result[output_column] = result[external_id_column].map(lookup)
179
+
180
+ if drop_unresolved:
181
+ result = result.dropna(subset=[output_column])
182
+
183
+ return result
184
+
185
+
186
+ def create_analysis_dataframe(
187
+ judgments_df: DataFrame,
188
+ participants: ParticipantCollection,
189
+ id_mappings: IDMappingCollection | None = None,
190
+ external_id_column: str | None = None,
191
+ participant_id_column: str = "participant_id",
192
+ metadata_columns: list[str] | None = None,
193
+ ) -> DataFrame:
194
+ """Create analysis-ready DataFrame with resolved IDs and metadata.
195
+
196
+ Convenience function that:
197
+ 1. Resolves external IDs to internal UUIDs (if id_mappings provided)
198
+ 2. Merges participant metadata
199
+ 3. Returns a clean DataFrame ready for analysis
200
+
201
+ Preserves input DataFrame type.
202
+
203
+ Parameters
204
+ ----------
205
+ judgments_df : DataFrame
206
+ Raw judgment data.
207
+ participants : ParticipantCollection
208
+ Participant collection with metadata.
209
+ id_mappings : IDMappingCollection | None
210
+ ID mappings (required if external_id_column is provided).
211
+ external_id_column : str | None
212
+ Column with external IDs to resolve.
213
+ participant_id_column : str
214
+ Column with participant IDs (after resolution).
215
+ metadata_columns : list[str] | None
216
+ Metadata columns to include.
217
+
218
+ Returns
219
+ -------
220
+ DataFrame
221
+ Analysis-ready DataFrame.
222
+
223
+ Examples
224
+ --------
225
+ >>> import pandas as pd
226
+ >>> from bead.participants.collection import (
227
+ ... ParticipantCollection, IDMappingCollection
228
+ ... )
229
+ >>> raw_judgments = pd.DataFrame({
230
+ ... "PROLIFIC_PID": ["ABC123"],
231
+ ... "response": [5],
232
+ ... })
233
+ >>> participants = ParticipantCollection(name="test")
234
+ >>> mappings = IDMappingCollection(name="test", source="prolific")
235
+ >>> # analysis_df = create_analysis_dataframe(
236
+ >>> # raw_judgments,
237
+ >>> # participants,
238
+ >>> # id_mappings=mappings,
239
+ >>> # external_id_column="PROLIFIC_PID",
240
+ >>> # )
241
+ """
242
+ df = judgments_df
243
+
244
+ # Step 1: Resolve external IDs if needed
245
+ if external_id_column is not None and id_mappings is not None:
246
+ df = resolve_external_ids(
247
+ df,
248
+ id_mappings,
249
+ external_id_column=external_id_column,
250
+ output_column=participant_id_column,
251
+ )
252
+
253
+ # Step 2: Merge participant metadata
254
+ df = merge_participant_metadata(
255
+ df,
256
+ participants,
257
+ id_column=participant_id_column,
258
+ metadata_columns=metadata_columns,
259
+ )
260
+
261
+ return df
262
+
263
+
264
+ def build_participant_lookup(
265
+ participants: ParticipantCollection,
266
+ key_field: str | None = None,
267
+ ) -> dict[str, dict[str, str | int | float | bool | None]]:
268
+ """Build a lookup dictionary from participant collection.
269
+
270
+ Useful for manual merging or custom processing.
271
+
272
+ Parameters
273
+ ----------
274
+ participants : ParticipantCollection
275
+ Collection of participants.
276
+ key_field : str | None
277
+ If provided, use this metadata field as the key instead of UUID.
278
+
279
+ Returns
280
+ -------
281
+ dict[str, dict[str, str | int | float | bool | None]]
282
+ Lookup from participant ID (or key_field) to metadata dict.
283
+
284
+ Examples
285
+ --------
286
+ >>> from bead.participants.collection import ParticipantCollection
287
+ >>> from bead.participants.models import Participant
288
+ >>> collection = ParticipantCollection(name="test")
289
+ >>> p = Participant(participant_metadata={"age": 25})
290
+ >>> collection.add_participant(p)
291
+ >>> lookup = build_participant_lookup(collection)
292
+ >>> str(p.id) in lookup
293
+ True
294
+ """
295
+ result: dict[str, dict[str, str | int | float | bool | None]] = {}
296
+
297
+ for p in participants.participants:
298
+ # Determine key
299
+ if key_field is not None:
300
+ key = str(p.participant_metadata.get(key_field, ""))
301
+ else:
302
+ key = str(p.id)
303
+
304
+ # Extract simple metadata values
305
+ metadata: dict[str, str | int | float | bool | None] = {}
306
+ for k, v in p.participant_metadata.items():
307
+ if isinstance(v, str | int | float | bool) or v is None:
308
+ metadata[k] = v
309
+
310
+ result[key] = metadata
311
+
312
+ return result