bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,343 @@
1
+ """Behavioral data extraction from slopit sessions.
2
+
3
+ This module provides functions for extracting per-judgment behavioral
4
+ analytics from slopit session data, using slopit's IO loaders and
5
+ analysis pipeline.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+ from uuid import UUID
13
+
14
+ from slopit import load_session, load_sessions
15
+ from slopit.behavioral import (
16
+ Analyzer,
17
+ FocusAnalyzer,
18
+ KeystrokeAnalyzer,
19
+ PasteAnalyzer,
20
+ TimingAnalyzer,
21
+ )
22
+ from slopit.pipeline import AnalysisPipeline
23
+ from slopit.schemas import AnalysisFlag, Severity, SlopitSession, SlopitTrial
24
+
25
+ from bead.behavioral.analytics import AnalyticsCollection, JudgmentAnalytics
26
+
27
+ if TYPE_CHECKING:
28
+ from bead.data.base import JsonValue
29
+
30
+
31
+ def _get_max_severity(flags: list[AnalysisFlag]) -> Severity | None:
32
+ """Get maximum severity from a list of flags.
33
+
34
+ Parameters
35
+ ----------
36
+ flags : list[AnalysisFlag]
37
+ List of analysis flags.
38
+
39
+ Returns
40
+ -------
41
+ Severity | None
42
+ Maximum severity, or None if no flags.
43
+ """
44
+ if not flags:
45
+ return None
46
+
47
+ severity_order: dict[str, int] = {"info": 0, "low": 1, "medium": 2, "high": 3}
48
+ max_level = -1
49
+ max_severity: Severity | None = None
50
+
51
+ for flag in flags:
52
+ level = severity_order.get(flag.severity, 0)
53
+ if level > max_level:
54
+ max_level = level
55
+ max_severity = flag.severity
56
+
57
+ return max_severity
58
+
59
+
60
+ def extract_from_trial(
61
+ trial: SlopitTrial,
62
+ session: SlopitSession,
63
+ item_id_key: str = "item_id",
64
+ ) -> JudgmentAnalytics | None:
65
+ """Extract behavioral analytics from a single slopit trial.
66
+
67
+ Parameters
68
+ ----------
69
+ trial : SlopitTrial
70
+ Slopit trial data.
71
+ session : SlopitSession
72
+ Parent session for participant context.
73
+ item_id_key : str
74
+ Key in platform_data containing the item UUID.
75
+
76
+ Returns
77
+ -------
78
+ JudgmentAnalytics | None
79
+ Analytics record, or None if item_id not found in trial.
80
+ """
81
+ # Extract item_id from platform_data
82
+ if trial.platform_data is None or item_id_key not in trial.platform_data:
83
+ return None
84
+
85
+ item_id_str = trial.platform_data[item_id_key]
86
+ if not isinstance(item_id_str, str):
87
+ return None
88
+
89
+ try:
90
+ item_id = UUID(item_id_str)
91
+ except (ValueError, TypeError):
92
+ return None
93
+
94
+ # Extract response value
95
+ response_value: JsonValue = None
96
+ if trial.response is not None:
97
+ response_value = trial.response.value
98
+
99
+ # Extract response time
100
+ response_time_ms = trial.rt if trial.rt is not None else 0
101
+
102
+ # Extract behavioral metrics
103
+ keystroke_metrics = None
104
+ focus_metrics = None
105
+ timing_metrics = None
106
+ paste_count = 0
107
+
108
+ if trial.behavioral is not None:
109
+ if trial.behavioral.metrics is not None:
110
+ keystroke_metrics = trial.behavioral.metrics.keystroke
111
+ focus_metrics = trial.behavioral.metrics.focus
112
+ timing_metrics = trial.behavioral.metrics.timing
113
+
114
+ if trial.behavioral.paste is not None:
115
+ paste_count = len(trial.behavioral.paste)
116
+
117
+ # Extract flags from capture_flags
118
+ flags: list[AnalysisFlag] = []
119
+ if trial.capture_flags is not None:
120
+ # Convert CaptureFlags to AnalysisFlags for consistency
121
+ for cf in trial.capture_flags:
122
+ flags.append(
123
+ AnalysisFlag(
124
+ type=cf.type,
125
+ analyzer="capture",
126
+ severity=cf.severity,
127
+ message=cf.message,
128
+ evidence=cf.details,
129
+ trial_ids=[trial.trial_id],
130
+ )
131
+ )
132
+
133
+ return JudgmentAnalytics(
134
+ item_id=item_id,
135
+ participant_id=session.participant_id or session.session_id,
136
+ trial_index=trial.trial_index,
137
+ session_id=session.session_id,
138
+ response_value=response_value,
139
+ response_time_ms=response_time_ms,
140
+ keystroke_metrics=keystroke_metrics,
141
+ focus_metrics=focus_metrics,
142
+ timing_metrics=timing_metrics,
143
+ paste_event_count=paste_count,
144
+ flags=flags,
145
+ max_severity=_get_max_severity(flags),
146
+ )
147
+
148
+
149
+ def extract_from_session(
150
+ session: SlopitSession,
151
+ item_id_key: str = "item_id",
152
+ ) -> list[JudgmentAnalytics]:
153
+ """Extract behavioral analytics from all trials in a slopit session.
154
+
155
+ Parameters
156
+ ----------
157
+ session : SlopitSession
158
+ Slopit session containing trial data.
159
+ item_id_key : str
160
+ Key in platform_data containing the item UUID.
161
+
162
+ Returns
163
+ -------
164
+ list[JudgmentAnalytics]
165
+ Analytics records for trials with valid item_id.
166
+ """
167
+ analytics: list[JudgmentAnalytics] = []
168
+
169
+ for trial in session.trials:
170
+ result = extract_from_trial(trial, session, item_id_key)
171
+ if result is not None:
172
+ analytics.append(result)
173
+
174
+ return analytics
175
+
176
+
177
+ def extract_from_file(
178
+ path: Path | str,
179
+ item_id_key: str = "item_id",
180
+ ) -> list[JudgmentAnalytics]:
181
+ """Extract behavioral analytics from a slopit session file.
182
+
183
+ Uses slopit's load_session() to automatically detect format.
184
+
185
+ Parameters
186
+ ----------
187
+ path : Path | str
188
+ Path to session file (JSON or JATOS format).
189
+ item_id_key : str
190
+ Key in platform_data containing the item UUID.
191
+
192
+ Returns
193
+ -------
194
+ list[JudgmentAnalytics]
195
+ Analytics records from the session.
196
+
197
+ Examples
198
+ --------
199
+ >>> analytics = extract_from_file("data/session_001.json")
200
+ >>> len(analytics)
201
+ 50
202
+ """
203
+ session = load_session(path)
204
+ return extract_from_session(session, item_id_key)
205
+
206
+
207
+ def extract_from_directory(
208
+ path: Path | str,
209
+ pattern: str = "*",
210
+ item_id_key: str = "item_id",
211
+ name: str | None = None,
212
+ ) -> AnalyticsCollection:
213
+ """Extract behavioral analytics from all session files in a directory.
214
+
215
+ Uses slopit's load_sessions() to load all files.
216
+
217
+ Parameters
218
+ ----------
219
+ path : Path | str
220
+ Directory containing session files.
221
+ pattern : str
222
+ Glob pattern for file matching (default: "*").
223
+ item_id_key : str
224
+ Key in platform_data containing the item UUID.
225
+ name : str | None
226
+ Name for the collection. Defaults to directory name.
227
+
228
+ Returns
229
+ -------
230
+ AnalyticsCollection
231
+ Collection of analytics from all sessions.
232
+
233
+ Examples
234
+ --------
235
+ >>> collection = extract_from_directory("data/jatos_export/")
236
+ >>> print(f"Extracted {len(collection)} analytics records")
237
+ """
238
+ path = Path(path)
239
+ sessions = load_sessions(path, pattern)
240
+
241
+ all_analytics: list[JudgmentAnalytics] = []
242
+ for session in sessions:
243
+ analytics = extract_from_session(session, item_id_key)
244
+ all_analytics.extend(analytics)
245
+
246
+ collection_name = name if name is not None else path.name
247
+ return AnalyticsCollection(name=collection_name, analytics=all_analytics)
248
+
249
+
250
+ def analyze_sessions(
251
+ sessions: list[SlopitSession],
252
+ analyzers: list[Analyzer] | None = None,
253
+ ) -> list[SlopitSession]:
254
+ """Run slopit behavioral analyzers on sessions.
255
+
256
+ Uses slopit's AnalysisPipeline to process sessions with
257
+ the specified analyzers.
258
+
259
+ Parameters
260
+ ----------
261
+ sessions : list[SlopitSession]
262
+ Sessions to analyze.
263
+ analyzers : list[Analyzer] | None
264
+ Analyzers to run. If None, uses default set:
265
+ KeystrokeAnalyzer, FocusAnalyzer, PasteAnalyzer, TimingAnalyzer.
266
+
267
+ Returns
268
+ -------
269
+ list[SlopitSession]
270
+ Sessions with analysis flags added.
271
+
272
+ Examples
273
+ --------
274
+ >>> from slopit import load_sessions
275
+ >>> sessions = load_sessions("data/")
276
+ >>> analyzed = analyze_sessions(sessions)
277
+ >>> # Sessions now have analysis flags populated
278
+ """
279
+ if analyzers is None:
280
+ analyzers = [
281
+ KeystrokeAnalyzer(),
282
+ FocusAnalyzer(),
283
+ PasteAnalyzer(),
284
+ TimingAnalyzer(),
285
+ ]
286
+
287
+ pipeline = AnalysisPipeline(analyzers)
288
+ return pipeline.analyze(sessions)
289
+
290
+
291
+ def extract_with_analysis(
292
+ path: Path | str,
293
+ pattern: str = "*",
294
+ item_id_key: str = "item_id",
295
+ analyzers: list[Analyzer] | None = None,
296
+ name: str | None = None,
297
+ ) -> AnalyticsCollection:
298
+ """Load sessions, run analysis, and extract analytics in one step.
299
+
300
+ Convenience function that combines loading, analysis, and extraction.
301
+
302
+ Parameters
303
+ ----------
304
+ path : Path | str
305
+ Path to session file or directory.
306
+ pattern : str
307
+ Glob pattern for directory (default: "*").
308
+ item_id_key : str
309
+ Key in platform_data containing the item UUID.
310
+ analyzers : list[Analyzer] | None
311
+ Analyzers to run. If None, uses default set.
312
+ name : str | None
313
+ Name for the collection.
314
+
315
+ Returns
316
+ -------
317
+ AnalyticsCollection
318
+ Collection with analyzed behavioral data.
319
+
320
+ Examples
321
+ --------
322
+ >>> collection = extract_with_analysis("data/jatos_export/")
323
+ >>> summaries = collection.get_participant_summaries()
324
+ >>> for s in summaries:
325
+ ... if s.flag_rate > 0.1:
326
+ ... print(f"Participant {s.participant_id}: {s.flag_rate:.1%} flagged")
327
+ """
328
+ path = Path(path)
329
+
330
+ # Load sessions
331
+ sessions = load_sessions(path, pattern)
332
+
333
+ # Run analysis
334
+ analyzed = analyze_sessions(sessions, analyzers)
335
+
336
+ # Extract analytics
337
+ all_analytics: list[JudgmentAnalytics] = []
338
+ for session in analyzed:
339
+ analytics = extract_from_session(session, item_id_key)
340
+ all_analytics.extend(analytics)
341
+
342
+ collection_name = name if name is not None else path.name
343
+ return AnalyticsCollection(name=collection_name, analytics=all_analytics)
@@ -0,0 +1,343 @@
1
+ """Utilities for merging behavioral analytics with judgment data.
2
+
3
+ This module provides functions for joining behavioral analytics with
4
+ judgment DataFrames for analysis. All functions support both pandas
5
+ and polars DataFrames, preserving the input type.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING, Literal
11
+
12
+ import pandas as pd
13
+ import polars as pl
14
+
15
+ from bead.behavioral.analytics import AnalyticsCollection
16
+
17
+ if TYPE_CHECKING:
18
+ from slopit.schemas import Severity
19
+
20
+ from bead.participants.collection import IDMappingCollection, ParticipantCollection
21
+
22
+ # Type alias for supported DataFrame types
23
+ DataFrame = pd.DataFrame | pl.DataFrame
24
+
25
+
26
+ def merge_behavioral_analytics(
27
+ judgments_df: DataFrame,
28
+ analytics: AnalyticsCollection,
29
+ item_id_column: str = "item_id",
30
+ participant_id_column: str = "participant_id",
31
+ include_metrics: bool = True,
32
+ include_flags: bool = True,
33
+ how: str = "left",
34
+ ) -> DataFrame:
35
+ """Merge behavioral analytics into a judgments DataFrame.
36
+
37
+ Preserves input DataFrame type (pandas in -> pandas out,
38
+ polars in -> polars out).
39
+
40
+ Parameters
41
+ ----------
42
+ judgments_df : DataFrame
43
+ DataFrame containing judgment data.
44
+ analytics : AnalyticsCollection
45
+ Collection of behavioral analytics.
46
+ item_id_column : str
47
+ Column in judgments_df containing item IDs (default: "item_id").
48
+ participant_id_column : str
49
+ Column in judgments_df containing participant IDs.
50
+ include_metrics : bool
51
+ If True, include flattened behavioral metrics columns.
52
+ include_flags : bool
53
+ If True, include flag-related columns.
54
+ how : str
55
+ Merge type: "left", "inner", "outer" (default: "left").
56
+
57
+ Returns
58
+ -------
59
+ DataFrame
60
+ Merged DataFrame with behavioral analytics columns added.
61
+
62
+ Examples
63
+ --------
64
+ >>> import pandas as pd
65
+ >>> judgments = pd.DataFrame({
66
+ ... "item_id": ["uuid1", "uuid2"],
67
+ ... "participant_id": ["p1", "p1"],
68
+ ... "response": [5, 3],
69
+ ... })
70
+ >>> # merged = merge_behavioral_analytics(judgments, analytics_collection)
71
+ """
72
+ is_polars = isinstance(judgments_df, pl.DataFrame)
73
+
74
+ # Convert analytics to DataFrame with same backend
75
+ backend: Literal["pandas", "polars"] = "polars" if is_polars else "pandas"
76
+ analytics_df = analytics.to_dataframe(
77
+ backend=backend,
78
+ include_metrics=include_metrics,
79
+ include_flags=include_flags,
80
+ )
81
+
82
+ if is_polars:
83
+ assert isinstance(judgments_df, pl.DataFrame)
84
+ assert isinstance(analytics_df, pl.DataFrame)
85
+
86
+ # Polars join on both item_id and participant_id
87
+ return judgments_df.join(
88
+ analytics_df,
89
+ left_on=[item_id_column, participant_id_column],
90
+ right_on=["item_id", "participant_id"],
91
+ how=how, # type: ignore[arg-type]
92
+ suffix="_behavioral",
93
+ )
94
+ else:
95
+ assert isinstance(judgments_df, pd.DataFrame)
96
+ assert isinstance(analytics_df, pd.DataFrame)
97
+
98
+ # Pandas merge
99
+ merged = pd.merge(
100
+ judgments_df,
101
+ analytics_df,
102
+ left_on=[item_id_column, participant_id_column],
103
+ right_on=["item_id", "participant_id"],
104
+ how=how, # type: ignore[arg-type]
105
+ suffixes=("", "_behavioral"),
106
+ )
107
+
108
+ # Remove duplicate columns if created
109
+ for col in ["item_id_behavioral", "participant_id_behavioral"]:
110
+ if col in merged.columns:
111
+ merged = merged.drop(columns=[col])
112
+
113
+ return merged
114
+
115
+
116
+ def filter_flagged_judgments(
117
+ judgments_df: DataFrame,
118
+ analytics: AnalyticsCollection,
119
+ item_id_column: str = "item_id",
120
+ participant_id_column: str = "participant_id",
121
+ min_severity: Severity | None = None,
122
+ exclude_flagged: bool = True,
123
+ ) -> DataFrame:
124
+ """Filter judgments based on behavioral flags.
125
+
126
+ Preserves input DataFrame type.
127
+
128
+ Parameters
129
+ ----------
130
+ judgments_df : DataFrame
131
+ DataFrame containing judgment data.
132
+ analytics : AnalyticsCollection
133
+ Collection of behavioral analytics.
134
+ item_id_column : str
135
+ Column containing item IDs.
136
+ participant_id_column : str
137
+ Column containing participant IDs.
138
+ min_severity : Severity | None
139
+ Minimum severity level for filtering. If None, any flag counts.
140
+ exclude_flagged : bool
141
+ If True, exclude flagged judgments (default).
142
+ If False, keep only flagged judgments.
143
+
144
+ Returns
145
+ -------
146
+ DataFrame
147
+ Filtered DataFrame.
148
+
149
+ Examples
150
+ --------
151
+ >>> # Keep only unflagged judgments
152
+ >>> clean_df = filter_flagged_judgments(judgments, analytics, exclude_flagged=True)
153
+ >>> # Keep only high-severity flagged judgments for review
154
+ >>> flagged_df = filter_flagged_judgments(
155
+ ... judgments, analytics, min_severity="high", exclude_flagged=False
156
+ ... )
157
+ """
158
+ is_polars = isinstance(judgments_df, pl.DataFrame)
159
+
160
+ # Get filtered analytics
161
+ filtered_analytics = analytics.filter_flagged(
162
+ min_severity=min_severity,
163
+ exclude_flagged=False, # Get flagged records
164
+ )
165
+
166
+ # Build set of flagged (item_id, participant_id) pairs
167
+ flagged_pairs: set[tuple[str, str]] = {
168
+ (str(a.item_id), a.participant_id) for a in filtered_analytics.analytics
169
+ }
170
+
171
+ if is_polars:
172
+ assert isinstance(judgments_df, pl.DataFrame)
173
+
174
+ # Create mask column
175
+ df_with_flag = judgments_df.with_columns(
176
+ pl.struct([item_id_column, participant_id_column])
177
+ .map_elements(
178
+ lambda row: (
179
+ (str(row[item_id_column]), str(row[participant_id_column]))
180
+ in flagged_pairs
181
+ ),
182
+ return_dtype=pl.Boolean,
183
+ )
184
+ .alias("_is_flagged")
185
+ )
186
+
187
+ if exclude_flagged:
188
+ result = df_with_flag.filter(~pl.col("_is_flagged"))
189
+ else:
190
+ result = df_with_flag.filter(pl.col("_is_flagged"))
191
+
192
+ return result.drop("_is_flagged")
193
+
194
+ else:
195
+ assert isinstance(judgments_df, pd.DataFrame)
196
+
197
+ # Create mask
198
+ mask = judgments_df.apply(
199
+ lambda row: (
200
+ (str(row[item_id_column]), str(row[participant_id_column]))
201
+ in flagged_pairs
202
+ ),
203
+ axis=1,
204
+ )
205
+
206
+ if exclude_flagged:
207
+ return judgments_df[~mask].copy()
208
+ else:
209
+ return judgments_df[mask].copy()
210
+
211
+
212
+ def create_analysis_dataframe_with_behavior(
213
+ judgments_df: DataFrame,
214
+ participants: ParticipantCollection,
215
+ analytics: AnalyticsCollection,
216
+ id_mappings: IDMappingCollection | None = None,
217
+ external_id_column: str | None = None,
218
+ participant_id_column: str = "participant_id",
219
+ item_id_column: str = "item_id",
220
+ metadata_columns: list[str] | None = None,
221
+ include_metrics: bool = True,
222
+ include_flags: bool = True,
223
+ ) -> DataFrame:
224
+ """Create analysis-ready DataFrame with metadata and behavioral analytics.
225
+
226
+ Combines both participant and behavioral merging in one step.
227
+ Preserves input DataFrame type.
228
+
229
+ Parameters
230
+ ----------
231
+ judgments_df : DataFrame
232
+ Raw judgment data.
233
+ participants : ParticipantCollection
234
+ Participant collection with metadata.
235
+ analytics : AnalyticsCollection
236
+ Behavioral analytics collection.
237
+ id_mappings : IDMappingCollection | None
238
+ ID mappings (required if external_id_column is provided).
239
+ external_id_column : str | None
240
+ Column with external IDs to resolve.
241
+ participant_id_column : str
242
+ Column with participant IDs (after resolution).
243
+ item_id_column : str
244
+ Column with item IDs.
245
+ metadata_columns : list[str] | None
246
+ Participant metadata columns to include.
247
+ include_metrics : bool
248
+ If True, include behavioral metrics columns.
249
+ include_flags : bool
250
+ If True, include flag columns.
251
+
252
+ Returns
253
+ -------
254
+ DataFrame
255
+ Analysis-ready DataFrame with both metadata and behavioral data.
256
+
257
+ Examples
258
+ --------
259
+ >>> analysis_df = create_analysis_dataframe_with_behavior(
260
+ ... judgments,
261
+ ... participants,
262
+ ... analytics,
263
+ ... id_mappings=mappings,
264
+ ... external_id_column="PROLIFIC_PID",
265
+ ... )
266
+ """
267
+ # Import here to avoid circular imports
268
+ from bead.participants.merging import ( # noqa: PLC0415
269
+ merge_participant_metadata,
270
+ resolve_external_ids,
271
+ )
272
+
273
+ df = judgments_df
274
+
275
+ # Step 1: Resolve external IDs if needed
276
+ if external_id_column is not None and id_mappings is not None:
277
+ df = resolve_external_ids(
278
+ df,
279
+ id_mappings,
280
+ external_id_column=external_id_column,
281
+ output_column=participant_id_column,
282
+ )
283
+
284
+ # Step 2: Merge participant metadata
285
+ df = merge_participant_metadata(
286
+ df,
287
+ participants,
288
+ id_column=participant_id_column,
289
+ metadata_columns=metadata_columns,
290
+ )
291
+
292
+ # Step 3: Merge behavioral analytics
293
+ df = merge_behavioral_analytics(
294
+ df,
295
+ analytics,
296
+ item_id_column=item_id_column,
297
+ participant_id_column=participant_id_column,
298
+ include_metrics=include_metrics,
299
+ include_flags=include_flags,
300
+ )
301
+
302
+ return df
303
+
304
+
305
+ def get_exclusion_list(
306
+ analytics: AnalyticsCollection,
307
+ min_flag_rate: float = 0.1,
308
+ min_severity: Severity | None = None,
309
+ ) -> list[str]:
310
+ """Get list of participant IDs that should be excluded based on flags.
311
+
312
+ Identifies participants with flag rates above the threshold.
313
+
314
+ Parameters
315
+ ----------
316
+ analytics : AnalyticsCollection
317
+ Behavioral analytics collection.
318
+ min_flag_rate : float
319
+ Minimum proportion of flagged judgments for exclusion (default: 0.1).
320
+ min_severity : Severity | None
321
+ Only count flags at or above this severity.
322
+
323
+ Returns
324
+ -------
325
+ list[str]
326
+ Participant IDs recommended for exclusion.
327
+
328
+ Examples
329
+ --------
330
+ >>> exclude = get_exclusion_list(analytics, min_flag_rate=0.2)
331
+ >>> clean_df = judgments_df[~judgments_df["participant_id"].isin(exclude)]
332
+ """
333
+ # Apply severity filter if specified
334
+ if min_severity is not None:
335
+ filtered = analytics.filter_flagged(
336
+ min_severity=min_severity, exclude_flagged=False
337
+ )
338
+ else:
339
+ filtered = analytics
340
+
341
+ summaries = filtered.get_participant_summaries()
342
+
343
+ return [s.participant_id for s in summaries if s.flag_rate >= min_flag_rate]
bead/cli/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """Command-line interface.
2
+
3
+ Provides commands for configuration management, project initialization,
4
+ and pipeline execution.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from bead.cli.main import cli
10
+
11
+ __all__ = ["cli"]