bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,646 @@
1
+ """Behavioral analytics models for bead.
2
+
3
+ This module provides data models for per-judgment behavioral metrics
4
+ and participant-level summaries, linking slopit behavioral data to
5
+ bead's item-based experimental structure.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import defaultdict
11
+ from pathlib import Path
12
+ from typing import Literal
13
+ from uuid import UUID
14
+
15
+ import pandas as pd
16
+ import polars as pl
17
+ from pydantic import ConfigDict, Field, computed_field
18
+ from slopit.schemas import (
19
+ AnalysisFlag,
20
+ FocusMetrics,
21
+ KeystrokeMetrics,
22
+ Severity,
23
+ TimingMetrics,
24
+ )
25
+
26
+ from bead.data.base import BeadBaseModel, JsonValue
27
+ from bead.data.serialization import read_jsonlines, write_jsonlines
28
+
29
+ # Type alias for supported DataFrame types
30
+ DataFrame = pd.DataFrame | pl.DataFrame
31
+
32
+
33
+ def _empty_flag_list() -> list[AnalysisFlag]:
34
+ """Return empty flag list."""
35
+ return []
36
+
37
+
38
+ def _empty_analytics_list() -> list[JudgmentAnalytics]:
39
+ """Return empty analytics list."""
40
+ return []
41
+
42
+
43
+ def _empty_flag_counts() -> dict[str, int]:
44
+ """Return empty flag counts dict."""
45
+ return {}
46
+
47
+
48
+ class JudgmentAnalytics(BeadBaseModel):
49
+ """Behavioral analytics for a single judgment.
50
+
51
+ Links slopit behavioral data to a specific item judgment, enabling
52
+ analysis of participant behavior during individual responses.
53
+
54
+ Attributes
55
+ ----------
56
+ item_id : UUID
57
+ Reference to the Item being judged.
58
+ participant_id : str
59
+ Participant identifier (from slopit session).
60
+ trial_index : int
61
+ Zero-indexed position in the experiment session.
62
+ session_id : str
63
+ Slopit session identifier.
64
+ response_value : JsonValue
65
+ The participant's response value.
66
+ response_time_ms : int
67
+ Response time in milliseconds.
68
+ keystroke_metrics : KeystrokeMetrics | None
69
+ Keystroke-derived behavioral metrics.
70
+ focus_metrics : FocusMetrics | None
71
+ Focus and visibility metrics.
72
+ timing_metrics : TimingMetrics | None
73
+ Timing-related metrics.
74
+ paste_event_count : int
75
+ Number of paste events during this judgment.
76
+ flags : list[AnalysisFlag]
77
+ Behavioral analysis flags from slopit analyzers.
78
+ max_severity : Severity | None
79
+ Maximum severity among all flags.
80
+
81
+ Examples
82
+ --------
83
+ >>> from uuid import uuid4
84
+ >>> analytics = JudgmentAnalytics(
85
+ ... item_id=uuid4(),
86
+ ... participant_id="participant_001",
87
+ ... trial_index=0,
88
+ ... session_id="session_001",
89
+ ... response_value=5,
90
+ ... response_time_ms=2500,
91
+ ... )
92
+ >>> analytics.is_flagged
93
+ False
94
+ """
95
+
96
+ # Override to ignore computed_field values during deserialization
97
+ model_config = ConfigDict(extra="ignore")
98
+
99
+ # Linkage to judgment
100
+ item_id: UUID = Field(..., description="Item UUID being judged")
101
+ participant_id: str = Field(..., description="Participant identifier")
102
+ trial_index: int = Field(..., ge=0, description="Trial position in session")
103
+ session_id: str = Field(..., description="Slopit session identifier")
104
+
105
+ # Response data
106
+ response_value: JsonValue = Field(
107
+ default=None, description="Judgment response value"
108
+ )
109
+ response_time_ms: int = Field(
110
+ ..., ge=0, description="Response time in milliseconds"
111
+ )
112
+
113
+ # Behavioral metrics (from slopit)
114
+ keystroke_metrics: KeystrokeMetrics | None = Field(
115
+ default=None, description="Keystroke dynamics metrics"
116
+ )
117
+ focus_metrics: FocusMetrics | None = Field(
118
+ default=None, description="Focus and visibility metrics"
119
+ )
120
+ timing_metrics: TimingMetrics | None = Field(
121
+ default=None, description="Timing metrics"
122
+ )
123
+
124
+ # Paste tracking
125
+ paste_event_count: int = Field(
126
+ default=0, ge=0, description="Number of paste events"
127
+ )
128
+
129
+ # Flags (from slopit analyzers)
130
+ flags: list[AnalysisFlag] = Field(
131
+ default_factory=_empty_flag_list, description="Analysis flags"
132
+ )
133
+ max_severity: Severity | None = Field(
134
+ default=None, description="Maximum flag severity"
135
+ )
136
+
137
+ @computed_field
138
+ @property
139
+ def has_paste_events(self) -> bool:
140
+ """Check if this judgment had any paste events.
141
+
142
+ Returns
143
+ -------
144
+ bool
145
+ True if paste events occurred during this judgment.
146
+ """
147
+ return self.paste_event_count > 0
148
+
149
+ @computed_field
150
+ @property
151
+ def is_flagged(self) -> bool:
152
+ """Check if this judgment has any behavioral flags.
153
+
154
+ Returns
155
+ -------
156
+ bool
157
+ True if any analysis flags are present.
158
+ """
159
+ return len(self.flags) > 0
160
+
161
+ def get_flag_types(self) -> list[str]:
162
+ """Get list of flag types for this judgment.
163
+
164
+ Returns
165
+ -------
166
+ list[str]
167
+ List of flag type identifiers.
168
+ """
169
+ return [f.type for f in self.flags]
170
+
171
+
172
+ class ParticipantBehavioralSummary(BeadBaseModel):
173
+ """Aggregated behavioral metrics for a participant across all judgments.
174
+
175
+ Provides summary statistics useful for identifying participants
176
+ who may require exclusion from analysis.
177
+
178
+ Attributes
179
+ ----------
180
+ participant_id : str
181
+ Participant identifier.
182
+ session_id : str
183
+ Slopit session identifier.
184
+ total_judgments : int
185
+ Total number of judgments analyzed.
186
+ flagged_judgments : int
187
+ Number of judgments with behavioral flags.
188
+ mean_response_time_ms : float
189
+ Mean response time across all judgments.
190
+ mean_iki : float | None
191
+ Mean inter-keystroke interval (averaged across judgments).
192
+ total_keystrokes : int
193
+ Total keystrokes across all judgments.
194
+ total_paste_events : int
195
+ Total paste events across all judgments.
196
+ total_blur_events : int
197
+ Total window blur events.
198
+ total_blur_duration_ms : float
199
+ Total time with window blurred in milliseconds.
200
+ flag_counts : dict[str, int]
201
+ Count of each flag type.
202
+ max_severity : Severity | None
203
+ Maximum flag severity across all judgments.
204
+
205
+ Examples
206
+ --------
207
+ >>> summary = ParticipantBehavioralSummary(
208
+ ... participant_id="participant_001",
209
+ ... session_id="session_001",
210
+ ... total_judgments=50,
211
+ ... flagged_judgments=3,
212
+ ... mean_response_time_ms=2500.0,
213
+ ... )
214
+ >>> summary.flag_rate
215
+ 0.06
216
+ """
217
+
218
+ # Override to ignore computed_field values during deserialization
219
+ model_config = ConfigDict(extra="ignore")
220
+
221
+ participant_id: str = Field(..., description="Participant identifier")
222
+ session_id: str = Field(..., description="Session identifier")
223
+
224
+ # Aggregated counts
225
+ total_judgments: int = Field(..., ge=0, description="Total judgments")
226
+ flagged_judgments: int = Field(
227
+ default=0, ge=0, description="Flagged judgment count"
228
+ )
229
+ mean_response_time_ms: float = Field(..., ge=0.0, description="Mean response time")
230
+
231
+ # Keystroke aggregates
232
+ mean_iki: float | None = Field(
233
+ default=None, description="Mean inter-keystroke interval"
234
+ )
235
+ total_keystrokes: int = Field(default=0, ge=0, description="Total keystrokes")
236
+ total_paste_events: int = Field(default=0, ge=0, description="Total paste events")
237
+
238
+ # Focus aggregates
239
+ total_blur_events: int = Field(default=0, ge=0, description="Total blur events")
240
+ total_blur_duration_ms: float = Field(
241
+ default=0.0, ge=0.0, description="Total blur duration"
242
+ )
243
+
244
+ # Flag summary
245
+ flag_counts: dict[str, int] = Field(
246
+ default_factory=_empty_flag_counts, description="Flag type counts"
247
+ )
248
+ max_severity: Severity | None = Field(
249
+ default=None, description="Maximum severity across judgments"
250
+ )
251
+
252
+ @computed_field
253
+ @property
254
+ def flag_rate(self) -> float:
255
+ """Calculate proportion of judgments that were flagged.
256
+
257
+ Returns
258
+ -------
259
+ float
260
+ Proportion of flagged judgments (0.0 to 1.0).
261
+ """
262
+ if self.total_judgments == 0:
263
+ return 0.0
264
+ return self.flagged_judgments / self.total_judgments
265
+
266
+ @computed_field
267
+ @property
268
+ def has_paste_events(self) -> bool:
269
+ """Check if participant had any paste events.
270
+
271
+ Returns
272
+ -------
273
+ bool
274
+ True if any paste events occurred.
275
+ """
276
+ return self.total_paste_events > 0
277
+
278
+
279
+ class AnalyticsCollection(BeadBaseModel):
280
+ """Collection of judgment analytics with I/O and filtering support.
281
+
282
+ Provides methods for persisting analytics to JSONL files,
283
+ converting to DataFrames, and filtering by behavioral flags.
284
+
285
+ Attributes
286
+ ----------
287
+ name : str
288
+ Collection name (e.g., study identifier).
289
+ analytics : list[JudgmentAnalytics]
290
+ List of per-judgment analytics records.
291
+
292
+ Examples
293
+ --------
294
+ >>> from uuid import uuid4
295
+ >>> collection = AnalyticsCollection(name="study_001")
296
+ >>> analytics = JudgmentAnalytics(
297
+ ... item_id=uuid4(),
298
+ ... participant_id="p001",
299
+ ... trial_index=0,
300
+ ... session_id="s001",
301
+ ... response_time_ms=2000,
302
+ ... )
303
+ >>> collection.add_analytics(analytics)
304
+ >>> len(collection)
305
+ 1
306
+ """
307
+
308
+ name: str = Field(..., description="Collection name")
309
+ analytics: list[JudgmentAnalytics] = Field(
310
+ default_factory=_empty_analytics_list, description="Analytics records"
311
+ )
312
+
313
+ def __len__(self) -> int:
314
+ """Return number of analytics records.
315
+
316
+ Returns
317
+ -------
318
+ int
319
+ Number of analytics records in the collection.
320
+ """
321
+ return len(self.analytics)
322
+
323
+ def add_analytics(self, analytics: JudgmentAnalytics) -> None:
324
+ """Add a single analytics record to the collection.
325
+
326
+ Parameters
327
+ ----------
328
+ analytics : JudgmentAnalytics
329
+ Analytics record to add.
330
+ """
331
+ self.analytics.append(analytics)
332
+ self.update_modified_time()
333
+
334
+ def add_many(self, analytics_list: list[JudgmentAnalytics]) -> None:
335
+ """Add multiple analytics records to the collection.
336
+
337
+ Parameters
338
+ ----------
339
+ analytics_list : list[JudgmentAnalytics]
340
+ List of analytics records to add.
341
+ """
342
+ self.analytics.extend(analytics_list)
343
+ self.update_modified_time()
344
+
345
+ def get_by_participant(self, participant_id: str) -> list[JudgmentAnalytics]:
346
+ """Get all analytics for a specific participant.
347
+
348
+ Parameters
349
+ ----------
350
+ participant_id : str
351
+ Participant identifier to filter by.
352
+
353
+ Returns
354
+ -------
355
+ list[JudgmentAnalytics]
356
+ Analytics records for the participant.
357
+ """
358
+ return [a for a in self.analytics if a.participant_id == participant_id]
359
+
360
+ def get_by_item(self, item_id: UUID) -> list[JudgmentAnalytics]:
361
+ """Get all analytics for a specific item.
362
+
363
+ Parameters
364
+ ----------
365
+ item_id : UUID
366
+ Item UUID to filter by.
367
+
368
+ Returns
369
+ -------
370
+ list[JudgmentAnalytics]
371
+ Analytics records for the item.
372
+ """
373
+ return [a for a in self.analytics if a.item_id == item_id]
374
+
375
+ def filter_flagged(
376
+ self,
377
+ min_severity: Severity | None = None,
378
+ exclude_flagged: bool = False,
379
+ ) -> AnalyticsCollection:
380
+ """Filter analytics by flag status.
381
+
382
+ Parameters
383
+ ----------
384
+ min_severity : Severity | None
385
+ If provided, only include analytics with flags at or above this severity.
386
+ Severity order: info < low < medium < high.
387
+ exclude_flagged : bool
388
+ If True, exclude flagged records. If False, include only flagged records.
389
+ Default is False (return flagged records).
390
+
391
+ Returns
392
+ -------
393
+ AnalyticsCollection
394
+ New collection with filtered analytics.
395
+ """
396
+ severity_order: dict[str, int] = {"info": 0, "low": 1, "medium": 2, "high": 3}
397
+
398
+ def meets_criteria(a: JudgmentAnalytics) -> bool:
399
+ has_flags = a.is_flagged
400
+
401
+ if exclude_flagged:
402
+ return not has_flags
403
+
404
+ if not has_flags:
405
+ return False
406
+
407
+ if min_severity is None:
408
+ return True
409
+
410
+ # Check if any flag meets minimum severity
411
+ min_level = severity_order.get(min_severity, 0)
412
+ for flag in a.flags:
413
+ flag_level = severity_order.get(flag.severity, 0)
414
+ if flag_level >= min_level:
415
+ return True
416
+ return False
417
+
418
+ filtered = [a for a in self.analytics if meets_criteria(a)]
419
+ return AnalyticsCollection(name=f"{self.name}_filtered", analytics=filtered)
420
+
421
+ def get_participant_ids(self) -> list[str]:
422
+ """Get unique participant IDs in the collection.
423
+
424
+ Returns
425
+ -------
426
+ list[str]
427
+ List of unique participant IDs.
428
+ """
429
+ return list({a.participant_id for a in self.analytics})
430
+
431
+ def get_participant_summaries(self) -> list[ParticipantBehavioralSummary]:
432
+ """Generate behavioral summaries for all participants.
433
+
434
+ Returns
435
+ -------
436
+ list[ParticipantBehavioralSummary]
437
+ Summary for each participant in the collection.
438
+ """
439
+ severity_order: dict[str, int] = {"info": 0, "low": 1, "medium": 2, "high": 3}
440
+
441
+ # Group by participant
442
+ by_participant: dict[str, list[JudgmentAnalytics]] = defaultdict(list)
443
+ for a in self.analytics:
444
+ by_participant[a.participant_id].append(a)
445
+
446
+ summaries: list[ParticipantBehavioralSummary] = []
447
+
448
+ for participant_id, records in by_participant.items():
449
+ # Aggregated metrics
450
+ total = len(records)
451
+ flagged = sum(1 for r in records if r.is_flagged)
452
+ response_times = [r.response_time_ms for r in records]
453
+ mean_rt = sum(response_times) / total if total > 0 else 0.0
454
+
455
+ # Keystroke metrics
456
+ ikis: list[float] = []
457
+ total_keystrokes = 0
458
+ total_pastes = 0
459
+
460
+ for r in records:
461
+ total_pastes += r.paste_event_count
462
+ if r.keystroke_metrics is not None:
463
+ total_keystrokes += r.keystroke_metrics.total_keystrokes
464
+ if r.keystroke_metrics.mean_iki > 0:
465
+ ikis.append(r.keystroke_metrics.mean_iki)
466
+
467
+ mean_iki = sum(ikis) / len(ikis) if ikis else None
468
+
469
+ # Focus metrics
470
+ blur_events = 0
471
+ blur_duration = 0.0
472
+
473
+ for r in records:
474
+ if r.focus_metrics is not None:
475
+ blur_events += r.focus_metrics.blur_count
476
+ blur_duration += r.focus_metrics.total_blur_duration
477
+
478
+ # Flag aggregation
479
+ flag_counts: dict[str, int] = defaultdict(int)
480
+ max_severity_level = -1
481
+ max_severity: Severity | None = None
482
+
483
+ for r in records:
484
+ for flag in r.flags:
485
+ flag_counts[flag.type] += 1
486
+ level = severity_order.get(flag.severity, 0)
487
+ if level > max_severity_level:
488
+ max_severity_level = level
489
+ max_severity = flag.severity
490
+
491
+ # Get session_id from first record
492
+ session_id = records[0].session_id if records else ""
493
+
494
+ summaries.append(
495
+ ParticipantBehavioralSummary(
496
+ participant_id=participant_id,
497
+ session_id=session_id,
498
+ total_judgments=total,
499
+ flagged_judgments=flagged,
500
+ mean_response_time_ms=mean_rt,
501
+ mean_iki=mean_iki,
502
+ total_keystrokes=total_keystrokes,
503
+ total_paste_events=total_pastes,
504
+ total_blur_events=blur_events,
505
+ total_blur_duration_ms=blur_duration,
506
+ flag_counts=dict(flag_counts),
507
+ max_severity=max_severity,
508
+ )
509
+ )
510
+
511
+ return summaries
512
+
513
+ # JSONL I/O
514
+
515
+ def to_jsonl(self, path: Path | str) -> None:
516
+ """Write analytics to JSONL file.
517
+
518
+ Parameters
519
+ ----------
520
+ path : Path | str
521
+ Path to output file.
522
+ """
523
+ path = Path(path)
524
+ path.parent.mkdir(parents=True, exist_ok=True)
525
+ write_jsonlines(self.analytics, path)
526
+
527
+ @classmethod
528
+ def from_jsonl(
529
+ cls,
530
+ path: Path | str,
531
+ name: str = "loaded_analytics",
532
+ ) -> AnalyticsCollection:
533
+ """Load analytics from JSONL file.
534
+
535
+ Parameters
536
+ ----------
537
+ path : Path | str
538
+ Path to JSONL file.
539
+ name : str
540
+ Name for the collection.
541
+
542
+ Returns
543
+ -------
544
+ AnalyticsCollection
545
+ Collection with loaded analytics.
546
+ """
547
+ analytics = read_jsonlines(Path(path), JudgmentAnalytics)
548
+ return cls(name=name, analytics=analytics)
549
+
550
+ # DataFrame conversion
551
+
552
+ def to_dataframe(
553
+ self,
554
+ backend: Literal["pandas", "polars"] = "pandas",
555
+ include_metrics: bool = True,
556
+ include_flags: bool = True,
557
+ ) -> DataFrame:
558
+ """Convert to pandas or polars DataFrame.
559
+
560
+ Parameters
561
+ ----------
562
+ backend : Literal["pandas", "polars"]
563
+ DataFrame backend to use (default: "pandas").
564
+ include_metrics : bool
565
+ If True, flatten behavioral metrics into columns.
566
+ include_flags : bool
567
+ If True, include flag-related columns.
568
+
569
+ Returns
570
+ -------
571
+ DataFrame
572
+ pandas or polars DataFrame with analytics data.
573
+ """
574
+ if not self.analytics:
575
+ columns = [
576
+ "item_id",
577
+ "participant_id",
578
+ "trial_index",
579
+ "session_id",
580
+ "response_value",
581
+ "response_time_ms",
582
+ ]
583
+ if backend == "pandas":
584
+ return pd.DataFrame(columns=columns)
585
+ else:
586
+ schema: dict[str, type[pl.Utf8]] = dict.fromkeys(columns, pl.Utf8)
587
+ return pl.DataFrame(schema=schema)
588
+
589
+ records: list[dict[str, JsonValue]] = []
590
+
591
+ for a in self.analytics:
592
+ record: dict[str, JsonValue] = {
593
+ "item_id": str(a.item_id),
594
+ "participant_id": a.participant_id,
595
+ "trial_index": a.trial_index,
596
+ "session_id": a.session_id,
597
+ "response_value": a.response_value,
598
+ "response_time_ms": a.response_time_ms,
599
+ "paste_event_count": a.paste_event_count,
600
+ }
601
+
602
+ if include_metrics:
603
+ # Keystroke metrics
604
+ if a.keystroke_metrics is not None:
605
+ record["keystroke_total"] = a.keystroke_metrics.total_keystrokes
606
+ record["keystroke_mean_iki"] = a.keystroke_metrics.mean_iki
607
+ record["keystroke_std_iki"] = a.keystroke_metrics.std_iki
608
+ record["keystroke_deletions"] = a.keystroke_metrics.deletions
609
+ else:
610
+ record["keystroke_total"] = None
611
+ record["keystroke_mean_iki"] = None
612
+ record["keystroke_std_iki"] = None
613
+ record["keystroke_deletions"] = None
614
+
615
+ # Focus metrics
616
+ if a.focus_metrics is not None:
617
+ record["focus_blur_count"] = a.focus_metrics.blur_count
618
+ record["focus_blur_duration"] = a.focus_metrics.total_blur_duration
619
+ else:
620
+ record["focus_blur_count"] = None
621
+ record["focus_blur_duration"] = None
622
+
623
+ # Timing metrics
624
+ if a.timing_metrics is not None:
625
+ record["timing_first_keystroke"] = (
626
+ a.timing_metrics.first_keystroke_latency
627
+ )
628
+ record["timing_total_response"] = (
629
+ a.timing_metrics.total_response_time
630
+ )
631
+ else:
632
+ record["timing_first_keystroke"] = None
633
+ record["timing_total_response"] = None
634
+
635
+ if include_flags:
636
+ record["is_flagged"] = a.is_flagged
637
+ record["flag_count"] = len(a.flags)
638
+ record["max_severity"] = a.max_severity
639
+ record["flag_types"] = ",".join(a.get_flag_types()) if a.flags else None
640
+
641
+ records.append(record)
642
+
643
+ if backend == "pandas":
644
+ return pd.DataFrame(records)
645
+ else:
646
+ return pl.DataFrame(records)