bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Behavioral data extraction from slopit sessions.
|
|
2
|
+
|
|
3
|
+
This module provides functions for extracting per-judgment behavioral
|
|
4
|
+
analytics from slopit session data, using slopit's IO loaders and
|
|
5
|
+
analysis pipeline.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
from slopit import load_session, load_sessions
|
|
15
|
+
from slopit.behavioral import (
|
|
16
|
+
Analyzer,
|
|
17
|
+
FocusAnalyzer,
|
|
18
|
+
KeystrokeAnalyzer,
|
|
19
|
+
PasteAnalyzer,
|
|
20
|
+
TimingAnalyzer,
|
|
21
|
+
)
|
|
22
|
+
from slopit.pipeline import AnalysisPipeline
|
|
23
|
+
from slopit.schemas import AnalysisFlag, Severity, SlopitSession, SlopitTrial
|
|
24
|
+
|
|
25
|
+
from bead.behavioral.analytics import AnalyticsCollection, JudgmentAnalytics
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from bead.data.base import JsonValue
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_max_severity(flags: list[AnalysisFlag]) -> Severity | None:
|
|
32
|
+
"""Get maximum severity from a list of flags.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
flags : list[AnalysisFlag]
|
|
37
|
+
List of analysis flags.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
Severity | None
|
|
42
|
+
Maximum severity, or None if no flags.
|
|
43
|
+
"""
|
|
44
|
+
if not flags:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
severity_order: dict[str, int] = {"info": 0, "low": 1, "medium": 2, "high": 3}
|
|
48
|
+
max_level = -1
|
|
49
|
+
max_severity: Severity | None = None
|
|
50
|
+
|
|
51
|
+
for flag in flags:
|
|
52
|
+
level = severity_order.get(flag.severity, 0)
|
|
53
|
+
if level > max_level:
|
|
54
|
+
max_level = level
|
|
55
|
+
max_severity = flag.severity
|
|
56
|
+
|
|
57
|
+
return max_severity
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_from_trial(
|
|
61
|
+
trial: SlopitTrial,
|
|
62
|
+
session: SlopitSession,
|
|
63
|
+
item_id_key: str = "item_id",
|
|
64
|
+
) -> JudgmentAnalytics | None:
|
|
65
|
+
"""Extract behavioral analytics from a single slopit trial.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
trial : SlopitTrial
|
|
70
|
+
Slopit trial data.
|
|
71
|
+
session : SlopitSession
|
|
72
|
+
Parent session for participant context.
|
|
73
|
+
item_id_key : str
|
|
74
|
+
Key in platform_data containing the item UUID.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
JudgmentAnalytics | None
|
|
79
|
+
Analytics record, or None if item_id not found in trial.
|
|
80
|
+
"""
|
|
81
|
+
# Extract item_id from platform_data
|
|
82
|
+
if trial.platform_data is None or item_id_key not in trial.platform_data:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
item_id_str = trial.platform_data[item_id_key]
|
|
86
|
+
if not isinstance(item_id_str, str):
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
item_id = UUID(item_id_str)
|
|
91
|
+
except (ValueError, TypeError):
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# Extract response value
|
|
95
|
+
response_value: JsonValue = None
|
|
96
|
+
if trial.response is not None:
|
|
97
|
+
response_value = trial.response.value
|
|
98
|
+
|
|
99
|
+
# Extract response time
|
|
100
|
+
response_time_ms = trial.rt if trial.rt is not None else 0
|
|
101
|
+
|
|
102
|
+
# Extract behavioral metrics
|
|
103
|
+
keystroke_metrics = None
|
|
104
|
+
focus_metrics = None
|
|
105
|
+
timing_metrics = None
|
|
106
|
+
paste_count = 0
|
|
107
|
+
|
|
108
|
+
if trial.behavioral is not None:
|
|
109
|
+
if trial.behavioral.metrics is not None:
|
|
110
|
+
keystroke_metrics = trial.behavioral.metrics.keystroke
|
|
111
|
+
focus_metrics = trial.behavioral.metrics.focus
|
|
112
|
+
timing_metrics = trial.behavioral.metrics.timing
|
|
113
|
+
|
|
114
|
+
if trial.behavioral.paste is not None:
|
|
115
|
+
paste_count = len(trial.behavioral.paste)
|
|
116
|
+
|
|
117
|
+
# Extract flags from capture_flags
|
|
118
|
+
flags: list[AnalysisFlag] = []
|
|
119
|
+
if trial.capture_flags is not None:
|
|
120
|
+
# Convert CaptureFlags to AnalysisFlags for consistency
|
|
121
|
+
for cf in trial.capture_flags:
|
|
122
|
+
flags.append(
|
|
123
|
+
AnalysisFlag(
|
|
124
|
+
type=cf.type,
|
|
125
|
+
analyzer="capture",
|
|
126
|
+
severity=cf.severity,
|
|
127
|
+
message=cf.message,
|
|
128
|
+
evidence=cf.details,
|
|
129
|
+
trial_ids=[trial.trial_id],
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
return JudgmentAnalytics(
|
|
134
|
+
item_id=item_id,
|
|
135
|
+
participant_id=session.participant_id or session.session_id,
|
|
136
|
+
trial_index=trial.trial_index,
|
|
137
|
+
session_id=session.session_id,
|
|
138
|
+
response_value=response_value,
|
|
139
|
+
response_time_ms=response_time_ms,
|
|
140
|
+
keystroke_metrics=keystroke_metrics,
|
|
141
|
+
focus_metrics=focus_metrics,
|
|
142
|
+
timing_metrics=timing_metrics,
|
|
143
|
+
paste_event_count=paste_count,
|
|
144
|
+
flags=flags,
|
|
145
|
+
max_severity=_get_max_severity(flags),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def extract_from_session(
|
|
150
|
+
session: SlopitSession,
|
|
151
|
+
item_id_key: str = "item_id",
|
|
152
|
+
) -> list[JudgmentAnalytics]:
|
|
153
|
+
"""Extract behavioral analytics from all trials in a slopit session.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
session : SlopitSession
|
|
158
|
+
Slopit session containing trial data.
|
|
159
|
+
item_id_key : str
|
|
160
|
+
Key in platform_data containing the item UUID.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
list[JudgmentAnalytics]
|
|
165
|
+
Analytics records for trials with valid item_id.
|
|
166
|
+
"""
|
|
167
|
+
analytics: list[JudgmentAnalytics] = []
|
|
168
|
+
|
|
169
|
+
for trial in session.trials:
|
|
170
|
+
result = extract_from_trial(trial, session, item_id_key)
|
|
171
|
+
if result is not None:
|
|
172
|
+
analytics.append(result)
|
|
173
|
+
|
|
174
|
+
return analytics
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def extract_from_file(
|
|
178
|
+
path: Path | str,
|
|
179
|
+
item_id_key: str = "item_id",
|
|
180
|
+
) -> list[JudgmentAnalytics]:
|
|
181
|
+
"""Extract behavioral analytics from a slopit session file.
|
|
182
|
+
|
|
183
|
+
Uses slopit's load_session() to automatically detect format.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
path : Path | str
|
|
188
|
+
Path to session file (JSON or JATOS format).
|
|
189
|
+
item_id_key : str
|
|
190
|
+
Key in platform_data containing the item UUID.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
list[JudgmentAnalytics]
|
|
195
|
+
Analytics records from the session.
|
|
196
|
+
|
|
197
|
+
Examples
|
|
198
|
+
--------
|
|
199
|
+
>>> analytics = extract_from_file("data/session_001.json")
|
|
200
|
+
>>> len(analytics)
|
|
201
|
+
50
|
|
202
|
+
"""
|
|
203
|
+
session = load_session(path)
|
|
204
|
+
return extract_from_session(session, item_id_key)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def extract_from_directory(
|
|
208
|
+
path: Path | str,
|
|
209
|
+
pattern: str = "*",
|
|
210
|
+
item_id_key: str = "item_id",
|
|
211
|
+
name: str | None = None,
|
|
212
|
+
) -> AnalyticsCollection:
|
|
213
|
+
"""Extract behavioral analytics from all session files in a directory.
|
|
214
|
+
|
|
215
|
+
Uses slopit's load_sessions() to load all files.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
path : Path | str
|
|
220
|
+
Directory containing session files.
|
|
221
|
+
pattern : str
|
|
222
|
+
Glob pattern for file matching (default: "*").
|
|
223
|
+
item_id_key : str
|
|
224
|
+
Key in platform_data containing the item UUID.
|
|
225
|
+
name : str | None
|
|
226
|
+
Name for the collection. Defaults to directory name.
|
|
227
|
+
|
|
228
|
+
Returns
|
|
229
|
+
-------
|
|
230
|
+
AnalyticsCollection
|
|
231
|
+
Collection of analytics from all sessions.
|
|
232
|
+
|
|
233
|
+
Examples
|
|
234
|
+
--------
|
|
235
|
+
>>> collection = extract_from_directory("data/jatos_export/")
|
|
236
|
+
>>> print(f"Extracted {len(collection)} analytics records")
|
|
237
|
+
"""
|
|
238
|
+
path = Path(path)
|
|
239
|
+
sessions = load_sessions(path, pattern)
|
|
240
|
+
|
|
241
|
+
all_analytics: list[JudgmentAnalytics] = []
|
|
242
|
+
for session in sessions:
|
|
243
|
+
analytics = extract_from_session(session, item_id_key)
|
|
244
|
+
all_analytics.extend(analytics)
|
|
245
|
+
|
|
246
|
+
collection_name = name if name is not None else path.name
|
|
247
|
+
return AnalyticsCollection(name=collection_name, analytics=all_analytics)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def analyze_sessions(
|
|
251
|
+
sessions: list[SlopitSession],
|
|
252
|
+
analyzers: list[Analyzer] | None = None,
|
|
253
|
+
) -> list[SlopitSession]:
|
|
254
|
+
"""Run slopit behavioral analyzers on sessions.
|
|
255
|
+
|
|
256
|
+
Uses slopit's AnalysisPipeline to process sessions with
|
|
257
|
+
the specified analyzers.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
sessions : list[SlopitSession]
|
|
262
|
+
Sessions to analyze.
|
|
263
|
+
analyzers : list[Analyzer] | None
|
|
264
|
+
Analyzers to run. If None, uses default set:
|
|
265
|
+
KeystrokeAnalyzer, FocusAnalyzer, PasteAnalyzer, TimingAnalyzer.
|
|
266
|
+
|
|
267
|
+
Returns
|
|
268
|
+
-------
|
|
269
|
+
list[SlopitSession]
|
|
270
|
+
Sessions with analysis flags added.
|
|
271
|
+
|
|
272
|
+
Examples
|
|
273
|
+
--------
|
|
274
|
+
>>> from slopit import load_sessions
|
|
275
|
+
>>> sessions = load_sessions("data/")
|
|
276
|
+
>>> analyzed = analyze_sessions(sessions)
|
|
277
|
+
>>> # Sessions now have analysis flags populated
|
|
278
|
+
"""
|
|
279
|
+
if analyzers is None:
|
|
280
|
+
analyzers = [
|
|
281
|
+
KeystrokeAnalyzer(),
|
|
282
|
+
FocusAnalyzer(),
|
|
283
|
+
PasteAnalyzer(),
|
|
284
|
+
TimingAnalyzer(),
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
pipeline = AnalysisPipeline(analyzers)
|
|
288
|
+
return pipeline.analyze(sessions)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def extract_with_analysis(
|
|
292
|
+
path: Path | str,
|
|
293
|
+
pattern: str = "*",
|
|
294
|
+
item_id_key: str = "item_id",
|
|
295
|
+
analyzers: list[Analyzer] | None = None,
|
|
296
|
+
name: str | None = None,
|
|
297
|
+
) -> AnalyticsCollection:
|
|
298
|
+
"""Load sessions, run analysis, and extract analytics in one step.
|
|
299
|
+
|
|
300
|
+
Convenience function that combines loading, analysis, and extraction.
|
|
301
|
+
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
path : Path | str
|
|
305
|
+
Path to session file or directory.
|
|
306
|
+
pattern : str
|
|
307
|
+
Glob pattern for directory (default: "*").
|
|
308
|
+
item_id_key : str
|
|
309
|
+
Key in platform_data containing the item UUID.
|
|
310
|
+
analyzers : list[Analyzer] | None
|
|
311
|
+
Analyzers to run. If None, uses default set.
|
|
312
|
+
name : str | None
|
|
313
|
+
Name for the collection.
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
AnalyticsCollection
|
|
318
|
+
Collection with analyzed behavioral data.
|
|
319
|
+
|
|
320
|
+
Examples
|
|
321
|
+
--------
|
|
322
|
+
>>> collection = extract_with_analysis("data/jatos_export/")
|
|
323
|
+
>>> summaries = collection.get_participant_summaries()
|
|
324
|
+
>>> for s in summaries:
|
|
325
|
+
... if s.flag_rate > 0.1:
|
|
326
|
+
... print(f"Participant {s.participant_id}: {s.flag_rate:.1%} flagged")
|
|
327
|
+
"""
|
|
328
|
+
path = Path(path)
|
|
329
|
+
|
|
330
|
+
# Load sessions
|
|
331
|
+
sessions = load_sessions(path, pattern)
|
|
332
|
+
|
|
333
|
+
# Run analysis
|
|
334
|
+
analyzed = analyze_sessions(sessions, analyzers)
|
|
335
|
+
|
|
336
|
+
# Extract analytics
|
|
337
|
+
all_analytics: list[JudgmentAnalytics] = []
|
|
338
|
+
for session in analyzed:
|
|
339
|
+
analytics = extract_from_session(session, item_id_key)
|
|
340
|
+
all_analytics.extend(analytics)
|
|
341
|
+
|
|
342
|
+
collection_name = name if name is not None else path.name
|
|
343
|
+
return AnalyticsCollection(name=collection_name, analytics=all_analytics)
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Utilities for merging behavioral analytics with judgment data.
|
|
2
|
+
|
|
3
|
+
This module provides functions for joining behavioral analytics with
|
|
4
|
+
judgment DataFrames for analysis. All functions support both pandas
|
|
5
|
+
and polars DataFrames, preserving the input type.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING, Literal
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import polars as pl
|
|
14
|
+
|
|
15
|
+
from bead.behavioral.analytics import AnalyticsCollection
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from slopit.schemas import Severity
|
|
19
|
+
|
|
20
|
+
from bead.participants.collection import IDMappingCollection, ParticipantCollection
|
|
21
|
+
|
|
22
|
+
# Type alias for supported DataFrame types
|
|
23
|
+
DataFrame = pd.DataFrame | pl.DataFrame
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def merge_behavioral_analytics(
|
|
27
|
+
judgments_df: DataFrame,
|
|
28
|
+
analytics: AnalyticsCollection,
|
|
29
|
+
item_id_column: str = "item_id",
|
|
30
|
+
participant_id_column: str = "participant_id",
|
|
31
|
+
include_metrics: bool = True,
|
|
32
|
+
include_flags: bool = True,
|
|
33
|
+
how: str = "left",
|
|
34
|
+
) -> DataFrame:
|
|
35
|
+
"""Merge behavioral analytics into a judgments DataFrame.
|
|
36
|
+
|
|
37
|
+
Preserves input DataFrame type (pandas in -> pandas out,
|
|
38
|
+
polars in -> polars out).
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
judgments_df : DataFrame
|
|
43
|
+
DataFrame containing judgment data.
|
|
44
|
+
analytics : AnalyticsCollection
|
|
45
|
+
Collection of behavioral analytics.
|
|
46
|
+
item_id_column : str
|
|
47
|
+
Column in judgments_df containing item IDs (default: "item_id").
|
|
48
|
+
participant_id_column : str
|
|
49
|
+
Column in judgments_df containing participant IDs.
|
|
50
|
+
include_metrics : bool
|
|
51
|
+
If True, include flattened behavioral metrics columns.
|
|
52
|
+
include_flags : bool
|
|
53
|
+
If True, include flag-related columns.
|
|
54
|
+
how : str
|
|
55
|
+
Merge type: "left", "inner", "outer" (default: "left").
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
DataFrame
|
|
60
|
+
Merged DataFrame with behavioral analytics columns added.
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
>>> import pandas as pd
|
|
65
|
+
>>> judgments = pd.DataFrame({
|
|
66
|
+
... "item_id": ["uuid1", "uuid2"],
|
|
67
|
+
... "participant_id": ["p1", "p1"],
|
|
68
|
+
... "response": [5, 3],
|
|
69
|
+
... })
|
|
70
|
+
>>> # merged = merge_behavioral_analytics(judgments, analytics_collection)
|
|
71
|
+
"""
|
|
72
|
+
is_polars = isinstance(judgments_df, pl.DataFrame)
|
|
73
|
+
|
|
74
|
+
# Convert analytics to DataFrame with same backend
|
|
75
|
+
backend: Literal["pandas", "polars"] = "polars" if is_polars else "pandas"
|
|
76
|
+
analytics_df = analytics.to_dataframe(
|
|
77
|
+
backend=backend,
|
|
78
|
+
include_metrics=include_metrics,
|
|
79
|
+
include_flags=include_flags,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if is_polars:
|
|
83
|
+
assert isinstance(judgments_df, pl.DataFrame)
|
|
84
|
+
assert isinstance(analytics_df, pl.DataFrame)
|
|
85
|
+
|
|
86
|
+
# Polars join on both item_id and participant_id
|
|
87
|
+
return judgments_df.join(
|
|
88
|
+
analytics_df,
|
|
89
|
+
left_on=[item_id_column, participant_id_column],
|
|
90
|
+
right_on=["item_id", "participant_id"],
|
|
91
|
+
how=how, # type: ignore[arg-type]
|
|
92
|
+
suffix="_behavioral",
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
assert isinstance(judgments_df, pd.DataFrame)
|
|
96
|
+
assert isinstance(analytics_df, pd.DataFrame)
|
|
97
|
+
|
|
98
|
+
# Pandas merge
|
|
99
|
+
merged = pd.merge(
|
|
100
|
+
judgments_df,
|
|
101
|
+
analytics_df,
|
|
102
|
+
left_on=[item_id_column, participant_id_column],
|
|
103
|
+
right_on=["item_id", "participant_id"],
|
|
104
|
+
how=how, # type: ignore[arg-type]
|
|
105
|
+
suffixes=("", "_behavioral"),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Remove duplicate columns if created
|
|
109
|
+
for col in ["item_id_behavioral", "participant_id_behavioral"]:
|
|
110
|
+
if col in merged.columns:
|
|
111
|
+
merged = merged.drop(columns=[col])
|
|
112
|
+
|
|
113
|
+
return merged
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def filter_flagged_judgments(
|
|
117
|
+
judgments_df: DataFrame,
|
|
118
|
+
analytics: AnalyticsCollection,
|
|
119
|
+
item_id_column: str = "item_id",
|
|
120
|
+
participant_id_column: str = "participant_id",
|
|
121
|
+
min_severity: Severity | None = None,
|
|
122
|
+
exclude_flagged: bool = True,
|
|
123
|
+
) -> DataFrame:
|
|
124
|
+
"""Filter judgments based on behavioral flags.
|
|
125
|
+
|
|
126
|
+
Preserves input DataFrame type.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
judgments_df : DataFrame
|
|
131
|
+
DataFrame containing judgment data.
|
|
132
|
+
analytics : AnalyticsCollection
|
|
133
|
+
Collection of behavioral analytics.
|
|
134
|
+
item_id_column : str
|
|
135
|
+
Column containing item IDs.
|
|
136
|
+
participant_id_column : str
|
|
137
|
+
Column containing participant IDs.
|
|
138
|
+
min_severity : Severity | None
|
|
139
|
+
Minimum severity level for filtering. If None, any flag counts.
|
|
140
|
+
exclude_flagged : bool
|
|
141
|
+
If True, exclude flagged judgments (default).
|
|
142
|
+
If False, keep only flagged judgments.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
DataFrame
|
|
147
|
+
Filtered DataFrame.
|
|
148
|
+
|
|
149
|
+
Examples
|
|
150
|
+
--------
|
|
151
|
+
>>> # Keep only unflagged judgments
|
|
152
|
+
>>> clean_df = filter_flagged_judgments(judgments, analytics, exclude_flagged=True)
|
|
153
|
+
>>> # Keep only high-severity flagged judgments for review
|
|
154
|
+
>>> flagged_df = filter_flagged_judgments(
|
|
155
|
+
... judgments, analytics, min_severity="high", exclude_flagged=False
|
|
156
|
+
... )
|
|
157
|
+
"""
|
|
158
|
+
is_polars = isinstance(judgments_df, pl.DataFrame)
|
|
159
|
+
|
|
160
|
+
# Get filtered analytics
|
|
161
|
+
filtered_analytics = analytics.filter_flagged(
|
|
162
|
+
min_severity=min_severity,
|
|
163
|
+
exclude_flagged=False, # Get flagged records
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Build set of flagged (item_id, participant_id) pairs
|
|
167
|
+
flagged_pairs: set[tuple[str, str]] = {
|
|
168
|
+
(str(a.item_id), a.participant_id) for a in filtered_analytics.analytics
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if is_polars:
|
|
172
|
+
assert isinstance(judgments_df, pl.DataFrame)
|
|
173
|
+
|
|
174
|
+
# Create mask column
|
|
175
|
+
df_with_flag = judgments_df.with_columns(
|
|
176
|
+
pl.struct([item_id_column, participant_id_column])
|
|
177
|
+
.map_elements(
|
|
178
|
+
lambda row: (
|
|
179
|
+
(str(row[item_id_column]), str(row[participant_id_column]))
|
|
180
|
+
in flagged_pairs
|
|
181
|
+
),
|
|
182
|
+
return_dtype=pl.Boolean,
|
|
183
|
+
)
|
|
184
|
+
.alias("_is_flagged")
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if exclude_flagged:
|
|
188
|
+
result = df_with_flag.filter(~pl.col("_is_flagged"))
|
|
189
|
+
else:
|
|
190
|
+
result = df_with_flag.filter(pl.col("_is_flagged"))
|
|
191
|
+
|
|
192
|
+
return result.drop("_is_flagged")
|
|
193
|
+
|
|
194
|
+
else:
|
|
195
|
+
assert isinstance(judgments_df, pd.DataFrame)
|
|
196
|
+
|
|
197
|
+
# Create mask
|
|
198
|
+
mask = judgments_df.apply(
|
|
199
|
+
lambda row: (
|
|
200
|
+
(str(row[item_id_column]), str(row[participant_id_column]))
|
|
201
|
+
in flagged_pairs
|
|
202
|
+
),
|
|
203
|
+
axis=1,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if exclude_flagged:
|
|
207
|
+
return judgments_df[~mask].copy()
|
|
208
|
+
else:
|
|
209
|
+
return judgments_df[mask].copy()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def create_analysis_dataframe_with_behavior(
|
|
213
|
+
judgments_df: DataFrame,
|
|
214
|
+
participants: ParticipantCollection,
|
|
215
|
+
analytics: AnalyticsCollection,
|
|
216
|
+
id_mappings: IDMappingCollection | None = None,
|
|
217
|
+
external_id_column: str | None = None,
|
|
218
|
+
participant_id_column: str = "participant_id",
|
|
219
|
+
item_id_column: str = "item_id",
|
|
220
|
+
metadata_columns: list[str] | None = None,
|
|
221
|
+
include_metrics: bool = True,
|
|
222
|
+
include_flags: bool = True,
|
|
223
|
+
) -> DataFrame:
|
|
224
|
+
"""Create analysis-ready DataFrame with metadata and behavioral analytics.
|
|
225
|
+
|
|
226
|
+
Combines both participant and behavioral merging in one step.
|
|
227
|
+
Preserves input DataFrame type.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
judgments_df : DataFrame
|
|
232
|
+
Raw judgment data.
|
|
233
|
+
participants : ParticipantCollection
|
|
234
|
+
Participant collection with metadata.
|
|
235
|
+
analytics : AnalyticsCollection
|
|
236
|
+
Behavioral analytics collection.
|
|
237
|
+
id_mappings : IDMappingCollection | None
|
|
238
|
+
ID mappings (required if external_id_column is provided).
|
|
239
|
+
external_id_column : str | None
|
|
240
|
+
Column with external IDs to resolve.
|
|
241
|
+
participant_id_column : str
|
|
242
|
+
Column with participant IDs (after resolution).
|
|
243
|
+
item_id_column : str
|
|
244
|
+
Column with item IDs.
|
|
245
|
+
metadata_columns : list[str] | None
|
|
246
|
+
Participant metadata columns to include.
|
|
247
|
+
include_metrics : bool
|
|
248
|
+
If True, include behavioral metrics columns.
|
|
249
|
+
include_flags : bool
|
|
250
|
+
If True, include flag columns.
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
DataFrame
|
|
255
|
+
Analysis-ready DataFrame with both metadata and behavioral data.
|
|
256
|
+
|
|
257
|
+
Examples
|
|
258
|
+
--------
|
|
259
|
+
>>> analysis_df = create_analysis_dataframe_with_behavior(
|
|
260
|
+
... judgments,
|
|
261
|
+
... participants,
|
|
262
|
+
... analytics,
|
|
263
|
+
... id_mappings=mappings,
|
|
264
|
+
... external_id_column="PROLIFIC_PID",
|
|
265
|
+
... )
|
|
266
|
+
"""
|
|
267
|
+
# Import here to avoid circular imports
|
|
268
|
+
from bead.participants.merging import ( # noqa: PLC0415
|
|
269
|
+
merge_participant_metadata,
|
|
270
|
+
resolve_external_ids,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
df = judgments_df
|
|
274
|
+
|
|
275
|
+
# Step 1: Resolve external IDs if needed
|
|
276
|
+
if external_id_column is not None and id_mappings is not None:
|
|
277
|
+
df = resolve_external_ids(
|
|
278
|
+
df,
|
|
279
|
+
id_mappings,
|
|
280
|
+
external_id_column=external_id_column,
|
|
281
|
+
output_column=participant_id_column,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Step 2: Merge participant metadata
|
|
285
|
+
df = merge_participant_metadata(
|
|
286
|
+
df,
|
|
287
|
+
participants,
|
|
288
|
+
id_column=participant_id_column,
|
|
289
|
+
metadata_columns=metadata_columns,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Step 3: Merge behavioral analytics
|
|
293
|
+
df = merge_behavioral_analytics(
|
|
294
|
+
df,
|
|
295
|
+
analytics,
|
|
296
|
+
item_id_column=item_id_column,
|
|
297
|
+
participant_id_column=participant_id_column,
|
|
298
|
+
include_metrics=include_metrics,
|
|
299
|
+
include_flags=include_flags,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return df
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def get_exclusion_list(
|
|
306
|
+
analytics: AnalyticsCollection,
|
|
307
|
+
min_flag_rate: float = 0.1,
|
|
308
|
+
min_severity: Severity | None = None,
|
|
309
|
+
) -> list[str]:
|
|
310
|
+
"""Get list of participant IDs that should be excluded based on flags.
|
|
311
|
+
|
|
312
|
+
Identifies participants with flag rates above the threshold.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
analytics : AnalyticsCollection
|
|
317
|
+
Behavioral analytics collection.
|
|
318
|
+
min_flag_rate : float
|
|
319
|
+
Minimum proportion of flagged judgments for exclusion (default: 0.1).
|
|
320
|
+
min_severity : Severity | None
|
|
321
|
+
Only count flags at or above this severity.
|
|
322
|
+
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
list[str]
|
|
326
|
+
Participant IDs recommended for exclusion.
|
|
327
|
+
|
|
328
|
+
Examples
|
|
329
|
+
--------
|
|
330
|
+
>>> exclude = get_exclusion_list(analytics, min_flag_rate=0.2)
|
|
331
|
+
>>> clean_df = judgments_df[~judgments_df["participant_id"].isin(exclude)]
|
|
332
|
+
"""
|
|
333
|
+
# Apply severity filter if specified
|
|
334
|
+
if min_severity is not None:
|
|
335
|
+
filtered = analytics.filter_flagged(
|
|
336
|
+
min_severity=min_severity, exclude_flagged=False
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
filtered = analytics
|
|
340
|
+
|
|
341
|
+
summaries = filtered.get_participant_summaries()
|
|
342
|
+
|
|
343
|
+
return [s.participant_id for s in summaries if s.flag_rate >= min_flag_rate]
|