bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,154 @@
1
+ """Data merger for JATOS and Prolific data.
2
+
3
+ This module provides the DataMerger class for merging experimental results
4
+ from JATOS with participant metadata from Prolific. The merger matches
5
+ records based on participant IDs and handles unmatched records gracefully.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from bead.data.base import JsonValue
11
+
12
+
13
+ class DataMerger:
14
+ """Merges JATOS results with Prolific metadata.
15
+
16
+ This class merges experimental data from JATOS with participant
17
+ demographics and metadata from Prolific based on participant IDs.
18
+
19
+ Parameters
20
+ ----------
21
+ merge_key : str
22
+ Key to merge on (e.g., "PROLIFIC_PID"). Default is "PROLIFIC_PID".
23
+
24
+ Attributes
25
+ ----------
26
+ merge_key : str
27
+ Key to merge on (e.g., "PROLIFIC_PID").
28
+
29
+ Examples
30
+ --------
31
+ Create a merger with custom key::
32
+
33
+ merger = DataMerger(merge_key="PROLIFIC_PID")
34
+ merged_data = merger.merge(jatos_results, prolific_submissions)
35
+ """
36
+
37
+ def __init__(self, merge_key: str = "PROLIFIC_PID") -> None:
38
+ self.merge_key = merge_key
39
+
40
+ def merge(
41
+ self,
42
+ jatos_results: list[dict[str, JsonValue]],
43
+ prolific_submissions: list[dict[str, JsonValue]],
44
+ ) -> list[dict[str, JsonValue]]:
45
+ """Merge JATOS and Prolific data.
46
+
47
+ Merges experimental results from JATOS with participant submissions
48
+ from Prolific by matching on participant IDs. Returns merged records
49
+ with both JATOS data and Prolific metadata.
50
+
51
+ Parameters
52
+ ----------
53
+ jatos_results : list[dict[str, Any]]
54
+ JATOS results from JATOSDataCollector.
55
+ prolific_submissions : list[dict[str, Any]]
56
+ Prolific submissions from ProlificDataCollector.
57
+
58
+ Returns
59
+ -------
60
+ list[dict[str, JsonValue]]
61
+ Merged data with structure:
62
+ {
63
+ "jatos_data": {...},
64
+ "prolific_metadata": {...} | None,
65
+ "merged": bool
66
+ }
67
+
68
+ Examples
69
+ --------
70
+ ::
71
+
72
+ jatos_results = [
73
+ {"data": {"PROLIFIC_PID": "abc123"}, "metadata": {}}
74
+ ]
75
+ prolific_submissions = [
76
+ {"participant_id": "abc123", "status": "APPROVED"}
77
+ ]
78
+ merged = merger.merge(jatos_results, prolific_submissions)
79
+ assert merged[0]["merged"] is True
80
+ """
81
+ # Create lookup by Prolific participant ID
82
+ prolific_lookup: dict[str, dict[str, JsonValue]] = {
83
+ sub["participant_id"]: sub for sub in prolific_submissions
84
+ }
85
+
86
+ merged: list[dict[str, JsonValue]] = []
87
+
88
+ for result in jatos_results:
89
+ # Extract Prolific PID from JATOS data
90
+ prolific_pid = self._extract_prolific_pid(result)
91
+
92
+ if prolific_pid and prolific_pid in prolific_lookup:
93
+ # Merge
94
+ merged_record: dict[str, JsonValue] = {
95
+ "jatos_data": result,
96
+ "prolific_metadata": prolific_lookup[prolific_pid],
97
+ "merged": True,
98
+ }
99
+ else:
100
+ # No match
101
+ merged_record = {
102
+ "jatos_data": result,
103
+ "prolific_metadata": None,
104
+ "merged": False,
105
+ }
106
+
107
+ merged.append(merged_record)
108
+
109
+ return merged
110
+
111
+ def _extract_prolific_pid(
112
+ self,
113
+ jatos_result: dict[str, JsonValue],
114
+ ) -> str | None:
115
+ """Extract Prolific PID from JATOS result.
116
+
117
+ Searches for the participant ID in both the data and metadata
118
+ fields of the JATOS result.
119
+
120
+ Parameters
121
+ ----------
122
+ jatos_result : dict[str, JsonValue]
123
+ JATOS result dictionary.
124
+
125
+ Returns
126
+ -------
127
+ str | None
128
+ Prolific PID if found, None otherwise.
129
+
130
+ Examples
131
+ --------
132
+ ::
133
+
134
+ result = {"data": {"PROLIFIC_PID": "abc123"}}
135
+ pid = merger._extract_prolific_pid(result)
136
+ assert pid == "abc123"
137
+ """
138
+ # Check in data field
139
+ data = jatos_result.get("data")
140
+ if isinstance(data, dict):
141
+ # Extract value from untyped JSON and verify type at runtime
142
+ value = data.get(self.merge_key) # type: ignore[reportUnknownMemberType]
143
+ if isinstance(value, str):
144
+ return value
145
+
146
+ # Check in metadata field
147
+ metadata = jatos_result.get("metadata")
148
+ if isinstance(metadata, dict):
149
+ # Extract value from untyped JSON and verify type at runtime
150
+ value = metadata.get(self.merge_key) # type: ignore[reportUnknownMemberType]
151
+ if isinstance(value, str):
152
+ return value
153
+
154
+ return None
@@ -0,0 +1,198 @@
1
+ """Prolific data collection for model training.
2
+
3
+ This module provides the ProlificDataCollector class for downloading participant
4
+ metadata and submissions from Prolific. It supports:
5
+ - Downloading participant submissions with pagination
6
+ - Filtering by submission status
7
+ - Approving submissions
8
+ - Getting study metadata
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from pathlib import Path
15
+
16
+ import requests
17
+
18
+ from bead.data.base import JsonValue
19
+ from bead.data.timestamps import now_iso8601
20
+
21
+
22
+ class ProlificDataCollector:
23
+ """Collects participant data from Prolific API.
24
+
25
+ This class interfaces with the Prolific API v1 to download participant
26
+ submissions, demographics, and metadata for model training.
27
+
28
+ Parameters
29
+ ----------
30
+ api_key : str
31
+ Prolific API key for authentication.
32
+ study_id : str
33
+ Prolific study ID to collect data from.
34
+
35
+ Attributes
36
+ ----------
37
+ api_key : str
38
+ Prolific API key for authentication.
39
+ study_id : str
40
+ Prolific study ID to collect data from.
41
+ base_url : str
42
+ Prolific API base URL.
43
+ session : requests.Session
44
+ HTTP session with authentication headers.
45
+
46
+ Examples
47
+ --------
48
+ Create a collector and download submissions::
49
+
50
+ collector = ProlificDataCollector(
51
+ api_key="my-api-key",
52
+ study_id="abc123"
53
+ )
54
+ submissions = collector.download_submissions(Path("submissions.json"))
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ api_key: str,
60
+ study_id: str,
61
+ ) -> None:
62
+ self.api_key = api_key
63
+ self.study_id = study_id
64
+ self.base_url = "https://api.prolific.co/api/v1"
65
+
66
+ self.session = requests.Session()
67
+ self.session.headers.update({"Authorization": f"Token {api_key}"})
68
+
69
+ def download_submissions(
70
+ self,
71
+ output_path: Path,
72
+ status: str | None = None,
73
+ ) -> list[dict[str, JsonValue]]:
74
+ """Download participant submissions.
75
+
76
+ Downloads all submissions for the study, handling pagination automatically.
77
+ Each submission is enriched with a download timestamp.
78
+
79
+ Parameters
80
+ ----------
81
+ output_path : Path
82
+ Path to save submissions (JSON format).
83
+ status : str | None
84
+ Filter by status (e.g., "APPROVED", "AWAITING REVIEW").
85
+
86
+ Returns
87
+ -------
88
+ list[dict[str, JsonValue]]
89
+ Downloaded submissions with metadata.
90
+
91
+ Raises
92
+ ------
93
+ requests.HTTPError
94
+ If the API request fails.
95
+
96
+ Examples
97
+ --------
98
+ Download all submissions::
99
+
100
+ submissions = collector.download_submissions(Path("submissions.json"))
101
+
102
+ Download with status filter::
103
+
104
+ submissions = collector.download_submissions(
105
+ Path("approved.json"),
106
+ status="APPROVED"
107
+ )
108
+ """
109
+ submissions: list[dict[str, JsonValue]] = []
110
+ page = 1
111
+
112
+ while True:
113
+ url = f"{self.base_url}/studies/{self.study_id}/submissions/"
114
+ params: dict[str, str | int] = {"page": page}
115
+
116
+ if status:
117
+ params["status"] = status
118
+
119
+ response = self.session.get(url, params=params)
120
+ response.raise_for_status()
121
+
122
+ data: dict[str, JsonValue] = response.json()
123
+ page_submissions: list[JsonValue] = (
124
+ data.get("results", []) if isinstance(data.get("results"), list) else []
125
+ )
126
+
127
+ if not page_submissions:
128
+ break
129
+
130
+ # Enrich with metadata
131
+ for sub in page_submissions:
132
+ if isinstance(sub, dict):
133
+ sub["download_timestamp"] = now_iso8601().isoformat()
134
+ submissions.append(sub)
135
+
136
+ page += 1
137
+
138
+ # Save to JSON file
139
+ output_path.parent.mkdir(parents=True, exist_ok=True)
140
+ with open(output_path, "w") as f:
141
+ json.dump(submissions, f, indent=2)
142
+
143
+ return submissions
144
+
145
+ def get_study_info(self) -> dict[str, JsonValue]:
146
+ """Get study information.
147
+
148
+ Returns
149
+ -------
150
+ dict[str, JsonValue]
151
+ Study details dictionary.
152
+
153
+ Raises
154
+ ------
155
+ requests.HTTPError
156
+ If the API request fails.
157
+
158
+ Examples
159
+ --------
160
+ ::
161
+
162
+ info = collector.get_study_info()
163
+ print(info["name"])
164
+ """
165
+ url = f"{self.base_url}/studies/{self.study_id}/"
166
+ response = self.session.get(url)
167
+ response.raise_for_status()
168
+ return response.json()
169
+
170
+ def approve_submissions(
171
+ self,
172
+ submission_ids: list[str],
173
+ ) -> None:
174
+ """Approve submissions.
175
+
176
+ Approves multiple submissions by transitioning their status to APPROVED.
177
+
178
+ Parameters
179
+ ----------
180
+ submission_ids : list[str]
181
+ Submission IDs to approve.
182
+
183
+ Raises
184
+ ------
185
+ requests.HTTPError
186
+ If the API request fails.
187
+
188
+ Examples
189
+ --------
190
+ ::
191
+
192
+ collector.approve_submissions(["sub1", "sub2", "sub3"])
193
+ """
194
+ for submission_id in submission_ids:
195
+ url = f"{self.base_url}/submissions/{submission_id}/transition/"
196
+ data = {"action": "APPROVE"}
197
+ response = self.session.post(url, json=data)
198
+ response.raise_for_status()
@@ -0,0 +1,5 @@
1
+ """Deployment module for generating experiments.
2
+
3
+ Generates experiments for jsPsych/JATOS platforms with batch mode support
4
+ and configurable list distribution strategies.
5
+ """