bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/lists/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """List construction module for experimental list partitioning.
2
+
3
+ Provides data models for organizing experimental items into balanced lists
4
+ for presentation to participants. Includes ExperimentList, ListCollection,
5
+ and constraint types (uniqueness, balance, quantile, size, diversity, ordering).
6
+ """
7
+
8
+ from bead.lists.constraints import (
9
+ BalanceConstraint,
10
+ DiversityConstraint,
11
+ ListConstraint,
12
+ OrderingConstraint,
13
+ QuantileConstraint,
14
+ SizeConstraint,
15
+ UniquenessConstraint,
16
+ )
17
+ from bead.lists.experiment_list import ExperimentList
18
+ from bead.lists.list_collection import ListCollection
19
+
20
+ __all__ = [
21
+ "ExperimentList",
22
+ "ListCollection",
23
+ "ListConstraint",
24
+ "UniquenessConstraint",
25
+ "BalanceConstraint",
26
+ "QuantileConstraint",
27
+ "DiversityConstraint",
28
+ "SizeConstraint",
29
+ "OrderingConstraint",
30
+ ]
bead/lists/balancer.py ADDED
@@ -0,0 +1,263 @@
1
+ """Quantile balancing for experimental list partitioning.
2
+
3
+ This module provides the QuantileBalancer class for ensuring uniform distribution
4
+ of items across quantiles of a numeric property. Uses NumPy for efficient
5
+ quantile computation and maintains stand-off annotation pattern (works with UUIDs).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from typing import Any
12
+ from uuid import UUID
13
+
14
+ import numpy as np
15
+
16
+
17
+ class QuantileBalancer:
18
+ """Ensures uniform distribution of items across quantiles.
19
+
20
+ Used by stratified partitioning strategy to create balanced distribution
21
+ of numeric properties (e.g., LM probabilities, word frequencies).
22
+
23
+ Works with UUIDs only (stand-off annotation). Requires value_func callable
24
+ to extract numeric values from items via their UUIDs.
25
+
26
+ Parameters
27
+ ----------
28
+ n_quantiles : int, default=5
29
+ Number of quantiles to create (must be >= 2).
30
+ random_seed : int | None, default=None
31
+ Random seed for reproducibility. If None, uses non-deterministic RNG.
32
+
33
+ Attributes
34
+ ----------
35
+ n_quantiles : int
36
+ Number of quantiles to create.
37
+ random_seed : int | None
38
+ Random seed for reproducibility.
39
+
40
+ Examples
41
+ --------
42
+ >>> from uuid import uuid4
43
+ >>> import numpy as np
44
+ >>> balancer = QuantileBalancer(n_quantiles=5, random_seed=42)
45
+ >>> # Create items with known values
46
+ >>> items = [uuid4() for _ in range(100)]
47
+ >>> values = {item: float(i) for i, item in enumerate(items)}
48
+ >>> value_func = lambda uid: values[uid]
49
+ >>> # Balance across 4 lists, 5 items per quantile per list
50
+ >>> lists = balancer.balance(items, value_func, n_lists=4,
51
+ ... items_per_quantile_per_list=5)
52
+ >>> len(lists)
53
+ 4
54
+ """
55
+
56
+ def __init__(self, n_quantiles: int = 5, random_seed: int | None = None) -> None:
57
+ if n_quantiles < 2:
58
+ raise ValueError(f"n_quantiles must be >= 2, got {n_quantiles}")
59
+
60
+ self.n_quantiles = n_quantiles
61
+ self.random_seed = random_seed
62
+ self._rng = np.random.default_rng(random_seed)
63
+
64
+ def balance(
65
+ self,
66
+ item_ids: list[UUID],
67
+ value_func: Callable[[UUID], float],
68
+ n_lists: int,
69
+ items_per_quantile_per_list: int,
70
+ ) -> list[list[UUID]]:
71
+ """Balance items across lists and quantiles.
72
+
73
+ Distributes items uniformly across quantiles and lists to ensure
74
+ balanced representation of the numeric property across all lists.
75
+
76
+ Parameters
77
+ ----------
78
+ item_ids : list[UUID]
79
+ UUIDs of items to balance.
80
+ value_func : Callable[[UUID], float]
81
+ Function to extract numeric value from item UUID.
82
+ n_lists : int
83
+ Number of lists to create.
84
+ items_per_quantile_per_list : int
85
+ Target number of items per quantile per list.
86
+
87
+ Returns
88
+ -------
89
+ list[list[UUID]]
90
+ Balanced lists of item UUIDs.
91
+
92
+ Raises
93
+ ------
94
+ ValueError
95
+ If n_lists < 1 or items_per_quantile_per_list < 1.
96
+
97
+ Examples
98
+ --------
99
+ >>> from uuid import uuid4
100
+ >>> balancer = QuantileBalancer(n_quantiles=5, random_seed=42)
101
+ >>> items = [uuid4() for _ in range(100)]
102
+ >>> values = {item: float(i) for i, item in enumerate(items)}
103
+ >>> lists = balancer.balance(items, lambda uid: values[uid], 4, 5)
104
+ >>> all(len(lst) == 25 for lst in lists) # 5 quantiles * 5 items
105
+ True
106
+
107
+ Notes
108
+ -----
109
+ - Items are assigned to quantiles using np.percentile and np.digitize
110
+ - Within each quantile, items are shuffled before distribution
111
+ - If insufficient items exist in a quantile, fewer items are assigned
112
+ """
113
+ if n_lists < 1:
114
+ raise ValueError(f"n_lists must be >= 1, got {n_lists}")
115
+ if items_per_quantile_per_list < 1:
116
+ raise ValueError(
117
+ f"items_per_quantile_per_list must be >= 1, "
118
+ f"got {items_per_quantile_per_list}"
119
+ )
120
+
121
+ # create quantile-based strata
122
+ strata = self._create_strata(item_ids, value_func)
123
+
124
+ # initialize lists
125
+ lists: list[list[UUID]] = [[] for _ in range(n_lists)]
126
+
127
+ # distribute items from each quantile across lists
128
+ for q in range(self.n_quantiles):
129
+ q_items = strata[q]
130
+
131
+ # shuffle items in this quantile
132
+ q_items_array = np.array(q_items)
133
+ self._rng.shuffle(q_items_array)
134
+
135
+ # distribute to lists
136
+ for list_idx in range(n_lists):
137
+ # take items for this list
138
+ start_idx = list_idx * items_per_quantile_per_list
139
+ end_idx = start_idx + items_per_quantile_per_list
140
+ list_items = q_items_array[start_idx:end_idx].tolist()
141
+
142
+ lists[list_idx].extend(list_items)
143
+
144
+ return lists
145
+
146
+ def compute_balance_score(
147
+ self, item_ids: list[UUID], value_func: Callable[[UUID], float]
148
+ ) -> float:
149
+ """Compute balance score for items.
150
+
151
+ Score is 1.0 for perfect balance (uniform distribution across quantiles),
152
+ lower for imbalanced distributions. Score is based on deviation from
153
+ expected uniform distribution.
154
+
155
+ Parameters
156
+ ----------
157
+ item_ids : list[UUID]
158
+ UUIDs of items to score.
159
+ value_func : Callable[[UUID], float]
160
+ Function to extract numeric values.
161
+
162
+ Returns
163
+ -------
164
+ float
165
+ Balance score (0.0-1.0, higher is better).
166
+
167
+ Examples
168
+ --------
169
+ >>> from uuid import uuid4
170
+ >>> balancer = QuantileBalancer(n_quantiles=5)
171
+ >>> # Uniformly distributed values
172
+ >>> items = [uuid4() for _ in range(100)]
173
+ >>> values = {item: float(i) for i, item in enumerate(items)}
174
+ >>> score = balancer.compute_balance_score(items, lambda uid: values[uid])
175
+ >>> score > 0.9 # Should be close to 1.0
176
+ True
177
+
178
+ Notes
179
+ -----
180
+ - Returns 0.0 for empty item lists
181
+ - Uses mean absolute deviation from expected uniform count
182
+ """
183
+ if not item_ids:
184
+ return 0.0
185
+
186
+ # compute values
187
+ values: np.ndarray[tuple[int, ...], np.dtype[np.floating[Any]]] = np.array(
188
+ [value_func(item_id) for item_id in item_ids]
189
+ )
190
+
191
+ # create expected quantile bins
192
+ expected_quantiles: np.ndarray[tuple[int], np.dtype[np.floating[Any]]] = (
193
+ np.linspace(0, 100, self.n_quantiles + 1)
194
+ )
195
+ # percentile with array input returns array
196
+ expected_bins: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.percentile(
197
+ values, expected_quantiles
198
+ )
199
+
200
+ # count items in each quantile; digitize returns array of integers
201
+ quantile_assignments: np.ndarray[Any, np.dtype[np.intp]] = (
202
+ np.digitize(values, expected_bins) - 1
203
+ )
204
+ quantile_assignments = np.clip(quantile_assignments, 0, self.n_quantiles - 1)
205
+
206
+ quantile_counts = np.bincount(quantile_assignments, minlength=self.n_quantiles)
207
+
208
+ # compute uniformity score
209
+ expected_count = len(item_ids) / self.n_quantiles
210
+ deviations = np.abs(quantile_counts - expected_count)
211
+ score = 1.0 - (np.mean(deviations) / expected_count)
212
+
213
+ return float(max(0.0, score))
214
+
215
+ def _create_strata(
216
+ self, item_ids: list[UUID], value_func: Callable[[UUID], float]
217
+ ) -> dict[int, list[UUID]]:
218
+ """Create quantile-based strata from items.
219
+
220
+ Parameters
221
+ ----------
222
+ item_ids : list[UUID]
223
+ UUIDs of items to stratify.
224
+ value_func : Callable[[UUID], float]
225
+ Function to extract numeric values.
226
+
227
+ Returns
228
+ -------
229
+ dict[int, list[UUID]]
230
+ Dictionary mapping quantile index (0 to n_quantiles-1) to list
231
+ of item UUIDs in that quantile.
232
+
233
+ Notes
234
+ -----
235
+ - Uses np.percentile to compute quantile boundaries
236
+ - Uses np.digitize to assign items to quantiles
237
+ - Edge cases are handled by clipping to valid quantile range
238
+ """
239
+ # extract values
240
+ values: np.ndarray[tuple[int, ...], np.dtype[np.floating[Any]]] = np.array(
241
+ [value_func(item_id) for item_id in item_ids]
242
+ )
243
+
244
+ # compute quantile bins
245
+ quantiles: np.ndarray[tuple[int], np.dtype[np.floating[Any]]] = np.linspace(
246
+ 0, 100, self.n_quantiles + 1
247
+ )
248
+ bins: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.percentile(
249
+ values, quantiles
250
+ )
251
+
252
+ # assign items to quantiles
253
+ quantile_assignments: np.ndarray[Any, np.dtype[np.intp]] = (
254
+ np.digitize(values, bins) - 1
255
+ )
256
+ quantile_assignments = np.clip(quantile_assignments, 0, self.n_quantiles - 1)
257
+
258
+ # group items by quantile
259
+ strata: dict[int, list[UUID]] = {q: [] for q in range(self.n_quantiles)}
260
+ for item_id, q in zip(item_ids, quantile_assignments, strict=False):
261
+ strata[q].append(item_id)
262
+
263
+ return strata