bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/items/scoring.py ADDED
@@ -0,0 +1,448 @@
1
+ """Abstract base classes for item scoring with language models.
2
+
3
+ This module provides language-agnostic base classes for scoring items
4
+ using various metrics (log probability, perplexity, embeddings).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from abc import ABC, abstractmethod
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from bead.items.adapters.huggingface import HuggingFaceLanguageModel
15
+ from collections.abc import Callable
16
+ from uuid import UUID, uuid4
17
+
18
+ import numpy as np
19
+
20
+ from bead.items.cache import ModelOutputCache
21
+ from bead.items.item import Item
22
+
23
+
24
+ class ItemScorer(ABC):
25
+ """Abstract base class for item scoring.
26
+
27
+ ItemScorer provides a framework for assigning numeric scores to items
28
+ based on various criteria (language model probability, acceptability,
29
+ similarity, etc.).
30
+
31
+ Examples
32
+ --------
33
+ Implementing a custom scorer:
34
+ >>> class AcceptabilityScorer(ItemScorer):
35
+ ... def score(self, item):
36
+ ... # Score based on some acceptability metric
37
+ ... text = item.rendered_elements.get("text", "")
38
+ ... return self._compute_acceptability(text)
39
+ ...
40
+ ... def score_batch(self, items):
41
+ ... return [self.score(item) for item in items]
42
+ """
43
+
44
+ @abstractmethod
45
+ def score(self, item: Item) -> float:
46
+ """Compute score for a single item.
47
+
48
+ Parameters
49
+ ----------
50
+ item : Item
51
+ Item to score.
52
+
53
+ Returns
54
+ -------
55
+ float
56
+ Numeric score for the item.
57
+ """
58
+ ...
59
+
60
+ def score_batch(self, items: list[Item]) -> list[float]:
61
+ """Compute scores for multiple items.
62
+
63
+ Default implementation calls score() for each item sequentially.
64
+ Subclasses can override for batch processing optimization.
65
+
66
+ Parameters
67
+ ----------
68
+ items : list[Item]
69
+ Items to score.
70
+
71
+ Returns
72
+ -------
73
+ list[float]
74
+ Scores for each item.
75
+
76
+ Examples
77
+ --------
78
+ >>> scorer = ConcreteScorer()
79
+ >>> items = [item1, item2, item3]
80
+ >>> scores = scorer.score_batch(items) # doctest: +SKIP
81
+ >>> len(scores) == len(items)
82
+ True
83
+ """
84
+ return [self.score(item) for item in items]
85
+
86
+ def score_with_metadata(
87
+ self, items: list[Item]
88
+ ) -> dict[UUID, dict[str, float | str]]:
89
+ """Score items and return results with metadata.
90
+
91
+ Parameters
92
+ ----------
93
+ items
94
+ Items to score.
95
+
96
+ Returns
97
+ -------
98
+ dict[UUID, dict[str, float | str]]
99
+ Dictionary mapping item UUIDs to score dictionaries.
100
+ Each score dict contains at least a "score" key.
101
+
102
+ Examples
103
+ --------
104
+ >>> scorer = ConcreteScorer()
105
+ >>> results = scorer.score_with_metadata([item1, item2]) # doctest: +SKIP
106
+ >>> results[item1.id]["score"] # doctest: +SKIP
107
+ -42.5
108
+ """
109
+ scores = self.score_batch(items)
110
+
111
+ results: dict[UUID, dict[str, float | str]] = {}
112
+ for item, score in zip(items, scores, strict=True):
113
+ results[item.id] = {"score": score}
114
+
115
+ return results
116
+
117
+
118
+ class LanguageModelScorer(ItemScorer):
119
+ """Scorer using language model log probabilities.
120
+
121
+ Scores items based on their log probability under a language model.
122
+ Uses HuggingFace adapters for model inference and supports caching.
123
+
124
+ Parameters
125
+ ----------
126
+ model_name : str
127
+ HuggingFace model identifier (e.g., "gpt2", "gpt2-medium").
128
+ cache_dir : Path | str | None
129
+ Directory for caching model outputs. If None, no caching.
130
+ device : str
131
+ Device to run model on ("cpu", "cuda", "mps").
132
+ text_key : str
133
+ Key in item.rendered_elements to use as text (default: "text").
134
+ model_version : str
135
+ Version string for cache tracking.
136
+
137
+ Examples
138
+ --------
139
+ >>> from pathlib import Path
140
+ >>> scorer = LanguageModelScorer(
141
+ ... model_name="gpt2",
142
+ ... cache_dir=Path(".cache"),
143
+ ... device="cpu"
144
+ ... ) # doctest: +SKIP
145
+ >>> score = scorer.score(item) # doctest: +SKIP
146
+ >>> score < 0 # Log probabilities are negative # doctest: +SKIP
147
+ True
148
+ """
149
+
150
+ def __init__(
151
+ self,
152
+ model_name: str,
153
+ cache_dir: Path | str | None = None,
154
+ device: str = "cpu",
155
+ text_key: str = "text",
156
+ model_version: str = "unknown",
157
+ ) -> None:
158
+ self.model_name = model_name
159
+ self.cache_dir = Path(cache_dir) if cache_dir else None
160
+ self.device = device
161
+ self.text_key = text_key
162
+ self.model_version = model_version
163
+
164
+ # lazy loading of model and cache
165
+ self._model: HuggingFaceLanguageModel | None = None
166
+ self._cache: ModelOutputCache | None = None
167
+
168
+ @property
169
+ def model(self) -> HuggingFaceLanguageModel:
170
+ """Get the model, loading if necessary.
171
+
172
+ Returns
173
+ -------
174
+ HuggingFaceLanguageModel
175
+ The language model adapter.
176
+ """
177
+ if self._model is None:
178
+ # import here to avoid circular dependency
179
+ from bead.items.adapters.huggingface import ( # noqa: PLC0415
180
+ HuggingFaceLanguageModel,
181
+ )
182
+
183
+ # set up cache
184
+ if self.cache_dir:
185
+ self._cache = ModelOutputCache(cache_dir=self.cache_dir)
186
+ else:
187
+ # create a no-op cache
188
+ self._cache = ModelOutputCache(cache_dir=Path(".cache/temp"))
189
+
190
+ self._model = HuggingFaceLanguageModel(
191
+ model_name=self.model_name,
192
+ cache=self._cache,
193
+ device=self.device, # type: ignore[arg-type]
194
+ model_version=self.model_version,
195
+ )
196
+
197
+ return self._model
198
+
199
+ def score(self, item: Item) -> float:
200
+ """Compute log probability score for an item.
201
+
202
+ Parameters
203
+ ----------
204
+ item : Item
205
+ Item to score.
206
+
207
+ Returns
208
+ -------
209
+ float
210
+ Log probability of the item's text under the language model.
211
+
212
+ Raises
213
+ ------
214
+ KeyError
215
+ If text_key not found in item.rendered_elements.
216
+ """
217
+ text = item.rendered_elements.get(self.text_key)
218
+ if text is None:
219
+ raise KeyError(f"Key '{self.text_key}' not found in item.rendered_elements")
220
+
221
+ return self.model.compute_log_probability(text)
222
+
223
+ def score_batch(
224
+ self, items: list[Item], batch_size: int | None = None
225
+ ) -> list[float]:
226
+ """Compute scores for multiple items efficiently using batched inference.
227
+
228
+ Parameters
229
+ ----------
230
+ items : list[Item]
231
+ Items to score.
232
+ batch_size : int | None, default=None
233
+ Number of items to process in each batch. If None, automatically
234
+ infers optimal batch size based on available resources.
235
+
236
+ Returns
237
+ -------
238
+ list[float]
239
+ Log probabilities for each item.
240
+ """
241
+ # Extract texts
242
+ texts: list[str] = []
243
+ for item in items:
244
+ text_val = item.rendered_elements.get(self.text_key)
245
+ if text_val is None:
246
+ msg = (
247
+ f"Key '{self.text_key}' not found in "
248
+ f"item {item.id}.rendered_elements"
249
+ )
250
+ raise KeyError(msg)
251
+ # Type narrowing - text_val is now known to be str after this check
252
+ assert isinstance(text_val, str), f"Expected str, got {type(text_val)}"
253
+ texts.append(text_val)
254
+
255
+ # Use batched scoring if available, otherwise fall back to sequential
256
+ if hasattr(self.model, "compute_log_probability_batch"):
257
+ scores = self.model.compute_log_probability_batch(
258
+ texts, batch_size=batch_size
259
+ )
260
+ else:
261
+ # Fallback for models without batch support
262
+ scores = [self.model.compute_log_probability(text) for text in texts]
263
+
264
+ return scores
265
+
266
+ def score_with_metadata(
267
+ self, items: list[Item]
268
+ ) -> dict[UUID, dict[str, float | str]]:
269
+ """Score items and return results with additional metrics.
270
+
271
+ Returns log probability and perplexity for each item.
272
+
273
+ Parameters
274
+ ----------
275
+ items
276
+ Items to score.
277
+
278
+ Returns
279
+ -------
280
+ dict[UUID, dict[str, float | str]]
281
+ Dictionary with "score" (log prob) and "perplexity" for each item.
282
+ """
283
+ scores = self.score_batch(items)
284
+
285
+ results: dict[UUID, dict[str, float | str]] = {}
286
+ for item, score in zip(items, scores, strict=True):
287
+ # compute perplexity from log probability
288
+ # perplexity = exp(-log_prob / num_tokens)
289
+ # for now, just include log_prob; perplexity computation
290
+ # requires token count which we'd need to get from the model
291
+ results[item.id] = {
292
+ "score": score,
293
+ "log_probability": score,
294
+ "model": self.model_name,
295
+ }
296
+
297
+ return results
298
+
299
+
300
+ class ForcedChoiceScorer(ItemScorer):
301
+ """Scorer for N-AFC (forced-choice) items with multiple options.
302
+
303
+ Computes comparison scores for forced-choice items by scoring each
304
+ option and applying a comparison function (e.g., max difference,
305
+ variance, entropy).
306
+
307
+ Parameters
308
+ ----------
309
+ base_scorer : ItemScorer
310
+ Base scorer to use for individual options.
311
+ comparison_fn : callable | None
312
+ Function that takes list of scores and returns comparison metric.
313
+ Default is standard deviation (variance in scores).
314
+ option_prefix : str
315
+ Prefix for option names in rendered_elements (default: "option").
316
+
317
+ Examples
318
+ --------
319
+ >>> base = LanguageModelScorer("gpt2", device="cpu") # doctest: +SKIP
320
+ >>> fc_scorer = ForcedChoiceScorer(
321
+ ... base_scorer=base,
322
+ ... comparison_fn=lambda scores: max(scores) - min(scores) # Range
323
+ ... ) # doctest: +SKIP
324
+ >>> # Item with option_a, option_b, option_c, ...
325
+ >>> score = fc_scorer.score(forced_choice_item) # doctest: +SKIP
326
+ """
327
+
328
+ def __init__(
329
+ self,
330
+ base_scorer: ItemScorer,
331
+ comparison_fn: Callable[[list[float]], float] | None = None,
332
+ option_prefix: str = "option",
333
+ ) -> None:
334
+ self.base_scorer = base_scorer
335
+ self.option_prefix = option_prefix
336
+
337
+ if comparison_fn is None:
338
+ # default: standard deviation of scores
339
+ self.comparison_fn: Callable[[list[float]], float] = (
340
+ self._default_comparison
341
+ )
342
+ else:
343
+ self.comparison_fn = comparison_fn
344
+
345
+ @staticmethod
346
+ def _default_comparison(scores: list[float]) -> float:
347
+ """Compute standard deviation of scores."""
348
+ return float(np.std(scores))
349
+
350
+ def score(self, item: Item) -> float:
351
+ """Score a forced-choice item.
352
+
353
+ Extracts all options from item.rendered_elements (option_a, option_b, ...),
354
+ scores each option, and applies comparison function.
355
+
356
+ Parameters
357
+ ----------
358
+ item : Item
359
+ Forced-choice item with multiple options.
360
+
361
+ Returns
362
+ -------
363
+ float
364
+ Comparison score across all options.
365
+
366
+ Raises
367
+ ------
368
+ ValueError
369
+ If item doesn't contain option elements or has precomputed scores.
370
+ """
371
+ # try to get precomputed scores from metadata first
372
+ # look for lm_score_0, lm_score_1, ... or lm_score_a, lm_score_b, ...
373
+ precomputed_scores = self._extract_precomputed_scores(item)
374
+ if precomputed_scores:
375
+ return self.comparison_fn(precomputed_scores)
376
+
377
+ # otherwise score each option element
378
+ option_scores: list[float] = []
379
+ letters = "abcdefghijklmnopqrstuvwxyz"
380
+
381
+ for letter in letters:
382
+ option_name = f"{self.option_prefix}_{letter}"
383
+ if option_name not in item.rendered_elements:
384
+ break # no more options
385
+
386
+ # create temporary item for scoring this option
387
+ option_text = item.rendered_elements[option_name]
388
+ temp_item = Item(
389
+ item_template_id=uuid4(),
390
+ rendered_elements={"text": option_text},
391
+ )
392
+ score: float = self.base_scorer.score(temp_item)
393
+ option_scores.append(score)
394
+
395
+ if not option_scores:
396
+ raise ValueError(
397
+ f"Item has no options with prefix '{self.option_prefix}_' "
398
+ "in rendered_elements"
399
+ )
400
+
401
+ return self.comparison_fn(option_scores)
402
+
403
+ def _extract_precomputed_scores(self, item: Item) -> list[float] | None:
404
+ """Extract precomputed scores from item metadata if available.
405
+
406
+ Looks for keys like: lm_score_0, lm_score_1, ... or
407
+ lm_score_a, lm_score_b, ...
408
+
409
+ Parameters
410
+ ----------
411
+ item : Item
412
+ Item to extract scores from.
413
+
414
+ Returns
415
+ -------
416
+ list[float] | None
417
+ List of scores if found, None otherwise.
418
+ """
419
+ scores: list[float] = []
420
+ letters = "abcdefghijklmnopqrstuvwxyz"
421
+
422
+ # try numeric indices first (lm_score_0, lm_score_1, ...)
423
+ for i in range(26): # max 26 options
424
+ key = f"lm_score_{i}"
425
+ if key in item.item_metadata:
426
+ metadata_val = item.item_metadata[key]
427
+ if not isinstance(metadata_val, int | float | str):
428
+ raise TypeError(f"Expected numeric type, got {type(metadata_val)}")
429
+ scores.append(float(metadata_val))
430
+ else:
431
+ break
432
+
433
+ if scores:
434
+ return scores
435
+
436
+ # try letter indices (lm_score_a, lm_score_b, ...)
437
+ scores = []
438
+ for letter in letters:
439
+ key = f"lm_score_{letter}"
440
+ if key in item.item_metadata:
441
+ metadata_val = item.item_metadata[key]
442
+ if not isinstance(metadata_val, int | float | str):
443
+ raise TypeError(f"Expected numeric type, got {type(metadata_val)}")
444
+ scores.append(float(metadata_val))
445
+ else:
446
+ break
447
+
448
+ return scores if scores else None