bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,259 @@
1
+ """Google Generative AI adapter for item construction.
2
+
3
+ This module provides a ModelAdapter implementation for Google's Generative AI
4
+ models (Gemini), supporting natural language inference via prompting and
5
+ embeddings. Note that Gemini API does not provide direct access to log
6
+ probabilities.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+
13
+ import numpy as np
14
+
15
+ try:
16
+ import google.generativeai as genai
17
+ except ImportError as e:
18
+ raise ImportError(
19
+ "google-generativeai package is required for Google adapter. "
20
+ "Install it with: pip install google-generativeai"
21
+ ) from e
22
+
23
+ from bead.items.adapters.api_utils import rate_limit, retry_with_backoff
24
+ from bead.items.adapters.base import ModelAdapter
25
+ from bead.items.cache import ModelOutputCache
26
+
27
+
28
+ class GoogleAdapter(ModelAdapter):
29
+ """Adapter for Google Generative AI models (Gemini).
30
+
31
+ Provides access to Gemini models for natural language inference and
32
+ embeddings. Note that Gemini API does not support log probability
33
+ computation.
34
+
35
+ Parameters
36
+ ----------
37
+ model_name : str
38
+ Gemini model identifier (default: "gemini-pro").
39
+ api_key : str | None
40
+ Google API key. If None, uses GOOGLE_API_KEY environment variable.
41
+ cache : ModelOutputCache | None
42
+ Cache for model outputs. If None, creates in-memory cache.
43
+ model_version : str
44
+ Model version for cache tracking (default: "latest").
45
+ embedding_model : str
46
+ Model to use for embeddings (default: "models/embedding-001").
47
+
48
+ Attributes
49
+ ----------
50
+ model_name : str
51
+ Gemini model identifier (e.g., "gemini-pro").
52
+ model : genai.GenerativeModel
53
+ Google Generative AI model instance.
54
+ embedding_model : str
55
+ Model to use for embeddings (default: "models/embedding-001").
56
+
57
+ Raises
58
+ ------
59
+ ValueError
60
+ If no API key is provided and GOOGLE_API_KEY is not set.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ model_name: str = "gemini-pro",
66
+ api_key: str | None = None,
67
+ cache: ModelOutputCache | None = None,
68
+ model_version: str = "latest",
69
+ embedding_model: str = "models/embedding-001",
70
+ ) -> None:
71
+ if cache is None:
72
+ cache = ModelOutputCache(backend="memory")
73
+
74
+ super().__init__(
75
+ model_name=model_name, cache=cache, model_version=model_version
76
+ )
77
+
78
+ # Get API key from parameter or environment
79
+ if api_key is None:
80
+ api_key = os.environ.get("GOOGLE_API_KEY")
81
+ if api_key is None:
82
+ raise ValueError(
83
+ "Google API key must be provided via api_key parameter "
84
+ "or GOOGLE_API_KEY environment variable"
85
+ )
86
+
87
+ genai.configure(api_key=api_key)
88
+ self.model = genai.GenerativeModel(model_name)
89
+ self.embedding_model = embedding_model
90
+
91
+ def compute_log_probability(self, text: str) -> float:
92
+ """Compute log probability of text.
93
+
94
+ Not supported by Google Generative AI API.
95
+
96
+ Raises
97
+ ------
98
+ NotImplementedError
99
+ Always raised - Gemini API does not provide log probabilities.
100
+ """
101
+ raise NotImplementedError(
102
+ "Log probability computation is not supported by Google Generative AI. "
103
+ "Gemini does not provide access to token-level probabilities."
104
+ )
105
+
106
+ def compute_perplexity(self, text: str) -> float:
107
+ """Compute perplexity of text.
108
+
109
+ Not supported by Google Generative AI API (requires log probabilities).
110
+
111
+ Raises
112
+ ------
113
+ NotImplementedError
114
+ Always raised - requires log probability support.
115
+ """
116
+ raise NotImplementedError(
117
+ "Perplexity computation is not supported by Google Generative AI. "
118
+ "This operation requires log probabilities, which Gemini does not provide."
119
+ )
120
+
121
+ @retry_with_backoff(
122
+ max_retries=3,
123
+ initial_delay=1.0,
124
+ backoff_factor=2.0,
125
+ exceptions=(Exception,), # Google API uses generic exceptions
126
+ )
127
+ @rate_limit(calls_per_minute=60)
128
+ def get_embedding(self, text: str) -> np.ndarray:
129
+ """Get embedding vector for text using Google's embedding model.
130
+
131
+ Parameters
132
+ ----------
133
+ text : str
134
+ Text to embed.
135
+
136
+ Returns
137
+ -------
138
+ np.ndarray
139
+ Embedding vector for the text.
140
+ """
141
+ # Check cache
142
+ cached = self.cache.get(
143
+ model_name=self.embedding_model, operation="embedding", text=text
144
+ )
145
+ if cached is not None:
146
+ return np.array(cached)
147
+
148
+ # Call API
149
+ result = genai.embed_content(
150
+ model=self.embedding_model,
151
+ content=text,
152
+ task_type="retrieval_document",
153
+ )
154
+
155
+ embedding = np.array(result["embedding"])
156
+
157
+ # Cache result
158
+ self.cache.set(
159
+ model_name=self.embedding_model,
160
+ operation="embedding",
161
+ result=embedding.tolist(),
162
+ model_version=self.model_version,
163
+ text=text,
164
+ )
165
+
166
+ return embedding
167
+
168
+ @retry_with_backoff(
169
+ max_retries=3,
170
+ initial_delay=1.0,
171
+ backoff_factor=2.0,
172
+ exceptions=(Exception,), # Google API uses generic exceptions
173
+ )
174
+ @rate_limit(calls_per_minute=60)
175
+ def compute_nli(self, premise: str, hypothesis: str) -> dict[str, float]:
176
+ """Compute natural language inference scores via prompting.
177
+
178
+ Uses Gemini's generation API with a prompt to classify the relationship
179
+ between premise and hypothesis.
180
+
181
+ Parameters
182
+ ----------
183
+ premise : str
184
+ Premise text.
185
+ hypothesis : str
186
+ Hypothesis text.
187
+
188
+ Returns
189
+ -------
190
+ dict[str, float]
191
+ Dictionary with keys "entailment", "neutral", "contradiction"
192
+ mapping to probability scores.
193
+ """
194
+ # Check cache
195
+ cached = self.cache.get(
196
+ model_name=self.model_name,
197
+ operation="nli",
198
+ premise=premise,
199
+ hypothesis=hypothesis,
200
+ )
201
+ if cached is not None:
202
+ return dict(cached)
203
+
204
+ # Construct prompt
205
+ prompt = (
206
+ "Given the following premise and hypothesis, "
207
+ "determine the relationship between them.\n\n"
208
+ f"Premise: {premise}\n"
209
+ f"Hypothesis: {hypothesis}\n\n"
210
+ "Choose one of the following:\n"
211
+ "- entailment: The hypothesis is definitely true given the premise\n"
212
+ "- neutral: The hypothesis might be true given the premise\n"
213
+ "- contradiction: The hypothesis is definitely false given the premise\n\n"
214
+ "Respond with only one word: entailment, neutral, or contradiction."
215
+ )
216
+
217
+ # Call API
218
+ response = self.model.generate_content(
219
+ prompt,
220
+ generation_config=genai.types.GenerationConfig(
221
+ temperature=0.0,
222
+ max_output_tokens=10,
223
+ ),
224
+ )
225
+
226
+ # Parse response
227
+ if not response.text:
228
+ raise ValueError("API response did not include text")
229
+
230
+ answer = response.text.strip().lower()
231
+
232
+ # Map to scores
233
+ scores: dict[str, float] = {
234
+ "entailment": 0.0,
235
+ "neutral": 0.0,
236
+ "contradiction": 0.0,
237
+ }
238
+
239
+ if "entailment" in answer:
240
+ scores["entailment"] = 1.0
241
+ elif "neutral" in answer:
242
+ scores["neutral"] = 1.0
243
+ elif "contradiction" in answer:
244
+ scores["contradiction"] = 1.0
245
+ else:
246
+ # Default to neutral if unclear
247
+ scores["neutral"] = 1.0
248
+
249
+ # Cache result
250
+ self.cache.set(
251
+ model_name=self.model_name,
252
+ operation="nli",
253
+ result=scores,
254
+ model_version=self.model_version,
255
+ premise=premise,
256
+ hypothesis=hypothesis,
257
+ )
258
+
259
+ return scores