bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Google Generative AI adapter for item construction.
|
|
2
|
+
|
|
3
|
+
This module provides a ModelAdapter implementation for Google's Generative AI
|
|
4
|
+
models (Gemini), supporting natural language inference via prompting and
|
|
5
|
+
embeddings. Note that Gemini API does not provide direct access to log
|
|
6
|
+
probabilities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import google.generativeai as genai
|
|
17
|
+
except ImportError as e:
|
|
18
|
+
raise ImportError(
|
|
19
|
+
"google-generativeai package is required for Google adapter. "
|
|
20
|
+
"Install it with: pip install google-generativeai"
|
|
21
|
+
) from e
|
|
22
|
+
|
|
23
|
+
from bead.items.adapters.api_utils import rate_limit, retry_with_backoff
|
|
24
|
+
from bead.items.adapters.base import ModelAdapter
|
|
25
|
+
from bead.items.cache import ModelOutputCache
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class GoogleAdapter(ModelAdapter):
|
|
29
|
+
"""Adapter for Google Generative AI models (Gemini).
|
|
30
|
+
|
|
31
|
+
Provides access to Gemini models for natural language inference and
|
|
32
|
+
embeddings. Note that Gemini API does not support log probability
|
|
33
|
+
computation.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
model_name : str
|
|
38
|
+
Gemini model identifier (default: "gemini-pro").
|
|
39
|
+
api_key : str | None
|
|
40
|
+
Google API key. If None, uses GOOGLE_API_KEY environment variable.
|
|
41
|
+
cache : ModelOutputCache | None
|
|
42
|
+
Cache for model outputs. If None, creates in-memory cache.
|
|
43
|
+
model_version : str
|
|
44
|
+
Model version for cache tracking (default: "latest").
|
|
45
|
+
embedding_model : str
|
|
46
|
+
Model to use for embeddings (default: "models/embedding-001").
|
|
47
|
+
|
|
48
|
+
Attributes
|
|
49
|
+
----------
|
|
50
|
+
model_name : str
|
|
51
|
+
Gemini model identifier (e.g., "gemini-pro").
|
|
52
|
+
model : genai.GenerativeModel
|
|
53
|
+
Google Generative AI model instance.
|
|
54
|
+
embedding_model : str
|
|
55
|
+
Model to use for embeddings (default: "models/embedding-001").
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ValueError
|
|
60
|
+
If no API key is provided and GOOGLE_API_KEY is not set.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
model_name: str = "gemini-pro",
|
|
66
|
+
api_key: str | None = None,
|
|
67
|
+
cache: ModelOutputCache | None = None,
|
|
68
|
+
model_version: str = "latest",
|
|
69
|
+
embedding_model: str = "models/embedding-001",
|
|
70
|
+
) -> None:
|
|
71
|
+
if cache is None:
|
|
72
|
+
cache = ModelOutputCache(backend="memory")
|
|
73
|
+
|
|
74
|
+
super().__init__(
|
|
75
|
+
model_name=model_name, cache=cache, model_version=model_version
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Get API key from parameter or environment
|
|
79
|
+
if api_key is None:
|
|
80
|
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
81
|
+
if api_key is None:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"Google API key must be provided via api_key parameter "
|
|
84
|
+
"or GOOGLE_API_KEY environment variable"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
genai.configure(api_key=api_key)
|
|
88
|
+
self.model = genai.GenerativeModel(model_name)
|
|
89
|
+
self.embedding_model = embedding_model
|
|
90
|
+
|
|
91
|
+
def compute_log_probability(self, text: str) -> float:
|
|
92
|
+
"""Compute log probability of text.
|
|
93
|
+
|
|
94
|
+
Not supported by Google Generative AI API.
|
|
95
|
+
|
|
96
|
+
Raises
|
|
97
|
+
------
|
|
98
|
+
NotImplementedError
|
|
99
|
+
Always raised - Gemini API does not provide log probabilities.
|
|
100
|
+
"""
|
|
101
|
+
raise NotImplementedError(
|
|
102
|
+
"Log probability computation is not supported by Google Generative AI. "
|
|
103
|
+
"Gemini does not provide access to token-level probabilities."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def compute_perplexity(self, text: str) -> float:
|
|
107
|
+
"""Compute perplexity of text.
|
|
108
|
+
|
|
109
|
+
Not supported by Google Generative AI API (requires log probabilities).
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
NotImplementedError
|
|
114
|
+
Always raised - requires log probability support.
|
|
115
|
+
"""
|
|
116
|
+
raise NotImplementedError(
|
|
117
|
+
"Perplexity computation is not supported by Google Generative AI. "
|
|
118
|
+
"This operation requires log probabilities, which Gemini does not provide."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@retry_with_backoff(
|
|
122
|
+
max_retries=3,
|
|
123
|
+
initial_delay=1.0,
|
|
124
|
+
backoff_factor=2.0,
|
|
125
|
+
exceptions=(Exception,), # Google API uses generic exceptions
|
|
126
|
+
)
|
|
127
|
+
@rate_limit(calls_per_minute=60)
|
|
128
|
+
def get_embedding(self, text: str) -> np.ndarray:
|
|
129
|
+
"""Get embedding vector for text using Google's embedding model.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
text : str
|
|
134
|
+
Text to embed.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
np.ndarray
|
|
139
|
+
Embedding vector for the text.
|
|
140
|
+
"""
|
|
141
|
+
# Check cache
|
|
142
|
+
cached = self.cache.get(
|
|
143
|
+
model_name=self.embedding_model, operation="embedding", text=text
|
|
144
|
+
)
|
|
145
|
+
if cached is not None:
|
|
146
|
+
return np.array(cached)
|
|
147
|
+
|
|
148
|
+
# Call API
|
|
149
|
+
result = genai.embed_content(
|
|
150
|
+
model=self.embedding_model,
|
|
151
|
+
content=text,
|
|
152
|
+
task_type="retrieval_document",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
embedding = np.array(result["embedding"])
|
|
156
|
+
|
|
157
|
+
# Cache result
|
|
158
|
+
self.cache.set(
|
|
159
|
+
model_name=self.embedding_model,
|
|
160
|
+
operation="embedding",
|
|
161
|
+
result=embedding.tolist(),
|
|
162
|
+
model_version=self.model_version,
|
|
163
|
+
text=text,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return embedding
|
|
167
|
+
|
|
168
|
+
@retry_with_backoff(
|
|
169
|
+
max_retries=3,
|
|
170
|
+
initial_delay=1.0,
|
|
171
|
+
backoff_factor=2.0,
|
|
172
|
+
exceptions=(Exception,), # Google API uses generic exceptions
|
|
173
|
+
)
|
|
174
|
+
@rate_limit(calls_per_minute=60)
|
|
175
|
+
def compute_nli(self, premise: str, hypothesis: str) -> dict[str, float]:
|
|
176
|
+
"""Compute natural language inference scores via prompting.
|
|
177
|
+
|
|
178
|
+
Uses Gemini's generation API with a prompt to classify the relationship
|
|
179
|
+
between premise and hypothesis.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
premise : str
|
|
184
|
+
Premise text.
|
|
185
|
+
hypothesis : str
|
|
186
|
+
Hypothesis text.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
dict[str, float]
|
|
191
|
+
Dictionary with keys "entailment", "neutral", "contradiction"
|
|
192
|
+
mapping to probability scores.
|
|
193
|
+
"""
|
|
194
|
+
# Check cache
|
|
195
|
+
cached = self.cache.get(
|
|
196
|
+
model_name=self.model_name,
|
|
197
|
+
operation="nli",
|
|
198
|
+
premise=premise,
|
|
199
|
+
hypothesis=hypothesis,
|
|
200
|
+
)
|
|
201
|
+
if cached is not None:
|
|
202
|
+
return dict(cached)
|
|
203
|
+
|
|
204
|
+
# Construct prompt
|
|
205
|
+
prompt = (
|
|
206
|
+
"Given the following premise and hypothesis, "
|
|
207
|
+
"determine the relationship between them.\n\n"
|
|
208
|
+
f"Premise: {premise}\n"
|
|
209
|
+
f"Hypothesis: {hypothesis}\n\n"
|
|
210
|
+
"Choose one of the following:\n"
|
|
211
|
+
"- entailment: The hypothesis is definitely true given the premise\n"
|
|
212
|
+
"- neutral: The hypothesis might be true given the premise\n"
|
|
213
|
+
"- contradiction: The hypothesis is definitely false given the premise\n\n"
|
|
214
|
+
"Respond with only one word: entailment, neutral, or contradiction."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Call API
|
|
218
|
+
response = self.model.generate_content(
|
|
219
|
+
prompt,
|
|
220
|
+
generation_config=genai.types.GenerationConfig(
|
|
221
|
+
temperature=0.0,
|
|
222
|
+
max_output_tokens=10,
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Parse response
|
|
227
|
+
if not response.text:
|
|
228
|
+
raise ValueError("API response did not include text")
|
|
229
|
+
|
|
230
|
+
answer = response.text.strip().lower()
|
|
231
|
+
|
|
232
|
+
# Map to scores
|
|
233
|
+
scores: dict[str, float] = {
|
|
234
|
+
"entailment": 0.0,
|
|
235
|
+
"neutral": 0.0,
|
|
236
|
+
"contradiction": 0.0,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if "entailment" in answer:
|
|
240
|
+
scores["entailment"] = 1.0
|
|
241
|
+
elif "neutral" in answer:
|
|
242
|
+
scores["neutral"] = 1.0
|
|
243
|
+
elif "contradiction" in answer:
|
|
244
|
+
scores["contradiction"] = 1.0
|
|
245
|
+
else:
|
|
246
|
+
# Default to neutral if unclear
|
|
247
|
+
scores["neutral"] = 1.0
|
|
248
|
+
|
|
249
|
+
# Cache result
|
|
250
|
+
self.cache.set(
|
|
251
|
+
model_name=self.model_name,
|
|
252
|
+
operation="nli",
|
|
253
|
+
result=scores,
|
|
254
|
+
model_version=self.model_version,
|
|
255
|
+
premise=premise,
|
|
256
|
+
hypothesis=hypothesis,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return scores
|