bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Lexical item models for words and multi-word expressions.
|
|
2
|
+
|
|
3
|
+
This module provides data models for representing lexical items in the bead
|
|
4
|
+
system. Lexical items are the atomic units that fill template slots during
|
|
5
|
+
sentence generation. Includes support for single words and multi-word
|
|
6
|
+
expressions (MWEs).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic import Field, field_validator
|
|
14
|
+
|
|
15
|
+
from bead.data.base import BeadBaseModel
|
|
16
|
+
from bead.data.language_codes import LanguageCode
|
|
17
|
+
from bead.resources.constraints import Constraint
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _empty_constraint_list() -> list[Constraint]:
|
|
21
|
+
"""Create an empty constraint list."""
|
|
22
|
+
return []
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LexicalItem(BeadBaseModel):
|
|
26
|
+
"""A lexical item with linguistic features.
|
|
27
|
+
|
|
28
|
+
Follows UniMorph structure: lemma, form, features bundle.
|
|
29
|
+
- lemma: base/citation form
|
|
30
|
+
- form: inflected surface form (None if same as lemma)
|
|
31
|
+
- features: feature bundle (pos, tense, person, number, etc.)
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
lemma : str
|
|
36
|
+
Base/citation form (e.g., "walk", "the").
|
|
37
|
+
form : str | None
|
|
38
|
+
Inflected surface form if different from lemma (e.g., "walked", "walking").
|
|
39
|
+
None means form equals lemma.
|
|
40
|
+
language_code : LanguageCode
|
|
41
|
+
ISO 639-3 language code (e.g., "eng").
|
|
42
|
+
features : dict[str, Any]
|
|
43
|
+
Feature bundle with grammatical/linguistic features:
|
|
44
|
+
- pos: str (e.g., "VERB", "DET", "NOUN", "ADJ", "ADP")
|
|
45
|
+
- Morphological: tense, person, number, case, gender, etc.
|
|
46
|
+
- unimorph_features: str (e.g., "V;PRS;3;SG")
|
|
47
|
+
- Lexical resource info: verbnet_class, themroles, frame_info, etc.
|
|
48
|
+
source : str | None
|
|
49
|
+
Provenance (e.g., "VerbNet", "UniMorph", "manual").
|
|
50
|
+
|
|
51
|
+
Examples
|
|
52
|
+
--------
|
|
53
|
+
>>> # Inflected verb
|
|
54
|
+
>>> verb = LexicalItem(
|
|
55
|
+
... lemma="walk",
|
|
56
|
+
... form="walked",
|
|
57
|
+
... language_code="eng",
|
|
58
|
+
... features={"pos": "VERB", "tense": "PST"},
|
|
59
|
+
... source="UniMorph"
|
|
60
|
+
... )
|
|
61
|
+
>>> verb.form
|
|
62
|
+
'walked'
|
|
63
|
+
>>>
|
|
64
|
+
>>> # Uninflected determiner
|
|
65
|
+
>>> det = LexicalItem(
|
|
66
|
+
... lemma="the",
|
|
67
|
+
... form=None,
|
|
68
|
+
... language_code="eng",
|
|
69
|
+
... features={"pos": "DET"},
|
|
70
|
+
... source="manual"
|
|
71
|
+
... )
|
|
72
|
+
>>> det.form is None
|
|
73
|
+
True
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
lemma: str
|
|
77
|
+
form: str | None = None
|
|
78
|
+
language_code: LanguageCode
|
|
79
|
+
features: dict[str, Any] = Field(default_factory=dict)
|
|
80
|
+
source: str | None = None
|
|
81
|
+
|
|
82
|
+
@field_validator("lemma")
|
|
83
|
+
@classmethod
|
|
84
|
+
def validate_lemma(cls, v: str) -> str:
|
|
85
|
+
"""Validate that lemma is non-empty.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
v : str
|
|
90
|
+
The lemma value to validate.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
str
|
|
95
|
+
The validated lemma.
|
|
96
|
+
|
|
97
|
+
Raises
|
|
98
|
+
------
|
|
99
|
+
ValueError
|
|
100
|
+
If lemma is empty or contains only whitespace.
|
|
101
|
+
"""
|
|
102
|
+
if not v or not v.strip():
|
|
103
|
+
raise ValueError("lemma must be non-empty")
|
|
104
|
+
return v
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class MWEComponent(LexicalItem):
|
|
108
|
+
"""A component of a multi-word expression.
|
|
109
|
+
|
|
110
|
+
Components represent individual parts of an MWE (e.g., verb and particle
|
|
111
|
+
in a phrasal verb). Each component has a role within the MWE and can
|
|
112
|
+
have its own constraints.
|
|
113
|
+
|
|
114
|
+
Attributes
|
|
115
|
+
----------
|
|
116
|
+
role : str
|
|
117
|
+
Role of this component in the MWE (e.g., "verb", "particle", "noun").
|
|
118
|
+
required : bool
|
|
119
|
+
Whether this component must be present (default: True).
|
|
120
|
+
constraints : list[Constraint]
|
|
121
|
+
Component-specific constraints (in addition to base LexicalItem constraints).
|
|
122
|
+
|
|
123
|
+
Examples
|
|
124
|
+
--------
|
|
125
|
+
>>> # Verb component of "take off"
|
|
126
|
+
>>> verb = MWEComponent(
|
|
127
|
+
... lemma="take",
|
|
128
|
+
... pos="VERB",
|
|
129
|
+
... role="verb",
|
|
130
|
+
... required=True
|
|
131
|
+
... )
|
|
132
|
+
>>> # Particle component
|
|
133
|
+
>>> particle = MWEComponent(
|
|
134
|
+
... lemma="off",
|
|
135
|
+
... pos="PART",
|
|
136
|
+
... role="particle",
|
|
137
|
+
... required=True
|
|
138
|
+
... )
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
role: str = Field(..., description="Component role in MWE")
|
|
142
|
+
required: bool = Field(default=True, description="Whether component is required")
|
|
143
|
+
constraints: list[Constraint] = Field(
|
|
144
|
+
default_factory=_empty_constraint_list,
|
|
145
|
+
description="Component-specific constraints",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class MultiWordExpression(LexicalItem):
|
|
150
|
+
"""Multi-word expression as a lexical item.
|
|
151
|
+
|
|
152
|
+
MWEs are lexical items composed of multiple components. They can be
|
|
153
|
+
separable (components can be non-adjacent) or inseparable. MWEs
|
|
154
|
+
support component-level constraints and adjacency patterns.
|
|
155
|
+
|
|
156
|
+
Attributes
|
|
157
|
+
----------
|
|
158
|
+
components : list[MWEComponent]
|
|
159
|
+
Components that make up this MWE.
|
|
160
|
+
separable : bool
|
|
161
|
+
Whether components can be separated by other words (default: False).
|
|
162
|
+
Example: "take the ball off" (separable) vs "kick the bucket" (inseparable).
|
|
163
|
+
adjacency_pattern : str | None
|
|
164
|
+
DSL expression defining valid adjacency patterns.
|
|
165
|
+
Variables: component roles, 'distance' between components.
|
|
166
|
+
Example: "distance(verb, particle) <= 3"
|
|
167
|
+
|
|
168
|
+
Examples
|
|
169
|
+
--------
|
|
170
|
+
>>> # Inseparable phrasal verb "look after"
|
|
171
|
+
>>> mwe1 = MultiWordExpression(
|
|
172
|
+
... lemma="look after",
|
|
173
|
+
... pos="VERB",
|
|
174
|
+
... components=[
|
|
175
|
+
... MWEComponent(lemma="look", pos="VERB", role="verb"),
|
|
176
|
+
... MWEComponent(lemma="after", pos="ADP", role="particle")
|
|
177
|
+
... ],
|
|
178
|
+
... separable=False
|
|
179
|
+
... )
|
|
180
|
+
>>>
|
|
181
|
+
>>> # Separable phrasal verb "take off"
|
|
182
|
+
>>> mwe2 = MultiWordExpression(
|
|
183
|
+
... lemma="take off",
|
|
184
|
+
... pos="VERB",
|
|
185
|
+
... components=[
|
|
186
|
+
... MWEComponent(lemma="take", pos="VERB", role="verb"),
|
|
187
|
+
... MWEComponent(lemma="off", pos="PART", role="particle")
|
|
188
|
+
... ],
|
|
189
|
+
... separable=True,
|
|
190
|
+
... adjacency_pattern="distance(verb, particle) <= 3"
|
|
191
|
+
... )
|
|
192
|
+
>>>
|
|
193
|
+
>>> # MWE with constraints on components
|
|
194
|
+
>>> mwe3 = MultiWordExpression(
|
|
195
|
+
... lemma="break down",
|
|
196
|
+
... pos="VERB",
|
|
197
|
+
... components=[
|
|
198
|
+
... MWEComponent(
|
|
199
|
+
... lemma="break",
|
|
200
|
+
... pos="VERB",
|
|
201
|
+
... role="verb",
|
|
202
|
+
... constraints=[
|
|
203
|
+
... Constraint(
|
|
204
|
+
... expression="self.lemma in motion_verbs",
|
|
205
|
+
... context={"motion_verbs": {"break", "take", "give"}}
|
|
206
|
+
... )
|
|
207
|
+
... ]
|
|
208
|
+
... ),
|
|
209
|
+
... MWEComponent(lemma="down", pos="PART", role="particle")
|
|
210
|
+
... ],
|
|
211
|
+
... separable=True
|
|
212
|
+
... )
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
components: list[MWEComponent] = Field(
|
|
216
|
+
default_factory=list, description="MWE components"
|
|
217
|
+
)
|
|
218
|
+
separable: bool = Field(
|
|
219
|
+
default=False, description="Whether components can be non-adjacent"
|
|
220
|
+
)
|
|
221
|
+
adjacency_pattern: str | None = Field(
|
|
222
|
+
default=None, description="DSL expression for valid adjacency patterns"
|
|
223
|
+
)
|