bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,223 @@
1
+ """Lexical item models for words and multi-word expressions.
2
+
3
+ This module provides data models for representing lexical items in the bead
4
+ system. Lexical items are the atomic units that fill template slots during
5
+ sentence generation. Includes support for single words and multi-word
6
+ expressions (MWEs).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+ from pydantic import Field, field_validator
14
+
15
+ from bead.data.base import BeadBaseModel
16
+ from bead.data.language_codes import LanguageCode
17
+ from bead.resources.constraints import Constraint
18
+
19
+
20
+ def _empty_constraint_list() -> list[Constraint]:
21
+ """Create an empty constraint list."""
22
+ return []
23
+
24
+
25
+ class LexicalItem(BeadBaseModel):
26
+ """A lexical item with linguistic features.
27
+
28
+ Follows UniMorph structure: lemma, form, features bundle.
29
+ - lemma: base/citation form
30
+ - form: inflected surface form (None if same as lemma)
31
+ - features: feature bundle (pos, tense, person, number, etc.)
32
+
33
+ Attributes
34
+ ----------
35
+ lemma : str
36
+ Base/citation form (e.g., "walk", "the").
37
+ form : str | None
38
+ Inflected surface form if different from lemma (e.g., "walked", "walking").
39
+ None means form equals lemma.
40
+ language_code : LanguageCode
41
+ ISO 639-3 language code (e.g., "eng").
42
+ features : dict[str, Any]
43
+ Feature bundle with grammatical/linguistic features:
44
+ - pos: str (e.g., "VERB", "DET", "NOUN", "ADJ", "ADP")
45
+ - Morphological: tense, person, number, case, gender, etc.
46
+ - unimorph_features: str (e.g., "V;PRS;3;SG")
47
+ - Lexical resource info: verbnet_class, themroles, frame_info, etc.
48
+ source : str | None
49
+ Provenance (e.g., "VerbNet", "UniMorph", "manual").
50
+
51
+ Examples
52
+ --------
53
+ >>> # Inflected verb
54
+ >>> verb = LexicalItem(
55
+ ... lemma="walk",
56
+ ... form="walked",
57
+ ... language_code="eng",
58
+ ... features={"pos": "VERB", "tense": "PST"},
59
+ ... source="UniMorph"
60
+ ... )
61
+ >>> verb.form
62
+ 'walked'
63
+ >>>
64
+ >>> # Uninflected determiner
65
+ >>> det = LexicalItem(
66
+ ... lemma="the",
67
+ ... form=None,
68
+ ... language_code="eng",
69
+ ... features={"pos": "DET"},
70
+ ... source="manual"
71
+ ... )
72
+ >>> det.form is None
73
+ True
74
+ """
75
+
76
+ lemma: str
77
+ form: str | None = None
78
+ language_code: LanguageCode
79
+ features: dict[str, Any] = Field(default_factory=dict)
80
+ source: str | None = None
81
+
82
+ @field_validator("lemma")
83
+ @classmethod
84
+ def validate_lemma(cls, v: str) -> str:
85
+ """Validate that lemma is non-empty.
86
+
87
+ Parameters
88
+ ----------
89
+ v : str
90
+ The lemma value to validate.
91
+
92
+ Returns
93
+ -------
94
+ str
95
+ The validated lemma.
96
+
97
+ Raises
98
+ ------
99
+ ValueError
100
+ If lemma is empty or contains only whitespace.
101
+ """
102
+ if not v or not v.strip():
103
+ raise ValueError("lemma must be non-empty")
104
+ return v
105
+
106
+
107
+ class MWEComponent(LexicalItem):
108
+ """A component of a multi-word expression.
109
+
110
+ Components represent individual parts of an MWE (e.g., verb and particle
111
+ in a phrasal verb). Each component has a role within the MWE and can
112
+ have its own constraints.
113
+
114
+ Attributes
115
+ ----------
116
+ role : str
117
+ Role of this component in the MWE (e.g., "verb", "particle", "noun").
118
+ required : bool
119
+ Whether this component must be present (default: True).
120
+ constraints : list[Constraint]
121
+ Component-specific constraints (in addition to base LexicalItem constraints).
122
+
123
+ Examples
124
+ --------
125
+ >>> # Verb component of "take off"
126
+ >>> verb = MWEComponent(
127
+ ... lemma="take",
128
+ ... pos="VERB",
129
+ ... role="verb",
130
+ ... required=True
131
+ ... )
132
+ >>> # Particle component
133
+ >>> particle = MWEComponent(
134
+ ... lemma="off",
135
+ ... pos="PART",
136
+ ... role="particle",
137
+ ... required=True
138
+ ... )
139
+ """
140
+
141
+ role: str = Field(..., description="Component role in MWE")
142
+ required: bool = Field(default=True, description="Whether component is required")
143
+ constraints: list[Constraint] = Field(
144
+ default_factory=_empty_constraint_list,
145
+ description="Component-specific constraints",
146
+ )
147
+
148
+
149
+ class MultiWordExpression(LexicalItem):
150
+ """Multi-word expression as a lexical item.
151
+
152
+ MWEs are lexical items composed of multiple components. They can be
153
+ separable (components can be non-adjacent) or inseparable. MWEs
154
+ support component-level constraints and adjacency patterns.
155
+
156
+ Attributes
157
+ ----------
158
+ components : list[MWEComponent]
159
+ Components that make up this MWE.
160
+ separable : bool
161
+ Whether components can be separated by other words (default: False).
162
+ Example: "take the ball off" (separable) vs "kick the bucket" (inseparable).
163
+ adjacency_pattern : str | None
164
+ DSL expression defining valid adjacency patterns.
165
+ Variables: component roles, 'distance' between components.
166
+ Example: "distance(verb, particle) <= 3"
167
+
168
+ Examples
169
+ --------
170
+ >>> # Inseparable phrasal verb "look after"
171
+ >>> mwe1 = MultiWordExpression(
172
+ ... lemma="look after",
173
+ ... pos="VERB",
174
+ ... components=[
175
+ ... MWEComponent(lemma="look", pos="VERB", role="verb"),
176
+ ... MWEComponent(lemma="after", pos="ADP", role="particle")
177
+ ... ],
178
+ ... separable=False
179
+ ... )
180
+ >>>
181
+ >>> # Separable phrasal verb "take off"
182
+ >>> mwe2 = MultiWordExpression(
183
+ ... lemma="take off",
184
+ ... pos="VERB",
185
+ ... components=[
186
+ ... MWEComponent(lemma="take", pos="VERB", role="verb"),
187
+ ... MWEComponent(lemma="off", pos="PART", role="particle")
188
+ ... ],
189
+ ... separable=True,
190
+ ... adjacency_pattern="distance(verb, particle) <= 3"
191
+ ... )
192
+ >>>
193
+ >>> # MWE with constraints on components
194
+ >>> mwe3 = MultiWordExpression(
195
+ ... lemma="break down",
196
+ ... pos="VERB",
197
+ ... components=[
198
+ ... MWEComponent(
199
+ ... lemma="break",
200
+ ... pos="VERB",
201
+ ... role="verb",
202
+ ... constraints=[
203
+ ... Constraint(
204
+ ... expression="self.lemma in motion_verbs",
205
+ ... context={"motion_verbs": {"break", "take", "give"}}
206
+ ... )
207
+ ... ]
208
+ ... ),
209
+ ... MWEComponent(lemma="down", pos="PART", role="particle")
210
+ ... ],
211
+ ... separable=True
212
+ ... )
213
+ """
214
+
215
+ components: list[MWEComponent] = Field(
216
+ default_factory=list, description="MWE components"
217
+ )
218
+ separable: bool = Field(
219
+ default=False, description="Whether components can be non-adjacent"
220
+ )
221
+ adjacency_pattern: str | None = Field(
222
+ default=None, description="DSL expression for valid adjacency patterns"
223
+ )