bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,856 @@
1
+ """Linguistic classification models for lexical items and templates.
2
+
3
+ This module provides models for grouping lexical items and templates by
4
+ linguistic properties. These classifications enable cross-linguistic analysis
5
+ and alignment, supporting both monolingual and multilingual classification.
6
+
7
+ LexicalItemClass and TemplateClass are NOT subclasses of Lexicon and
8
+ TemplateCollection. This is a deliberate architectural choice:
9
+ - Lexicon/TemplateCollection: Operational resource management for experiments
10
+ - LexicalItemClass/TemplateClass: Analytical linguistic classification
11
+
12
+ Primary use cases:
13
+ - Cross-linguistic analysis and comparison
14
+ - Aligning resources across languages for meta-analysis
15
+ - Combining experimental results by linguistic class
16
+ - Linguistic typology studies
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections.abc import Iterator
22
+ from typing import Any
23
+ from uuid import UUID
24
+
25
+ from pydantic import Field, field_validator
26
+
27
+ from bead.data.base import BeadBaseModel
28
+ from bead.data.language_codes import validate_iso639_code
29
+ from bead.resources.lexical_item import LexicalItem
30
+ from bead.resources.template import Template
31
+
32
+
33
+ def _empty_str_list() -> list[str]:
34
+ """Create an empty string list."""
35
+ return []
36
+
37
+
38
+ def _empty_item_dict() -> dict[UUID, LexicalItem]:
39
+ """Create an empty lexical item dictionary."""
40
+ return {}
41
+
42
+
43
+ def _empty_template_dict() -> dict[UUID, Template]:
44
+ """Create an empty template dictionary."""
45
+ return {}
46
+
47
+
48
+ def _empty_metadata_dict() -> dict[str, Any]:
49
+ """Create an empty metadata dictionary."""
50
+ return {}
51
+
52
+
53
+ class LexicalItemClass(BeadBaseModel):
54
+ """Groups lexical items that share linguistic properties.
55
+
56
+ LexicalItemClass represents a linguistic classification that can span
57
+ a single language (e.g., "all causative verbs in English") or multiple
58
+ languages (e.g., "all causative verbs across English, Korean, Zulu").
59
+
60
+ Primary use cases:
61
+ - Cross-linguistic analysis and comparison
62
+ - Aligning lexical items across languages for meta-analysis
63
+ - Combining experimental results by lexical class
64
+ - Linguistic typology studies
65
+
66
+ NOT typically used for:
67
+ - Experiment generation (use Lexicon for that)
68
+ - Resource storage (use Lexicon for that)
69
+
70
+ Attributes
71
+ ----------
72
+ name : str
73
+ Name of this lexical item class.
74
+ description : str | None
75
+ Description of the classification (e.g., "Causative verbs").
76
+ property_name : str
77
+ The linguistic property that defines this class (e.g., "causative",
78
+ "transitive", "stative").
79
+ property_value : Any | None
80
+ Optional specific value for the property (e.g., True, "agentive").
81
+ items : dict[UUID, LexicalItem]
82
+ Dictionary of lexical items in this class, indexed by UUID.
83
+ tags : list[str]
84
+ Tags for organization and search.
85
+ class_metadata : dict[str, Any]
86
+ Additional metadata about this classification.
87
+
88
+ Examples
89
+ --------
90
+ >>> # Monolingual classification
91
+ >>> causative_en = LexicalItemClass(
92
+ ... name="causative_verbs_en",
93
+ ... description="Causative verbs in English",
94
+ ... property_name="causative",
95
+ ... property_value=True
96
+ ... )
97
+ >>> # Multilingual cross-linguistic classification
98
+ >>> causatives_multi = LexicalItemClass(
99
+ ... name="causative_verbs_crossling",
100
+ ... description="Causative verbs across EN, KO, ZU",
101
+ ... property_name="causative",
102
+ ... property_value=True
103
+ ... )
104
+ >>> english_break = LexicalItem(lemma="break", language_code="en")
105
+ >>> korean_kkakta = LexicalItem(lemma="kkakta", language_code="ko")
106
+ >>> causatives_multi.add(english_break)
107
+ >>> causatives_multi.add(korean_kkakta)
108
+ >>> len(causatives_multi)
109
+ 2
110
+ >>> causatives_multi.is_multilingual()
111
+ True
112
+ >>> for lang in causatives_multi.languages():
113
+ ... items = causatives_multi.get_items_by_language(lang)
114
+ ... print(f"{lang}: {len(items)} causative verbs")
115
+ en: 1 causative verbs
116
+ ko: 1 causative verbs
117
+ """
118
+
119
+ name: str
120
+ description: str | None = None
121
+ property_name: str
122
+ property_value: Any | None = None
123
+ items: dict[UUID, LexicalItem] = Field(default_factory=_empty_item_dict)
124
+ tags: list[str] = Field(default_factory=_empty_str_list)
125
+ class_metadata: dict[str, Any] = Field(default_factory=_empty_metadata_dict)
126
+
127
+ @field_validator("name")
128
+ @classmethod
129
+ def validate_name(cls, v: str) -> str:
130
+ """Validate that name is non-empty.
131
+
132
+ Parameters
133
+ ----------
134
+ v : str
135
+ The name to validate.
136
+
137
+ Returns
138
+ -------
139
+ str
140
+ The validated name.
141
+
142
+ Raises
143
+ ------
144
+ ValueError
145
+ If name is empty or contains only whitespace.
146
+ """
147
+ if not v or not v.strip():
148
+ raise ValueError("name must be non-empty")
149
+ return v
150
+
151
+ @field_validator("property_name")
152
+ @classmethod
153
+ def validate_property_name(cls, v: str) -> str:
154
+ """Validate that property_name is non-empty.
155
+
156
+ Parameters
157
+ ----------
158
+ v : str
159
+ The property name to validate.
160
+
161
+ Returns
162
+ -------
163
+ str
164
+ The validated property name.
165
+
166
+ Raises
167
+ ------
168
+ ValueError
169
+ If property_name is empty or contains only whitespace.
170
+ """
171
+ if not v or not v.strip():
172
+ raise ValueError("property_name must be non-empty")
173
+ return v
174
+
175
+ def languages(self) -> set[str]:
176
+ """Return set of language codes present in this class.
177
+
178
+ Items without language_code are excluded from the result.
179
+
180
+ Returns
181
+ -------
182
+ set[str]
183
+ Set of language codes (lowercase) found in this class.
184
+
185
+ Examples
186
+ --------
187
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
188
+ >>> cls.add(LexicalItem(lemma="break", language_code="en"))
189
+ >>> cls.add(LexicalItem(lemma="kkakta", language_code="ko"))
190
+ >>> cls.languages()
191
+ {'en', 'ko'}
192
+ """
193
+ return {
194
+ item.language_code.lower()
195
+ for item in self.items.values()
196
+ if item.language_code is not None
197
+ }
198
+
199
+ def get_items_by_language(self, language_code: str) -> list[LexicalItem]:
200
+ """Filter items by language code.
201
+
202
+ Accepts both ISO 639-1 (2-letter) and ISO 639-3 (3-letter) codes.
203
+ The query code is normalized to ISO 639-3 for comparison.
204
+
205
+ Parameters
206
+ ----------
207
+ language_code : str
208
+ Language code to filter by (e.g., "en", "eng", "ko", "kor").
209
+
210
+ Returns
211
+ -------
212
+ list[LexicalItem]
213
+ List of items matching the language code.
214
+
215
+ Examples
216
+ --------
217
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
218
+ >>> cls.add(LexicalItem(lemma="break", language_code="en"))
219
+ >>> cls.add(LexicalItem(lemma="kkakta", language_code="ko"))
220
+ >>> en_items = cls.get_items_by_language("en")
221
+ >>> len(en_items)
222
+ 1
223
+ >>> en_items[0].lemma
224
+ 'break'
225
+ """
226
+ # normalize the query language code to ISO 639-3
227
+ try:
228
+ normalized_code = validate_iso639_code(language_code)
229
+ if normalized_code is None:
230
+ return []
231
+ lang_normalized = normalized_code.lower()
232
+ except ValueError:
233
+ # invalid language code, return empty list
234
+ return []
235
+
236
+ return [
237
+ item
238
+ for item in self.items.values()
239
+ if item.language_code is not None
240
+ and item.language_code.lower() == lang_normalized
241
+ ]
242
+
243
+ def is_monolingual(self) -> bool:
244
+ """Check if class contains only one language.
245
+
246
+ Returns
247
+ -------
248
+ bool
249
+ True if class contains items from only one language (or no items).
250
+
251
+ Examples
252
+ --------
253
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
254
+ >>> cls.add(LexicalItem(lemma="break", language_code="en"))
255
+ >>> cls.is_monolingual()
256
+ True
257
+ >>> cls.add(LexicalItem(lemma="kkakta", language_code="ko"))
258
+ >>> cls.is_monolingual()
259
+ False
260
+ """
261
+ return len(self.languages()) <= 1
262
+
263
+ def is_multilingual(self) -> bool:
264
+ """Check if class contains multiple languages.
265
+
266
+ Returns
267
+ -------
268
+ bool
269
+ True if class contains items from more than one language.
270
+
271
+ Examples
272
+ --------
273
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
274
+ >>> cls.add(LexicalItem(lemma="break", language_code="en"))
275
+ >>> cls.is_multilingual()
276
+ False
277
+ >>> cls.add(LexicalItem(lemma="kkakta", language_code="ko"))
278
+ >>> cls.is_multilingual()
279
+ True
280
+ """
281
+ return len(self.languages()) > 1
282
+
283
+ def add(self, item: LexicalItem) -> None:
284
+ """Add a lexical item to the class.
285
+
286
+ Parameters
287
+ ----------
288
+ item : LexicalItem
289
+ The item to add.
290
+
291
+ Raises
292
+ ------
293
+ ValueError
294
+ If item with same ID already exists.
295
+
296
+ Examples
297
+ --------
298
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
299
+ >>> item = LexicalItem(lemma="break")
300
+ >>> cls.add(item)
301
+ >>> len(cls)
302
+ 1
303
+ """
304
+ if item.id in self.items:
305
+ raise ValueError(f"Item with ID {item.id} already exists in class")
306
+ self.items[item.id] = item
307
+ self.update_modified_time()
308
+
309
+ def remove(self, item_id: UUID) -> LexicalItem:
310
+ """Remove and return an item by ID.
311
+
312
+ Parameters
313
+ ----------
314
+ item_id : UUID
315
+ The ID of the item to remove.
316
+
317
+ Returns
318
+ -------
319
+ LexicalItem
320
+ The removed item.
321
+
322
+ Raises
323
+ ------
324
+ KeyError
325
+ If item ID not found.
326
+
327
+ Examples
328
+ --------
329
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
330
+ >>> item = LexicalItem(lemma="break")
331
+ >>> cls.add(item)
332
+ >>> removed = cls.remove(item.id)
333
+ >>> removed.lemma
334
+ 'break'
335
+ >>> len(cls)
336
+ 0
337
+ """
338
+ if item_id not in self.items:
339
+ raise KeyError(f"Item with ID {item_id} not found in class")
340
+ item = self.items.pop(item_id)
341
+ self.update_modified_time()
342
+ return item
343
+
344
+ def get(self, item_id: UUID) -> LexicalItem | None:
345
+ """Get an item by ID, or None if not found.
346
+
347
+ Parameters
348
+ ----------
349
+ item_id : UUID
350
+ The ID of the item to get.
351
+
352
+ Returns
353
+ -------
354
+ LexicalItem | None
355
+ The item if found, None otherwise.
356
+
357
+ Examples
358
+ --------
359
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
360
+ >>> item = LexicalItem(lemma="break")
361
+ >>> cls.add(item)
362
+ >>> retrieved = cls.get(item.id)
363
+ >>> retrieved.lemma # doctest: +SKIP
364
+ 'break'
365
+ >>> from uuid import uuid4
366
+ >>> cls.get(uuid4()) is None
367
+ True
368
+ """
369
+ return self.items.get(item_id)
370
+
371
+ def __len__(self) -> int:
372
+ """Return number of items in class.
373
+
374
+ Returns
375
+ -------
376
+ int
377
+ Number of items in the class.
378
+
379
+ Examples
380
+ --------
381
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
382
+ >>> len(cls)
383
+ 0
384
+ >>> cls.add(LexicalItem(lemma="break"))
385
+ >>> len(cls)
386
+ 1
387
+ """
388
+ return len(self.items)
389
+
390
+ def __contains__(self, item_id: UUID) -> bool:
391
+ """Check if item ID is in class.
392
+
393
+ Parameters
394
+ ----------
395
+ item_id : UUID
396
+ The item ID to check.
397
+
398
+ Returns
399
+ -------
400
+ bool
401
+ True if item ID exists in class.
402
+
403
+ Examples
404
+ --------
405
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
406
+ >>> item = LexicalItem(lemma="break")
407
+ >>> cls.add(item)
408
+ >>> item.id in cls
409
+ True
410
+ """
411
+ return item_id in self.items
412
+
413
+ def __iter__(self) -> Iterator[LexicalItem]: # type: ignore[override]
414
+ """Iterate over items in class.
415
+
416
+ Returns
417
+ -------
418
+ Iterator[LexicalItem]
419
+ Iterator over lexical items.
420
+
421
+ Examples
422
+ --------
423
+ >>> cls = LexicalItemClass(name="test", property_name="causative")
424
+ >>> cls.add(LexicalItem(lemma="break"))
425
+ >>> cls.add(LexicalItem(lemma="open"))
426
+ >>> [item.lemma for item in cls]
427
+ ['break', 'open']
428
+ """
429
+ return iter(self.items.values())
430
+
431
+
432
+ class TemplateClass(BeadBaseModel):
433
+ """Groups templates that share linguistic properties.
434
+
435
+ TemplateClass represents a linguistic classification that can span
436
+ a single language (e.g., "transitive templates in English that vary
437
+ only in adjuncts") or multiple languages (e.g., "causative-inchoative
438
+ alternation templates across languages").
439
+
440
+ Primary use cases:
441
+ - Cross-linguistic analysis and comparison
442
+ - Identifying systematic variation patterns (e.g., adjunct variation)
443
+ - Aligning templates across languages for meta-analysis
444
+ - Combining experimental results by template class
445
+ - Linguistic typology studies
446
+
447
+ NOT typically used for:
448
+ - Experiment generation (use TemplateCollection for that)
449
+ - Operational template storage (use TemplateCollection for that)
450
+
451
+ Attributes
452
+ ----------
453
+ name : str
454
+ Name of this template class.
455
+ description : str | None
456
+ Description of the classification (e.g., "Transitive with adjunct variation").
457
+ property_name : str
458
+ The linguistic property that defines this class (e.g., "transitive",
459
+ "causative_inchoative", "wh_question").
460
+ property_value : Any | None
461
+ Optional specific value for the property.
462
+ templates : dict[UUID, Template]
463
+ Dictionary of templates in this class, indexed by UUID.
464
+ tags : list[str]
465
+ Tags for organization and search.
466
+ class_metadata : dict[str, Any]
467
+ Additional metadata about this classification.
468
+
469
+ Examples
470
+ --------
471
+ >>> from bead.resources.structures import Slot
472
+ >>> # Monolingual classification
473
+ >>> transitive_en = TemplateClass(
474
+ ... name="transitive_templates_en",
475
+ ... description="Transitive templates in English",
476
+ ... property_name="transitive",
477
+ ... property_value=True
478
+ ... )
479
+ >>> # Multilingual cross-linguistic classification
480
+ >>> transitives_multi = TemplateClass(
481
+ ... name="transitive_templates_crossling",
482
+ ... description="Transitive templates across languages",
483
+ ... property_name="transitive",
484
+ ... property_value=True
485
+ ... )
486
+ >>> en_template = Template(
487
+ ... name="svo",
488
+ ... template_string="{subject} {verb} {object}.",
489
+ ... slots={"subject": Slot(name="subject"), "verb": Slot(name="verb"),
490
+ ... "object": Slot(name="object")},
491
+ ... language_code="en"
492
+ ... )
493
+ >>> transitives_multi.add(en_template)
494
+ >>> len(transitives_multi)
495
+ 1
496
+ """
497
+
498
+ name: str
499
+ description: str | None = None
500
+ property_name: str
501
+ property_value: Any | None = None
502
+ templates: dict[UUID, Template] = Field(default_factory=_empty_template_dict)
503
+ tags: list[str] = Field(default_factory=_empty_str_list)
504
+ class_metadata: dict[str, Any] = Field(default_factory=_empty_metadata_dict)
505
+
506
+ @field_validator("name")
507
+ @classmethod
508
+ def validate_name(cls, v: str) -> str:
509
+ """Validate that name is non-empty.
510
+
511
+ Parameters
512
+ ----------
513
+ v : str
514
+ The name to validate.
515
+
516
+ Returns
517
+ -------
518
+ str
519
+ The validated name.
520
+
521
+ Raises
522
+ ------
523
+ ValueError
524
+ If name is empty or contains only whitespace.
525
+ """
526
+ if not v or not v.strip():
527
+ raise ValueError("name must be non-empty")
528
+ return v
529
+
530
+ @field_validator("property_name")
531
+ @classmethod
532
+ def validate_property_name(cls, v: str) -> str:
533
+ """Validate that property_name is non-empty.
534
+
535
+ Parameters
536
+ ----------
537
+ v : str
538
+ The property name to validate.
539
+
540
+ Returns
541
+ -------
542
+ str
543
+ The validated property name.
544
+
545
+ Raises
546
+ ------
547
+ ValueError
548
+ If property_name is empty or contains only whitespace.
549
+ """
550
+ if not v or not v.strip():
551
+ raise ValueError("property_name must be non-empty")
552
+ return v
553
+
554
+ def languages(self) -> set[str]:
555
+ """Return set of language codes present in this class.
556
+
557
+ Templates without language_code are excluded from the result.
558
+
559
+ Returns
560
+ -------
561
+ set[str]
562
+ Set of language codes (lowercase) found in this class.
563
+
564
+ Examples
565
+ --------
566
+ >>> from bead.resources.structures import Slot
567
+ >>> cls = TemplateClass(name="test", property_name="transitive")
568
+ >>> t1 = Template(
569
+ ... name="en_svo",
570
+ ... template_string="{s} {v} {o}.",
571
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")},
572
+ ... language_code="en"
573
+ ... )
574
+ >>> cls.add(t1)
575
+ >>> cls.languages()
576
+ {'en'}
577
+ """
578
+ return {
579
+ template.language_code.lower()
580
+ for template in self.templates.values()
581
+ if template.language_code is not None
582
+ }
583
+
584
+ def get_templates_by_language(self, language_code: str) -> list[Template]:
585
+ """Filter templates by language code.
586
+
587
+ Accepts both ISO 639-1 (2-letter) and ISO 639-3 (3-letter) codes.
588
+ The query code is normalized to ISO 639-3 for comparison.
589
+
590
+ Parameters
591
+ ----------
592
+ language_code : str
593
+ Language code to filter by (e.g., "en", "eng", "ko", "kor").
594
+
595
+ Returns
596
+ -------
597
+ list[Template]
598
+ List of templates matching the language code.
599
+
600
+ Examples
601
+ --------
602
+ >>> from bead.resources.structures import Slot
603
+ >>> cls = TemplateClass(name="test", property_name="transitive")
604
+ >>> t1 = Template(
605
+ ... name="en_svo",
606
+ ... template_string="{s} {v} {o}.",
607
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")},
608
+ ... language_code="en"
609
+ ... )
610
+ >>> cls.add(t1)
611
+ >>> en_templates = cls.get_templates_by_language("en")
612
+ >>> len(en_templates)
613
+ 1
614
+ >>> en_templates[0].name
615
+ 'en_svo'
616
+ """
617
+ # normalize the query language code to ISO 639-3
618
+ try:
619
+ normalized_code = validate_iso639_code(language_code)
620
+ if normalized_code is None:
621
+ return []
622
+ lang_normalized = normalized_code.lower()
623
+ except ValueError:
624
+ # invalid language code, return empty list
625
+ return []
626
+
627
+ return [
628
+ template
629
+ for template in self.templates.values()
630
+ if template.language_code is not None
631
+ and template.language_code.lower() == lang_normalized
632
+ ]
633
+
634
+ def is_monolingual(self) -> bool:
635
+ """Check if class contains only one language.
636
+
637
+ Returns
638
+ -------
639
+ bool
640
+ True if class contains templates from only one language (or no templates).
641
+
642
+ Examples
643
+ --------
644
+ >>> from bead.resources.structures import Slot
645
+ >>> cls = TemplateClass(name="test", property_name="transitive")
646
+ >>> t1 = Template(
647
+ ... name="en_svo",
648
+ ... template_string="{s} {v} {o}.",
649
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")},
650
+ ... language_code="en"
651
+ ... )
652
+ >>> cls.add(t1)
653
+ >>> cls.is_monolingual()
654
+ True
655
+ """
656
+ return len(self.languages()) <= 1
657
+
658
+ def is_multilingual(self) -> bool:
659
+ """Check if class contains multiple languages.
660
+
661
+ Returns
662
+ -------
663
+ bool
664
+ True if class contains templates from more than one language.
665
+
666
+ Examples
667
+ --------
668
+ >>> from bead.resources.structures import Slot
669
+ >>> cls = TemplateClass(name="test", property_name="transitive")
670
+ >>> t1 = Template(
671
+ ... name="en_svo",
672
+ ... template_string="{s} {v} {o}.",
673
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")},
674
+ ... language_code="en"
675
+ ... )
676
+ >>> cls.add(t1)
677
+ >>> cls.is_multilingual()
678
+ False
679
+ """
680
+ return len(self.languages()) > 1
681
+
682
+ def add(self, template: Template) -> None:
683
+ """Add a template to the class.
684
+
685
+ Parameters
686
+ ----------
687
+ template : Template
688
+ The template to add.
689
+
690
+ Raises
691
+ ------
692
+ ValueError
693
+ If template with same ID already exists.
694
+
695
+ Examples
696
+ --------
697
+ >>> from bead.resources.structures import Slot
698
+ >>> cls = TemplateClass(name="test", property_name="transitive")
699
+ >>> t1 = Template(
700
+ ... name="svo",
701
+ ... template_string="{s} {v} {o}.",
702
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
703
+ ... )
704
+ >>> cls.add(t1)
705
+ >>> len(cls)
706
+ 1
707
+ """
708
+ if template.id in self.templates:
709
+ raise ValueError(f"Template with ID {template.id} already exists in class")
710
+ self.templates[template.id] = template
711
+ self.update_modified_time()
712
+
713
+ def remove(self, template_id: UUID) -> Template:
714
+ """Remove and return a template by ID.
715
+
716
+ Parameters
717
+ ----------
718
+ template_id : UUID
719
+ The ID of the template to remove.
720
+
721
+ Returns
722
+ -------
723
+ Template
724
+ The removed template.
725
+
726
+ Raises
727
+ ------
728
+ KeyError
729
+ If template ID not found.
730
+
731
+ Examples
732
+ --------
733
+ >>> from bead.resources.structures import Slot
734
+ >>> cls = TemplateClass(name="test", property_name="transitive")
735
+ >>> t1 = Template(
736
+ ... name="svo",
737
+ ... template_string="{s} {v} {o}.",
738
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
739
+ ... )
740
+ >>> cls.add(t1)
741
+ >>> removed = cls.remove(t1.id)
742
+ >>> removed.name
743
+ 'svo'
744
+ >>> len(cls)
745
+ 0
746
+ """
747
+ if template_id not in self.templates:
748
+ raise KeyError(f"Template with ID {template_id} not found in class")
749
+ template = self.templates.pop(template_id)
750
+ self.update_modified_time()
751
+ return template
752
+
753
+ def get(self, template_id: UUID) -> Template | None:
754
+ """Get a template by ID, or None if not found.
755
+
756
+ Parameters
757
+ ----------
758
+ template_id : UUID
759
+ The ID of the template to get.
760
+
761
+ Returns
762
+ -------
763
+ Template | None
764
+ The template if found, None otherwise.
765
+
766
+ Examples
767
+ --------
768
+ >>> from bead.resources.structures import Slot
769
+ >>> cls = TemplateClass(name="test", property_name="transitive")
770
+ >>> t1 = Template(
771
+ ... name="svo",
772
+ ... template_string="{s} {v} {o}.",
773
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
774
+ ... )
775
+ >>> cls.add(t1)
776
+ >>> retrieved = cls.get(t1.id)
777
+ >>> retrieved.name # doctest: +SKIP
778
+ 'svo'
779
+ >>> from uuid import uuid4
780
+ >>> cls.get(uuid4()) is None
781
+ True
782
+ """
783
+ return self.templates.get(template_id)
784
+
785
+ def __len__(self) -> int:
786
+ """Return number of templates in class.
787
+
788
+ Returns
789
+ -------
790
+ int
791
+ Number of templates in the class.
792
+
793
+ Examples
794
+ --------
795
+ >>> cls = TemplateClass(name="test", property_name="transitive")
796
+ >>> len(cls)
797
+ 0
798
+ """
799
+ return len(self.templates)
800
+
801
+ def __contains__(self, template_id: UUID) -> bool:
802
+ """Check if template ID is in class.
803
+
804
+ Parameters
805
+ ----------
806
+ template_id : UUID
807
+ The template ID to check.
808
+
809
+ Returns
810
+ -------
811
+ bool
812
+ True if template ID exists in class.
813
+
814
+ Examples
815
+ --------
816
+ >>> from bead.resources.structures import Slot
817
+ >>> cls = TemplateClass(name="test", property_name="transitive")
818
+ >>> t1 = Template(
819
+ ... name="svo",
820
+ ... template_string="{s} {v} {o}.",
821
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
822
+ ... )
823
+ >>> cls.add(t1)
824
+ >>> t1.id in cls
825
+ True
826
+ """
827
+ return template_id in self.templates
828
+
829
+ def __iter__(self) -> Iterator[Template]: # type: ignore[override]
830
+ """Iterate over templates in class.
831
+
832
+ Returns
833
+ -------
834
+ Iterator[Template]
835
+ Iterator over templates.
836
+
837
+ Examples
838
+ --------
839
+ >>> from bead.resources.structures import Slot
840
+ >>> cls = TemplateClass(name="test", property_name="transitive")
841
+ >>> t1 = Template(
842
+ ... name="svo1",
843
+ ... template_string="{s} {v} {o}.",
844
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
845
+ ... )
846
+ >>> t2 = Template(
847
+ ... name="svo2",
848
+ ... template_string="{s} {v} {o}.",
849
+ ... slots={"s": Slot(name="s"), "v": Slot(name="v"), "o": Slot(name="o")}
850
+ ... )
851
+ >>> cls.add(t1)
852
+ >>> cls.add(t2)
853
+ >>> [t.name for t in cls]
854
+ ['svo1', 'svo2']
855
+ """
856
+ return iter(self.templates.values())