bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,276 @@
1
+ """Participant data models.
2
+
3
+ This module provides Participant and ParticipantIDMapping models for
4
+ storing participant information with privacy-preserving external ID mapping.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from typing import TYPE_CHECKING
11
+ from uuid import UUID
12
+
13
+ from pydantic import Field, field_validator
14
+
15
+ from bead.data.base import BeadBaseModel, JsonValue
16
+ from bead.data.timestamps import now_iso8601
17
+
18
+ if TYPE_CHECKING:
19
+ from bead.participants.metadata_spec import ParticipantMetadataSpec
20
+
21
+
22
+ def _empty_metadata_dict() -> dict[str, JsonValue]:
23
+ """Return empty metadata dict."""
24
+ return {}
25
+
26
+
27
+ def _empty_session_list() -> list[str]:
28
+ """Return empty session list."""
29
+ return []
30
+
31
+
32
+ class Participant(BeadBaseModel):
33
+ """A study participant with demographic and session metadata.
34
+
35
+ Inherits UUID, timestamps, version, and metadata from BeadBaseModel.
36
+ The internal `id` (UUID) is used for all analysis; external IDs
37
+ (e.g., Prolific IDs) are stored separately for privacy.
38
+
39
+ Attributes
40
+ ----------
41
+ id : UUID
42
+ Internal unique identifier (UUIDv7, inherited from BeadBaseModel).
43
+ created_at : datetime
44
+ When participant record was created (inherited).
45
+ modified_at : datetime
46
+ When participant record was last modified (inherited).
47
+ participant_metadata : dict[str, JsonValue]
48
+ Demographic and other participant attributes (e.g., age, education).
49
+ Keys should match a ParticipantMetadataSpec for validation.
50
+ study_id : str | None
51
+ Optional study identifier this participant belongs to.
52
+ session_ids : list[str]
53
+ Session identifiers for this participant (for longitudinal studies).
54
+ consent_timestamp : datetime | None
55
+ When participant provided consent.
56
+ notes : str | None
57
+ Free-text notes about this participant.
58
+
59
+ Examples
60
+ --------
61
+ >>> participant = Participant(
62
+ ... participant_metadata={
63
+ ... "age": 25,
64
+ ... "education": "bachelors",
65
+ ... "native_speaker": True,
66
+ ... },
67
+ ... study_id="study_001",
68
+ ... )
69
+ >>> participant.participant_metadata["age"]
70
+ 25
71
+ >>> str(participant.id) # doctest: +SKIP
72
+ '019...' # UUIDv7
73
+ """
74
+
75
+ participant_metadata: dict[str, JsonValue] = Field(
76
+ default_factory=_empty_metadata_dict,
77
+ description="Participant attributes (demographics, etc.)",
78
+ )
79
+ study_id: str | None = Field(default=None, description="Study identifier")
80
+ session_ids: list[str] = Field(
81
+ default_factory=_empty_session_list, description="Session identifiers"
82
+ )
83
+ consent_timestamp: datetime | None = Field(
84
+ default=None, description="Consent timestamp"
85
+ )
86
+ notes: str | None = Field(default=None, description="Free-text notes")
87
+
88
+ def validate_against_spec(
89
+ self, spec: ParticipantMetadataSpec
90
+ ) -> tuple[bool, list[str]]:
91
+ """Validate participant_metadata against a specification.
92
+
93
+ Parameters
94
+ ----------
95
+ spec : ParticipantMetadataSpec
96
+ Specification to validate against.
97
+
98
+ Returns
99
+ -------
100
+ tuple[bool, list[str]]
101
+ (is_valid, list of error messages)
102
+
103
+ Examples
104
+ --------
105
+ >>> from bead.participants.metadata_spec import (
106
+ ... FieldSpec, ParticipantMetadataSpec
107
+ ... )
108
+ >>> spec = ParticipantMetadataSpec(
109
+ ... name="test",
110
+ ... fields=[FieldSpec(name="age", field_type="int", required=True)]
111
+ ... )
112
+ >>> p = Participant(participant_metadata={"age": 25})
113
+ >>> p.validate_against_spec(spec)
114
+ (True, [])
115
+ """
116
+ # Convert JsonValue dict to the expected type for validation
117
+ metadata: dict[str, str | int | float | bool | None] = {}
118
+ for key, value in self.participant_metadata.items():
119
+ if isinstance(value, str | int | float | bool) or value is None:
120
+ metadata[key] = value
121
+ # Skip complex values (lists, dicts) - they won't match FieldSpec types
122
+ return spec.validate_metadata(metadata)
123
+
124
+ def get_attribute(self, key: str, default: JsonValue = None) -> JsonValue:
125
+ """Get a metadata attribute with optional default.
126
+
127
+ Parameters
128
+ ----------
129
+ key : str
130
+ Attribute name.
131
+ default : JsonValue
132
+ Default value if attribute not found.
133
+
134
+ Returns
135
+ -------
136
+ JsonValue
137
+ Attribute value or default.
138
+
139
+ Examples
140
+ --------
141
+ >>> p = Participant(participant_metadata={"age": 25})
142
+ >>> p.get_attribute("age")
143
+ 25
144
+ >>> p.get_attribute("unknown", default="N/A")
145
+ 'N/A'
146
+ """
147
+ return self.participant_metadata.get(key, default)
148
+
149
+ def set_attribute(self, key: str, value: JsonValue) -> None:
150
+ """Set a metadata attribute.
151
+
152
+ Parameters
153
+ ----------
154
+ key : str
155
+ Attribute name.
156
+ value : JsonValue
157
+ Attribute value.
158
+
159
+ Examples
160
+ --------
161
+ >>> p = Participant()
162
+ >>> p.set_attribute("age", 25)
163
+ >>> p.participant_metadata["age"]
164
+ 25
165
+ """
166
+ self.participant_metadata[key] = value
167
+ self.update_modified_time()
168
+
169
+ def add_session(self, session_id: str) -> None:
170
+ """Add a session ID to this participant.
171
+
172
+ Parameters
173
+ ----------
174
+ session_id : str
175
+ Session identifier to add.
176
+
177
+ Examples
178
+ --------
179
+ >>> p = Participant()
180
+ >>> p.add_session("session_001")
181
+ >>> p.session_ids
182
+ ['session_001']
183
+ """
184
+ self.session_ids.append(session_id)
185
+ self.update_modified_time()
186
+
187
+
188
+ class ParticipantIDMapping(BeadBaseModel):
189
+ """Mapping between external participant IDs and internal UUIDs.
190
+
191
+ This model is stored SEPARATELY from participant data for IRB/privacy
192
+ compliance. The external ID (e.g., Prolific PID) can be deleted while
193
+ retaining the internal UUID for analysis.
194
+
195
+ Attributes
196
+ ----------
197
+ id : UUID
198
+ Unique identifier for this mapping record (inherited).
199
+ external_id : str
200
+ External participant identifier (e.g., Prolific PID).
201
+ external_source : str
202
+ Source of the external ID (e.g., "prolific", "mturk", "sona").
203
+ participant_id : UUID
204
+ Internal participant UUID (references Participant.id).
205
+ mapping_timestamp : datetime
206
+ When this mapping was created.
207
+ is_active : bool
208
+ Whether this mapping is active (for soft deletion).
209
+
210
+ Examples
211
+ --------
212
+ >>> from uuid import UUID
213
+ >>> mapping = ParticipantIDMapping(
214
+ ... external_id="PROLIFIC_ABC123",
215
+ ... external_source="prolific",
216
+ ... participant_id=UUID("01234567-89ab-cdef-0123-456789abcdef"),
217
+ ... )
218
+ >>> mapping.external_source
219
+ 'prolific'
220
+ """
221
+
222
+ external_id: str = Field(..., description="External participant ID")
223
+ external_source: str = Field(..., description="Source of external ID")
224
+ participant_id: UUID = Field(..., description="Internal participant UUID")
225
+ mapping_timestamp: datetime = Field(
226
+ default_factory=now_iso8601, description="When mapping was created"
227
+ )
228
+ is_active: bool = Field(default=True, description="Whether mapping is active")
229
+
230
+ @field_validator("external_id", "external_source")
231
+ @classmethod
232
+ def validate_non_empty(cls, v: str) -> str:
233
+ """Validate string fields are non-empty.
234
+
235
+ Parameters
236
+ ----------
237
+ v : str
238
+ String value to validate.
239
+
240
+ Returns
241
+ -------
242
+ str
243
+ Validated string.
244
+
245
+ Raises
246
+ ------
247
+ ValueError
248
+ If string is empty or whitespace only.
249
+ """
250
+ if not v or not v.strip():
251
+ raise ValueError("Field cannot be empty")
252
+ return v.strip()
253
+
254
+ def deactivate(self) -> None:
255
+ """Soft-delete this mapping (for privacy compliance).
256
+
257
+ Sets is_active to False without deleting the record. This allows
258
+ the mapping to be retained for audit purposes while marking it
259
+ as no longer valid.
260
+
261
+ Examples
262
+ --------
263
+ >>> from uuid import uuid4
264
+ >>> mapping = ParticipantIDMapping(
265
+ ... external_id="ABC123",
266
+ ... external_source="prolific",
267
+ ... participant_id=uuid4(),
268
+ ... )
269
+ >>> mapping.is_active
270
+ True
271
+ >>> mapping.deactivate()
272
+ >>> mapping.is_active
273
+ False
274
+ """
275
+ self.is_active = False
276
+ self.update_modified_time()
@@ -0,0 +1,29 @@
1
+ """Resource models.
2
+
3
+ Provides data models for lexical items, templates, constraints, and
4
+ template structures.
5
+ """
6
+
7
+ from bead.resources.constraints import Constraint
8
+ from bead.resources.lexical_item import LexicalItem, MultiWordExpression, MWEComponent
9
+ from bead.resources.lexicon import Lexicon
10
+ from bead.resources.template import Slot, Template, TemplateSequence, TemplateTree
11
+ from bead.resources.template_collection import TemplateCollection
12
+
13
+ __all__ = [
14
+ # Lexical items
15
+ "LexicalItem",
16
+ "MWEComponent",
17
+ "MultiWordExpression",
18
+ # Lexicon
19
+ "Lexicon",
20
+ # Constraints
21
+ "Constraint",
22
+ # Templates and structures
23
+ "Slot",
24
+ "Template",
25
+ "TemplateSequence",
26
+ "TemplateTree",
27
+ # Template collection
28
+ "TemplateCollection",
29
+ ]
@@ -0,0 +1,19 @@
1
+ """External resource adapters for linguistic databases.
2
+
3
+ Fetches lexical items from VerbNet, PropBank, FrameNet (via glazing), and
4
+ UniMorph morphological paradigms.
5
+ """
6
+
7
+ from bead.resources.adapters.base import ResourceAdapter
8
+ from bead.resources.adapters.cache import AdapterCache
9
+ from bead.resources.adapters.glazing import GlazingAdapter
10
+ from bead.resources.adapters.registry import AdapterRegistry
11
+ from bead.resources.adapters.unimorph import UniMorphAdapter
12
+
13
+ __all__ = [
14
+ "ResourceAdapter",
15
+ "AdapterCache",
16
+ "GlazingAdapter",
17
+ "UniMorphAdapter",
18
+ "AdapterRegistry",
19
+ ]
@@ -0,0 +1,104 @@
1
+ """Abstract base class for external resource adapters.
2
+
3
+ This module defines the interface that all resource adapters must implement
4
+ to fetch lexical items from external linguistic databases.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from abc import ABC, abstractmethod
10
+ from typing import Any
11
+
12
+ from bead.data.language_codes import LanguageCode
13
+ from bead.resources.lexical_item import LexicalItem
14
+
15
+
16
+ class ResourceAdapter(ABC):
17
+ """Abstract base class for external resource adapters.
18
+
19
+ Resource adapters fetch lexical items from external linguistic databases
20
+ and convert them to the bead LexicalItem format. All adapters must
21
+ implement language_code filtering to support multi-language workflows.
22
+
23
+ Subclasses must implement:
24
+ - fetch_items(): Retrieve items from the external resource
25
+ - is_available(): Check if the external resource is accessible
26
+
27
+ Examples
28
+ --------
29
+ >>> class MyAdapter(ResourceAdapter):
30
+ ... def fetch_items(self, query=None, language_code=None, **kwargs):
31
+ ... # Fetch from external resource
32
+ ... return [LexicalItem(lemma="walk", pos="VERB", language_code="en")]
33
+ ... def is_available(self):
34
+ ... return True
35
+ >>> adapter = MyAdapter()
36
+ >>> items = adapter.fetch_items(query="walk", language_code="en")
37
+ >>> len(items) > 0
38
+ True
39
+ """
40
+
41
+ @abstractmethod
42
+ def fetch_items(
43
+ self,
44
+ query: str | None = None,
45
+ language_code: LanguageCode = None,
46
+ **kwargs: Any,
47
+ ) -> list[LexicalItem]:
48
+ """Fetch lexical items from external resource.
49
+
50
+ Parameters
51
+ ----------
52
+ query : str | None
53
+ Query string in adapter-specific format (e.g., lemma, predicate name,
54
+ class identifier). If None, behavior is adapter-specific (may return
55
+ all items, raise error, or use default query).
56
+ language_code : LanguageCode
57
+ ISO 639-1 (2-letter) or ISO 639-3 (3-letter) language code to filter
58
+ results. Examples: "en", "eng", "ko", "kor". If None, returns items
59
+ for all available languages.
60
+ **kwargs : Any
61
+ Additional adapter-specific parameters (e.g., pos="VERB",
62
+ resource="verbnet", include_features=True).
63
+
64
+ Returns
65
+ -------
66
+ list[LexicalItem]
67
+ Lexical items fetched from the external resource. Each item should
68
+ have language_code set if known.
69
+
70
+ Raises
71
+ ------
72
+ ValueError
73
+ If query is invalid or required parameters are missing.
74
+ RuntimeError
75
+ If the external resource is unavailable or the request fails.
76
+
77
+ Examples
78
+ --------
79
+ >>> adapter = MyAdapter()
80
+ >>> items = adapter.fetch_items(query="break", language_code="en")
81
+ >>> all(item.language_code == "en" for item in items)
82
+ True
83
+ """
84
+ ...
85
+
86
+ @abstractmethod
87
+ def is_available(self) -> bool:
88
+ """Check if the external resource is available.
89
+
90
+ This method should verify that the external resource can be accessed,
91
+ whether via installed packages, accessible data files, or network APIs.
92
+
93
+ Returns
94
+ -------
95
+ bool
96
+ True if the resource can be accessed, False otherwise.
97
+
98
+ Examples
99
+ --------
100
+ >>> adapter = MyAdapter()
101
+ >>> adapter.is_available()
102
+ True
103
+ """
104
+ ...
@@ -0,0 +1,128 @@
1
+ """Caching for adapter fetch results.
2
+
3
+ This module provides an in-memory cache to avoid redundant fetches from
4
+ external resources when the same query is repeated.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import json
11
+ from typing import Any
12
+
13
+ from bead.resources.lexical_item import LexicalItem
14
+
15
+
16
+ class AdapterCache:
17
+ """In-memory cache for adapter fetch results.
18
+
19
+ The cache stores results keyed by a hash of query parameters. This avoids
20
+ redundant fetches when the same query is made multiple times.
21
+
22
+ Examples
23
+ --------
24
+ >>> cache = AdapterCache()
25
+ >>> items = [LexicalItem(lemma="walk", pos="VERB")]
26
+ >>> key = cache.make_key("glazing", query="walk", language_code="en")
27
+ >>> cache.set(key, items)
28
+ >>> cached = cache.get(key)
29
+ >>> cached == items
30
+ True
31
+ """
32
+
33
+ def __init__(self) -> None:
34
+ self._cache: dict[str, list[LexicalItem]] = {}
35
+
36
+ def get(self, key: str) -> list[LexicalItem] | None:
37
+ """Get cached result.
38
+
39
+ Parameters
40
+ ----------
41
+ key : str
42
+ Cache key generated by make_key().
43
+
44
+ Returns
45
+ -------
46
+ list[LexicalItem] | None
47
+ Cached items if key exists, None otherwise.
48
+
49
+ Examples
50
+ --------
51
+ >>> cache = AdapterCache()
52
+ >>> cache.get("nonexistent")
53
+ None
54
+ """
55
+ return self._cache.get(key)
56
+
57
+ def set(self, key: str, items: list[LexicalItem]) -> None:
58
+ """Cache result.
59
+
60
+ Parameters
61
+ ----------
62
+ key : str
63
+ Cache key generated by make_key().
64
+ items : list[LexicalItem]
65
+ Items to cache.
66
+
67
+ Examples
68
+ --------
69
+ >>> cache = AdapterCache()
70
+ >>> items = [LexicalItem(lemma="walk")]
71
+ >>> cache.set("key1", items)
72
+ >>> cache.get("key1") == items
73
+ True
74
+ """
75
+ self._cache[key] = items
76
+
77
+ def clear(self) -> None:
78
+ """Clear entire cache.
79
+
80
+ Examples
81
+ --------
82
+ >>> cache = AdapterCache()
83
+ >>> cache.set("key1", [])
84
+ >>> cache.clear()
85
+ >>> cache.get("key1")
86
+ None
87
+ """
88
+ self._cache.clear()
89
+
90
+ def make_key(
91
+ self, adapter_name: str, query: str | None = None, **kwargs: Any
92
+ ) -> str:
93
+ """Generate cache key from query parameters.
94
+
95
+ Create a deterministic hash key from adapter name, query, and
96
+ additional parameters. Same inputs always produce same key.
97
+
98
+ Parameters
99
+ ----------
100
+ adapter_name : str
101
+ Name of the adapter (e.g., "glazing", "unimorph").
102
+ query : str | None
103
+ Query string.
104
+ **kwargs : Any
105
+ Additional query parameters (e.g., language_code, pos).
106
+
107
+ Returns
108
+ -------
109
+ str
110
+ Cache key (hexadecimal hash string).
111
+
112
+ Examples
113
+ --------
114
+ >>> cache = AdapterCache()
115
+ >>> key1 = cache.make_key("glazing", query="walk", language_code="en")
116
+ >>> key2 = cache.make_key("glazing", query="walk", language_code="en")
117
+ >>> key1 == key2
118
+ True
119
+ >>> key3 = cache.make_key("glazing", query="run", language_code="en")
120
+ >>> key1 != key3
121
+ True
122
+ """
123
+ # create deterministic dict for hashing
124
+ params = {"adapter": adapter_name, "query": query, **kwargs}
125
+ # sort keys for deterministic serialization
126
+ serialized = json.dumps(params, sort_keys=True)
127
+ # return SHA256 hash
128
+ return hashlib.sha256(serialized.encode()).hexdigest()