bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Participant data models.
|
|
2
|
+
|
|
3
|
+
This module provides Participant and ParticipantIDMapping models for
|
|
4
|
+
storing participant information with privacy-preserving external ID mapping.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
from uuid import UUID
|
|
12
|
+
|
|
13
|
+
from pydantic import Field, field_validator
|
|
14
|
+
|
|
15
|
+
from bead.data.base import BeadBaseModel, JsonValue
|
|
16
|
+
from bead.data.timestamps import now_iso8601
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from bead.participants.metadata_spec import ParticipantMetadataSpec
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _empty_metadata_dict() -> dict[str, JsonValue]:
|
|
23
|
+
"""Return empty metadata dict."""
|
|
24
|
+
return {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _empty_session_list() -> list[str]:
|
|
28
|
+
"""Return empty session list."""
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Participant(BeadBaseModel):
|
|
33
|
+
"""A study participant with demographic and session metadata.
|
|
34
|
+
|
|
35
|
+
Inherits UUID, timestamps, version, and metadata from BeadBaseModel.
|
|
36
|
+
The internal `id` (UUID) is used for all analysis; external IDs
|
|
37
|
+
(e.g., Prolific IDs) are stored separately for privacy.
|
|
38
|
+
|
|
39
|
+
Attributes
|
|
40
|
+
----------
|
|
41
|
+
id : UUID
|
|
42
|
+
Internal unique identifier (UUIDv7, inherited from BeadBaseModel).
|
|
43
|
+
created_at : datetime
|
|
44
|
+
When participant record was created (inherited).
|
|
45
|
+
modified_at : datetime
|
|
46
|
+
When participant record was last modified (inherited).
|
|
47
|
+
participant_metadata : dict[str, JsonValue]
|
|
48
|
+
Demographic and other participant attributes (e.g., age, education).
|
|
49
|
+
Keys should match a ParticipantMetadataSpec for validation.
|
|
50
|
+
study_id : str | None
|
|
51
|
+
Optional study identifier this participant belongs to.
|
|
52
|
+
session_ids : list[str]
|
|
53
|
+
Session identifiers for this participant (for longitudinal studies).
|
|
54
|
+
consent_timestamp : datetime | None
|
|
55
|
+
When participant provided consent.
|
|
56
|
+
notes : str | None
|
|
57
|
+
Free-text notes about this participant.
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
>>> participant = Participant(
|
|
62
|
+
... participant_metadata={
|
|
63
|
+
... "age": 25,
|
|
64
|
+
... "education": "bachelors",
|
|
65
|
+
... "native_speaker": True,
|
|
66
|
+
... },
|
|
67
|
+
... study_id="study_001",
|
|
68
|
+
... )
|
|
69
|
+
>>> participant.participant_metadata["age"]
|
|
70
|
+
25
|
|
71
|
+
>>> str(participant.id) # doctest: +SKIP
|
|
72
|
+
'019...' # UUIDv7
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
participant_metadata: dict[str, JsonValue] = Field(
|
|
76
|
+
default_factory=_empty_metadata_dict,
|
|
77
|
+
description="Participant attributes (demographics, etc.)",
|
|
78
|
+
)
|
|
79
|
+
study_id: str | None = Field(default=None, description="Study identifier")
|
|
80
|
+
session_ids: list[str] = Field(
|
|
81
|
+
default_factory=_empty_session_list, description="Session identifiers"
|
|
82
|
+
)
|
|
83
|
+
consent_timestamp: datetime | None = Field(
|
|
84
|
+
default=None, description="Consent timestamp"
|
|
85
|
+
)
|
|
86
|
+
notes: str | None = Field(default=None, description="Free-text notes")
|
|
87
|
+
|
|
88
|
+
def validate_against_spec(
|
|
89
|
+
self, spec: ParticipantMetadataSpec
|
|
90
|
+
) -> tuple[bool, list[str]]:
|
|
91
|
+
"""Validate participant_metadata against a specification.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
spec : ParticipantMetadataSpec
|
|
96
|
+
Specification to validate against.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
tuple[bool, list[str]]
|
|
101
|
+
(is_valid, list of error messages)
|
|
102
|
+
|
|
103
|
+
Examples
|
|
104
|
+
--------
|
|
105
|
+
>>> from bead.participants.metadata_spec import (
|
|
106
|
+
... FieldSpec, ParticipantMetadataSpec
|
|
107
|
+
... )
|
|
108
|
+
>>> spec = ParticipantMetadataSpec(
|
|
109
|
+
... name="test",
|
|
110
|
+
... fields=[FieldSpec(name="age", field_type="int", required=True)]
|
|
111
|
+
... )
|
|
112
|
+
>>> p = Participant(participant_metadata={"age": 25})
|
|
113
|
+
>>> p.validate_against_spec(spec)
|
|
114
|
+
(True, [])
|
|
115
|
+
"""
|
|
116
|
+
# Convert JsonValue dict to the expected type for validation
|
|
117
|
+
metadata: dict[str, str | int | float | bool | None] = {}
|
|
118
|
+
for key, value in self.participant_metadata.items():
|
|
119
|
+
if isinstance(value, str | int | float | bool) or value is None:
|
|
120
|
+
metadata[key] = value
|
|
121
|
+
# Skip complex values (lists, dicts) - they won't match FieldSpec types
|
|
122
|
+
return spec.validate_metadata(metadata)
|
|
123
|
+
|
|
124
|
+
def get_attribute(self, key: str, default: JsonValue = None) -> JsonValue:
|
|
125
|
+
"""Get a metadata attribute with optional default.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
key : str
|
|
130
|
+
Attribute name.
|
|
131
|
+
default : JsonValue
|
|
132
|
+
Default value if attribute not found.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
JsonValue
|
|
137
|
+
Attribute value or default.
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
>>> p = Participant(participant_metadata={"age": 25})
|
|
142
|
+
>>> p.get_attribute("age")
|
|
143
|
+
25
|
|
144
|
+
>>> p.get_attribute("unknown", default="N/A")
|
|
145
|
+
'N/A'
|
|
146
|
+
"""
|
|
147
|
+
return self.participant_metadata.get(key, default)
|
|
148
|
+
|
|
149
|
+
def set_attribute(self, key: str, value: JsonValue) -> None:
|
|
150
|
+
"""Set a metadata attribute.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
key : str
|
|
155
|
+
Attribute name.
|
|
156
|
+
value : JsonValue
|
|
157
|
+
Attribute value.
|
|
158
|
+
|
|
159
|
+
Examples
|
|
160
|
+
--------
|
|
161
|
+
>>> p = Participant()
|
|
162
|
+
>>> p.set_attribute("age", 25)
|
|
163
|
+
>>> p.participant_metadata["age"]
|
|
164
|
+
25
|
|
165
|
+
"""
|
|
166
|
+
self.participant_metadata[key] = value
|
|
167
|
+
self.update_modified_time()
|
|
168
|
+
|
|
169
|
+
def add_session(self, session_id: str) -> None:
|
|
170
|
+
"""Add a session ID to this participant.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
session_id : str
|
|
175
|
+
Session identifier to add.
|
|
176
|
+
|
|
177
|
+
Examples
|
|
178
|
+
--------
|
|
179
|
+
>>> p = Participant()
|
|
180
|
+
>>> p.add_session("session_001")
|
|
181
|
+
>>> p.session_ids
|
|
182
|
+
['session_001']
|
|
183
|
+
"""
|
|
184
|
+
self.session_ids.append(session_id)
|
|
185
|
+
self.update_modified_time()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class ParticipantIDMapping(BeadBaseModel):
|
|
189
|
+
"""Mapping between external participant IDs and internal UUIDs.
|
|
190
|
+
|
|
191
|
+
This model is stored SEPARATELY from participant data for IRB/privacy
|
|
192
|
+
compliance. The external ID (e.g., Prolific PID) can be deleted while
|
|
193
|
+
retaining the internal UUID for analysis.
|
|
194
|
+
|
|
195
|
+
Attributes
|
|
196
|
+
----------
|
|
197
|
+
id : UUID
|
|
198
|
+
Unique identifier for this mapping record (inherited).
|
|
199
|
+
external_id : str
|
|
200
|
+
External participant identifier (e.g., Prolific PID).
|
|
201
|
+
external_source : str
|
|
202
|
+
Source of the external ID (e.g., "prolific", "mturk", "sona").
|
|
203
|
+
participant_id : UUID
|
|
204
|
+
Internal participant UUID (references Participant.id).
|
|
205
|
+
mapping_timestamp : datetime
|
|
206
|
+
When this mapping was created.
|
|
207
|
+
is_active : bool
|
|
208
|
+
Whether this mapping is active (for soft deletion).
|
|
209
|
+
|
|
210
|
+
Examples
|
|
211
|
+
--------
|
|
212
|
+
>>> from uuid import UUID
|
|
213
|
+
>>> mapping = ParticipantIDMapping(
|
|
214
|
+
... external_id="PROLIFIC_ABC123",
|
|
215
|
+
... external_source="prolific",
|
|
216
|
+
... participant_id=UUID("01234567-89ab-cdef-0123-456789abcdef"),
|
|
217
|
+
... )
|
|
218
|
+
>>> mapping.external_source
|
|
219
|
+
'prolific'
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
external_id: str = Field(..., description="External participant ID")
|
|
223
|
+
external_source: str = Field(..., description="Source of external ID")
|
|
224
|
+
participant_id: UUID = Field(..., description="Internal participant UUID")
|
|
225
|
+
mapping_timestamp: datetime = Field(
|
|
226
|
+
default_factory=now_iso8601, description="When mapping was created"
|
|
227
|
+
)
|
|
228
|
+
is_active: bool = Field(default=True, description="Whether mapping is active")
|
|
229
|
+
|
|
230
|
+
@field_validator("external_id", "external_source")
|
|
231
|
+
@classmethod
|
|
232
|
+
def validate_non_empty(cls, v: str) -> str:
|
|
233
|
+
"""Validate string fields are non-empty.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
v : str
|
|
238
|
+
String value to validate.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
str
|
|
243
|
+
Validated string.
|
|
244
|
+
|
|
245
|
+
Raises
|
|
246
|
+
------
|
|
247
|
+
ValueError
|
|
248
|
+
If string is empty or whitespace only.
|
|
249
|
+
"""
|
|
250
|
+
if not v or not v.strip():
|
|
251
|
+
raise ValueError("Field cannot be empty")
|
|
252
|
+
return v.strip()
|
|
253
|
+
|
|
254
|
+
def deactivate(self) -> None:
|
|
255
|
+
"""Soft-delete this mapping (for privacy compliance).
|
|
256
|
+
|
|
257
|
+
Sets is_active to False without deleting the record. This allows
|
|
258
|
+
the mapping to be retained for audit purposes while marking it
|
|
259
|
+
as no longer valid.
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
>>> from uuid import uuid4
|
|
264
|
+
>>> mapping = ParticipantIDMapping(
|
|
265
|
+
... external_id="ABC123",
|
|
266
|
+
... external_source="prolific",
|
|
267
|
+
... participant_id=uuid4(),
|
|
268
|
+
... )
|
|
269
|
+
>>> mapping.is_active
|
|
270
|
+
True
|
|
271
|
+
>>> mapping.deactivate()
|
|
272
|
+
>>> mapping.is_active
|
|
273
|
+
False
|
|
274
|
+
"""
|
|
275
|
+
self.is_active = False
|
|
276
|
+
self.update_modified_time()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Resource models.
|
|
2
|
+
|
|
3
|
+
Provides data models for lexical items, templates, constraints, and
|
|
4
|
+
template structures.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from bead.resources.constraints import Constraint
|
|
8
|
+
from bead.resources.lexical_item import LexicalItem, MultiWordExpression, MWEComponent
|
|
9
|
+
from bead.resources.lexicon import Lexicon
|
|
10
|
+
from bead.resources.template import Slot, Template, TemplateSequence, TemplateTree
|
|
11
|
+
from bead.resources.template_collection import TemplateCollection
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
# Lexical items
|
|
15
|
+
"LexicalItem",
|
|
16
|
+
"MWEComponent",
|
|
17
|
+
"MultiWordExpression",
|
|
18
|
+
# Lexicon
|
|
19
|
+
"Lexicon",
|
|
20
|
+
# Constraints
|
|
21
|
+
"Constraint",
|
|
22
|
+
# Templates and structures
|
|
23
|
+
"Slot",
|
|
24
|
+
"Template",
|
|
25
|
+
"TemplateSequence",
|
|
26
|
+
"TemplateTree",
|
|
27
|
+
# Template collection
|
|
28
|
+
"TemplateCollection",
|
|
29
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""External resource adapters for linguistic databases.
|
|
2
|
+
|
|
3
|
+
Fetches lexical items from VerbNet, PropBank, FrameNet (via glazing), and
|
|
4
|
+
UniMorph morphological paradigms.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from bead.resources.adapters.base import ResourceAdapter
|
|
8
|
+
from bead.resources.adapters.cache import AdapterCache
|
|
9
|
+
from bead.resources.adapters.glazing import GlazingAdapter
|
|
10
|
+
from bead.resources.adapters.registry import AdapterRegistry
|
|
11
|
+
from bead.resources.adapters.unimorph import UniMorphAdapter
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ResourceAdapter",
|
|
15
|
+
"AdapterCache",
|
|
16
|
+
"GlazingAdapter",
|
|
17
|
+
"UniMorphAdapter",
|
|
18
|
+
"AdapterRegistry",
|
|
19
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Abstract base class for external resource adapters.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all resource adapters must implement
|
|
4
|
+
to fetch lexical items from external linguistic databases.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from bead.data.language_codes import LanguageCode
|
|
13
|
+
from bead.resources.lexical_item import LexicalItem
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ResourceAdapter(ABC):
|
|
17
|
+
"""Abstract base class for external resource adapters.
|
|
18
|
+
|
|
19
|
+
Resource adapters fetch lexical items from external linguistic databases
|
|
20
|
+
and convert them to the bead LexicalItem format. All adapters must
|
|
21
|
+
implement language_code filtering to support multi-language workflows.
|
|
22
|
+
|
|
23
|
+
Subclasses must implement:
|
|
24
|
+
- fetch_items(): Retrieve items from the external resource
|
|
25
|
+
- is_available(): Check if the external resource is accessible
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> class MyAdapter(ResourceAdapter):
|
|
30
|
+
... def fetch_items(self, query=None, language_code=None, **kwargs):
|
|
31
|
+
... # Fetch from external resource
|
|
32
|
+
... return [LexicalItem(lemma="walk", pos="VERB", language_code="en")]
|
|
33
|
+
... def is_available(self):
|
|
34
|
+
... return True
|
|
35
|
+
>>> adapter = MyAdapter()
|
|
36
|
+
>>> items = adapter.fetch_items(query="walk", language_code="en")
|
|
37
|
+
>>> len(items) > 0
|
|
38
|
+
True
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def fetch_items(
|
|
43
|
+
self,
|
|
44
|
+
query: str | None = None,
|
|
45
|
+
language_code: LanguageCode = None,
|
|
46
|
+
**kwargs: Any,
|
|
47
|
+
) -> list[LexicalItem]:
|
|
48
|
+
"""Fetch lexical items from external resource.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
query : str | None
|
|
53
|
+
Query string in adapter-specific format (e.g., lemma, predicate name,
|
|
54
|
+
class identifier). If None, behavior is adapter-specific (may return
|
|
55
|
+
all items, raise error, or use default query).
|
|
56
|
+
language_code : LanguageCode
|
|
57
|
+
ISO 639-1 (2-letter) or ISO 639-3 (3-letter) language code to filter
|
|
58
|
+
results. Examples: "en", "eng", "ko", "kor". If None, returns items
|
|
59
|
+
for all available languages.
|
|
60
|
+
**kwargs : Any
|
|
61
|
+
Additional adapter-specific parameters (e.g., pos="VERB",
|
|
62
|
+
resource="verbnet", include_features=True).
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
list[LexicalItem]
|
|
67
|
+
Lexical items fetched from the external resource. Each item should
|
|
68
|
+
have language_code set if known.
|
|
69
|
+
|
|
70
|
+
Raises
|
|
71
|
+
------
|
|
72
|
+
ValueError
|
|
73
|
+
If query is invalid or required parameters are missing.
|
|
74
|
+
RuntimeError
|
|
75
|
+
If the external resource is unavailable or the request fails.
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
>>> adapter = MyAdapter()
|
|
80
|
+
>>> items = adapter.fetch_items(query="break", language_code="en")
|
|
81
|
+
>>> all(item.language_code == "en" for item in items)
|
|
82
|
+
True
|
|
83
|
+
"""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def is_available(self) -> bool:
|
|
88
|
+
"""Check if the external resource is available.
|
|
89
|
+
|
|
90
|
+
This method should verify that the external resource can be accessed,
|
|
91
|
+
whether via installed packages, accessible data files, or network APIs.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
bool
|
|
96
|
+
True if the resource can be accessed, False otherwise.
|
|
97
|
+
|
|
98
|
+
Examples
|
|
99
|
+
--------
|
|
100
|
+
>>> adapter = MyAdapter()
|
|
101
|
+
>>> adapter.is_available()
|
|
102
|
+
True
|
|
103
|
+
"""
|
|
104
|
+
...
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Caching for adapter fetch results.
|
|
2
|
+
|
|
3
|
+
This module provides an in-memory cache to avoid redundant fetches from
|
|
4
|
+
external resources when the same query is repeated.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from bead.resources.lexical_item import LexicalItem
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AdapterCache:
|
|
17
|
+
"""In-memory cache for adapter fetch results.
|
|
18
|
+
|
|
19
|
+
The cache stores results keyed by a hash of query parameters. This avoids
|
|
20
|
+
redundant fetches when the same query is made multiple times.
|
|
21
|
+
|
|
22
|
+
Examples
|
|
23
|
+
--------
|
|
24
|
+
>>> cache = AdapterCache()
|
|
25
|
+
>>> items = [LexicalItem(lemma="walk", pos="VERB")]
|
|
26
|
+
>>> key = cache.make_key("glazing", query="walk", language_code="en")
|
|
27
|
+
>>> cache.set(key, items)
|
|
28
|
+
>>> cached = cache.get(key)
|
|
29
|
+
>>> cached == items
|
|
30
|
+
True
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
self._cache: dict[str, list[LexicalItem]] = {}
|
|
35
|
+
|
|
36
|
+
def get(self, key: str) -> list[LexicalItem] | None:
|
|
37
|
+
"""Get cached result.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
key : str
|
|
42
|
+
Cache key generated by make_key().
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
list[LexicalItem] | None
|
|
47
|
+
Cached items if key exists, None otherwise.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
>>> cache = AdapterCache()
|
|
52
|
+
>>> cache.get("nonexistent")
|
|
53
|
+
None
|
|
54
|
+
"""
|
|
55
|
+
return self._cache.get(key)
|
|
56
|
+
|
|
57
|
+
def set(self, key: str, items: list[LexicalItem]) -> None:
|
|
58
|
+
"""Cache result.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
key : str
|
|
63
|
+
Cache key generated by make_key().
|
|
64
|
+
items : list[LexicalItem]
|
|
65
|
+
Items to cache.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> cache = AdapterCache()
|
|
70
|
+
>>> items = [LexicalItem(lemma="walk")]
|
|
71
|
+
>>> cache.set("key1", items)
|
|
72
|
+
>>> cache.get("key1") == items
|
|
73
|
+
True
|
|
74
|
+
"""
|
|
75
|
+
self._cache[key] = items
|
|
76
|
+
|
|
77
|
+
def clear(self) -> None:
|
|
78
|
+
"""Clear entire cache.
|
|
79
|
+
|
|
80
|
+
Examples
|
|
81
|
+
--------
|
|
82
|
+
>>> cache = AdapterCache()
|
|
83
|
+
>>> cache.set("key1", [])
|
|
84
|
+
>>> cache.clear()
|
|
85
|
+
>>> cache.get("key1")
|
|
86
|
+
None
|
|
87
|
+
"""
|
|
88
|
+
self._cache.clear()
|
|
89
|
+
|
|
90
|
+
def make_key(
|
|
91
|
+
self, adapter_name: str, query: str | None = None, **kwargs: Any
|
|
92
|
+
) -> str:
|
|
93
|
+
"""Generate cache key from query parameters.
|
|
94
|
+
|
|
95
|
+
Create a deterministic hash key from adapter name, query, and
|
|
96
|
+
additional parameters. Same inputs always produce same key.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
adapter_name : str
|
|
101
|
+
Name of the adapter (e.g., "glazing", "unimorph").
|
|
102
|
+
query : str | None
|
|
103
|
+
Query string.
|
|
104
|
+
**kwargs : Any
|
|
105
|
+
Additional query parameters (e.g., language_code, pos).
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
str
|
|
110
|
+
Cache key (hexadecimal hash string).
|
|
111
|
+
|
|
112
|
+
Examples
|
|
113
|
+
--------
|
|
114
|
+
>>> cache = AdapterCache()
|
|
115
|
+
>>> key1 = cache.make_key("glazing", query="walk", language_code="en")
|
|
116
|
+
>>> key2 = cache.make_key("glazing", query="walk", language_code="en")
|
|
117
|
+
>>> key1 == key2
|
|
118
|
+
True
|
|
119
|
+
>>> key3 = cache.make_key("glazing", query="run", language_code="en")
|
|
120
|
+
>>> key1 != key3
|
|
121
|
+
True
|
|
122
|
+
"""
|
|
123
|
+
# create deterministic dict for hashing
|
|
124
|
+
params = {"adapter": adapter_name, "query": query, **kwargs}
|
|
125
|
+
# sort keys for deterministic serialization
|
|
126
|
+
serialized = json.dumps(params, sort_keys=True)
|
|
127
|
+
# return SHA256 hash
|
|
128
|
+
return hashlib.sha256(serialized.encode()).hexdigest()
|