bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""External resource loader commands for bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for importing lexical resources from external
|
|
4
|
+
sources like VerbNet, UniMorph, PropBank, and FrameNet.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from pydantic import ValidationError
|
|
13
|
+
|
|
14
|
+
from bead.cli.display import create_progress, print_error, print_info, print_success
|
|
15
|
+
from bead.resources.adapters.cache import AdapterCache
|
|
16
|
+
from bead.resources.adapters.glazing import GlazingAdapter
|
|
17
|
+
from bead.resources.adapters.unimorph import UniMorphAdapter
|
|
18
|
+
from bead.resources.lexicon import Lexicon
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group()
|
|
22
|
+
def resource_loaders() -> None:
|
|
23
|
+
r"""External resource loader commands.
|
|
24
|
+
|
|
25
|
+
Commands for importing lexical items from external linguistic resources.
|
|
26
|
+
|
|
27
|
+
\b
|
|
28
|
+
Examples:
|
|
29
|
+
$ bead resources import-verbnet --output lexicons/verbs.jsonl
|
|
30
|
+
$ bead resources import-unimorph --language-code eng --query walk \
|
|
31
|
+
--output lexicons/inflections.jsonl
|
|
32
|
+
$ bead resources import-propbank --query eat.01 \
|
|
33
|
+
--output lexicons/propbank.jsonl
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@click.command()
|
|
38
|
+
@click.option(
|
|
39
|
+
"--output",
|
|
40
|
+
"-o",
|
|
41
|
+
"output_file",
|
|
42
|
+
type=click.Path(path_type=Path),
|
|
43
|
+
required=True,
|
|
44
|
+
help="Output lexicon file path",
|
|
45
|
+
)
|
|
46
|
+
@click.option(
|
|
47
|
+
"--query",
|
|
48
|
+
"-q",
|
|
49
|
+
help="Lemma or verb class to query (e.g., 'break', 'put-9.1'). Omit for all verbs.",
|
|
50
|
+
)
|
|
51
|
+
@click.option(
|
|
52
|
+
"--verb-class",
|
|
53
|
+
help="VerbNet class to filter (e.g., 'put-9.1', 'break-45.1')",
|
|
54
|
+
)
|
|
55
|
+
@click.option(
|
|
56
|
+
"--language-code",
|
|
57
|
+
default="eng",
|
|
58
|
+
help="ISO 639 language code (default: 'eng')",
|
|
59
|
+
)
|
|
60
|
+
@click.option(
|
|
61
|
+
"--include-frames",
|
|
62
|
+
is_flag=True,
|
|
63
|
+
help="Include detailed frame information in metadata",
|
|
64
|
+
)
|
|
65
|
+
@click.option(
|
|
66
|
+
"--limit",
|
|
67
|
+
type=int,
|
|
68
|
+
help="Maximum number of verbs to import",
|
|
69
|
+
)
|
|
70
|
+
@click.option(
|
|
71
|
+
"--cache-dir",
|
|
72
|
+
type=click.Path(path_type=Path),
|
|
73
|
+
default=Path(".cache/bead"),
|
|
74
|
+
help="Cache directory for adapter results",
|
|
75
|
+
)
|
|
76
|
+
@click.pass_context
|
|
77
|
+
def import_verbnet(
|
|
78
|
+
ctx: click.Context,
|
|
79
|
+
output_file: Path,
|
|
80
|
+
query: str | None,
|
|
81
|
+
verb_class: str | None,
|
|
82
|
+
language_code: str,
|
|
83
|
+
include_frames: bool,
|
|
84
|
+
limit: int | None,
|
|
85
|
+
cache_dir: Path,
|
|
86
|
+
) -> None:
|
|
87
|
+
r"""Import verbs from VerbNet.
|
|
88
|
+
|
|
89
|
+
Fetches verb frame information from VerbNet and converts it to
|
|
90
|
+
LexicalItem format. Frame information is stored in the features field.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
ctx : click.Context
|
|
95
|
+
Click context object.
|
|
96
|
+
output_file : Path
|
|
97
|
+
Path to output lexicon file.
|
|
98
|
+
query : str | None
|
|
99
|
+
Lemma or verb class to query.
|
|
100
|
+
verb_class : str | None
|
|
101
|
+
VerbNet class filter.
|
|
102
|
+
language_code : str
|
|
103
|
+
ISO 639 language code.
|
|
104
|
+
include_frames : bool
|
|
105
|
+
Include detailed frame information.
|
|
106
|
+
limit : int | None
|
|
107
|
+
Maximum number of items.
|
|
108
|
+
cache_dir : Path
|
|
109
|
+
Cache directory path.
|
|
110
|
+
|
|
111
|
+
Examples
|
|
112
|
+
--------
|
|
113
|
+
# Import all verbs
|
|
114
|
+
$ bead resources import-verbnet --output lexicons/verbnet_verbs.jsonl
|
|
115
|
+
|
|
116
|
+
# Import specific verb
|
|
117
|
+
$ bead resources import-verbnet --query break \
|
|
118
|
+
--output lexicons/break_verbs.jsonl
|
|
119
|
+
|
|
120
|
+
# Import verb class with frames
|
|
121
|
+
$ bead resources import-verbnet --verb-class put-9.1 --include-frames \
|
|
122
|
+
--output lexicons/put_verbs.jsonl
|
|
123
|
+
|
|
124
|
+
# Limit results
|
|
125
|
+
$ bead resources import-verbnet --limit 100 \
|
|
126
|
+
--output lexicons/verbs_sample.jsonl
|
|
127
|
+
"""
|
|
128
|
+
try:
|
|
129
|
+
print_info("Initializing VerbNet adapter...")
|
|
130
|
+
|
|
131
|
+
# Create cache (cache_dir not used by AdapterCache, it's in-memory only)
|
|
132
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
cache = AdapterCache()
|
|
134
|
+
|
|
135
|
+
# Create adapter
|
|
136
|
+
adapter = GlazingAdapter(resource="verbnet", cache=cache)
|
|
137
|
+
|
|
138
|
+
# Fetch items with progress
|
|
139
|
+
verb_class_info = f" (class: {verb_class})" if verb_class else ""
|
|
140
|
+
print_info(f"Fetching verbs from VerbNet{verb_class_info}...")
|
|
141
|
+
|
|
142
|
+
with create_progress() as progress:
|
|
143
|
+
task = progress.add_task("[cyan]Fetching from VerbNet...", total=None)
|
|
144
|
+
|
|
145
|
+
items = adapter.fetch_items(
|
|
146
|
+
query=query,
|
|
147
|
+
language_code=language_code,
|
|
148
|
+
include_frames=include_frames,
|
|
149
|
+
verb_class=verb_class,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
progress.update(task, completed=True, total=1)
|
|
153
|
+
|
|
154
|
+
# Apply limit if specified
|
|
155
|
+
if limit is not None and len(items) > limit:
|
|
156
|
+
print_info(f"Limiting results to {limit} items (from {len(items)})")
|
|
157
|
+
items = items[:limit]
|
|
158
|
+
|
|
159
|
+
# Create lexicon
|
|
160
|
+
verb_class_desc = f" for class {verb_class}" if verb_class else ""
|
|
161
|
+
lexicon = Lexicon(
|
|
162
|
+
name=f"verbnet_{verb_class or query or 'all'}",
|
|
163
|
+
language_code=language_code,
|
|
164
|
+
description=f"VerbNet verbs{verb_class_desc}",
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
for item in items:
|
|
168
|
+
lexicon.add(item)
|
|
169
|
+
|
|
170
|
+
# Save lexicon
|
|
171
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
172
|
+
lexicon.to_jsonl(str(output_file))
|
|
173
|
+
|
|
174
|
+
print_success(f"Imported {len(items)} verbs from VerbNet: {output_file}")
|
|
175
|
+
|
|
176
|
+
except ValidationError as e:
|
|
177
|
+
print_error(f"Validation error: {e}")
|
|
178
|
+
ctx.exit(1)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
print_error(f"Failed to import from VerbNet: {e}")
|
|
181
|
+
ctx.exit(1)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@click.command()
|
|
185
|
+
@click.option(
|
|
186
|
+
"--output",
|
|
187
|
+
"-o",
|
|
188
|
+
"output_file",
|
|
189
|
+
type=click.Path(path_type=Path),
|
|
190
|
+
required=True,
|
|
191
|
+
help="Output lexicon file path",
|
|
192
|
+
)
|
|
193
|
+
@click.option(
|
|
194
|
+
"--query",
|
|
195
|
+
"-q",
|
|
196
|
+
help="Lemma to query (e.g., 'walk', 'run'). Omit for all forms in language.",
|
|
197
|
+
)
|
|
198
|
+
@click.option(
|
|
199
|
+
"--language-code",
|
|
200
|
+
"-l",
|
|
201
|
+
required=True,
|
|
202
|
+
help="ISO 639 language code (e.g., 'eng', 'spa', 'fra')",
|
|
203
|
+
)
|
|
204
|
+
@click.option(
|
|
205
|
+
"--pos",
|
|
206
|
+
help="Part of speech filter (e.g., 'VERB', 'NOUN', 'ADJ')",
|
|
207
|
+
)
|
|
208
|
+
@click.option(
|
|
209
|
+
"--features",
|
|
210
|
+
help="UniMorph features to filter (e.g., 'V;PST', 'N;PL')",
|
|
211
|
+
)
|
|
212
|
+
@click.option(
|
|
213
|
+
"--limit",
|
|
214
|
+
type=int,
|
|
215
|
+
help="Maximum number of forms to import",
|
|
216
|
+
)
|
|
217
|
+
@click.option(
|
|
218
|
+
"--cache-dir",
|
|
219
|
+
type=click.Path(path_type=Path),
|
|
220
|
+
default=Path(".cache/bead"),
|
|
221
|
+
help="Cache directory for adapter results",
|
|
222
|
+
)
|
|
223
|
+
@click.pass_context
|
|
224
|
+
def import_unimorph(
|
|
225
|
+
ctx: click.Context,
|
|
226
|
+
output_file: Path,
|
|
227
|
+
query: str | None,
|
|
228
|
+
language_code: str,
|
|
229
|
+
pos: str | None,
|
|
230
|
+
features: str | None,
|
|
231
|
+
limit: int | None,
|
|
232
|
+
cache_dir: Path,
|
|
233
|
+
) -> None:
|
|
234
|
+
r"""Import inflected forms from UniMorph.
|
|
235
|
+
|
|
236
|
+
Fetches morphological paradigms from UniMorph and converts them to
|
|
237
|
+
LexicalItem format. Morphological features are stored in the features field.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
ctx : click.Context
|
|
242
|
+
Click context object.
|
|
243
|
+
output_file : Path
|
|
244
|
+
Path to output lexicon file.
|
|
245
|
+
query : str | None
|
|
246
|
+
Lemma to query.
|
|
247
|
+
language_code : str
|
|
248
|
+
ISO 639 language code (required).
|
|
249
|
+
pos : str | None
|
|
250
|
+
Part of speech filter.
|
|
251
|
+
features : str | None
|
|
252
|
+
UniMorph features filter.
|
|
253
|
+
limit : int | None
|
|
254
|
+
Maximum number of items.
|
|
255
|
+
cache_dir : Path
|
|
256
|
+
Cache directory path.
|
|
257
|
+
|
|
258
|
+
Examples
|
|
259
|
+
--------
|
|
260
|
+
# Import all English verb forms for "walk"
|
|
261
|
+
$ bead resources import-unimorph --language-code eng --query walk \
|
|
262
|
+
--pos VERB --output lexicons/walk_forms.jsonl
|
|
263
|
+
|
|
264
|
+
# Import past tense forms
|
|
265
|
+
$ bead resources import-unimorph --language-code eng --query run \
|
|
266
|
+
--features "V;PST" --output lexicons/run_past.jsonl
|
|
267
|
+
|
|
268
|
+
# Import all Spanish verb forms (limited)
|
|
269
|
+
$ bead resources import-unimorph --language-code spa --pos VERB \
|
|
270
|
+
--limit 1000 --output lexicons/spanish_verbs.jsonl
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
print_info("Initializing UniMorph adapter...")
|
|
274
|
+
|
|
275
|
+
# Create cache (cache_dir not used by AdapterCache, it's in-memory only)
|
|
276
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
277
|
+
cache = AdapterCache()
|
|
278
|
+
|
|
279
|
+
# Create adapter
|
|
280
|
+
adapter = UniMorphAdapter(cache=cache)
|
|
281
|
+
|
|
282
|
+
# Fetch items with progress
|
|
283
|
+
lemma_info = f" (lemma: {query})" if query else ""
|
|
284
|
+
print_info(f"Fetching forms from UniMorph for {language_code}{lemma_info}...")
|
|
285
|
+
|
|
286
|
+
with create_progress() as progress:
|
|
287
|
+
task = progress.add_task("[cyan]Fetching from UniMorph...", total=None)
|
|
288
|
+
|
|
289
|
+
items = adapter.fetch_items(
|
|
290
|
+
query=query,
|
|
291
|
+
language_code=language_code,
|
|
292
|
+
pos=pos,
|
|
293
|
+
features=features,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
progress.update(task, completed=True, total=1)
|
|
297
|
+
|
|
298
|
+
# Apply limit if specified
|
|
299
|
+
if limit is not None and len(items) > limit:
|
|
300
|
+
print_info(f"Limiting results to {limit} items (from {len(items)})")
|
|
301
|
+
items = items[:limit]
|
|
302
|
+
|
|
303
|
+
# Create lexicon
|
|
304
|
+
lemma_desc = f" (lemma: {query})" if query else ""
|
|
305
|
+
lexicon = Lexicon(
|
|
306
|
+
name=f"unimorph_{language_code}_{query or 'all'}",
|
|
307
|
+
language_code=language_code,
|
|
308
|
+
description=f"UniMorph inflections for {language_code}{lemma_desc}",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
for item in items:
|
|
312
|
+
lexicon.add(item)
|
|
313
|
+
|
|
314
|
+
# Save lexicon
|
|
315
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
316
|
+
lexicon.to_jsonl(str(output_file))
|
|
317
|
+
|
|
318
|
+
print_success(
|
|
319
|
+
f"Imported {len(items)} inflected forms from UniMorph: {output_file}"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
except ValidationError as e:
|
|
323
|
+
print_error(f"Validation error: {e}")
|
|
324
|
+
ctx.exit(1)
|
|
325
|
+
except Exception as e:
|
|
326
|
+
print_error(f"Failed to import from UniMorph: {e}")
|
|
327
|
+
ctx.exit(1)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
@click.command()
|
|
331
|
+
@click.option(
|
|
332
|
+
"--output",
|
|
333
|
+
"-o",
|
|
334
|
+
"output_file",
|
|
335
|
+
type=click.Path(path_type=Path),
|
|
336
|
+
required=True,
|
|
337
|
+
help="Output lexicon file path",
|
|
338
|
+
)
|
|
339
|
+
@click.option(
|
|
340
|
+
"--query",
|
|
341
|
+
"-q",
|
|
342
|
+
help="Predicate to query (e.g., 'eat.01', 'break.01'). Omit for all predicates.",
|
|
343
|
+
)
|
|
344
|
+
@click.option(
|
|
345
|
+
"--frameset",
|
|
346
|
+
help="PropBank frameset to filter (e.g., 'eat.01')",
|
|
347
|
+
)
|
|
348
|
+
@click.option(
|
|
349
|
+
"--language-code",
|
|
350
|
+
default="eng",
|
|
351
|
+
help="ISO 639 language code (default: 'eng')",
|
|
352
|
+
)
|
|
353
|
+
@click.option(
|
|
354
|
+
"--include-frames",
|
|
355
|
+
is_flag=True,
|
|
356
|
+
help="Include detailed frame information in metadata",
|
|
357
|
+
)
|
|
358
|
+
@click.option(
|
|
359
|
+
"--limit",
|
|
360
|
+
type=int,
|
|
361
|
+
help="Maximum number of predicates to import",
|
|
362
|
+
)
|
|
363
|
+
@click.option(
|
|
364
|
+
"--cache-dir",
|
|
365
|
+
type=click.Path(path_type=Path),
|
|
366
|
+
default=Path(".cache/bead"),
|
|
367
|
+
help="Cache directory for adapter results",
|
|
368
|
+
)
|
|
369
|
+
@click.pass_context
|
|
370
|
+
def import_propbank(
|
|
371
|
+
ctx: click.Context,
|
|
372
|
+
output_file: Path,
|
|
373
|
+
query: str | None,
|
|
374
|
+
frameset: str | None,
|
|
375
|
+
language_code: str,
|
|
376
|
+
include_frames: bool,
|
|
377
|
+
limit: int | None,
|
|
378
|
+
cache_dir: Path,
|
|
379
|
+
) -> None:
|
|
380
|
+
r"""Import predicates from PropBank.
|
|
381
|
+
|
|
382
|
+
Fetches predicate-argument structure information from PropBank and
|
|
383
|
+
converts it to LexicalItem format.
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
ctx : click.Context
|
|
388
|
+
Click context object.
|
|
389
|
+
output_file : Path
|
|
390
|
+
Path to output lexicon file.
|
|
391
|
+
query : str | None
|
|
392
|
+
Predicate to query.
|
|
393
|
+
frameset : str | None
|
|
394
|
+
PropBank frameset filter.
|
|
395
|
+
language_code : str
|
|
396
|
+
ISO 639 language code.
|
|
397
|
+
include_frames : bool
|
|
398
|
+
Include detailed frame information.
|
|
399
|
+
limit : int | None
|
|
400
|
+
Maximum number of items.
|
|
401
|
+
cache_dir : Path
|
|
402
|
+
Cache directory path.
|
|
403
|
+
|
|
404
|
+
Examples
|
|
405
|
+
--------
|
|
406
|
+
# Import specific predicate
|
|
407
|
+
$ bead resources import-propbank --query eat.01 \
|
|
408
|
+
--output lexicons/eat_propbank.jsonl
|
|
409
|
+
|
|
410
|
+
# Import all predicates (limited)
|
|
411
|
+
$ bead resources import-propbank --limit 500 \
|
|
412
|
+
--output lexicons/propbank_sample.jsonl
|
|
413
|
+
|
|
414
|
+
# Import with frame information
|
|
415
|
+
$ bead resources import-propbank --frameset break.01 --include-frames \
|
|
416
|
+
--output lexicons/break_frames.jsonl
|
|
417
|
+
"""
|
|
418
|
+
try:
|
|
419
|
+
print_info("Initializing PropBank adapter...")
|
|
420
|
+
|
|
421
|
+
# Create cache (cache_dir not used by AdapterCache, it's in-memory only)
|
|
422
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
423
|
+
cache = AdapterCache()
|
|
424
|
+
|
|
425
|
+
# Create adapter
|
|
426
|
+
adapter = GlazingAdapter(resource="propbank", cache=cache)
|
|
427
|
+
|
|
428
|
+
# Fetch items with progress
|
|
429
|
+
frameset_info = f" (frameset: {frameset})" if frameset else ""
|
|
430
|
+
print_info(f"Fetching predicates from PropBank{frameset_info}...")
|
|
431
|
+
|
|
432
|
+
with create_progress() as progress:
|
|
433
|
+
task = progress.add_task("[cyan]Fetching from PropBank...", total=None)
|
|
434
|
+
|
|
435
|
+
items = adapter.fetch_items(
|
|
436
|
+
query=query,
|
|
437
|
+
language_code=language_code,
|
|
438
|
+
include_frames=include_frames,
|
|
439
|
+
frameset=frameset,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
progress.update(task, completed=True, total=1)
|
|
443
|
+
|
|
444
|
+
# Apply limit if specified
|
|
445
|
+
if limit is not None and len(items) > limit:
|
|
446
|
+
print_info(f"Limiting results to {limit} items (from {len(items)})")
|
|
447
|
+
items = items[:limit]
|
|
448
|
+
|
|
449
|
+
# Create lexicon
|
|
450
|
+
frameset_desc = f" for frameset {frameset}" if frameset else ""
|
|
451
|
+
lexicon = Lexicon(
|
|
452
|
+
name=f"propbank_{frameset or query or 'all'}",
|
|
453
|
+
language_code=language_code,
|
|
454
|
+
description=f"PropBank predicates{frameset_desc}",
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
for item in items:
|
|
458
|
+
lexicon.add(item)
|
|
459
|
+
|
|
460
|
+
# Save lexicon
|
|
461
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
462
|
+
lexicon.to_jsonl(str(output_file))
|
|
463
|
+
|
|
464
|
+
print_success(f"Imported {len(items)} predicates from PropBank: {output_file}")
|
|
465
|
+
|
|
466
|
+
except ValidationError as e:
|
|
467
|
+
print_error(f"Validation error: {e}")
|
|
468
|
+
ctx.exit(1)
|
|
469
|
+
except Exception as e:
|
|
470
|
+
print_error(f"Failed to import from PropBank: {e}")
|
|
471
|
+
ctx.exit(1)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@click.command()
|
|
475
|
+
@click.option(
|
|
476
|
+
"--output",
|
|
477
|
+
"-o",
|
|
478
|
+
"output_file",
|
|
479
|
+
type=click.Path(path_type=Path),
|
|
480
|
+
required=True,
|
|
481
|
+
help="Output lexicon file path",
|
|
482
|
+
)
|
|
483
|
+
@click.option(
|
|
484
|
+
"--query",
|
|
485
|
+
"-q",
|
|
486
|
+
help="Frame to query (e.g., 'Ingestion', 'Motion'). Omit for all frames.",
|
|
487
|
+
)
|
|
488
|
+
@click.option(
|
|
489
|
+
"--frame",
|
|
490
|
+
help="FrameNet frame to filter (e.g., 'Ingestion')",
|
|
491
|
+
)
|
|
492
|
+
@click.option(
|
|
493
|
+
"--language-code",
|
|
494
|
+
default="eng",
|
|
495
|
+
help="ISO 639 language code (default: 'eng')",
|
|
496
|
+
)
|
|
497
|
+
@click.option(
|
|
498
|
+
"--include-frames",
|
|
499
|
+
is_flag=True,
|
|
500
|
+
help="Include detailed frame information in metadata",
|
|
501
|
+
)
|
|
502
|
+
@click.option(
|
|
503
|
+
"--limit",
|
|
504
|
+
type=int,
|
|
505
|
+
help="Maximum number of frames to import",
|
|
506
|
+
)
|
|
507
|
+
@click.option(
|
|
508
|
+
"--cache-dir",
|
|
509
|
+
type=click.Path(path_type=Path),
|
|
510
|
+
default=Path(".cache/bead"),
|
|
511
|
+
help="Cache directory for adapter results",
|
|
512
|
+
)
|
|
513
|
+
@click.pass_context
|
|
514
|
+
def import_framenet(
|
|
515
|
+
ctx: click.Context,
|
|
516
|
+
output_file: Path,
|
|
517
|
+
query: str | None,
|
|
518
|
+
frame: str | None,
|
|
519
|
+
language_code: str,
|
|
520
|
+
include_frames: bool,
|
|
521
|
+
limit: int | None,
|
|
522
|
+
cache_dir: Path,
|
|
523
|
+
) -> None:
|
|
524
|
+
r"""Import frames from FrameNet.
|
|
525
|
+
|
|
526
|
+
Fetches frame semantic information from FrameNet and converts it to
|
|
527
|
+
LexicalItem format.
|
|
528
|
+
|
|
529
|
+
Parameters
|
|
530
|
+
----------
|
|
531
|
+
ctx : click.Context
|
|
532
|
+
Click context object.
|
|
533
|
+
output_file : Path
|
|
534
|
+
Path to output lexicon file.
|
|
535
|
+
query : str | None
|
|
536
|
+
Frame to query.
|
|
537
|
+
frame : str | None
|
|
538
|
+
FrameNet frame filter.
|
|
539
|
+
language_code : str
|
|
540
|
+
ISO 639 language code.
|
|
541
|
+
include_frames : bool
|
|
542
|
+
Include detailed frame information.
|
|
543
|
+
limit : int | None
|
|
544
|
+
Maximum number of items.
|
|
545
|
+
cache_dir : Path
|
|
546
|
+
Cache directory path.
|
|
547
|
+
|
|
548
|
+
Examples
|
|
549
|
+
--------
|
|
550
|
+
# Import specific frame
|
|
551
|
+
$ bead resources import-framenet --query Ingestion \
|
|
552
|
+
--output lexicons/ingestion_frame.jsonl
|
|
553
|
+
|
|
554
|
+
# Import all frames (limited)
|
|
555
|
+
$ bead resources import-framenet --limit 100 \
|
|
556
|
+
--output lexicons/framenet_sample.jsonl
|
|
557
|
+
|
|
558
|
+
# Import with frame information
|
|
559
|
+
$ bead resources import-framenet --frame Motion --include-frames \
|
|
560
|
+
--output lexicons/motion_frames.jsonl
|
|
561
|
+
"""
|
|
562
|
+
try:
|
|
563
|
+
print_info("Initializing FrameNet adapter...")
|
|
564
|
+
|
|
565
|
+
# Create cache (cache_dir not used by AdapterCache, it's in-memory only)
|
|
566
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
567
|
+
cache = AdapterCache()
|
|
568
|
+
|
|
569
|
+
# Create adapter
|
|
570
|
+
adapter = GlazingAdapter(resource="framenet", cache=cache)
|
|
571
|
+
|
|
572
|
+
# Fetch items with progress
|
|
573
|
+
frame_info = f" (frame: {frame})" if frame else ""
|
|
574
|
+
print_info(f"Fetching frames from FrameNet{frame_info}...")
|
|
575
|
+
|
|
576
|
+
with create_progress() as progress:
|
|
577
|
+
task = progress.add_task("[cyan]Fetching from FrameNet...", total=None)
|
|
578
|
+
|
|
579
|
+
items = adapter.fetch_items(
|
|
580
|
+
query=query,
|
|
581
|
+
language_code=language_code,
|
|
582
|
+
include_frames=include_frames,
|
|
583
|
+
frame=frame,
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
progress.update(task, completed=True, total=1)
|
|
587
|
+
|
|
588
|
+
# Apply limit if specified
|
|
589
|
+
if limit is not None and len(items) > limit:
|
|
590
|
+
print_info(f"Limiting results to {limit} items (from {len(items)})")
|
|
591
|
+
items = items[:limit]
|
|
592
|
+
|
|
593
|
+
# Create lexicon
|
|
594
|
+
lexicon = Lexicon(
|
|
595
|
+
name=f"framenet_{frame or query or 'all'}",
|
|
596
|
+
language_code=language_code,
|
|
597
|
+
description=f"FrameNet frames{f' for {frame}' if frame else ''}",
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
for item in items:
|
|
601
|
+
lexicon.add(item)
|
|
602
|
+
|
|
603
|
+
# Save lexicon
|
|
604
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
605
|
+
lexicon.to_jsonl(str(output_file))
|
|
606
|
+
|
|
607
|
+
print_success(f"Imported {len(items)} frames from FrameNet: {output_file}")
|
|
608
|
+
|
|
609
|
+
except ValidationError as e:
|
|
610
|
+
print_error(f"Validation error: {e}")
|
|
611
|
+
ctx.exit(1)
|
|
612
|
+
except Exception as e:
|
|
613
|
+
print_error(f"Failed to import from FrameNet: {e}")
|
|
614
|
+
ctx.exit(1)
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
# Register commands
|
|
618
|
+
resource_loaders.add_command(import_verbnet, name="import-verbnet")
|
|
619
|
+
resource_loaders.add_command(import_unimorph, name="import-unimorph")
|
|
620
|
+
resource_loaders.add_command(import_propbank, name="import-propbank")
|
|
621
|
+
resource_loaders.add_command(import_framenet, name="import-framenet")
|