bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/config/item.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Item configuration models for the bead package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from bead.config.model import ModelConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ItemConfig(BaseModel):
|
|
11
|
+
"""Configuration for item generation.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
model : ModelConfig
|
|
16
|
+
Model configuration.
|
|
17
|
+
apply_constraints : bool
|
|
18
|
+
Whether to apply model-based constraints.
|
|
19
|
+
track_metadata : bool
|
|
20
|
+
Whether to track item metadata.
|
|
21
|
+
parallel_processing : bool
|
|
22
|
+
Whether to use parallel processing.
|
|
23
|
+
num_workers : int
|
|
24
|
+
Number of workers for parallel processing.
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> config = ItemConfig()
|
|
29
|
+
>>> config.apply_constraints
|
|
30
|
+
True
|
|
31
|
+
>>> config.num_workers
|
|
32
|
+
4
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
model: ModelConfig = Field(
|
|
36
|
+
default_factory=ModelConfig, description="Model configuration"
|
|
37
|
+
)
|
|
38
|
+
apply_constraints: bool = Field(
|
|
39
|
+
default=True, description="Apply model-based constraints"
|
|
40
|
+
)
|
|
41
|
+
track_metadata: bool = Field(default=True, description="Track item metadata")
|
|
42
|
+
parallel_processing: bool = Field(
|
|
43
|
+
default=False, description="Use parallel processing"
|
|
44
|
+
)
|
|
45
|
+
num_workers: int = Field(default=4, description="Number of workers", gt=0)
|
bead/config/list.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""List configuration models for the bead package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BatchConstraintConfig(BaseModel):
|
|
14
|
+
"""Configuration for batch-level constraints.
|
|
15
|
+
|
|
16
|
+
Batch constraints operate across all lists in a batch to ensure global
|
|
17
|
+
properties like coverage, balance, and diversity.
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
type : Literal["coverage", "balance", "diversity", "min_occurrence"]
|
|
22
|
+
Type of batch constraint.
|
|
23
|
+
property_expression : str
|
|
24
|
+
Expression to extract property (e.g., "item['template_id']").
|
|
25
|
+
target_values : list[str | int | float] | None
|
|
26
|
+
Target values for coverage constraint. Default: None.
|
|
27
|
+
min_coverage : float
|
|
28
|
+
Minimum coverage fraction for coverage constraint (0.0-1.0). Default: 1.0.
|
|
29
|
+
target_distribution : dict[str, float] | None
|
|
30
|
+
Target distribution for balance constraint (values sum to 1.0). Default: None.
|
|
31
|
+
tolerance : float
|
|
32
|
+
Tolerance for balance constraint (0.0-1.0). Default: 0.1.
|
|
33
|
+
max_lists_per_value : int | None
|
|
34
|
+
Maximum lists per value for diversity constraint. Default: None.
|
|
35
|
+
min_occurrences : int | None
|
|
36
|
+
Minimum occurrences per value for min_occurrence constraint. Default: None.
|
|
37
|
+
priority : int
|
|
38
|
+
Constraint priority (higher = more important). Default: 1.
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> # Coverage constraint
|
|
43
|
+
>>> config = BatchConstraintConfig(
|
|
44
|
+
... type="coverage",
|
|
45
|
+
... property_expression="item['template_id']",
|
|
46
|
+
... target_values=list(range(26)),
|
|
47
|
+
... min_coverage=1.0
|
|
48
|
+
... )
|
|
49
|
+
>>> # Balance constraint
|
|
50
|
+
>>> config = BatchConstraintConfig(
|
|
51
|
+
... type="balance",
|
|
52
|
+
... property_expression="item['pair_type']",
|
|
53
|
+
... target_distribution={"same_verb": 0.5, "different_verb": 0.5},
|
|
54
|
+
... tolerance=0.05
|
|
55
|
+
... )
|
|
56
|
+
>>> # Diversity constraint
|
|
57
|
+
>>> config = BatchConstraintConfig(
|
|
58
|
+
... type="diversity",
|
|
59
|
+
... property_expression="item['verb_lemma']",
|
|
60
|
+
... max_lists_per_value=3
|
|
61
|
+
... )
|
|
62
|
+
>>> # Min occurrence constraint
|
|
63
|
+
>>> config = BatchConstraintConfig(
|
|
64
|
+
... type="min_occurrence",
|
|
65
|
+
... property_expression="item['quantile']",
|
|
66
|
+
... min_occurrences=50
|
|
67
|
+
... )
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
type: Literal["coverage", "balance", "diversity", "min_occurrence"] = Field(
|
|
71
|
+
..., description="Type of batch constraint"
|
|
72
|
+
)
|
|
73
|
+
property_expression: str = Field(..., description="Expression to extract property")
|
|
74
|
+
target_values: list[str | int | float] | None = Field(
|
|
75
|
+
default=None, description="Target values for coverage constraint"
|
|
76
|
+
)
|
|
77
|
+
min_coverage: float = Field(
|
|
78
|
+
default=1.0, ge=0.0, le=1.0, description="Minimum coverage fraction"
|
|
79
|
+
)
|
|
80
|
+
target_distribution: dict[str, float] | None = Field(
|
|
81
|
+
default=None, description="Target distribution for balance constraint"
|
|
82
|
+
)
|
|
83
|
+
tolerance: float = Field(
|
|
84
|
+
default=0.1, ge=0.0, le=1.0, description="Tolerance for balance constraint"
|
|
85
|
+
)
|
|
86
|
+
max_lists_per_value: int | None = Field(
|
|
87
|
+
default=None, ge=1, description="Maximum lists per value for diversity"
|
|
88
|
+
)
|
|
89
|
+
min_occurrences: int | None = Field(
|
|
90
|
+
default=None, ge=1, description="Minimum occurrences for min_occurrence"
|
|
91
|
+
)
|
|
92
|
+
priority: int = Field(default=1, ge=1, description="Constraint priority")
|
|
93
|
+
|
|
94
|
+
@field_validator("property_expression")
|
|
95
|
+
@classmethod
|
|
96
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
97
|
+
"""Validate property expression is non-empty."""
|
|
98
|
+
if not v or not v.strip():
|
|
99
|
+
raise ValueError("property_expression must be non-empty")
|
|
100
|
+
return v.strip()
|
|
101
|
+
|
|
102
|
+
@model_validator(mode="after")
|
|
103
|
+
def validate_constraint_params(self) -> BatchConstraintConfig:
|
|
104
|
+
"""Validate constraint-specific parameters are provided."""
|
|
105
|
+
if self.type == "coverage":
|
|
106
|
+
# coverage requires target_values (can be None for auto-detection)
|
|
107
|
+
pass
|
|
108
|
+
elif self.type == "balance":
|
|
109
|
+
if self.target_distribution is None:
|
|
110
|
+
raise ValueError("target_distribution required for balance constraint")
|
|
111
|
+
elif self.type == "diversity":
|
|
112
|
+
if self.max_lists_per_value is None:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
"max_lists_per_value required for diversity constraint"
|
|
115
|
+
)
|
|
116
|
+
elif self.type == "min_occurrence":
|
|
117
|
+
if self.min_occurrences is None:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
"min_occurrences required for min_occurrence constraint"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return self
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ListConfig(BaseModel):
|
|
126
|
+
"""Configuration for list partitioning.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
partitioning_strategy : str
|
|
131
|
+
Strategy name for partitioning.
|
|
132
|
+
num_lists : int
|
|
133
|
+
Number of lists to create.
|
|
134
|
+
items_per_list : int | None
|
|
135
|
+
Items per list.
|
|
136
|
+
balance_by : list[str]
|
|
137
|
+
Fields to balance on.
|
|
138
|
+
ensure_uniqueness : bool
|
|
139
|
+
Whether to ensure items are unique across lists.
|
|
140
|
+
random_seed : int | None
|
|
141
|
+
Random seed for reproducibility.
|
|
142
|
+
batch_constraints : list[BatchConstraintConfig] | None
|
|
143
|
+
Batch-level constraints to apply across all lists.
|
|
144
|
+
|
|
145
|
+
Examples
|
|
146
|
+
--------
|
|
147
|
+
>>> config = ListConfig()
|
|
148
|
+
>>> config.partitioning_strategy
|
|
149
|
+
'balanced'
|
|
150
|
+
>>> config.num_lists
|
|
151
|
+
1
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
partitioning_strategy: str = Field(
|
|
155
|
+
default="balanced", description="Partitioning strategy"
|
|
156
|
+
)
|
|
157
|
+
num_lists: int = Field(default=1, description="Number of lists to create", gt=0)
|
|
158
|
+
items_per_list: int | None = Field(default=None, description="Items per list")
|
|
159
|
+
balance_by: list[str] = Field(
|
|
160
|
+
default_factory=list, description="Fields to balance on"
|
|
161
|
+
)
|
|
162
|
+
ensure_uniqueness: bool = Field(
|
|
163
|
+
default=True, description="Ensure items unique across lists"
|
|
164
|
+
)
|
|
165
|
+
random_seed: int | None = Field(default=None, description="Random seed")
|
|
166
|
+
batch_constraints: list[BatchConstraintConfig] | None = Field(
|
|
167
|
+
default=None, description="Batch-level constraints"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
@field_validator("items_per_list")
|
|
171
|
+
@classmethod
|
|
172
|
+
def validate_items_per_list(cls, v: int | None) -> int | None:
|
|
173
|
+
"""Validate items_per_list is positive.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
v : int | None
|
|
178
|
+
Items per list value.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
int | None
|
|
183
|
+
Validated value.
|
|
184
|
+
|
|
185
|
+
Raises
|
|
186
|
+
------
|
|
187
|
+
ValueError
|
|
188
|
+
If value is not positive.
|
|
189
|
+
"""
|
|
190
|
+
if v is not None and v <= 0:
|
|
191
|
+
msg = f"items_per_list must be positive, got {v}"
|
|
192
|
+
raise ValueError(msg)
|
|
193
|
+
return v
|
bead/config/loader.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Configuration loading from YAML files.
|
|
2
|
+
|
|
3
|
+
This module provides functionality for loading configurations from YAML files,
|
|
4
|
+
merging configurations from multiple sources, and applying configuration overrides.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from bead.config.config import BeadConfig
|
|
13
|
+
from bead.config.profiles import get_profile
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def merge_configs(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
17
|
+
"""Deep merge two configuration dictionaries.
|
|
18
|
+
|
|
19
|
+
Recursively merges override into base, with override values taking precedence.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
base : dict[str, Any]
|
|
24
|
+
Base configuration dictionary.
|
|
25
|
+
override : dict[str, Any]
|
|
26
|
+
Override configuration dictionary.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
dict[str, Any]
|
|
31
|
+
Merged configuration dictionary.
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
>>> base = {"a": 1, "b": {"c": 2}}
|
|
36
|
+
>>> override = {"b": {"d": 3}}
|
|
37
|
+
>>> merge_configs(base, override)
|
|
38
|
+
{'a': 1, 'b': {'c': 2, 'd': 3}}
|
|
39
|
+
"""
|
|
40
|
+
result = base.copy()
|
|
41
|
+
for key, value in override.items():
|
|
42
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
43
|
+
result[key] = merge_configs(result[key], value) # type: ignore[arg-type]
|
|
44
|
+
else:
|
|
45
|
+
result[key] = value
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_yaml_file(path: Path | str) -> dict[str, Any]:
|
|
50
|
+
"""Load YAML file and return as dictionary.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
path : Path | str
|
|
55
|
+
Path to YAML file.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
dict[str, Any]
|
|
60
|
+
Parsed YAML content.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
FileNotFoundError
|
|
65
|
+
If file doesn't exist.
|
|
66
|
+
yaml.YAMLError
|
|
67
|
+
If YAML is malformed.
|
|
68
|
+
"""
|
|
69
|
+
path = Path(path) if isinstance(path, str) else path
|
|
70
|
+
|
|
71
|
+
if not path.exists():
|
|
72
|
+
raise FileNotFoundError(f"Configuration file not found: {path}")
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
with open(path) as f:
|
|
76
|
+
content = yaml.safe_load(f)
|
|
77
|
+
# handle empty files
|
|
78
|
+
return content if content is not None else {}
|
|
79
|
+
except yaml.YAMLError as e:
|
|
80
|
+
raise yaml.YAMLError(f"Failed to parse YAML file {path}: {e}") from e
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def load_config(
|
|
84
|
+
config_path: Path | str | None = None,
|
|
85
|
+
profile: str = "default",
|
|
86
|
+
**overrides: Any,
|
|
87
|
+
) -> BeadConfig:
|
|
88
|
+
"""Load configuration from YAML file with optional overrides.
|
|
89
|
+
|
|
90
|
+
Precedence (lowest to highest):
|
|
91
|
+
1. Profile defaults
|
|
92
|
+
2. YAML file values
|
|
93
|
+
3. Keyword overrides
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
config_path : Path | str | None
|
|
98
|
+
Path to YAML config file. If None, uses profile defaults.
|
|
99
|
+
profile : str
|
|
100
|
+
Profile to use as base (default, dev, prod, test).
|
|
101
|
+
**overrides : Any
|
|
102
|
+
Direct overrides for config values.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
BeadConfig
|
|
107
|
+
Loaded and merged configuration.
|
|
108
|
+
|
|
109
|
+
Raises
|
|
110
|
+
------
|
|
111
|
+
FileNotFoundError
|
|
112
|
+
If config_path is specified but doesn't exist.
|
|
113
|
+
yaml.YAMLError
|
|
114
|
+
If YAML file is malformed.
|
|
115
|
+
ValidationError
|
|
116
|
+
If configuration is invalid.
|
|
117
|
+
|
|
118
|
+
Examples
|
|
119
|
+
--------
|
|
120
|
+
>>> config = load_config(profile="dev")
|
|
121
|
+
>>> config.profile
|
|
122
|
+
'dev'
|
|
123
|
+
>>> config = load_config(config_path="config.yaml", logging__level="DEBUG")
|
|
124
|
+
>>> config.logging.level
|
|
125
|
+
'DEBUG'
|
|
126
|
+
"""
|
|
127
|
+
# start with profile defaults
|
|
128
|
+
base_config: dict[str, Any] = get_profile(profile).model_dump()
|
|
129
|
+
|
|
130
|
+
# merge with YAML file if provided
|
|
131
|
+
if config_path is not None:
|
|
132
|
+
yaml_config = load_yaml_file(config_path)
|
|
133
|
+
base_config = merge_configs(base_config, yaml_config)
|
|
134
|
+
|
|
135
|
+
# convert overrides with __ syntax to nested dicts
|
|
136
|
+
if overrides:
|
|
137
|
+
override_dict: dict[str, Any] = {}
|
|
138
|
+
for key, value in overrides.items():
|
|
139
|
+
parts = key.split("__")
|
|
140
|
+
current = override_dict
|
|
141
|
+
for part in parts[:-1]:
|
|
142
|
+
if part not in current:
|
|
143
|
+
current[part] = {}
|
|
144
|
+
current = current[part]
|
|
145
|
+
current[parts[-1]] = value
|
|
146
|
+
base_config = merge_configs(base_config, override_dict)
|
|
147
|
+
|
|
148
|
+
# construct and validate BeadConfig
|
|
149
|
+
return BeadConfig(**base_config)
|
bead/config/logging.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Logging configuration models for the bead package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LoggingConfig(BaseModel):
|
|
12
|
+
"""Configuration for logging.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
level : str
|
|
17
|
+
Log level.
|
|
18
|
+
format : str
|
|
19
|
+
Log format string.
|
|
20
|
+
file : Path | None
|
|
21
|
+
Log file path.
|
|
22
|
+
console : bool
|
|
23
|
+
Whether to log to console.
|
|
24
|
+
|
|
25
|
+
Examples
|
|
26
|
+
--------
|
|
27
|
+
>>> config = LoggingConfig()
|
|
28
|
+
>>> config.level
|
|
29
|
+
'INFO'
|
|
30
|
+
>>> config.console
|
|
31
|
+
True
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
|
|
35
|
+
default="INFO", description="Log level"
|
|
36
|
+
)
|
|
37
|
+
format: str = Field(
|
|
38
|
+
default="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
39
|
+
description="Log format",
|
|
40
|
+
)
|
|
41
|
+
file: Path | None = Field(default=None, description="Log file path")
|
|
42
|
+
console: bool = Field(default=True, description="Log to console")
|
bead/config/model.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Model configuration models for the bead package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ModelConfig(BaseModel):
|
|
11
|
+
"""Configuration for language models.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
provider : str
|
|
16
|
+
Model provider name.
|
|
17
|
+
model_name : str
|
|
18
|
+
Model identifier.
|
|
19
|
+
batch_size : int
|
|
20
|
+
Inference batch size.
|
|
21
|
+
device : str
|
|
22
|
+
Device to use for computation.
|
|
23
|
+
max_length : int
|
|
24
|
+
Maximum sequence length.
|
|
25
|
+
temperature : float
|
|
26
|
+
Sampling temperature.
|
|
27
|
+
cache_outputs : bool
|
|
28
|
+
Whether to cache model outputs.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
>>> config = ModelConfig()
|
|
33
|
+
>>> config.provider
|
|
34
|
+
'huggingface'
|
|
35
|
+
>>> config.device
|
|
36
|
+
'cpu'
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
provider: Literal["huggingface", "openai", "anthropic"] = Field(
|
|
40
|
+
default="huggingface", description="Model provider"
|
|
41
|
+
)
|
|
42
|
+
model_name: str = Field(default="gpt2", description="Model identifier")
|
|
43
|
+
batch_size: int = Field(default=8, description="Inference batch size", gt=0)
|
|
44
|
+
device: Literal["cpu", "cuda", "mps"] = Field(
|
|
45
|
+
default="cpu", description="Device to use"
|
|
46
|
+
)
|
|
47
|
+
max_length: int = Field(default=512, description="Max sequence length", gt=0)
|
|
48
|
+
temperature: float = Field(default=1.0, description="Sampling temperature", ge=0)
|
|
49
|
+
cache_outputs: bool = Field(default=True, description="Cache model outputs")
|
bead/config/paths.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Path configuration models for the bead package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PathsConfig(BaseModel):
|
|
11
|
+
"""Configuration for file system paths.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
data_dir : Path
|
|
16
|
+
Base directory for data files.
|
|
17
|
+
output_dir : Path
|
|
18
|
+
Base directory for outputs.
|
|
19
|
+
cache_dir : Path
|
|
20
|
+
Cache directory.
|
|
21
|
+
temp_dir : Path | None
|
|
22
|
+
Temporary directory. If None, uses system temp.
|
|
23
|
+
create_dirs : bool
|
|
24
|
+
Whether to create directories if they don't exist.
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> config = PathsConfig()
|
|
29
|
+
>>> config.data_dir
|
|
30
|
+
PosixPath('data')
|
|
31
|
+
>>> config = PathsConfig(data_dir=Path("/absolute/path"))
|
|
32
|
+
>>> config.data_dir
|
|
33
|
+
PosixPath('/absolute/path')
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
data_dir: Path = Field(
|
|
37
|
+
default=Path("data"), description="Base directory for data files"
|
|
38
|
+
)
|
|
39
|
+
output_dir: Path = Field(
|
|
40
|
+
default=Path("output"), description="Base directory for outputs"
|
|
41
|
+
)
|
|
42
|
+
cache_dir: Path = Field(default=Path(".cache"), description="Cache directory")
|
|
43
|
+
temp_dir: Path | None = Field(default=None, description="Temporary directory")
|
|
44
|
+
create_dirs: bool = Field(
|
|
45
|
+
default=True, description="Create directories if they don't exist"
|
|
46
|
+
)
|