bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/config/item.py ADDED
@@ -0,0 +1,45 @@
1
+ """Item configuration models for the bead package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from bead.config.model import ModelConfig
8
+
9
+
10
+ class ItemConfig(BaseModel):
11
+ """Configuration for item generation.
12
+
13
+ Parameters
14
+ ----------
15
+ model : ModelConfig
16
+ Model configuration.
17
+ apply_constraints : bool
18
+ Whether to apply model-based constraints.
19
+ track_metadata : bool
20
+ Whether to track item metadata.
21
+ parallel_processing : bool
22
+ Whether to use parallel processing.
23
+ num_workers : int
24
+ Number of workers for parallel processing.
25
+
26
+ Examples
27
+ --------
28
+ >>> config = ItemConfig()
29
+ >>> config.apply_constraints
30
+ True
31
+ >>> config.num_workers
32
+ 4
33
+ """
34
+
35
+ model: ModelConfig = Field(
36
+ default_factory=ModelConfig, description="Model configuration"
37
+ )
38
+ apply_constraints: bool = Field(
39
+ default=True, description="Apply model-based constraints"
40
+ )
41
+ track_metadata: bool = Field(default=True, description="Track item metadata")
42
+ parallel_processing: bool = Field(
43
+ default=False, description="Use parallel processing"
44
+ )
45
+ num_workers: int = Field(default=4, description="Number of workers", gt=0)
bead/config/list.py ADDED
@@ -0,0 +1,193 @@
1
+ """List configuration models for the bead package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Literal
6
+
7
+ from pydantic import BaseModel, Field, field_validator, model_validator
8
+
9
+ if TYPE_CHECKING:
10
+ pass
11
+
12
+
13
+ class BatchConstraintConfig(BaseModel):
14
+ """Configuration for batch-level constraints.
15
+
16
+ Batch constraints operate across all lists in a batch to ensure global
17
+ properties like coverage, balance, and diversity.
18
+
19
+ Attributes
20
+ ----------
21
+ type : Literal["coverage", "balance", "diversity", "min_occurrence"]
22
+ Type of batch constraint.
23
+ property_expression : str
24
+ Expression to extract property (e.g., "item['template_id']").
25
+ target_values : list[str | int | float] | None
26
+ Target values for coverage constraint. Default: None.
27
+ min_coverage : float
28
+ Minimum coverage fraction for coverage constraint (0.0-1.0). Default: 1.0.
29
+ target_distribution : dict[str, float] | None
30
+ Target distribution for balance constraint (values sum to 1.0). Default: None.
31
+ tolerance : float
32
+ Tolerance for balance constraint (0.0-1.0). Default: 0.1.
33
+ max_lists_per_value : int | None
34
+ Maximum lists per value for diversity constraint. Default: None.
35
+ min_occurrences : int | None
36
+ Minimum occurrences per value for min_occurrence constraint. Default: None.
37
+ priority : int
38
+ Constraint priority (higher = more important). Default: 1.
39
+
40
+ Examples
41
+ --------
42
+ >>> # Coverage constraint
43
+ >>> config = BatchConstraintConfig(
44
+ ... type="coverage",
45
+ ... property_expression="item['template_id']",
46
+ ... target_values=list(range(26)),
47
+ ... min_coverage=1.0
48
+ ... )
49
+ >>> # Balance constraint
50
+ >>> config = BatchConstraintConfig(
51
+ ... type="balance",
52
+ ... property_expression="item['pair_type']",
53
+ ... target_distribution={"same_verb": 0.5, "different_verb": 0.5},
54
+ ... tolerance=0.05
55
+ ... )
56
+ >>> # Diversity constraint
57
+ >>> config = BatchConstraintConfig(
58
+ ... type="diversity",
59
+ ... property_expression="item['verb_lemma']",
60
+ ... max_lists_per_value=3
61
+ ... )
62
+ >>> # Min occurrence constraint
63
+ >>> config = BatchConstraintConfig(
64
+ ... type="min_occurrence",
65
+ ... property_expression="item['quantile']",
66
+ ... min_occurrences=50
67
+ ... )
68
+ """
69
+
70
+ type: Literal["coverage", "balance", "diversity", "min_occurrence"] = Field(
71
+ ..., description="Type of batch constraint"
72
+ )
73
+ property_expression: str = Field(..., description="Expression to extract property")
74
+ target_values: list[str | int | float] | None = Field(
75
+ default=None, description="Target values for coverage constraint"
76
+ )
77
+ min_coverage: float = Field(
78
+ default=1.0, ge=0.0, le=1.0, description="Minimum coverage fraction"
79
+ )
80
+ target_distribution: dict[str, float] | None = Field(
81
+ default=None, description="Target distribution for balance constraint"
82
+ )
83
+ tolerance: float = Field(
84
+ default=0.1, ge=0.0, le=1.0, description="Tolerance for balance constraint"
85
+ )
86
+ max_lists_per_value: int | None = Field(
87
+ default=None, ge=1, description="Maximum lists per value for diversity"
88
+ )
89
+ min_occurrences: int | None = Field(
90
+ default=None, ge=1, description="Minimum occurrences for min_occurrence"
91
+ )
92
+ priority: int = Field(default=1, ge=1, description="Constraint priority")
93
+
94
+ @field_validator("property_expression")
95
+ @classmethod
96
+ def validate_property_expression(cls, v: str) -> str:
97
+ """Validate property expression is non-empty."""
98
+ if not v or not v.strip():
99
+ raise ValueError("property_expression must be non-empty")
100
+ return v.strip()
101
+
102
+ @model_validator(mode="after")
103
+ def validate_constraint_params(self) -> BatchConstraintConfig:
104
+ """Validate constraint-specific parameters are provided."""
105
+ if self.type == "coverage":
106
+ # coverage requires target_values (can be None for auto-detection)
107
+ pass
108
+ elif self.type == "balance":
109
+ if self.target_distribution is None:
110
+ raise ValueError("target_distribution required for balance constraint")
111
+ elif self.type == "diversity":
112
+ if self.max_lists_per_value is None:
113
+ raise ValueError(
114
+ "max_lists_per_value required for diversity constraint"
115
+ )
116
+ elif self.type == "min_occurrence":
117
+ if self.min_occurrences is None:
118
+ raise ValueError(
119
+ "min_occurrences required for min_occurrence constraint"
120
+ )
121
+
122
+ return self
123
+
124
+
125
+ class ListConfig(BaseModel):
126
+ """Configuration for list partitioning.
127
+
128
+ Parameters
129
+ ----------
130
+ partitioning_strategy : str
131
+ Strategy name for partitioning.
132
+ num_lists : int
133
+ Number of lists to create.
134
+ items_per_list : int | None
135
+ Items per list.
136
+ balance_by : list[str]
137
+ Fields to balance on.
138
+ ensure_uniqueness : bool
139
+ Whether to ensure items are unique across lists.
140
+ random_seed : int | None
141
+ Random seed for reproducibility.
142
+ batch_constraints : list[BatchConstraintConfig] | None
143
+ Batch-level constraints to apply across all lists.
144
+
145
+ Examples
146
+ --------
147
+ >>> config = ListConfig()
148
+ >>> config.partitioning_strategy
149
+ 'balanced'
150
+ >>> config.num_lists
151
+ 1
152
+ """
153
+
154
+ partitioning_strategy: str = Field(
155
+ default="balanced", description="Partitioning strategy"
156
+ )
157
+ num_lists: int = Field(default=1, description="Number of lists to create", gt=0)
158
+ items_per_list: int | None = Field(default=None, description="Items per list")
159
+ balance_by: list[str] = Field(
160
+ default_factory=list, description="Fields to balance on"
161
+ )
162
+ ensure_uniqueness: bool = Field(
163
+ default=True, description="Ensure items unique across lists"
164
+ )
165
+ random_seed: int | None = Field(default=None, description="Random seed")
166
+ batch_constraints: list[BatchConstraintConfig] | None = Field(
167
+ default=None, description="Batch-level constraints"
168
+ )
169
+
170
+ @field_validator("items_per_list")
171
+ @classmethod
172
+ def validate_items_per_list(cls, v: int | None) -> int | None:
173
+ """Validate items_per_list is positive.
174
+
175
+ Parameters
176
+ ----------
177
+ v : int | None
178
+ Items per list value.
179
+
180
+ Returns
181
+ -------
182
+ int | None
183
+ Validated value.
184
+
185
+ Raises
186
+ ------
187
+ ValueError
188
+ If value is not positive.
189
+ """
190
+ if v is not None and v <= 0:
191
+ msg = f"items_per_list must be positive, got {v}"
192
+ raise ValueError(msg)
193
+ return v
bead/config/loader.py ADDED
@@ -0,0 +1,149 @@
1
+ """Configuration loading from YAML files.
2
+
3
+ This module provides functionality for loading configurations from YAML files,
4
+ merging configurations from multiple sources, and applying configuration overrides.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ from bead.config.config import BeadConfig
13
+ from bead.config.profiles import get_profile
14
+
15
+
16
+ def merge_configs(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
17
+ """Deep merge two configuration dictionaries.
18
+
19
+ Recursively merges override into base, with override values taking precedence.
20
+
21
+ Parameters
22
+ ----------
23
+ base : dict[str, Any]
24
+ Base configuration dictionary.
25
+ override : dict[str, Any]
26
+ Override configuration dictionary.
27
+
28
+ Returns
29
+ -------
30
+ dict[str, Any]
31
+ Merged configuration dictionary.
32
+
33
+ Examples
34
+ --------
35
+ >>> base = {"a": 1, "b": {"c": 2}}
36
+ >>> override = {"b": {"d": 3}}
37
+ >>> merge_configs(base, override)
38
+ {'a': 1, 'b': {'c': 2, 'd': 3}}
39
+ """
40
+ result = base.copy()
41
+ for key, value in override.items():
42
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
43
+ result[key] = merge_configs(result[key], value) # type: ignore[arg-type]
44
+ else:
45
+ result[key] = value
46
+ return result
47
+
48
+
49
+ def load_yaml_file(path: Path | str) -> dict[str, Any]:
50
+ """Load YAML file and return as dictionary.
51
+
52
+ Parameters
53
+ ----------
54
+ path : Path | str
55
+ Path to YAML file.
56
+
57
+ Returns
58
+ -------
59
+ dict[str, Any]
60
+ Parsed YAML content.
61
+
62
+ Raises
63
+ ------
64
+ FileNotFoundError
65
+ If file doesn't exist.
66
+ yaml.YAMLError
67
+ If YAML is malformed.
68
+ """
69
+ path = Path(path) if isinstance(path, str) else path
70
+
71
+ if not path.exists():
72
+ raise FileNotFoundError(f"Configuration file not found: {path}")
73
+
74
+ try:
75
+ with open(path) as f:
76
+ content = yaml.safe_load(f)
77
+ # handle empty files
78
+ return content if content is not None else {}
79
+ except yaml.YAMLError as e:
80
+ raise yaml.YAMLError(f"Failed to parse YAML file {path}: {e}") from e
81
+
82
+
83
+ def load_config(
84
+ config_path: Path | str | None = None,
85
+ profile: str = "default",
86
+ **overrides: Any,
87
+ ) -> BeadConfig:
88
+ """Load configuration from YAML file with optional overrides.
89
+
90
+ Precedence (lowest to highest):
91
+ 1. Profile defaults
92
+ 2. YAML file values
93
+ 3. Keyword overrides
94
+
95
+ Parameters
96
+ ----------
97
+ config_path : Path | str | None
98
+ Path to YAML config file. If None, uses profile defaults.
99
+ profile : str
100
+ Profile to use as base (default, dev, prod, test).
101
+ **overrides : Any
102
+ Direct overrides for config values.
103
+
104
+ Returns
105
+ -------
106
+ BeadConfig
107
+ Loaded and merged configuration.
108
+
109
+ Raises
110
+ ------
111
+ FileNotFoundError
112
+ If config_path is specified but doesn't exist.
113
+ yaml.YAMLError
114
+ If YAML file is malformed.
115
+ ValidationError
116
+ If configuration is invalid.
117
+
118
+ Examples
119
+ --------
120
+ >>> config = load_config(profile="dev")
121
+ >>> config.profile
122
+ 'dev'
123
+ >>> config = load_config(config_path="config.yaml", logging__level="DEBUG")
124
+ >>> config.logging.level
125
+ 'DEBUG'
126
+ """
127
+ # start with profile defaults
128
+ base_config: dict[str, Any] = get_profile(profile).model_dump()
129
+
130
+ # merge with YAML file if provided
131
+ if config_path is not None:
132
+ yaml_config = load_yaml_file(config_path)
133
+ base_config = merge_configs(base_config, yaml_config)
134
+
135
+ # convert overrides with __ syntax to nested dicts
136
+ if overrides:
137
+ override_dict: dict[str, Any] = {}
138
+ for key, value in overrides.items():
139
+ parts = key.split("__")
140
+ current = override_dict
141
+ for part in parts[:-1]:
142
+ if part not in current:
143
+ current[part] = {}
144
+ current = current[part]
145
+ current[parts[-1]] = value
146
+ base_config = merge_configs(base_config, override_dict)
147
+
148
+ # construct and validate BeadConfig
149
+ return BeadConfig(**base_config)
bead/config/logging.py ADDED
@@ -0,0 +1,42 @@
1
+ """Logging configuration models for the bead package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Literal
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ class LoggingConfig(BaseModel):
12
+ """Configuration for logging.
13
+
14
+ Parameters
15
+ ----------
16
+ level : str
17
+ Log level.
18
+ format : str
19
+ Log format string.
20
+ file : Path | None
21
+ Log file path.
22
+ console : bool
23
+ Whether to log to console.
24
+
25
+ Examples
26
+ --------
27
+ >>> config = LoggingConfig()
28
+ >>> config.level
29
+ 'INFO'
30
+ >>> config.console
31
+ True
32
+ """
33
+
34
+ level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
35
+ default="INFO", description="Log level"
36
+ )
37
+ format: str = Field(
38
+ default="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
39
+ description="Log format",
40
+ )
41
+ file: Path | None = Field(default=None, description="Log file path")
42
+ console: bool = Field(default=True, description="Log to console")
bead/config/model.py ADDED
@@ -0,0 +1,49 @@
1
+ """Model configuration models for the bead package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class ModelConfig(BaseModel):
11
+ """Configuration for language models.
12
+
13
+ Parameters
14
+ ----------
15
+ provider : str
16
+ Model provider name.
17
+ model_name : str
18
+ Model identifier.
19
+ batch_size : int
20
+ Inference batch size.
21
+ device : str
22
+ Device to use for computation.
23
+ max_length : int
24
+ Maximum sequence length.
25
+ temperature : float
26
+ Sampling temperature.
27
+ cache_outputs : bool
28
+ Whether to cache model outputs.
29
+
30
+ Examples
31
+ --------
32
+ >>> config = ModelConfig()
33
+ >>> config.provider
34
+ 'huggingface'
35
+ >>> config.device
36
+ 'cpu'
37
+ """
38
+
39
+ provider: Literal["huggingface", "openai", "anthropic"] = Field(
40
+ default="huggingface", description="Model provider"
41
+ )
42
+ model_name: str = Field(default="gpt2", description="Model identifier")
43
+ batch_size: int = Field(default=8, description="Inference batch size", gt=0)
44
+ device: Literal["cpu", "cuda", "mps"] = Field(
45
+ default="cpu", description="Device to use"
46
+ )
47
+ max_length: int = Field(default=512, description="Max sequence length", gt=0)
48
+ temperature: float = Field(default=1.0, description="Sampling temperature", ge=0)
49
+ cache_outputs: bool = Field(default=True, description="Cache model outputs")
bead/config/paths.py ADDED
@@ -0,0 +1,46 @@
1
+ """Path configuration models for the bead package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class PathsConfig(BaseModel):
11
+ """Configuration for file system paths.
12
+
13
+ Parameters
14
+ ----------
15
+ data_dir : Path
16
+ Base directory for data files.
17
+ output_dir : Path
18
+ Base directory for outputs.
19
+ cache_dir : Path
20
+ Cache directory.
21
+ temp_dir : Path | None
22
+ Temporary directory. If None, uses system temp.
23
+ create_dirs : bool
24
+ Whether to create directories if they don't exist.
25
+
26
+ Examples
27
+ --------
28
+ >>> config = PathsConfig()
29
+ >>> config.data_dir
30
+ PosixPath('data')
31
+ >>> config = PathsConfig(data_dir=Path("/absolute/path"))
32
+ >>> config.data_dir
33
+ PosixPath('/absolute/path')
34
+ """
35
+
36
+ data_dir: Path = Field(
37
+ default=Path("data"), description="Base directory for data files"
38
+ )
39
+ output_dir: Path = Field(
40
+ default=Path("output"), description="Base directory for outputs"
41
+ )
42
+ cache_dir: Path = Field(default=Path(".cache"), description="Cache directory")
43
+ temp_dir: Path | None = Field(default=None, description="Temporary directory")
44
+ create_dirs: bool = Field(
45
+ default=True, description="Create directories if they don't exist"
46
+ )