bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/templates/filler.py
ADDED
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
"""Template filling with backtracking search and constraint propagation.
|
|
2
|
+
|
|
3
|
+
This module implements a CSP (Constraint Satisfaction Problem) solver for
|
|
4
|
+
template filling. It uses backtracking search with forward checking to
|
|
5
|
+
efficiently find valid slot fillings that satisfy all constraints.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from collections.abc import Iterable, Iterator
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
from bead.data.base import BeadBaseModel
|
|
15
|
+
from bead.data.language_codes import LanguageCode, validate_iso639_code
|
|
16
|
+
from bead.dsl import ast
|
|
17
|
+
from bead.dsl.parser import parse
|
|
18
|
+
from bead.resources.lexical_item import LexicalItem
|
|
19
|
+
from bead.resources.template import Template
|
|
20
|
+
from bead.templates.renderers import DefaultRenderer, TemplateRenderer
|
|
21
|
+
from bead.templates.resolver import ConstraintResolver
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from bead.resources.lexicon import Lexicon
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TemplateFiller(ABC):
|
|
28
|
+
"""Abstract base class for template filling.
|
|
29
|
+
|
|
30
|
+
Subclasses implement different approaches to filling template slots
|
|
31
|
+
with lexical items from a lexicon. Strategies include constraint
|
|
32
|
+
satisfaction solving (CSP) and enumeration-based strategies.
|
|
33
|
+
|
|
34
|
+
Examples
|
|
35
|
+
--------
|
|
36
|
+
>>> from bead.templates.filler import CSPFiller
|
|
37
|
+
>>> filler = CSPFiller(lexicon)
|
|
38
|
+
>>> filled = list(filler.fill(template))
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def fill(
|
|
43
|
+
self,
|
|
44
|
+
template: Template,
|
|
45
|
+
language_code: LanguageCode | None = None,
|
|
46
|
+
) -> Iterable[FilledTemplate]:
|
|
47
|
+
"""Fill template slots with lexical items.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
template : Template
|
|
52
|
+
Template to fill.
|
|
53
|
+
language_code : LanguageCode | None
|
|
54
|
+
Optional language code to filter items.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
Iterable[FilledTemplate]
|
|
59
|
+
Filled template instances (iterator or list).
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
ValueError
|
|
64
|
+
If template cannot be filled.
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ConstraintUnsatisfiableError(Exception):
|
|
70
|
+
"""Raised when template constraints cannot be satisfied.
|
|
71
|
+
|
|
72
|
+
This error indicates that the backtracking search exhausted all
|
|
73
|
+
possibilities without finding a valid assignment.
|
|
74
|
+
|
|
75
|
+
Attributes
|
|
76
|
+
----------
|
|
77
|
+
template_name : str
|
|
78
|
+
Name of the template that could not be filled.
|
|
79
|
+
slot_name : str | None
|
|
80
|
+
Name of the slot where filling failed (if known).
|
|
81
|
+
attempted_combinations : int
|
|
82
|
+
Number of partial assignments tried before failure.
|
|
83
|
+
message : str
|
|
84
|
+
Diagnostic message explaining the failure.
|
|
85
|
+
|
|
86
|
+
Examples
|
|
87
|
+
--------
|
|
88
|
+
>>> raise ConstraintUnsatisfiableError(
|
|
89
|
+
... template_name="transitive",
|
|
90
|
+
... slot_name="verb",
|
|
91
|
+
... attempted_combinations=1523,
|
|
92
|
+
... message="No VERB items satisfy agreement constraints"
|
|
93
|
+
... )
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
template_name: str,
|
|
99
|
+
slot_name: str | None = None,
|
|
100
|
+
attempted_combinations: int = 0,
|
|
101
|
+
message: str = "Could not satisfy all constraints",
|
|
102
|
+
) -> None:
|
|
103
|
+
self.template_name = template_name
|
|
104
|
+
self.slot_name = slot_name
|
|
105
|
+
self.attempted_combinations = attempted_combinations
|
|
106
|
+
self.message = message
|
|
107
|
+
super().__init__(self._format_message())
|
|
108
|
+
|
|
109
|
+
def _format_message(self) -> str:
|
|
110
|
+
"""Format diagnostic error message."""
|
|
111
|
+
parts = [f"Template '{self.template_name}': {self.message}"]
|
|
112
|
+
if self.slot_name:
|
|
113
|
+
parts.append(f"Failed at slot: {self.slot_name}")
|
|
114
|
+
if self.attempted_combinations > 0:
|
|
115
|
+
parts.append(f"Tried {self.attempted_combinations} combinations")
|
|
116
|
+
return ". ".join(parts)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class FilledTemplate(BeadBaseModel):
|
|
120
|
+
"""A template populated with lexical items.
|
|
121
|
+
|
|
122
|
+
Represents a single instance of a template with specific
|
|
123
|
+
items filling each slot.
|
|
124
|
+
|
|
125
|
+
Attributes
|
|
126
|
+
----------
|
|
127
|
+
template_id : str
|
|
128
|
+
ID of the source template.
|
|
129
|
+
template_name : str
|
|
130
|
+
Name of the source template.
|
|
131
|
+
slot_fillers : dict[str, LexicalItem]
|
|
132
|
+
Mapping of slot names to items that fill them.
|
|
133
|
+
rendered_text : str
|
|
134
|
+
Template string with slots replaced by item lemmas.
|
|
135
|
+
strategy_name : str
|
|
136
|
+
Name of strategy used to generate this filled template.
|
|
137
|
+
template_slots : dict[str, bool]
|
|
138
|
+
Mapping of all template slot names to whether they are required.
|
|
139
|
+
Used to determine unfilled slots.
|
|
140
|
+
|
|
141
|
+
Examples
|
|
142
|
+
--------
|
|
143
|
+
>>> filled = FilledTemplate(
|
|
144
|
+
... template_id="t1",
|
|
145
|
+
... template_name="transitive",
|
|
146
|
+
... slot_fillers={"subject": noun_item, "verb": verb_item},
|
|
147
|
+
... rendered_text="cat broke the object",
|
|
148
|
+
... strategy_name="exhaustive",
|
|
149
|
+
... template_slots={"subject": True, "verb": True, "object": True}
|
|
150
|
+
... )
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
template_id: str
|
|
154
|
+
template_name: str
|
|
155
|
+
slot_fillers: dict[str, LexicalItem]
|
|
156
|
+
rendered_text: str
|
|
157
|
+
strategy_name: str
|
|
158
|
+
template_slots: dict[str, bool] = {}
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def unfilled_slots(self) -> set[str]:
|
|
162
|
+
"""Get names of slots that were not filled.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
set[str]
|
|
167
|
+
Set of slot names present in template but not in slot_fillers.
|
|
168
|
+
"""
|
|
169
|
+
return set(self.template_slots.keys()) - set(self.slot_fillers.keys())
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def unfilled_required_slots(self) -> set[str]:
|
|
173
|
+
"""Get names of required slots that were not filled.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
set[str]
|
|
178
|
+
Set of required slot names that are unfilled.
|
|
179
|
+
"""
|
|
180
|
+
return {
|
|
181
|
+
slot_name
|
|
182
|
+
for slot_name, is_required in self.template_slots.items()
|
|
183
|
+
if is_required and slot_name not in self.slot_fillers
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def is_complete(self) -> bool:
|
|
188
|
+
"""Check if all required slots are filled.
|
|
189
|
+
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
192
|
+
bool
|
|
193
|
+
True if all required slots have fillers.
|
|
194
|
+
"""
|
|
195
|
+
return len(self.unfilled_required_slots) == 0
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class CSPFiller(TemplateFiller):
|
|
199
|
+
"""Fill templates using backtracking search with forward checking.
|
|
200
|
+
|
|
201
|
+
Implements a CSP (Constraint Satisfaction Problem) solver with these guarantees:
|
|
202
|
+
1. Completeness: Will find a solution if one exists
|
|
203
|
+
2. Correctness: All returned assignments satisfy all constraints
|
|
204
|
+
3. Termination: Will halt (either with solution or error)
|
|
205
|
+
|
|
206
|
+
The algorithm uses:
|
|
207
|
+
- Backtracking search to explore assignment space
|
|
208
|
+
- Forward checking to prune search space early
|
|
209
|
+
- Most-constrained-first slot ordering heuristic
|
|
210
|
+
- Constraint propagation for multi-slot constraints
|
|
211
|
+
|
|
212
|
+
Use this filler when templates have multi-slot constraints (Template.constraints)
|
|
213
|
+
that require agreement or relational checking. For simple templates with only
|
|
214
|
+
single-slot constraints, StrategyFiller is 10-100x faster.
|
|
215
|
+
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
lexicon : Lexicon
|
|
219
|
+
Lexicon containing candidate items.
|
|
220
|
+
max_attempts : int
|
|
221
|
+
Maximum number of partial assignments to try (default: 10000).
|
|
222
|
+
renderer : TemplateRenderer | None
|
|
223
|
+
Template renderer to use for generating rendered_text. If None,
|
|
224
|
+
uses DefaultRenderer() which does simple slot substitution.
|
|
225
|
+
|
|
226
|
+
Examples
|
|
227
|
+
--------
|
|
228
|
+
>>> from bead.resources.lexicon import Lexicon
|
|
229
|
+
>>> from bead.templates.filler import CSPFiller
|
|
230
|
+
>>> lexicon = Lexicon(items=[...])
|
|
231
|
+
>>> filler = CSPFiller(lexicon)
|
|
232
|
+
>>> try:
|
|
233
|
+
... filled = next(filler.fill(template))
|
|
234
|
+
... except ConstraintUnsatisfiableError as e:
|
|
235
|
+
... print(f"Could not fill: {e}")
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(
|
|
239
|
+
self,
|
|
240
|
+
lexicon: Lexicon,
|
|
241
|
+
max_attempts: int = 10000,
|
|
242
|
+
renderer: TemplateRenderer | None = None,
|
|
243
|
+
) -> None:
|
|
244
|
+
self.lexicon = lexicon
|
|
245
|
+
self.max_attempts = max_attempts
|
|
246
|
+
self.resolver = ConstraintResolver()
|
|
247
|
+
self.renderer = renderer if renderer is not None else DefaultRenderer()
|
|
248
|
+
|
|
249
|
+
def fill(
|
|
250
|
+
self,
|
|
251
|
+
template: Template,
|
|
252
|
+
language_code: LanguageCode | None = None,
|
|
253
|
+
count: int = 1,
|
|
254
|
+
) -> Iterator[FilledTemplate]:
|
|
255
|
+
"""Fill template with lexical items using backtracking search.
|
|
256
|
+
|
|
257
|
+
Yields filled templates one at a time as they are found.
|
|
258
|
+
Stops after yielding `count` templates or exhausting possibilities.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
template : Template
|
|
263
|
+
Template to fill.
|
|
264
|
+
language_code : LanguageCode | None
|
|
265
|
+
Optional language code to filter lexicon items.
|
|
266
|
+
count : int
|
|
267
|
+
Maximum number of filled templates to generate (default: 1).
|
|
268
|
+
|
|
269
|
+
Yields
|
|
270
|
+
------
|
|
271
|
+
FilledTemplate
|
|
272
|
+
Filled template instance satisfying all constraints.
|
|
273
|
+
|
|
274
|
+
Raises
|
|
275
|
+
------
|
|
276
|
+
ConstraintUnsatisfiableError
|
|
277
|
+
If no valid assignment exists after exhaustive search.
|
|
278
|
+
ValueError
|
|
279
|
+
If template has no slots or invalid structure.
|
|
280
|
+
|
|
281
|
+
Examples
|
|
282
|
+
--------
|
|
283
|
+
>>> filler = CSPFiller(lexicon)
|
|
284
|
+
>>> # Get first valid filling
|
|
285
|
+
>>> filled = next(filler.fill(template))
|
|
286
|
+
>>> # Get up to 10 different fillings
|
|
287
|
+
>>> fillings = list(filler.fill(template, count=10))
|
|
288
|
+
"""
|
|
289
|
+
if not template.slots:
|
|
290
|
+
raise ValueError(f"Template '{template.name}' has no slots")
|
|
291
|
+
|
|
292
|
+
# 1. Build candidate pools for each slot
|
|
293
|
+
candidate_pools = self._build_candidate_pools(template, language_code)
|
|
294
|
+
|
|
295
|
+
# 2. Check for empty pools
|
|
296
|
+
empty_slots = [name for name, pool in candidate_pools.items() if not pool]
|
|
297
|
+
if empty_slots:
|
|
298
|
+
raise ConstraintUnsatisfiableError(
|
|
299
|
+
template_name=template.name,
|
|
300
|
+
slot_name=empty_slots[0],
|
|
301
|
+
message=f"No valid candidates for slot(s): {', '.join(empty_slots)}",
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# 3. Determine slot ordering (most constrained first)
|
|
305
|
+
slot_order = self._order_slots(template, candidate_pools)
|
|
306
|
+
|
|
307
|
+
# 4. Run backtracking search
|
|
308
|
+
generated = 0
|
|
309
|
+
attempt_count = [0] # Use list to make it mutable in nested function
|
|
310
|
+
|
|
311
|
+
for filled in self._backtrack_search(
|
|
312
|
+
template, candidate_pools, slot_order, {}, attempt_count
|
|
313
|
+
):
|
|
314
|
+
yield filled
|
|
315
|
+
generated += 1
|
|
316
|
+
if generated >= count:
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
# If we got here, we didn't find enough solutions
|
|
320
|
+
if generated == 0:
|
|
321
|
+
raise ConstraintUnsatisfiableError(
|
|
322
|
+
template_name=template.name,
|
|
323
|
+
attempted_combinations=attempt_count[0],
|
|
324
|
+
message="Exhausted all possibilities without finding valid assignment",
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
def _build_candidate_pools(
|
|
328
|
+
self, template: Template, language_code: LanguageCode | None = None
|
|
329
|
+
) -> dict[str, list[LexicalItem]]:
|
|
330
|
+
"""Build candidate pools for each slot.
|
|
331
|
+
|
|
332
|
+
For each slot, get all lexicon items that satisfy the slot's
|
|
333
|
+
single-slot constraints.
|
|
334
|
+
|
|
335
|
+
Parameters
|
|
336
|
+
----------
|
|
337
|
+
template : Template
|
|
338
|
+
Template with slots and constraints.
|
|
339
|
+
language_code : LanguageCode | None
|
|
340
|
+
Optional language code to filter items.
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
dict[str, list[LexicalItem]]
|
|
345
|
+
Mapping of slot names to candidate items.
|
|
346
|
+
"""
|
|
347
|
+
# Normalize language code if provided
|
|
348
|
+
normalized_lang = validate_iso639_code(language_code) if language_code else None
|
|
349
|
+
|
|
350
|
+
candidate_pools: dict[str, list[LexicalItem]] = {}
|
|
351
|
+
|
|
352
|
+
for slot_name, slot in template.slots.items():
|
|
353
|
+
candidates: list[LexicalItem] = []
|
|
354
|
+
for item in self.lexicon.items.values():
|
|
355
|
+
# Filter by language code if specified
|
|
356
|
+
if normalized_lang:
|
|
357
|
+
# Normalize item language code for comparison
|
|
358
|
+
item_lang = (
|
|
359
|
+
validate_iso639_code(item.language_code)
|
|
360
|
+
if item.language_code
|
|
361
|
+
else None
|
|
362
|
+
)
|
|
363
|
+
if item_lang != normalized_lang:
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
# Check if item satisfies slot constraints
|
|
367
|
+
if self.resolver.evaluate_slot_constraints(item, slot.constraints):
|
|
368
|
+
candidates.append(item)
|
|
369
|
+
candidate_pools[slot_name] = candidates
|
|
370
|
+
|
|
371
|
+
return candidate_pools
|
|
372
|
+
|
|
373
|
+
def _order_slots(
|
|
374
|
+
self, template: Template, candidate_pools: dict[str, list[LexicalItem]]
|
|
375
|
+
) -> list[str]:
|
|
376
|
+
"""Order slots using most-constrained-first heuristic.
|
|
377
|
+
|
|
378
|
+
Slots with fewer candidates are filled first to fail fast
|
|
379
|
+
and prune the search space earlier.
|
|
380
|
+
|
|
381
|
+
Parameters
|
|
382
|
+
----------
|
|
383
|
+
template : Template
|
|
384
|
+
Template with slots.
|
|
385
|
+
candidate_pools : dict[str, list[LexicalItem]]
|
|
386
|
+
Candidate items for each slot.
|
|
387
|
+
|
|
388
|
+
Returns
|
|
389
|
+
-------
|
|
390
|
+
list[str]
|
|
391
|
+
Slot names in optimal filling order.
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
# Sort slots by:
|
|
395
|
+
# 1. Number of candidates (fewer first, most constrained)
|
|
396
|
+
# 2. Number of constraints (more first, more likely to fail)
|
|
397
|
+
# 3. Alphabetical (for determinism)
|
|
398
|
+
def slot_key(slot_name: str) -> tuple[int, int, str]:
|
|
399
|
+
num_candidates = len(candidate_pools[slot_name])
|
|
400
|
+
num_constraints = len(template.slots[slot_name].constraints)
|
|
401
|
+
return (num_candidates, -num_constraints, slot_name)
|
|
402
|
+
|
|
403
|
+
return sorted(template.slots.keys(), key=slot_key)
|
|
404
|
+
|
|
405
|
+
def _backtrack_search(
|
|
406
|
+
self,
|
|
407
|
+
template: Template,
|
|
408
|
+
candidate_pools: dict[str, list[LexicalItem]],
|
|
409
|
+
slot_order: list[str],
|
|
410
|
+
assignment: dict[str, LexicalItem],
|
|
411
|
+
attempt_count: list[int],
|
|
412
|
+
) -> Iterator[FilledTemplate]:
|
|
413
|
+
"""Backtracking search with forward checking.
|
|
414
|
+
|
|
415
|
+
Recursively fill slots one at a time, checking constraints
|
|
416
|
+
at each step to prune invalid branches early.
|
|
417
|
+
|
|
418
|
+
Parameters
|
|
419
|
+
----------
|
|
420
|
+
template : Template
|
|
421
|
+
Template being filled.
|
|
422
|
+
candidate_pools : dict[str, list[LexicalItem]]
|
|
423
|
+
Candidate items for each slot.
|
|
424
|
+
slot_order : list[str]
|
|
425
|
+
Order in which to fill slots.
|
|
426
|
+
assignment : dict[str, LexicalItem]
|
|
427
|
+
Current partial assignment.
|
|
428
|
+
attempt_count : list[int]
|
|
429
|
+
Mutable counter for number of attempts.
|
|
430
|
+
|
|
431
|
+
Yields
|
|
432
|
+
------
|
|
433
|
+
FilledTemplate
|
|
434
|
+
Valid complete assignments.
|
|
435
|
+
"""
|
|
436
|
+
# Check attempt limit
|
|
437
|
+
if attempt_count[0] >= self.max_attempts:
|
|
438
|
+
return
|
|
439
|
+
|
|
440
|
+
# Base case: all slots filled
|
|
441
|
+
if len(assignment) == len(slot_order):
|
|
442
|
+
# Check template level multi slot constraints
|
|
443
|
+
if self.resolver.evaluate_template_constraints(
|
|
444
|
+
assignment, template.constraints
|
|
445
|
+
):
|
|
446
|
+
yield self._create_filled_template(template, assignment)
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
# Recursive case: fill next slot
|
|
450
|
+
slot_name = slot_order[len(assignment)]
|
|
451
|
+
slot = template.slots[slot_name]
|
|
452
|
+
|
|
453
|
+
for candidate in candidate_pools[slot_name]:
|
|
454
|
+
attempt_count[0] += 1
|
|
455
|
+
|
|
456
|
+
# Check single slot constraints
|
|
457
|
+
if not self.resolver.evaluate_slot_constraints(candidate, slot.constraints):
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
# Create extended assignment
|
|
461
|
+
extended_assignment = {**assignment, slot_name: candidate}
|
|
462
|
+
|
|
463
|
+
# Forward checking: check partial multi slot constraints
|
|
464
|
+
if not self._check_partial_constraints(
|
|
465
|
+
template, extended_assignment, slot_order[: len(extended_assignment)]
|
|
466
|
+
):
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
# Recurse with extended assignment
|
|
470
|
+
yield from self._backtrack_search(
|
|
471
|
+
template,
|
|
472
|
+
candidate_pools,
|
|
473
|
+
slot_order,
|
|
474
|
+
extended_assignment,
|
|
475
|
+
attempt_count,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
def _check_partial_constraints(
|
|
479
|
+
self,
|
|
480
|
+
template: Template,
|
|
481
|
+
partial_assignment: dict[str, LexicalItem],
|
|
482
|
+
filled_slots: list[str],
|
|
483
|
+
) -> bool:
|
|
484
|
+
"""Check if partial assignment satisfies applicable constraints.
|
|
485
|
+
|
|
486
|
+
Only check constraints that involve only slots that have been
|
|
487
|
+
filled so far (forward checking optimization). This method parses
|
|
488
|
+
the constraint AST to determine which variables are referenced.
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
template : Template
|
|
493
|
+
Template with constraints.
|
|
494
|
+
partial_assignment : dict[str, LexicalItem]
|
|
495
|
+
Current partial assignment.
|
|
496
|
+
filled_slots : list[str]
|
|
497
|
+
Names of slots that have been filled.
|
|
498
|
+
|
|
499
|
+
Returns
|
|
500
|
+
-------
|
|
501
|
+
bool
|
|
502
|
+
True if all applicable constraints are satisfied.
|
|
503
|
+
"""
|
|
504
|
+
filled_set = set(filled_slots)
|
|
505
|
+
|
|
506
|
+
for constraint in template.constraints:
|
|
507
|
+
# Parse the constraint expression to AST
|
|
508
|
+
if constraint.compiled:
|
|
509
|
+
ast_node = constraint.compiled
|
|
510
|
+
else:
|
|
511
|
+
ast_node = parse(constraint.expression)
|
|
512
|
+
|
|
513
|
+
# Extract all variable names referenced in the expression
|
|
514
|
+
referenced_vars = self._extract_variables(ast_node)
|
|
515
|
+
|
|
516
|
+
# Filter to only slot related variables (exclude context variables)
|
|
517
|
+
referenced_slots = referenced_vars - set(constraint.context.keys())
|
|
518
|
+
|
|
519
|
+
# Check if all referenced slots have been filled
|
|
520
|
+
if not referenced_slots.issubset(filled_set):
|
|
521
|
+
# Some referenced slots haven't been filled yet; skip this constraint
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
# All referenced slots are filled; evaluate the constraint
|
|
525
|
+
if not self.resolver.evaluate_template_constraints(
|
|
526
|
+
partial_assignment, [constraint]
|
|
527
|
+
):
|
|
528
|
+
return False
|
|
529
|
+
|
|
530
|
+
return True
|
|
531
|
+
|
|
532
|
+
def _extract_variables(self, node: ast.ASTNode) -> set[str]:
|
|
533
|
+
"""Extract all variable names from an AST node.
|
|
534
|
+
|
|
535
|
+
Recursively traverses the AST to find all Variable nodes.
|
|
536
|
+
|
|
537
|
+
Parameters
|
|
538
|
+
----------
|
|
539
|
+
node : ast.ASTNode
|
|
540
|
+
AST node to traverse.
|
|
541
|
+
|
|
542
|
+
Returns
|
|
543
|
+
-------
|
|
544
|
+
set[str]
|
|
545
|
+
Set of all variable names referenced in the expression.
|
|
546
|
+
"""
|
|
547
|
+
variables: set[str] = set()
|
|
548
|
+
|
|
549
|
+
if isinstance(node, ast.Variable):
|
|
550
|
+
variables.add(node.name)
|
|
551
|
+
elif isinstance(node, ast.BinaryOp):
|
|
552
|
+
variables.update(self._extract_variables(node.left))
|
|
553
|
+
variables.update(self._extract_variables(node.right))
|
|
554
|
+
elif isinstance(node, ast.UnaryOp):
|
|
555
|
+
variables.update(self._extract_variables(node.operand))
|
|
556
|
+
elif isinstance(node, ast.FunctionCall):
|
|
557
|
+
# Extract from function (Variable or AttributeAccess for methods)
|
|
558
|
+
variables.update(self._extract_variables(node.function))
|
|
559
|
+
# Extract from arguments
|
|
560
|
+
for arg in node.arguments:
|
|
561
|
+
variables.update(self._extract_variables(arg))
|
|
562
|
+
elif isinstance(node, ast.AttributeAccess):
|
|
563
|
+
variables.update(self._extract_variables(node.object))
|
|
564
|
+
elif isinstance(node, ast.Subscript):
|
|
565
|
+
variables.update(self._extract_variables(node.object))
|
|
566
|
+
variables.update(self._extract_variables(node.index))
|
|
567
|
+
elif isinstance(node, ast.ListLiteral):
|
|
568
|
+
for element in node.elements:
|
|
569
|
+
variables.update(self._extract_variables(element))
|
|
570
|
+
# Literal nodes don't contain variables
|
|
571
|
+
|
|
572
|
+
return variables
|
|
573
|
+
|
|
574
|
+
def _create_filled_template(
|
|
575
|
+
self, template: Template, assignment: dict[str, LexicalItem]
|
|
576
|
+
) -> FilledTemplate:
|
|
577
|
+
"""Create FilledTemplate from assignment.
|
|
578
|
+
|
|
579
|
+
Parameters
|
|
580
|
+
----------
|
|
581
|
+
template : Template
|
|
582
|
+
Source template.
|
|
583
|
+
assignment : dict[str, LexicalItem]
|
|
584
|
+
Complete slot assignment.
|
|
585
|
+
|
|
586
|
+
Returns
|
|
587
|
+
-------
|
|
588
|
+
FilledTemplate
|
|
589
|
+
Filled template instance.
|
|
590
|
+
"""
|
|
591
|
+
# Render template string using renderer
|
|
592
|
+
rendered = self.renderer.render(
|
|
593
|
+
template.template_string, assignment, template.slots
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
return FilledTemplate(
|
|
597
|
+
template_id=str(template.id),
|
|
598
|
+
template_name=template.name,
|
|
599
|
+
slot_fillers=assignment.copy(),
|
|
600
|
+
rendered_text=rendered,
|
|
601
|
+
strategy_name="backtracking",
|
|
602
|
+
template_slots={
|
|
603
|
+
name: slot.required for name, slot in template.slots.items()
|
|
604
|
+
},
|
|
605
|
+
)
|