bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""LM score-based annotator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
8
|
+
from bead.simulation.noise_models.temperature import TemperatureNoiseModel
|
|
9
|
+
from bead.simulation.strategies.binary import BinaryStrategy
|
|
10
|
+
from bead.simulation.strategies.categorical import CategoricalStrategy
|
|
11
|
+
from bead.simulation.strategies.cloze import ClozeStrategy
|
|
12
|
+
from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
|
|
13
|
+
from bead.simulation.strategies.free_text import FreeTextStrategy
|
|
14
|
+
from bead.simulation.strategies.magnitude import MagnitudeStrategy
|
|
15
|
+
from bead.simulation.strategies.multi_select import MultiSelectStrategy
|
|
16
|
+
from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from bead.config.simulation import SimulatedAnnotatorConfig
|
|
20
|
+
from bead.items.item import Item
|
|
21
|
+
from bead.items.item_template import ItemTemplate
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LMBasedAnnotator(SimulatedAnnotator):
|
|
25
|
+
"""Annotator using language model scores for decisions.
|
|
26
|
+
|
|
27
|
+
Uses LM log probabilities or scores from Item.model_outputs
|
|
28
|
+
to make informed decisions. Applies noise model for variability.
|
|
29
|
+
|
|
30
|
+
Supports all task types via pluggable strategies.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
config
|
|
35
|
+
Configuration for annotator.
|
|
36
|
+
|
|
37
|
+
Examples
|
|
38
|
+
--------
|
|
39
|
+
>>> from bead.config.simulation import SimulatedAnnotatorConfig, NoiseModelConfig
|
|
40
|
+
>>> config = SimulatedAnnotatorConfig(
|
|
41
|
+
... strategy="lm_score",
|
|
42
|
+
... model_output_key="lm_score",
|
|
43
|
+
... noise_model=NoiseModelConfig(noise_type="temperature", temperature=1.5)
|
|
44
|
+
... )
|
|
45
|
+
>>> annotator = LMBasedAnnotator(config)
|
|
46
|
+
>>> # judgment = annotator.annotate(item, template)
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, config: SimulatedAnnotatorConfig) -> None:
|
|
50
|
+
super().__init__(config)
|
|
51
|
+
|
|
52
|
+
# initialize strategies for different task types
|
|
53
|
+
self.strategies = {
|
|
54
|
+
"forced_choice": ForcedChoiceStrategy(),
|
|
55
|
+
"binary": BinaryStrategy(),
|
|
56
|
+
"ordinal_scale": OrdinalScaleStrategy(),
|
|
57
|
+
"categorical": CategoricalStrategy(),
|
|
58
|
+
"magnitude": MagnitudeStrategy(),
|
|
59
|
+
"multi_select": MultiSelectStrategy(),
|
|
60
|
+
"free_text": FreeTextStrategy(),
|
|
61
|
+
"cloze": ClozeStrategy(),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# initialize noise model
|
|
65
|
+
if config.noise_model.noise_type == "temperature":
|
|
66
|
+
self.noise_model = TemperatureNoiseModel(
|
|
67
|
+
temperature=config.noise_model.temperature
|
|
68
|
+
)
|
|
69
|
+
elif config.noise_model.noise_type == "none":
|
|
70
|
+
self.noise_model = None
|
|
71
|
+
else:
|
|
72
|
+
# default: no noise
|
|
73
|
+
self.noise_model = None
|
|
74
|
+
|
|
75
|
+
def annotate(
|
|
76
|
+
self, item: Item, item_template: ItemTemplate
|
|
77
|
+
) -> str | int | float | list[str]:
|
|
78
|
+
"""Generate annotation using LM scores.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
item : Item
|
|
83
|
+
Item to annotate.
|
|
84
|
+
item_template : ItemTemplate
|
|
85
|
+
Template defining task.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
str | int | float | list[str]
|
|
90
|
+
Annotation (format depends on task type).
|
|
91
|
+
"""
|
|
92
|
+
# get strategy for task type
|
|
93
|
+
strategy = self.get_strategy(item_template.task_type)
|
|
94
|
+
|
|
95
|
+
# validate item
|
|
96
|
+
strategy.validate_item(item, item_template)
|
|
97
|
+
|
|
98
|
+
# generate base response
|
|
99
|
+
response = strategy.simulate_response(
|
|
100
|
+
item=item,
|
|
101
|
+
item_template=item_template,
|
|
102
|
+
model_output_key=self.config.model_output_key,
|
|
103
|
+
rng=self.rng,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# apply noise model if configured
|
|
107
|
+
if self.noise_model is not None:
|
|
108
|
+
response = self.noise_model.apply(
|
|
109
|
+
value=response,
|
|
110
|
+
context={"item": item, "template": item_template, "strategy": strategy},
|
|
111
|
+
rng=self.rng,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return response
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Oracle (perfect performance) annotator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
8
|
+
from bead.simulation.annotators.random import RandomAnnotator
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from bead.config.simulation import SimulatedAnnotatorConfig
|
|
12
|
+
from bead.items.item import Item
|
|
13
|
+
from bead.items.item_template import ItemTemplate
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OracleAnnotator(SimulatedAnnotator):
|
|
17
|
+
"""Perfect performance annotator using ground truth.
|
|
18
|
+
|
|
19
|
+
Returns ground truth labels from item.item_metadata['ground_truth'].
|
|
20
|
+
Falls back to random when ground truth is not available.
|
|
21
|
+
|
|
22
|
+
Useful for establishing upper bound on performance.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
config
|
|
27
|
+
Configuration for annotator.
|
|
28
|
+
|
|
29
|
+
Examples
|
|
30
|
+
--------
|
|
31
|
+
>>> from bead.config.simulation import SimulatedAnnotatorConfig
|
|
32
|
+
>>> config = SimulatedAnnotatorConfig(strategy="oracle", random_state=42)
|
|
33
|
+
>>> annotator = OracleAnnotator(config)
|
|
34
|
+
>>> # judgment = annotator.annotate(item, template)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, config: SimulatedAnnotatorConfig) -> None:
|
|
38
|
+
super().__init__(config)
|
|
39
|
+
|
|
40
|
+
# create random annotator for fallback
|
|
41
|
+
self.random_annotator = RandomAnnotator(config)
|
|
42
|
+
|
|
43
|
+
def annotate(
|
|
44
|
+
self, item: Item, item_template: ItemTemplate
|
|
45
|
+
) -> str | int | float | bool | list[str]:
|
|
46
|
+
"""Generate oracle annotation using ground truth.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
item : Item
|
|
51
|
+
Item to annotate.
|
|
52
|
+
item_template : ItemTemplate
|
|
53
|
+
Template defining task.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
str | int | float | bool | list[str]
|
|
58
|
+
Ground truth annotation or random fallback.
|
|
59
|
+
"""
|
|
60
|
+
# try to get ground truth from item metadata
|
|
61
|
+
if hasattr(item, "item_metadata") and item.item_metadata:
|
|
62
|
+
ground_truth = item.item_metadata.get("ground_truth")
|
|
63
|
+
|
|
64
|
+
if ground_truth is not None:
|
|
65
|
+
# validate and return ground truth
|
|
66
|
+
return self._validate_ground_truth(ground_truth, item_template)
|
|
67
|
+
|
|
68
|
+
# fallback to random if no ground truth
|
|
69
|
+
return self.random_annotator.annotate(item, item_template)
|
|
70
|
+
|
|
71
|
+
def _validate_ground_truth(
|
|
72
|
+
self, ground_truth: str | int | float | bool | list[str], template: ItemTemplate
|
|
73
|
+
) -> str | int | float | bool | list[str]:
|
|
74
|
+
"""Validate ground truth against task spec.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
ground_truth
|
|
79
|
+
Ground truth value.
|
|
80
|
+
template : ItemTemplate
|
|
81
|
+
Template defining task constraints.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
str | int | float | bool | list[str]
|
|
86
|
+
Validated ground truth.
|
|
87
|
+
|
|
88
|
+
Raises
|
|
89
|
+
------
|
|
90
|
+
ValueError
|
|
91
|
+
If ground truth is invalid for task type.
|
|
92
|
+
"""
|
|
93
|
+
task_type = template.task_type
|
|
94
|
+
|
|
95
|
+
if task_type == "forced_choice":
|
|
96
|
+
if not isinstance(ground_truth, str):
|
|
97
|
+
msg = (
|
|
98
|
+
f"forced_choice ground truth must be str, got {type(ground_truth)}"
|
|
99
|
+
)
|
|
100
|
+
raise ValueError(msg)
|
|
101
|
+
options = template.task_spec.options or []
|
|
102
|
+
if ground_truth not in options:
|
|
103
|
+
msg = f"Ground truth '{ground_truth}' not in options {options}"
|
|
104
|
+
raise ValueError(msg)
|
|
105
|
+
return ground_truth
|
|
106
|
+
|
|
107
|
+
elif task_type == "binary":
|
|
108
|
+
if not isinstance(ground_truth, bool):
|
|
109
|
+
msg = f"binary ground truth must be bool, got {type(ground_truth)}"
|
|
110
|
+
raise ValueError(msg)
|
|
111
|
+
return ground_truth
|
|
112
|
+
|
|
113
|
+
elif task_type == "ordinal_scale":
|
|
114
|
+
if not isinstance(ground_truth, int):
|
|
115
|
+
msg = (
|
|
116
|
+
f"ordinal_scale ground truth must be int, got {type(ground_truth)}"
|
|
117
|
+
)
|
|
118
|
+
raise ValueError(msg)
|
|
119
|
+
scale_bounds = template.task_spec.scale_bounds
|
|
120
|
+
if scale_bounds is not None:
|
|
121
|
+
min_val, max_val = scale_bounds
|
|
122
|
+
else:
|
|
123
|
+
min_val, max_val = 1, 7
|
|
124
|
+
if not (min_val <= ground_truth <= max_val):
|
|
125
|
+
msg = f"Ground truth {ground_truth} not in range [{min_val}, {max_val}]"
|
|
126
|
+
raise ValueError(msg)
|
|
127
|
+
return ground_truth
|
|
128
|
+
|
|
129
|
+
elif task_type == "categorical":
|
|
130
|
+
if not isinstance(ground_truth, str):
|
|
131
|
+
msg = f"categorical ground truth must be str, got {type(ground_truth)}"
|
|
132
|
+
raise ValueError(msg)
|
|
133
|
+
options = template.task_spec.options or []
|
|
134
|
+
if ground_truth not in options:
|
|
135
|
+
msg = f"Ground truth '{ground_truth}' not in options {options}"
|
|
136
|
+
raise ValueError(msg)
|
|
137
|
+
return ground_truth
|
|
138
|
+
|
|
139
|
+
elif task_type == "magnitude":
|
|
140
|
+
if not isinstance(ground_truth, int | float):
|
|
141
|
+
msg = (
|
|
142
|
+
f"magnitude ground truth must be numeric, got {type(ground_truth)}"
|
|
143
|
+
)
|
|
144
|
+
raise ValueError(msg)
|
|
145
|
+
return float(ground_truth)
|
|
146
|
+
|
|
147
|
+
elif task_type == "multi_select":
|
|
148
|
+
if not isinstance(ground_truth, list):
|
|
149
|
+
msg = (
|
|
150
|
+
f"multi_select ground truth must be list, got {type(ground_truth)}"
|
|
151
|
+
)
|
|
152
|
+
raise ValueError(msg)
|
|
153
|
+
options = template.task_spec.options or []
|
|
154
|
+
for item_val in ground_truth:
|
|
155
|
+
if item_val not in options:
|
|
156
|
+
msg = f"Ground truth item '{item_val}' not in options {options}"
|
|
157
|
+
raise ValueError(msg)
|
|
158
|
+
return ground_truth
|
|
159
|
+
|
|
160
|
+
elif task_type == "free_text":
|
|
161
|
+
if not isinstance(ground_truth, str):
|
|
162
|
+
msg = f"free_text ground truth must be str, got {type(ground_truth)}"
|
|
163
|
+
raise ValueError(msg)
|
|
164
|
+
return ground_truth
|
|
165
|
+
|
|
166
|
+
elif task_type == "cloze":
|
|
167
|
+
if not isinstance(ground_truth, dict):
|
|
168
|
+
msg = f"cloze ground truth must be dict, got {type(ground_truth)}"
|
|
169
|
+
raise ValueError(msg)
|
|
170
|
+
# validate all required slots are present
|
|
171
|
+
for slot in template.unfilled_slots:
|
|
172
|
+
if slot.slot_name not in ground_truth:
|
|
173
|
+
msg = (
|
|
174
|
+
f"Ground truth missing slot '{slot.slot_name}' "
|
|
175
|
+
f"(expected slots: {[s.slot_name for s in template.unfilled_slots]})" # noqa: E501
|
|
176
|
+
)
|
|
177
|
+
raise ValueError(msg)
|
|
178
|
+
# return dict of slot_name -> value
|
|
179
|
+
return {k: str(v) for k, v in ground_truth.items()}
|
|
180
|
+
|
|
181
|
+
else:
|
|
182
|
+
raise ValueError(f"Unsupported task type: {task_type}")
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Random baseline annotator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from bead.config.simulation import SimulatedAnnotatorConfig
|
|
11
|
+
from bead.items.item import Item
|
|
12
|
+
from bead.items.item_template import ItemTemplate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RandomAnnotator(SimulatedAnnotator):
|
|
16
|
+
"""Pure random baseline annotator.
|
|
17
|
+
|
|
18
|
+
Generates random responses that respect task spec constraints
|
|
19
|
+
(options, scale ranges, etc.) but are otherwise uninformed.
|
|
20
|
+
|
|
21
|
+
Useful for establishing baseline performance.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
config
|
|
26
|
+
Configuration for annotator.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> from bead.config.simulation import SimulatedAnnotatorConfig
|
|
31
|
+
>>> config = SimulatedAnnotatorConfig(strategy="random", random_state=42)
|
|
32
|
+
>>> annotator = RandomAnnotator(config)
|
|
33
|
+
>>> # judgment = annotator.annotate(item, template)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: SimulatedAnnotatorConfig) -> None:
|
|
37
|
+
super().__init__(config)
|
|
38
|
+
|
|
39
|
+
# no strategies or noise models needed for random
|
|
40
|
+
|
|
41
|
+
def annotate(
|
|
42
|
+
self, item: Item, item_template: ItemTemplate
|
|
43
|
+
) -> str | int | float | bool | list[str]:
|
|
44
|
+
"""Generate random annotation.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
item : Item
|
|
49
|
+
Item to annotate (ignored).
|
|
50
|
+
item_template : ItemTemplate
|
|
51
|
+
Template defining task constraints.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str | int | float | bool | list[str]
|
|
56
|
+
Random annotation (format depends on task type).
|
|
57
|
+
|
|
58
|
+
Raises
|
|
59
|
+
------
|
|
60
|
+
ValueError
|
|
61
|
+
If task type is not supported.
|
|
62
|
+
"""
|
|
63
|
+
task_type = item_template.task_type
|
|
64
|
+
|
|
65
|
+
if task_type == "forced_choice":
|
|
66
|
+
return self._random_forced_choice(item_template)
|
|
67
|
+
elif task_type == "binary":
|
|
68
|
+
return self._random_binary()
|
|
69
|
+
elif task_type == "ordinal_scale":
|
|
70
|
+
return self._random_ordinal(item_template)
|
|
71
|
+
elif task_type == "categorical":
|
|
72
|
+
return self._random_categorical(item_template)
|
|
73
|
+
elif task_type == "magnitude":
|
|
74
|
+
return self._random_magnitude()
|
|
75
|
+
elif task_type == "multi_select":
|
|
76
|
+
return self._random_multi_select(item_template)
|
|
77
|
+
elif task_type == "free_text":
|
|
78
|
+
return self._random_free_text()
|
|
79
|
+
elif task_type == "cloze":
|
|
80
|
+
return self._random_cloze(item)
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(f"Unsupported task type: {task_type}")
|
|
83
|
+
|
|
84
|
+
def _random_forced_choice(self, template: ItemTemplate) -> str:
|
|
85
|
+
"""Generate random forced choice response."""
|
|
86
|
+
options = template.task_spec.options or []
|
|
87
|
+
if not options:
|
|
88
|
+
raise ValueError("forced_choice requires options")
|
|
89
|
+
return str(self.rng.choice(options))
|
|
90
|
+
|
|
91
|
+
def _random_binary(self) -> bool:
|
|
92
|
+
"""Generate random binary response."""
|
|
93
|
+
return bool(self.rng.choice([True, False]))
|
|
94
|
+
|
|
95
|
+
def _random_ordinal(self, template: ItemTemplate) -> int:
|
|
96
|
+
"""Generate random ordinal scale response."""
|
|
97
|
+
# get scale bounds from task_spec
|
|
98
|
+
scale_bounds = template.task_spec.scale_bounds
|
|
99
|
+
if scale_bounds is not None:
|
|
100
|
+
min_val, max_val = scale_bounds
|
|
101
|
+
else:
|
|
102
|
+
min_val, max_val = 1, 7
|
|
103
|
+
return int(self.rng.randint(min_val, max_val + 1))
|
|
104
|
+
|
|
105
|
+
def _random_categorical(self, template: ItemTemplate) -> str:
|
|
106
|
+
"""Generate random categorical response."""
|
|
107
|
+
options = template.task_spec.options or []
|
|
108
|
+
if not options:
|
|
109
|
+
raise ValueError("categorical requires options")
|
|
110
|
+
return str(self.rng.choice(options))
|
|
111
|
+
|
|
112
|
+
def _random_magnitude(self) -> float:
|
|
113
|
+
"""Generate random magnitude response."""
|
|
114
|
+
# log-normal distribution for positive magnitudes
|
|
115
|
+
return float(self.rng.lognormal(mean=0, sigma=1))
|
|
116
|
+
|
|
117
|
+
def _random_multi_select(self, template: ItemTemplate) -> list[str]:
|
|
118
|
+
"""Generate random multi-select response."""
|
|
119
|
+
options = template.task_spec.options or []
|
|
120
|
+
if not options:
|
|
121
|
+
raise ValueError("multi_select requires options")
|
|
122
|
+
|
|
123
|
+
# randomly select subset of options
|
|
124
|
+
selected = []
|
|
125
|
+
for option in options:
|
|
126
|
+
if self.rng.random() < 0.5:
|
|
127
|
+
selected.append(option)
|
|
128
|
+
return selected
|
|
129
|
+
|
|
130
|
+
def _random_free_text(self) -> str:
|
|
131
|
+
"""Generate random free text response."""
|
|
132
|
+
# simple random responses
|
|
133
|
+
responses = [
|
|
134
|
+
"No response",
|
|
135
|
+
"Unclear",
|
|
136
|
+
"Cannot determine",
|
|
137
|
+
"Not applicable",
|
|
138
|
+
"Unknown",
|
|
139
|
+
]
|
|
140
|
+
return str(self.rng.choice(responses))
|
|
141
|
+
|
|
142
|
+
def _random_cloze(self, item: Item) -> dict[str, str]:
|
|
143
|
+
"""Generate random cloze response."""
|
|
144
|
+
response = {}
|
|
145
|
+
|
|
146
|
+
# common word bank for random selection
|
|
147
|
+
word_bank = [
|
|
148
|
+
"the",
|
|
149
|
+
"a",
|
|
150
|
+
"is",
|
|
151
|
+
"was",
|
|
152
|
+
"has",
|
|
153
|
+
"can",
|
|
154
|
+
"will",
|
|
155
|
+
"thing",
|
|
156
|
+
"person",
|
|
157
|
+
"place",
|
|
158
|
+
"time",
|
|
159
|
+
"way",
|
|
160
|
+
"good",
|
|
161
|
+
"new",
|
|
162
|
+
"old",
|
|
163
|
+
"big",
|
|
164
|
+
"small",
|
|
165
|
+
"very",
|
|
166
|
+
"well",
|
|
167
|
+
"just",
|
|
168
|
+
"now",
|
|
169
|
+
"here",
|
|
170
|
+
"in",
|
|
171
|
+
"on",
|
|
172
|
+
"at",
|
|
173
|
+
"to",
|
|
174
|
+
"for",
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
for slot in item.unfilled_slots:
|
|
178
|
+
# randomly select a word from the bank
|
|
179
|
+
response[slot.slot_name] = str(self.rng.choice(word_bank))
|
|
180
|
+
|
|
181
|
+
return response
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Noise models for simulating human variability."""
|
|
2
|
+
|
|
3
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
4
|
+
from bead.simulation.noise_models.random_noise import RandomNoiseModel
|
|
5
|
+
from bead.simulation.noise_models.systematic import SystematicNoiseModel
|
|
6
|
+
from bead.simulation.noise_models.temperature import TemperatureNoiseModel
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"NoiseModel",
|
|
10
|
+
"RandomNoiseModel",
|
|
11
|
+
"SystematicNoiseModel",
|
|
12
|
+
"TemperatureNoiseModel",
|
|
13
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Base class for noise models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NoiseModel(ABC):
|
|
11
|
+
"""Abstract base for noise models.
|
|
12
|
+
|
|
13
|
+
Noise models add human-like variability to simulated responses.
|
|
14
|
+
They can:
|
|
15
|
+
- Scale probabilities by temperature
|
|
16
|
+
- Add systematic biases (length, frequency, position)
|
|
17
|
+
- Inject random noise
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def apply(
|
|
22
|
+
self,
|
|
23
|
+
value: str | int | float | list[str],
|
|
24
|
+
context: dict[str, str | int | float | bool | list[str]],
|
|
25
|
+
rng: np.random.RandomState,
|
|
26
|
+
) -> str | int | float | list[str]:
|
|
27
|
+
"""Apply noise to value.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
value : str | int | float | list[str]
|
|
32
|
+
Original value (probability, score, choice, etc.).
|
|
33
|
+
context : dict[str, str | int | float | bool | list[str]]
|
|
34
|
+
Additional context (item, template, strategy, etc.).
|
|
35
|
+
rng : np.random.RandomState
|
|
36
|
+
Random number generator.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
str | int | float | list[str]
|
|
41
|
+
Value with noise applied.
|
|
42
|
+
"""
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Random noise injection model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RandomNoiseModel(NoiseModel):
|
|
11
|
+
"""Random noise injection model.
|
|
12
|
+
|
|
13
|
+
Adds random noise to responses:
|
|
14
|
+
- Gaussian noise for numeric values
|
|
15
|
+
- Uniform noise for numeric values
|
|
16
|
+
- Random flipping for choice tasks
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
noise_type
|
|
21
|
+
Type of noise ("gaussian" or "uniform"). Default: "gaussian".
|
|
22
|
+
strength
|
|
23
|
+
Noise strength (stddev for gaussian, range for uniform). Default: 1.0.
|
|
24
|
+
|
|
25
|
+
Examples
|
|
26
|
+
--------
|
|
27
|
+
>>> noise_model = RandomNoiseModel(noise_type="gaussian", strength=0.5)
|
|
28
|
+
>>> # Adds gaussian noise with stddev=0.5 to numeric responses
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, noise_type: str = "gaussian", strength: float = 1.0) -> None:
|
|
32
|
+
self.noise_type = noise_type
|
|
33
|
+
self.strength = strength
|
|
34
|
+
|
|
35
|
+
def apply(
|
|
36
|
+
self,
|
|
37
|
+
value: str | int | float | bool | list[str],
|
|
38
|
+
context: dict[str, str | int | float | bool | list[str]],
|
|
39
|
+
rng: np.random.RandomState,
|
|
40
|
+
) -> str | int | float | bool | list[str]:
|
|
41
|
+
"""Apply random noise.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
value
|
|
46
|
+
Original value.
|
|
47
|
+
context : dict
|
|
48
|
+
Context with item, template, strategy.
|
|
49
|
+
rng : np.random.RandomState
|
|
50
|
+
Random number generator.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
str | int | float | bool | list[str]
|
|
55
|
+
Value with noise applied.
|
|
56
|
+
"""
|
|
57
|
+
if self.strength == 0.0:
|
|
58
|
+
return value
|
|
59
|
+
|
|
60
|
+
# apply noise based on value type
|
|
61
|
+
if isinstance(value, int | float) and not isinstance(value, bool):
|
|
62
|
+
return self._add_numeric_noise(value, rng)
|
|
63
|
+
else:
|
|
64
|
+
# for non-numeric, return as-is
|
|
65
|
+
return value
|
|
66
|
+
|
|
67
|
+
def _add_numeric_noise(
|
|
68
|
+
self, value: int | float, rng: np.random.RandomState
|
|
69
|
+
) -> int | float:
|
|
70
|
+
"""Add noise to numeric value."""
|
|
71
|
+
if self.noise_type == "gaussian":
|
|
72
|
+
noisy_value = value + rng.normal(0, self.strength)
|
|
73
|
+
elif self.noise_type == "uniform":
|
|
74
|
+
noisy_value = value + rng.uniform(-self.strength, self.strength)
|
|
75
|
+
else:
|
|
76
|
+
noisy_value = value
|
|
77
|
+
|
|
78
|
+
# preserve type
|
|
79
|
+
if isinstance(value, int):
|
|
80
|
+
return int(round(noisy_value))
|
|
81
|
+
else:
|
|
82
|
+
return float(noisy_value)
|