bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Systematic bias noise model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SystematicNoiseModel(NoiseModel):
|
|
11
|
+
"""Systematic bias noise model.
|
|
12
|
+
|
|
13
|
+
Adds consistent biases to responses:
|
|
14
|
+
- length: Prefer shorter/longer options
|
|
15
|
+
- frequency: Prefer common/rare words
|
|
16
|
+
- position: Prefer first/last option
|
|
17
|
+
- endpoint: Prefer endpoints on ordinal scales
|
|
18
|
+
- midpoint: Prefer midpoint on ordinal scales
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
bias_type
|
|
23
|
+
Type of bias ("length", "frequency", "position", "endpoint", "midpoint").
|
|
24
|
+
Default: "position".
|
|
25
|
+
bias_strength
|
|
26
|
+
Strength of bias (0.0-1.0). Default: 0.0.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> noise_model = SystematicNoiseModel(bias_type="position", bias_strength=0.3)
|
|
31
|
+
>>> # Adds 30% bias toward first option in forced choice
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, bias_type: str = "position", bias_strength: float = 0.0) -> None:
|
|
35
|
+
self.bias_type = bias_type
|
|
36
|
+
self.bias_strength = bias_strength
|
|
37
|
+
|
|
38
|
+
def apply(
|
|
39
|
+
self,
|
|
40
|
+
value: str | int | float | bool | list[str],
|
|
41
|
+
context: dict[str, str | int | float | bool | list[str]],
|
|
42
|
+
rng: np.random.RandomState,
|
|
43
|
+
) -> str | int | float | bool | list[str]:
|
|
44
|
+
"""Apply systematic bias.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
value
|
|
49
|
+
Original value.
|
|
50
|
+
context : dict
|
|
51
|
+
Context with item, template, strategy.
|
|
52
|
+
rng : np.random.RandomState
|
|
53
|
+
Random number generator.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
str | int | float | bool | list[str]
|
|
58
|
+
Value with bias applied.
|
|
59
|
+
"""
|
|
60
|
+
if self.bias_strength == 0.0:
|
|
61
|
+
return value
|
|
62
|
+
|
|
63
|
+
strategy = context.get("strategy")
|
|
64
|
+
template = context.get("template")
|
|
65
|
+
|
|
66
|
+
if not strategy or not template:
|
|
67
|
+
return value
|
|
68
|
+
|
|
69
|
+
task_type = strategy.supported_task_type
|
|
70
|
+
|
|
71
|
+
# position bias for choice tasks
|
|
72
|
+
is_choice_task = task_type in ["forced_choice", "categorical"]
|
|
73
|
+
if is_choice_task and self.bias_type == "position":
|
|
74
|
+
return self._apply_position_bias(value, template, rng)
|
|
75
|
+
|
|
76
|
+
# endpoint/midpoint bias for ordinal scales
|
|
77
|
+
elif task_type == "ordinal_scale":
|
|
78
|
+
if self.bias_type == "endpoint":
|
|
79
|
+
return self._apply_endpoint_bias(value, template, rng)
|
|
80
|
+
elif self.bias_type == "midpoint":
|
|
81
|
+
return self._apply_midpoint_bias(value, template, rng)
|
|
82
|
+
|
|
83
|
+
# no bias for other combinations
|
|
84
|
+
return value
|
|
85
|
+
|
|
86
|
+
def _apply_position_bias(
|
|
87
|
+
self, value: str, template: str, rng: np.random.RandomState
|
|
88
|
+
) -> str:
|
|
89
|
+
"""Apply position bias to choice tasks."""
|
|
90
|
+
options = template.task_spec.options
|
|
91
|
+
if not options or len(options) < 2:
|
|
92
|
+
return value
|
|
93
|
+
|
|
94
|
+
# bias toward first option
|
|
95
|
+
if rng.random() < self.bias_strength:
|
|
96
|
+
return options[0]
|
|
97
|
+
|
|
98
|
+
return value
|
|
99
|
+
|
|
100
|
+
def _apply_endpoint_bias(
|
|
101
|
+
self, value: int, template: str, rng: np.random.RandomState
|
|
102
|
+
) -> int:
|
|
103
|
+
"""Apply endpoint bias to ordinal scales."""
|
|
104
|
+
scale_bounds = template.task_spec.scale_bounds
|
|
105
|
+
if scale_bounds is not None:
|
|
106
|
+
min_val, max_val = scale_bounds
|
|
107
|
+
else:
|
|
108
|
+
min_val, max_val = 1, 7
|
|
109
|
+
|
|
110
|
+
# bias toward endpoints (min or max)
|
|
111
|
+
if rng.random() < self.bias_strength:
|
|
112
|
+
return min_val if rng.random() < 0.5 else max_val
|
|
113
|
+
|
|
114
|
+
return value
|
|
115
|
+
|
|
116
|
+
def _apply_midpoint_bias(
|
|
117
|
+
self, value: int, template: str, rng: np.random.RandomState
|
|
118
|
+
) -> int:
|
|
119
|
+
"""Apply midpoint bias to ordinal scales."""
|
|
120
|
+
scale_bounds = template.task_spec.scale_bounds
|
|
121
|
+
if scale_bounds is not None:
|
|
122
|
+
min_val, max_val = scale_bounds
|
|
123
|
+
else:
|
|
124
|
+
min_val, max_val = 1, 7
|
|
125
|
+
|
|
126
|
+
midpoint = (min_val + max_val) // 2
|
|
127
|
+
|
|
128
|
+
# bias toward midpoint
|
|
129
|
+
if rng.random() < self.bias_strength:
|
|
130
|
+
return midpoint
|
|
131
|
+
|
|
132
|
+
return value
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Temperature-based noise model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TemperatureNoiseModel(NoiseModel):
|
|
11
|
+
"""Temperature scaling for probability distributions.
|
|
12
|
+
|
|
13
|
+
Scales logits or probabilities by temperature before sampling:
|
|
14
|
+
- temperature < 1.0: More deterministic (sharper distribution)
|
|
15
|
+
- temperature = 1.0: No change
|
|
16
|
+
- temperature > 1.0: More random (flatter distribution)
|
|
17
|
+
|
|
18
|
+
For forced choice, modifies the softmax:
|
|
19
|
+
P_i = exp(score_i / T) / sum(exp(score_j / T))
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
temperature
|
|
24
|
+
Temperature scaling factor (> 0). Default: 1.0.
|
|
25
|
+
|
|
26
|
+
Raises
|
|
27
|
+
------
|
|
28
|
+
ValueError
|
|
29
|
+
If temperature <= 0.
|
|
30
|
+
|
|
31
|
+
Examples
|
|
32
|
+
--------
|
|
33
|
+
>>> noise_model = TemperatureNoiseModel(temperature=2.0)
|
|
34
|
+
>>> # More random decisions
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, temperature: float = 1.0) -> None:
|
|
38
|
+
if temperature <= 0:
|
|
39
|
+
msg = "Temperature must be positive"
|
|
40
|
+
raise ValueError(msg)
|
|
41
|
+
self.temperature = temperature
|
|
42
|
+
|
|
43
|
+
def apply(
|
|
44
|
+
self,
|
|
45
|
+
value: str | int | float | list[str],
|
|
46
|
+
context: dict[str, str | int | float | bool | list[str]],
|
|
47
|
+
rng: np.random.RandomState,
|
|
48
|
+
) -> str | int | float | list[str]:
|
|
49
|
+
"""Apply temperature scaling.
|
|
50
|
+
|
|
51
|
+
For forced_choice, re-samples with scaled probabilities.
|
|
52
|
+
For ordinal_scale, adds scaled noise to value.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
value : str | int | float | list[str]
|
|
57
|
+
Original value (choice, rating, etc.).
|
|
58
|
+
context : dict[str, str | int | float | bool | list[str]]
|
|
59
|
+
Context with item, template, strategy.
|
|
60
|
+
rng : np.random.RandomState
|
|
61
|
+
Random number generator.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
str | int | float | list[str]
|
|
66
|
+
Value with temperature applied.
|
|
67
|
+
"""
|
|
68
|
+
strategy = context.get("strategy")
|
|
69
|
+
|
|
70
|
+
if strategy and hasattr(strategy, "supported_task_type"):
|
|
71
|
+
task_type = strategy.supported_task_type
|
|
72
|
+
|
|
73
|
+
if task_type == "forced_choice":
|
|
74
|
+
# for forced choice, temperature is already handled in strategy
|
|
75
|
+
# by applying it to the softmax computation; return value as-is
|
|
76
|
+
return value
|
|
77
|
+
|
|
78
|
+
elif task_type == "ordinal_scale":
|
|
79
|
+
# for ordinal, add temperature-scaled gaussian noise
|
|
80
|
+
if isinstance(value, int | float):
|
|
81
|
+
noise = rng.normal(0, self.temperature * 0.5)
|
|
82
|
+
return value + noise
|
|
83
|
+
return value
|
|
84
|
+
|
|
85
|
+
# default: no modification
|
|
86
|
+
return value
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Simulation runner for orchestrating multi-annotator simulations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from bead.config.simulation import SimulationRunnerConfig
|
|
11
|
+
from bead.items.item import Item
|
|
12
|
+
from bead.items.item_template import ItemTemplate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimulationRunner:
|
|
16
|
+
"""Orchestrates multi-annotator simulation.
|
|
17
|
+
|
|
18
|
+
Can simulate:
|
|
19
|
+
- Multiple independent annotators
|
|
20
|
+
- Correlated annotators (shared noise component)
|
|
21
|
+
- Mixed strategies (some LM-based, some random)
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
config
|
|
26
|
+
Configuration for simulation.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> from bead.config.simulation import ( # doctest: +SKIP
|
|
31
|
+
... SimulationRunnerConfig,
|
|
32
|
+
... SimulatedAnnotatorConfig,
|
|
33
|
+
... )
|
|
34
|
+
>>> config = SimulationRunnerConfig( # doctest: +SKIP
|
|
35
|
+
... annotator_configs=[
|
|
36
|
+
... SimulatedAnnotatorConfig(strategy="lm_score", random_state=1),
|
|
37
|
+
... SimulatedAnnotatorConfig(strategy="lm_score", random_state=2),
|
|
38
|
+
... ],
|
|
39
|
+
... n_annotators=2
|
|
40
|
+
... )
|
|
41
|
+
>>> runner = SimulationRunner(config)
|
|
42
|
+
>>> # results = runner.run(items, templates)
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, config: SimulationRunnerConfig) -> None:
|
|
46
|
+
self.config = config
|
|
47
|
+
|
|
48
|
+
# create annotators from configs
|
|
49
|
+
from bead.simulation.annotators.base import ( # noqa: PLC0415
|
|
50
|
+
SimulatedAnnotator,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.annotators = [
|
|
54
|
+
SimulatedAnnotator.from_config(cfg) for cfg in config.annotator_configs
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# if n_annotators > len(annotator_configs), replicate first config
|
|
58
|
+
if config.n_annotators > len(self.annotators):
|
|
59
|
+
base_config = config.annotator_configs[0]
|
|
60
|
+
for i in range(len(self.annotators), config.n_annotators):
|
|
61
|
+
# create new config with different seed
|
|
62
|
+
new_config = base_config.model_copy()
|
|
63
|
+
new_config.random_state = (base_config.random_state or 0) + i
|
|
64
|
+
self.annotators.append(SimulatedAnnotator.from_config(new_config))
|
|
65
|
+
|
|
66
|
+
def run(
|
|
67
|
+
self,
|
|
68
|
+
items: list[Item],
|
|
69
|
+
templates: list[ItemTemplate] | ItemTemplate,
|
|
70
|
+
) -> dict[str, list[str | int | float | list[str]]]:
|
|
71
|
+
"""Run simulation with all annotators.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
items : list[Item]
|
|
76
|
+
Items to annotate.
|
|
77
|
+
templates : list[ItemTemplate] | ItemTemplate
|
|
78
|
+
Templates (one per item or shared).
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
dict[str, list[str | int | float | list[str]]]
|
|
83
|
+
Results: {
|
|
84
|
+
"item_ids": [...],
|
|
85
|
+
"annotator_0": [...],
|
|
86
|
+
"annotator_1": [...],
|
|
87
|
+
...
|
|
88
|
+
}
|
|
89
|
+
"""
|
|
90
|
+
# collect annotations from each annotator
|
|
91
|
+
results: dict[str, list[str | int | float | list[str]]] = {
|
|
92
|
+
"item_ids": [str(item.id) for item in items]
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for i, annotator in enumerate(self.annotators):
|
|
96
|
+
annotations = annotator.annotate_batch(items, templates)
|
|
97
|
+
results[f"annotator_{i}"] = [annotations[str(item.id)] for item in items]
|
|
98
|
+
|
|
99
|
+
# save if configured
|
|
100
|
+
if self.config.save_path:
|
|
101
|
+
self.save_results(results)
|
|
102
|
+
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
def save_results(
|
|
106
|
+
self, results: dict[str, list[str | int | float | list[str]]]
|
|
107
|
+
) -> None:
|
|
108
|
+
"""Save results to file.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
results : dict[str, list[str | int | float | list[str]]]
|
|
113
|
+
Simulation results.
|
|
114
|
+
"""
|
|
115
|
+
if self.config.save_path is None:
|
|
116
|
+
msg = "save_path not configured"
|
|
117
|
+
raise ValueError(msg)
|
|
118
|
+
|
|
119
|
+
path = Path(self.config.save_path)
|
|
120
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
if self.config.output_format == "jsonl":
|
|
123
|
+
# write JSONL format
|
|
124
|
+
with open(path, "w") as f:
|
|
125
|
+
for i in range(len(results["item_ids"])):
|
|
126
|
+
row = {
|
|
127
|
+
"item_id": results["item_ids"][i],
|
|
128
|
+
**{
|
|
129
|
+
key: results[key][i] for key in results if key != "item_ids"
|
|
130
|
+
},
|
|
131
|
+
}
|
|
132
|
+
f.write(json.dumps(row) + "\n")
|
|
133
|
+
|
|
134
|
+
elif self.config.output_format == "dict":
|
|
135
|
+
# write JSON format
|
|
136
|
+
with open(path, "w") as f:
|
|
137
|
+
json.dump(results, f, indent=2)
|
|
138
|
+
|
|
139
|
+
elif self.config.output_format == "dataframe":
|
|
140
|
+
# write CSV format (optional dependency)
|
|
141
|
+
import pandas as pd # noqa: PLC0415
|
|
142
|
+
|
|
143
|
+
df = pd.DataFrame(results)
|
|
144
|
+
df.to_csv(path, index=False)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Task-specific simulation strategies."""
|
|
2
|
+
|
|
3
|
+
from bead.simulation.strategies.base import SimulationStrategy
|
|
4
|
+
from bead.simulation.strategies.binary import BinaryStrategy
|
|
5
|
+
from bead.simulation.strategies.categorical import CategoricalStrategy
|
|
6
|
+
from bead.simulation.strategies.cloze import ClozeStrategy
|
|
7
|
+
from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
|
|
8
|
+
from bead.simulation.strategies.free_text import FreeTextStrategy
|
|
9
|
+
from bead.simulation.strategies.magnitude import MagnitudeStrategy
|
|
10
|
+
from bead.simulation.strategies.multi_select import MultiSelectStrategy
|
|
11
|
+
from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"SimulationStrategy",
|
|
15
|
+
"BinaryStrategy",
|
|
16
|
+
"CategoricalStrategy",
|
|
17
|
+
"ClozeStrategy",
|
|
18
|
+
"ForcedChoiceStrategy",
|
|
19
|
+
"FreeTextStrategy",
|
|
20
|
+
"MagnitudeStrategy",
|
|
21
|
+
"MultiSelectStrategy",
|
|
22
|
+
"OrdinalScaleStrategy",
|
|
23
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Base class for simulation strategies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from bead.items.item import Item
|
|
12
|
+
from bead.items.item_template import ItemTemplate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimulationStrategy(ABC):
|
|
16
|
+
"""Abstract base for task-specific simulation strategies.
|
|
17
|
+
|
|
18
|
+
Each strategy handles one task type (forced_choice, ordinal_scale, etc.)
|
|
19
|
+
and converts model outputs into appropriate responses.
|
|
20
|
+
|
|
21
|
+
Strategies should:
|
|
22
|
+
1. Validate item compatibility with task type
|
|
23
|
+
2. Extract relevant model outputs
|
|
24
|
+
3. Generate response in correct format for task
|
|
25
|
+
4. Handle missing model outputs gracefully
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def supported_task_type(self) -> str:
|
|
31
|
+
"""Return supported task type (e.g., 'forced_choice').
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
str
|
|
36
|
+
Task type identifier.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def validate_item(self, item: Item, item_template: ItemTemplate) -> None:
|
|
41
|
+
"""Validate item is compatible with this strategy.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
item : Item
|
|
46
|
+
Item to validate.
|
|
47
|
+
item_template : ItemTemplate
|
|
48
|
+
Template defining task structure.
|
|
49
|
+
|
|
50
|
+
Raises
|
|
51
|
+
------
|
|
52
|
+
ValueError
|
|
53
|
+
If item incompatible with this strategy.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def simulate_response(
|
|
58
|
+
self,
|
|
59
|
+
item: Item,
|
|
60
|
+
item_template: ItemTemplate,
|
|
61
|
+
model_output_key: str,
|
|
62
|
+
rng: np.random.RandomState,
|
|
63
|
+
) -> str | int | float | list[str]:
|
|
64
|
+
"""Generate simulated response for item.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
item : Item
|
|
69
|
+
Item to respond to.
|
|
70
|
+
item_template : ItemTemplate
|
|
71
|
+
Template defining task structure.
|
|
72
|
+
model_output_key : str
|
|
73
|
+
Key to extract from model outputs.
|
|
74
|
+
rng : np.random.RandomState
|
|
75
|
+
Random number generator.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
str | int | float | list[str]
|
|
80
|
+
Simulated response (format depends on task type).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def extract_model_outputs(
|
|
84
|
+
self, item: Item, key: str, required_count: int | None = None
|
|
85
|
+
) -> list[float] | None:
|
|
86
|
+
"""Extract model outputs from item.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
item : Item
|
|
91
|
+
Item to extract from.
|
|
92
|
+
key : str
|
|
93
|
+
Key to look for.
|
|
94
|
+
required_count : int | None
|
|
95
|
+
Expected number of outputs.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
list[float] | None
|
|
100
|
+
Extracted values or None if missing.
|
|
101
|
+
"""
|
|
102
|
+
# try model_outputs first
|
|
103
|
+
values: list[float] = []
|
|
104
|
+
if hasattr(item, "model_outputs"):
|
|
105
|
+
for output in item.model_outputs:
|
|
106
|
+
if output.operation == key:
|
|
107
|
+
values.append(float(output.output))
|
|
108
|
+
|
|
109
|
+
# try item_metadata as fallback
|
|
110
|
+
if not values and hasattr(item, "item_metadata"):
|
|
111
|
+
# look for keys like "lm_score1", "lm_score2", etc.
|
|
112
|
+
for i in range(1, (required_count or 10) + 1):
|
|
113
|
+
key_with_num = f"{key}{i}"
|
|
114
|
+
if key_with_num in item.item_metadata:
|
|
115
|
+
values.append(float(item.item_metadata[key_with_num]))
|
|
116
|
+
|
|
117
|
+
if not values:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
if required_count and len(values) != required_count:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
return values
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Binary choice simulation strategy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from bead.simulation.strategies.base import SimulationStrategy
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from bead.items.item import Item
|
|
13
|
+
from bead.items.item_template import ItemTemplate
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BinaryStrategy(SimulationStrategy):
|
|
17
|
+
"""Strategy for binary tasks (yes/no, true/false).
|
|
18
|
+
|
|
19
|
+
Uses model outputs to compute probability of "yes" response,
|
|
20
|
+
then samples from Bernoulli distribution.
|
|
21
|
+
|
|
22
|
+
For binary tasks with LM score:
|
|
23
|
+
P(yes) = sigmoid(score / temperature)
|
|
24
|
+
|
|
25
|
+
Examples
|
|
26
|
+
--------
|
|
27
|
+
>>> strategy = BinaryStrategy()
|
|
28
|
+
>>> strategy.supported_task_type
|
|
29
|
+
'binary'
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def supported_task_type(self) -> str:
|
|
34
|
+
"""Return 'binary'.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
str
|
|
39
|
+
Task type identifier.
|
|
40
|
+
"""
|
|
41
|
+
return "binary"
|
|
42
|
+
|
|
43
|
+
def validate_item(self, item: Item, item_template: ItemTemplate) -> None:
|
|
44
|
+
"""Validate item for binary choice.
|
|
45
|
+
|
|
46
|
+
Checks:
|
|
47
|
+
- task_type is 'binary'
|
|
48
|
+
- Item has appropriate model outputs OR can fall back
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
item : Item
|
|
53
|
+
Item to validate.
|
|
54
|
+
item_template : ItemTemplate
|
|
55
|
+
Template defining task.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ValueError
|
|
60
|
+
If validation fails.
|
|
61
|
+
"""
|
|
62
|
+
if item_template.task_type != "binary":
|
|
63
|
+
msg = f"Expected task_type 'binary', got '{item_template.task_type}'"
|
|
64
|
+
raise ValueError(msg)
|
|
65
|
+
|
|
66
|
+
def simulate_response(
|
|
67
|
+
self,
|
|
68
|
+
item: Item,
|
|
69
|
+
item_template: ItemTemplate,
|
|
70
|
+
model_output_key: str,
|
|
71
|
+
rng: np.random.RandomState,
|
|
72
|
+
) -> bool:
|
|
73
|
+
"""Generate binary response.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
item : Item
|
|
78
|
+
Item to respond to.
|
|
79
|
+
item_template : ItemTemplate
|
|
80
|
+
Template defining task.
|
|
81
|
+
model_output_key : str
|
|
82
|
+
Key for model outputs (e.g., "lm_score").
|
|
83
|
+
rng : np.random.RandomState
|
|
84
|
+
Random number generator.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
bool
|
|
89
|
+
Binary response (True/False).
|
|
90
|
+
"""
|
|
91
|
+
# extract model output (expecting single score)
|
|
92
|
+
scores = self.extract_model_outputs(item, model_output_key, required_count=1)
|
|
93
|
+
|
|
94
|
+
if scores is None:
|
|
95
|
+
# fallback to uniform random (50/50)
|
|
96
|
+
return bool(rng.rand() > 0.5)
|
|
97
|
+
|
|
98
|
+
# convert score to probability using sigmoid: 1 / (1 + exp(-x))
|
|
99
|
+
score = scores[0]
|
|
100
|
+
prob_yes = 1.0 / (1.0 + np.exp(-score))
|
|
101
|
+
|
|
102
|
+
# sample from Bernoulli
|
|
103
|
+
return bool(rng.rand() < prob_yes)
|