bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/lists/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""List construction module for experimental list partitioning.
|
|
2
|
+
|
|
3
|
+
Provides data models for organizing experimental items into balanced lists
|
|
4
|
+
for presentation to participants. Includes ExperimentList, ListCollection,
|
|
5
|
+
and constraint types (uniqueness, balance, quantile, size, diversity, ordering).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from bead.lists.constraints import (
|
|
9
|
+
BalanceConstraint,
|
|
10
|
+
DiversityConstraint,
|
|
11
|
+
ListConstraint,
|
|
12
|
+
OrderingConstraint,
|
|
13
|
+
QuantileConstraint,
|
|
14
|
+
SizeConstraint,
|
|
15
|
+
UniquenessConstraint,
|
|
16
|
+
)
|
|
17
|
+
from bead.lists.experiment_list import ExperimentList
|
|
18
|
+
from bead.lists.list_collection import ListCollection
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ExperimentList",
|
|
22
|
+
"ListCollection",
|
|
23
|
+
"ListConstraint",
|
|
24
|
+
"UniquenessConstraint",
|
|
25
|
+
"BalanceConstraint",
|
|
26
|
+
"QuantileConstraint",
|
|
27
|
+
"DiversityConstraint",
|
|
28
|
+
"SizeConstraint",
|
|
29
|
+
"OrderingConstraint",
|
|
30
|
+
]
|
bead/lists/balancer.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""Quantile balancing for experimental list partitioning.
|
|
2
|
+
|
|
3
|
+
This module provides the QuantileBalancer class for ensuring uniform distribution
|
|
4
|
+
of items across quantiles of a numeric property. Uses NumPy for efficient
|
|
5
|
+
quantile computation and maintains stand-off annotation pattern (works with UUIDs).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from typing import Any
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class QuantileBalancer:
|
|
18
|
+
"""Ensures uniform distribution of items across quantiles.
|
|
19
|
+
|
|
20
|
+
Used by stratified partitioning strategy to create balanced distribution
|
|
21
|
+
of numeric properties (e.g., LM probabilities, word frequencies).
|
|
22
|
+
|
|
23
|
+
Works with UUIDs only (stand-off annotation). Requires value_func callable
|
|
24
|
+
to extract numeric values from items via their UUIDs.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
n_quantiles : int, default=5
|
|
29
|
+
Number of quantiles to create (must be >= 2).
|
|
30
|
+
random_seed : int | None, default=None
|
|
31
|
+
Random seed for reproducibility. If None, uses non-deterministic RNG.
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
n_quantiles : int
|
|
36
|
+
Number of quantiles to create.
|
|
37
|
+
random_seed : int | None
|
|
38
|
+
Random seed for reproducibility.
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> from uuid import uuid4
|
|
43
|
+
>>> import numpy as np
|
|
44
|
+
>>> balancer = QuantileBalancer(n_quantiles=5, random_seed=42)
|
|
45
|
+
>>> # Create items with known values
|
|
46
|
+
>>> items = [uuid4() for _ in range(100)]
|
|
47
|
+
>>> values = {item: float(i) for i, item in enumerate(items)}
|
|
48
|
+
>>> value_func = lambda uid: values[uid]
|
|
49
|
+
>>> # Balance across 4 lists, 5 items per quantile per list
|
|
50
|
+
>>> lists = balancer.balance(items, value_func, n_lists=4,
|
|
51
|
+
... items_per_quantile_per_list=5)
|
|
52
|
+
>>> len(lists)
|
|
53
|
+
4
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, n_quantiles: int = 5, random_seed: int | None = None) -> None:
|
|
57
|
+
if n_quantiles < 2:
|
|
58
|
+
raise ValueError(f"n_quantiles must be >= 2, got {n_quantiles}")
|
|
59
|
+
|
|
60
|
+
self.n_quantiles = n_quantiles
|
|
61
|
+
self.random_seed = random_seed
|
|
62
|
+
self._rng = np.random.default_rng(random_seed)
|
|
63
|
+
|
|
64
|
+
def balance(
|
|
65
|
+
self,
|
|
66
|
+
item_ids: list[UUID],
|
|
67
|
+
value_func: Callable[[UUID], float],
|
|
68
|
+
n_lists: int,
|
|
69
|
+
items_per_quantile_per_list: int,
|
|
70
|
+
) -> list[list[UUID]]:
|
|
71
|
+
"""Balance items across lists and quantiles.
|
|
72
|
+
|
|
73
|
+
Distributes items uniformly across quantiles and lists to ensure
|
|
74
|
+
balanced representation of the numeric property across all lists.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
item_ids : list[UUID]
|
|
79
|
+
UUIDs of items to balance.
|
|
80
|
+
value_func : Callable[[UUID], float]
|
|
81
|
+
Function to extract numeric value from item UUID.
|
|
82
|
+
n_lists : int
|
|
83
|
+
Number of lists to create.
|
|
84
|
+
items_per_quantile_per_list : int
|
|
85
|
+
Target number of items per quantile per list.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
list[list[UUID]]
|
|
90
|
+
Balanced lists of item UUIDs.
|
|
91
|
+
|
|
92
|
+
Raises
|
|
93
|
+
------
|
|
94
|
+
ValueError
|
|
95
|
+
If n_lists < 1 or items_per_quantile_per_list < 1.
|
|
96
|
+
|
|
97
|
+
Examples
|
|
98
|
+
--------
|
|
99
|
+
>>> from uuid import uuid4
|
|
100
|
+
>>> balancer = QuantileBalancer(n_quantiles=5, random_seed=42)
|
|
101
|
+
>>> items = [uuid4() for _ in range(100)]
|
|
102
|
+
>>> values = {item: float(i) for i, item in enumerate(items)}
|
|
103
|
+
>>> lists = balancer.balance(items, lambda uid: values[uid], 4, 5)
|
|
104
|
+
>>> all(len(lst) == 25 for lst in lists) # 5 quantiles * 5 items
|
|
105
|
+
True
|
|
106
|
+
|
|
107
|
+
Notes
|
|
108
|
+
-----
|
|
109
|
+
- Items are assigned to quantiles using np.percentile and np.digitize
|
|
110
|
+
- Within each quantile, items are shuffled before distribution
|
|
111
|
+
- If insufficient items exist in a quantile, fewer items are assigned
|
|
112
|
+
"""
|
|
113
|
+
if n_lists < 1:
|
|
114
|
+
raise ValueError(f"n_lists must be >= 1, got {n_lists}")
|
|
115
|
+
if items_per_quantile_per_list < 1:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"items_per_quantile_per_list must be >= 1, "
|
|
118
|
+
f"got {items_per_quantile_per_list}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# create quantile-based strata
|
|
122
|
+
strata = self._create_strata(item_ids, value_func)
|
|
123
|
+
|
|
124
|
+
# initialize lists
|
|
125
|
+
lists: list[list[UUID]] = [[] for _ in range(n_lists)]
|
|
126
|
+
|
|
127
|
+
# distribute items from each quantile across lists
|
|
128
|
+
for q in range(self.n_quantiles):
|
|
129
|
+
q_items = strata[q]
|
|
130
|
+
|
|
131
|
+
# shuffle items in this quantile
|
|
132
|
+
q_items_array = np.array(q_items)
|
|
133
|
+
self._rng.shuffle(q_items_array)
|
|
134
|
+
|
|
135
|
+
# distribute to lists
|
|
136
|
+
for list_idx in range(n_lists):
|
|
137
|
+
# take items for this list
|
|
138
|
+
start_idx = list_idx * items_per_quantile_per_list
|
|
139
|
+
end_idx = start_idx + items_per_quantile_per_list
|
|
140
|
+
list_items = q_items_array[start_idx:end_idx].tolist()
|
|
141
|
+
|
|
142
|
+
lists[list_idx].extend(list_items)
|
|
143
|
+
|
|
144
|
+
return lists
|
|
145
|
+
|
|
146
|
+
def compute_balance_score(
|
|
147
|
+
self, item_ids: list[UUID], value_func: Callable[[UUID], float]
|
|
148
|
+
) -> float:
|
|
149
|
+
"""Compute balance score for items.
|
|
150
|
+
|
|
151
|
+
Score is 1.0 for perfect balance (uniform distribution across quantiles),
|
|
152
|
+
lower for imbalanced distributions. Score is based on deviation from
|
|
153
|
+
expected uniform distribution.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
item_ids : list[UUID]
|
|
158
|
+
UUIDs of items to score.
|
|
159
|
+
value_func : Callable[[UUID], float]
|
|
160
|
+
Function to extract numeric values.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
float
|
|
165
|
+
Balance score (0.0-1.0, higher is better).
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> from uuid import uuid4
|
|
170
|
+
>>> balancer = QuantileBalancer(n_quantiles=5)
|
|
171
|
+
>>> # Uniformly distributed values
|
|
172
|
+
>>> items = [uuid4() for _ in range(100)]
|
|
173
|
+
>>> values = {item: float(i) for i, item in enumerate(items)}
|
|
174
|
+
>>> score = balancer.compute_balance_score(items, lambda uid: values[uid])
|
|
175
|
+
>>> score > 0.9 # Should be close to 1.0
|
|
176
|
+
True
|
|
177
|
+
|
|
178
|
+
Notes
|
|
179
|
+
-----
|
|
180
|
+
- Returns 0.0 for empty item lists
|
|
181
|
+
- Uses mean absolute deviation from expected uniform count
|
|
182
|
+
"""
|
|
183
|
+
if not item_ids:
|
|
184
|
+
return 0.0
|
|
185
|
+
|
|
186
|
+
# compute values
|
|
187
|
+
values: np.ndarray[tuple[int, ...], np.dtype[np.floating[Any]]] = np.array(
|
|
188
|
+
[value_func(item_id) for item_id in item_ids]
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# create expected quantile bins
|
|
192
|
+
expected_quantiles: np.ndarray[tuple[int], np.dtype[np.floating[Any]]] = (
|
|
193
|
+
np.linspace(0, 100, self.n_quantiles + 1)
|
|
194
|
+
)
|
|
195
|
+
# percentile with array input returns array
|
|
196
|
+
expected_bins: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.percentile(
|
|
197
|
+
values, expected_quantiles
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# count items in each quantile; digitize returns array of integers
|
|
201
|
+
quantile_assignments: np.ndarray[Any, np.dtype[np.intp]] = (
|
|
202
|
+
np.digitize(values, expected_bins) - 1
|
|
203
|
+
)
|
|
204
|
+
quantile_assignments = np.clip(quantile_assignments, 0, self.n_quantiles - 1)
|
|
205
|
+
|
|
206
|
+
quantile_counts = np.bincount(quantile_assignments, minlength=self.n_quantiles)
|
|
207
|
+
|
|
208
|
+
# compute uniformity score
|
|
209
|
+
expected_count = len(item_ids) / self.n_quantiles
|
|
210
|
+
deviations = np.abs(quantile_counts - expected_count)
|
|
211
|
+
score = 1.0 - (np.mean(deviations) / expected_count)
|
|
212
|
+
|
|
213
|
+
return float(max(0.0, score))
|
|
214
|
+
|
|
215
|
+
def _create_strata(
|
|
216
|
+
self, item_ids: list[UUID], value_func: Callable[[UUID], float]
|
|
217
|
+
) -> dict[int, list[UUID]]:
|
|
218
|
+
"""Create quantile-based strata from items.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
item_ids : list[UUID]
|
|
223
|
+
UUIDs of items to stratify.
|
|
224
|
+
value_func : Callable[[UUID], float]
|
|
225
|
+
Function to extract numeric values.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
dict[int, list[UUID]]
|
|
230
|
+
Dictionary mapping quantile index (0 to n_quantiles-1) to list
|
|
231
|
+
of item UUIDs in that quantile.
|
|
232
|
+
|
|
233
|
+
Notes
|
|
234
|
+
-----
|
|
235
|
+
- Uses np.percentile to compute quantile boundaries
|
|
236
|
+
- Uses np.digitize to assign items to quantiles
|
|
237
|
+
- Edge cases are handled by clipping to valid quantile range
|
|
238
|
+
"""
|
|
239
|
+
# extract values
|
|
240
|
+
values: np.ndarray[tuple[int, ...], np.dtype[np.floating[Any]]] = np.array(
|
|
241
|
+
[value_func(item_id) for item_id in item_ids]
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# compute quantile bins
|
|
245
|
+
quantiles: np.ndarray[tuple[int], np.dtype[np.floating[Any]]] = np.linspace(
|
|
246
|
+
0, 100, self.n_quantiles + 1
|
|
247
|
+
)
|
|
248
|
+
bins: np.ndarray[Any, np.dtype[np.floating[Any]]] = np.percentile(
|
|
249
|
+
values, quantiles
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# assign items to quantiles
|
|
253
|
+
quantile_assignments: np.ndarray[Any, np.dtype[np.intp]] = (
|
|
254
|
+
np.digitize(values, bins) - 1
|
|
255
|
+
)
|
|
256
|
+
quantile_assignments = np.clip(quantile_assignments, 0, self.n_quantiles - 1)
|
|
257
|
+
|
|
258
|
+
# group items by quantile
|
|
259
|
+
strata: dict[int, list[UUID]] = {q: [] for q in range(self.n_quantiles)}
|
|
260
|
+
for item_id, q in zip(item_ids, quantile_assignments, strict=False):
|
|
261
|
+
strata[q].append(item_id)
|
|
262
|
+
|
|
263
|
+
return strata
|