ai-nk-cce 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_nk_cce-0.1.0.dist-info/METADATA +118 -0
- ai_nk_cce-0.1.0.dist-info/RECORD +46 -0
- ai_nk_cce-0.1.0.dist-info/WHEEL +4 -0
- api/__init__.py +0 -0
- api/mpcdf_vllm.py +94 -0
- evals/nk_model.py +277 -0
- model/README.md +64 -0
- model/config/dataset_conv_v1.yml +9 -0
- model/config/dataset_conv_v2_m2.yml +9 -0
- model/config/dataset_conv_v3_m2_assembl_nearest.yml +9 -0
- model/config/dataset_debug.yml +9 -0
- model/config/dataset_v4_int_format.yml +9 -0
- model/config/dataset_v5.yml +9 -0
- model/config/inference.yml +7 -0
- model/config/train.yml +24 -0
- model/config/train_debug.yml +19 -0
- model/config/train_from_checkpoint.yml +24 -0
- model/config/train_from_checkpoint_debug.yml +19 -0
- model/config/train_grpo.yml +30 -0
- model/config/train_grpo_debug.yml +30 -0
- model/config/train_grpo_debug_vllm.yml +32 -0
- model/config.py +54 -0
- model/dataset.py +324 -0
- model/inference.py +51 -0
- model/nk_assistant.py +207 -0
- model/parser.py +70 -0
- model/run_slurm.py +335 -0
- model/score.ipynb +596 -0
- model/scripts/template.slurm +54 -0
- model/scripts/template_rl.slurm +54 -0
- model/train.py +293 -0
- nk_model/__init__.py +0 -0
- nk_model/assembler.py +112 -0
- nk_model/biased_prediction_agent.py +389 -0
- nk_model/dataset.py +434 -0
- nk_model/enums.py +21 -0
- nk_model/landscape_cache.py +149 -0
- nk_model/models.py +172 -0
- nk_model/nk_landscape.py +498 -0
- simulation/hill_climber_simulation.py +211 -0
- simulation/hill_climber_vs_ai_simulation.py +132 -0
- simulation/landscape_selection.py +179 -0
- utils/__init__.py +0 -0
- utils/binary_conversion.py +128 -0
- utils/logging.py +33 -0
- utils/utils.py +51 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.optimize import minimize
|
|
5
|
+
|
|
6
|
+
from src.nk_model.models import Item, NKParams
|
|
7
|
+
from src.nk_model.nk_landscape import NKLandscape
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BiasedPredictionAgent:
|
|
11
|
+
"""
|
|
12
|
+
Agent that generates biased improvement suggestions on NK landscapes.
|
|
13
|
+
|
|
14
|
+
The agent applies a consistent bias G to the base landscape F, creating
|
|
15
|
+
a biased landscape F' = normalize(F * G). It then generates suggestions
|
|
16
|
+
from a maximum entropy distribution that satisfies mean and variance
|
|
17
|
+
constraints relative to F'.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
landscape: NKLandscape,
|
|
23
|
+
bias_seed: int | None = None,
|
|
24
|
+
bias_power: float = 1.0,
|
|
25
|
+
):
|
|
26
|
+
"""
|
|
27
|
+
Initialize the biased prediction agent.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
landscape: Base NK landscape F
|
|
31
|
+
bias_seed: Random seed for generating bias landscape G
|
|
32
|
+
bias_power: Power applied to bias landscape G. Higher values
|
|
33
|
+
make the bias more pronounced for regions with higher G(x)
|
|
34
|
+
"""
|
|
35
|
+
self.landscape = landscape
|
|
36
|
+
self.bias_seed = bias_seed
|
|
37
|
+
self.bias_power = bias_power
|
|
38
|
+
|
|
39
|
+
# Generate bias landscape G (N'=N, K'=0)
|
|
40
|
+
self.bias_landscape = self._create_bias()
|
|
41
|
+
|
|
42
|
+
# Create biased landscape F' = normalize(F * G^power)
|
|
43
|
+
self.biased_landscape = self._create_biased_landscape()
|
|
44
|
+
|
|
45
|
+
# Cache for distributions: key=(x_tuple, radius, mu, sigma2),
|
|
46
|
+
# value=List of (coordinates, probability) tuples
|
|
47
|
+
self._distribution_cache: Dict[
|
|
48
|
+
Tuple[Tuple[int, ...], int, float, float],
|
|
49
|
+
List[Tuple[np.ndarray, float]],
|
|
50
|
+
] = {}
|
|
51
|
+
|
|
52
|
+
def _create_bias(self) -> NKLandscape:
|
|
53
|
+
"""
|
|
54
|
+
Create bias landscape G with N'=N, K'=0.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
NKLandscape with K=0 (no interactions)
|
|
58
|
+
"""
|
|
59
|
+
if self.bias_seed is not None:
|
|
60
|
+
np.random.seed(self.bias_seed)
|
|
61
|
+
|
|
62
|
+
bias_params = NKParams(
|
|
63
|
+
n=self.landscape.N,
|
|
64
|
+
k=0, # K'=0 as specified
|
|
65
|
+
m=self.landscape.N, # Default m
|
|
66
|
+
power=self.bias_power,
|
|
67
|
+
max_val=1.0,
|
|
68
|
+
payoff_type="float",
|
|
69
|
+
)
|
|
70
|
+
return NKLandscape(params=bias_params)
|
|
71
|
+
|
|
72
|
+
def _create_biased_landscape(self) -> NKLandscape:
|
|
73
|
+
"""
|
|
74
|
+
Create biased landscape F' = normalize(F * G^power).
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
NKLandscape with payoffs F'(x) = normalize(F(x) * G(x)^power)
|
|
78
|
+
"""
|
|
79
|
+
# Multiply payoffs: F'(x) = F(x) * G(x)^power
|
|
80
|
+
combined_items = []
|
|
81
|
+
for item_f, item_g in zip(
|
|
82
|
+
self.landscape.items, self.bias_landscape.items
|
|
83
|
+
):
|
|
84
|
+
assert np.array_equal(
|
|
85
|
+
item_f.coordinates, item_g.coordinates
|
|
86
|
+
), "Coordinates must match"
|
|
87
|
+
combined_payoff = item_f.payoff * (item_g.payoff)
|
|
88
|
+
combined_items.append(
|
|
89
|
+
Item(
|
|
90
|
+
coordinates=item_f.coordinates.copy(),
|
|
91
|
+
payoff=combined_payoff,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Normalize to [0, 1]
|
|
96
|
+
payoffs = np.array([item.payoff for item in combined_items])
|
|
97
|
+
min_payoff = np.min(payoffs)
|
|
98
|
+
max_payoff = np.max(payoffs)
|
|
99
|
+
if max_payoff > min_payoff:
|
|
100
|
+
normalized_payoffs = (payoffs - min_payoff) / (
|
|
101
|
+
max_payoff - min_payoff
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
normalized_payoffs = np.zeros_like(payoffs)
|
|
105
|
+
|
|
106
|
+
# Create new landscape with normalized payoffs
|
|
107
|
+
normalized_items = [
|
|
108
|
+
Item(
|
|
109
|
+
coordinates=item.coordinates.copy(),
|
|
110
|
+
payoff=normalized_payoffs[i],
|
|
111
|
+
)
|
|
112
|
+
for i, item in enumerate(combined_items)
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
# Create a new NKLandscape-like object or store items
|
|
116
|
+
# For now, we'll store items and create lookup
|
|
117
|
+
biased_landscape = NKLandscape.__new__(NKLandscape)
|
|
118
|
+
biased_landscape.params = self.landscape.params
|
|
119
|
+
biased_landscape.N = self.landscape.N
|
|
120
|
+
biased_landscape.K = self.landscape.K
|
|
121
|
+
biased_landscape.M = self.landscape.M
|
|
122
|
+
biased_landscape.items = normalized_items
|
|
123
|
+
biased_landscape._payoff_lookup = {
|
|
124
|
+
tuple(item.coordinates): item.payoff for item in normalized_items
|
|
125
|
+
}
|
|
126
|
+
biased_landscape.uuid = f"{self.landscape.uuid}_biased"
|
|
127
|
+
|
|
128
|
+
return biased_landscape
|
|
129
|
+
|
|
130
|
+
def get_suggestion(
|
|
131
|
+
self,
|
|
132
|
+
x: np.ndarray,
|
|
133
|
+
radius: int,
|
|
134
|
+
delta_mu: float,
|
|
135
|
+
sigma2: float,
|
|
136
|
+
) -> Item:
|
|
137
|
+
"""
|
|
138
|
+
Generate a biased suggestion from distribution p_x.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
x: Current point in landscape
|
|
142
|
+
radius: Locality parameter r (Hamming distance radius)
|
|
143
|
+
delta_mu: Expected improvement delta over F'(x)
|
|
144
|
+
sigma2: Variance of improvement suggestions
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Suggested Item from original (unbiased) landscape
|
|
148
|
+
"""
|
|
149
|
+
# Compute absolute mu from delta_mu
|
|
150
|
+
current_fitness = self.landscape.get_payoff(x)
|
|
151
|
+
mu = current_fitness + delta_mu
|
|
152
|
+
|
|
153
|
+
# Get or compute distribution over all landscape items
|
|
154
|
+
item_distribution = self.generate_prediction_distribution(
|
|
155
|
+
x, radius, mu, sigma2
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Extract coordinates and probabilities
|
|
159
|
+
coordinates = [coord for coord, _ in item_distribution]
|
|
160
|
+
probabilities = np.array([prob for _, prob in item_distribution])
|
|
161
|
+
|
|
162
|
+
# Sample from distribution
|
|
163
|
+
sampled_idx = np.random.choice(len(coordinates), p=probabilities)
|
|
164
|
+
sampled_coords = coordinates[sampled_idx]
|
|
165
|
+
|
|
166
|
+
# Find and return the corresponding Item from original landscape
|
|
167
|
+
for item in self.landscape.items:
|
|
168
|
+
if np.array_equal(item.coordinates, sampled_coords):
|
|
169
|
+
return item
|
|
170
|
+
|
|
171
|
+
raise ValueError(
|
|
172
|
+
f"Item with coordinates {sampled_coords} not found"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def generate_prediction_distribution(
|
|
176
|
+
self,
|
|
177
|
+
x: np.ndarray,
|
|
178
|
+
radius: int,
|
|
179
|
+
mu: float,
|
|
180
|
+
sigma2: float,
|
|
181
|
+
) -> List[Tuple[np.ndarray, float]]:
|
|
182
|
+
"""
|
|
183
|
+
Generate prediction distribution p_x using maximum entropy.
|
|
184
|
+
|
|
185
|
+
Returns a distribution over ALL landscape items, where items
|
|
186
|
+
outside B_r(x) have probability 0.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
x: Current point
|
|
190
|
+
radius: Locality parameter r (Hamming distance radius)
|
|
191
|
+
mu: Target mean fitness F'(y)
|
|
192
|
+
sigma2: Target variance of fitness F'(y)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
List of (coordinates, probability) tuples for all items
|
|
196
|
+
"""
|
|
197
|
+
# Check cache
|
|
198
|
+
x_tuple = tuple(x)
|
|
199
|
+
cache_key = (x_tuple, radius, mu, sigma2)
|
|
200
|
+
if cache_key in self._distribution_cache:
|
|
201
|
+
return self._distribution_cache[cache_key]
|
|
202
|
+
|
|
203
|
+
# Get ball items from original landscape
|
|
204
|
+
ball_items = self.closed_ball(x, radius)
|
|
205
|
+
ball_item_coordinates = [item.coordinates.copy() for item in ball_items]
|
|
206
|
+
|
|
207
|
+
# Check feasibility
|
|
208
|
+
self.feasible_mean_and_variance(ball_items, mu, sigma2)
|
|
209
|
+
|
|
210
|
+
# Initialize distribution with zeros for all items
|
|
211
|
+
item_distribution = [
|
|
212
|
+
(item.coordinates.copy(), 0.0) for item in self.landscape.items
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
# Get probabilities for ball items
|
|
216
|
+
ball_distribution = self.solve_constraint_minimization(
|
|
217
|
+
mu, sigma2, ball_item_coordinates
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Create lookup for ball items
|
|
221
|
+
ball_coord_to_prob = {
|
|
222
|
+
tuple(coord): prob for coord, prob in ball_distribution
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
# Update distribution with ball item probabilities
|
|
226
|
+
item_distribution = [
|
|
227
|
+
(
|
|
228
|
+
coords,
|
|
229
|
+
ball_coord_to_prob.get(tuple(coords), 0.0),
|
|
230
|
+
)
|
|
231
|
+
for coords, _ in item_distribution
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
# Cache result
|
|
235
|
+
self._distribution_cache[cache_key] = item_distribution
|
|
236
|
+
|
|
237
|
+
return item_distribution
|
|
238
|
+
|
|
239
|
+
def feasible_mean_and_variance(
|
|
240
|
+
self,
|
|
241
|
+
ball_items: list[Item],
|
|
242
|
+
mu: float,
|
|
243
|
+
sigma2: float,
|
|
244
|
+
):
|
|
245
|
+
"""
|
|
246
|
+
Get feasible min and max fitness values and validate mu, sigma2.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
ball_items: List of items in B_r(x) from original landscape
|
|
250
|
+
mu: Target mean fitness F'(y)
|
|
251
|
+
sigma2: Target variance of fitness F'(y)
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Tuple of (min_fitness, max_fitness) in B_r(x)
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
ValueError: If mu or sigma2 are not feasible
|
|
258
|
+
"""
|
|
259
|
+
fitnesses = [
|
|
260
|
+
self.biased_landscape.get_payoff(item.coordinates)
|
|
261
|
+
for item in ball_items
|
|
262
|
+
]
|
|
263
|
+
min_fitness = float(np.min(fitnesses))
|
|
264
|
+
max_fitness = float(np.max(fitnesses))
|
|
265
|
+
|
|
266
|
+
# Check mean feasibility
|
|
267
|
+
if not (min_fitness <= mu <= max_fitness):
|
|
268
|
+
raise ValueError(
|
|
269
|
+
f"mu={mu} not feasible. Must be in "
|
|
270
|
+
f"[{min_fitness}, {max_fitness}]"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Check variance feasibility (Bhatia-Davis inequality)
|
|
274
|
+
max_var = (mu - min_fitness) * (max_fitness - mu)
|
|
275
|
+
if sigma2 > max_var:
|
|
276
|
+
raise ValueError(
|
|
277
|
+
f"sigma2={sigma2} not feasible. Must be <= {max_var}"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def solve_constraint_minimization(
|
|
281
|
+
self,
|
|
282
|
+
mu: float,
|
|
283
|
+
sigma2: float,
|
|
284
|
+
ball_item_coordinates: List[np.ndarray],
|
|
285
|
+
) -> List[Tuple[np.ndarray, float]]:
|
|
286
|
+
"""
|
|
287
|
+
Solve for maximum entropy distribution p_x.
|
|
288
|
+
|
|
289
|
+
Uses scipy.optimize.minimize to find distribution that maximizes
|
|
290
|
+
entropy subject to mean and variance constraints.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
mu: Target mean
|
|
294
|
+
sigma2: Target variance
|
|
295
|
+
ball_item_coordinates: List of coordinate arrays in B_r(x)
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List of (coordinates, probability) tuples for ball items
|
|
299
|
+
"""
|
|
300
|
+
# Get fitnesses from biased landscape
|
|
301
|
+
fitnesses = np.array(
|
|
302
|
+
[
|
|
303
|
+
self.biased_landscape.get_payoff(coordinate)
|
|
304
|
+
for coordinate in ball_item_coordinates
|
|
305
|
+
]
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
n_points = len(ball_item_coordinates)
|
|
309
|
+
|
|
310
|
+
# Objective: minimize negative entropy (maximize entropy)
|
|
311
|
+
def objective(p: np.ndarray) -> float:
|
|
312
|
+
return np.sum(p * np.log(p + 1e-10))
|
|
313
|
+
|
|
314
|
+
# Constraints
|
|
315
|
+
constraints = [
|
|
316
|
+
{
|
|
317
|
+
"type": "eq",
|
|
318
|
+
"fun": lambda p: np.sum(p * fitnesses) - mu,
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
"type": "eq",
|
|
322
|
+
"fun": lambda p: np.sum(p * (fitnesses - mu) ** 2) - sigma2,
|
|
323
|
+
},
|
|
324
|
+
{"type": "eq", "fun": lambda p: np.sum(p) - 1},
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
# Bounds: probabilities must be in [0, 1]
|
|
328
|
+
bounds = [(0, 1) for _ in range(n_points)]
|
|
329
|
+
|
|
330
|
+
# Initial guess: uniform distribution
|
|
331
|
+
p0 = np.ones(n_points) / n_points
|
|
332
|
+
|
|
333
|
+
# Solve
|
|
334
|
+
result = minimize(
|
|
335
|
+
objective,
|
|
336
|
+
p0,
|
|
337
|
+
bounds=bounds,
|
|
338
|
+
constraints=constraints,
|
|
339
|
+
method="SLSQP",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if not result.success:
|
|
343
|
+
raise RuntimeError(
|
|
344
|
+
f"Optimization failed: {result.message}"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Return list of (coordinates, probability) tuples
|
|
348
|
+
return [
|
|
349
|
+
(ball_item_coordinates[i].copy(), float(result.x[i]))
|
|
350
|
+
for i in range(n_points)
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
def hamming_distance(
|
|
354
|
+
self,
|
|
355
|
+
x: np.ndarray,
|
|
356
|
+
y: np.ndarray,
|
|
357
|
+
) -> int:
|
|
358
|
+
"""Compute Hamming distance between two points."""
|
|
359
|
+
return int(np.sum(x != y))
|
|
360
|
+
|
|
361
|
+
def closed_ball(
|
|
362
|
+
self,
|
|
363
|
+
x: np.ndarray,
|
|
364
|
+
r: int,
|
|
365
|
+
) -> list[Item]:
|
|
366
|
+
"""
|
|
367
|
+
Get all points y such that d_H(x, y) <= r.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
x: Center point
|
|
371
|
+
r: Radius (Hamming distance)
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
List of Item objects in B_r(x)
|
|
375
|
+
|
|
376
|
+
Raises:
|
|
377
|
+
ValueError: If no items found in ball B_r(x)
|
|
378
|
+
"""
|
|
379
|
+
ball_items = [
|
|
380
|
+
item
|
|
381
|
+
for item in self.landscape.items
|
|
382
|
+
if self.hamming_distance(x, item.coordinates) <= r
|
|
383
|
+
]
|
|
384
|
+
if len(ball_items) == 0:
|
|
385
|
+
raise ValueError(
|
|
386
|
+
f"No items found in ball B_{r}(x). "
|
|
387
|
+
f"Point x may not be in landscape."
|
|
388
|
+
)
|
|
389
|
+
return ball_items
|