ai-nk-cce 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. ai_nk_cce-0.1.0.dist-info/METADATA +118 -0
  2. ai_nk_cce-0.1.0.dist-info/RECORD +46 -0
  3. ai_nk_cce-0.1.0.dist-info/WHEEL +4 -0
  4. api/__init__.py +0 -0
  5. api/mpcdf_vllm.py +94 -0
  6. evals/nk_model.py +277 -0
  7. model/README.md +64 -0
  8. model/config/dataset_conv_v1.yml +9 -0
  9. model/config/dataset_conv_v2_m2.yml +9 -0
  10. model/config/dataset_conv_v3_m2_assembl_nearest.yml +9 -0
  11. model/config/dataset_debug.yml +9 -0
  12. model/config/dataset_v4_int_format.yml +9 -0
  13. model/config/dataset_v5.yml +9 -0
  14. model/config/inference.yml +7 -0
  15. model/config/train.yml +24 -0
  16. model/config/train_debug.yml +19 -0
  17. model/config/train_from_checkpoint.yml +24 -0
  18. model/config/train_from_checkpoint_debug.yml +19 -0
  19. model/config/train_grpo.yml +30 -0
  20. model/config/train_grpo_debug.yml +30 -0
  21. model/config/train_grpo_debug_vllm.yml +32 -0
  22. model/config.py +54 -0
  23. model/dataset.py +324 -0
  24. model/inference.py +51 -0
  25. model/nk_assistant.py +207 -0
  26. model/parser.py +70 -0
  27. model/run_slurm.py +335 -0
  28. model/score.ipynb +596 -0
  29. model/scripts/template.slurm +54 -0
  30. model/scripts/template_rl.slurm +54 -0
  31. model/train.py +293 -0
  32. nk_model/__init__.py +0 -0
  33. nk_model/assembler.py +112 -0
  34. nk_model/biased_prediction_agent.py +389 -0
  35. nk_model/dataset.py +434 -0
  36. nk_model/enums.py +21 -0
  37. nk_model/landscape_cache.py +149 -0
  38. nk_model/models.py +172 -0
  39. nk_model/nk_landscape.py +498 -0
  40. simulation/hill_climber_simulation.py +211 -0
  41. simulation/hill_climber_vs_ai_simulation.py +132 -0
  42. simulation/landscape_selection.py +179 -0
  43. utils/__init__.py +0 -0
  44. utils/binary_conversion.py +128 -0
  45. utils/logging.py +33 -0
  46. utils/utils.py +51 -0
@@ -0,0 +1,389 @@
1
+ from typing import Dict, List, Tuple
2
+
3
+ import numpy as np
4
+ from scipy.optimize import minimize
5
+
6
+ from src.nk_model.models import Item, NKParams
7
+ from src.nk_model.nk_landscape import NKLandscape
8
+
9
+
10
+ class BiasedPredictionAgent:
11
+ """
12
+ Agent that generates biased improvement suggestions on NK landscapes.
13
+
14
+ The agent applies a consistent bias G to the base landscape F, creating
15
+ a biased landscape F' = normalize(F * G). It then generates suggestions
16
+ from a maximum entropy distribution that satisfies mean and variance
17
+ constraints relative to F'.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ landscape: NKLandscape,
23
+ bias_seed: int | None = None,
24
+ bias_power: float = 1.0,
25
+ ):
26
+ """
27
+ Initialize the biased prediction agent.
28
+
29
+ Args:
30
+ landscape: Base NK landscape F
31
+ bias_seed: Random seed for generating bias landscape G
32
+ bias_power: Power applied to bias landscape G. Higher values
33
+ make the bias more pronounced for regions with higher G(x)
34
+ """
35
+ self.landscape = landscape
36
+ self.bias_seed = bias_seed
37
+ self.bias_power = bias_power
38
+
39
+ # Generate bias landscape G (N'=N, K'=0)
40
+ self.bias_landscape = self._create_bias()
41
+
42
+ # Create biased landscape F' = normalize(F * G^power)
43
+ self.biased_landscape = self._create_biased_landscape()
44
+
45
+ # Cache for distributions: key=(x_tuple, radius, mu, sigma2),
46
+ # value=List of (coordinates, probability) tuples
47
+ self._distribution_cache: Dict[
48
+ Tuple[Tuple[int, ...], int, float, float],
49
+ List[Tuple[np.ndarray, float]],
50
+ ] = {}
51
+
52
+ def _create_bias(self) -> NKLandscape:
53
+ """
54
+ Create bias landscape G with N'=N, K'=0.
55
+
56
+ Returns:
57
+ NKLandscape with K=0 (no interactions)
58
+ """
59
+ if self.bias_seed is not None:
60
+ np.random.seed(self.bias_seed)
61
+
62
+ bias_params = NKParams(
63
+ n=self.landscape.N,
64
+ k=0, # K'=0 as specified
65
+ m=self.landscape.N, # Default m
66
+ power=self.bias_power,
67
+ max_val=1.0,
68
+ payoff_type="float",
69
+ )
70
+ return NKLandscape(params=bias_params)
71
+
72
+ def _create_biased_landscape(self) -> NKLandscape:
73
+ """
74
+ Create biased landscape F' = normalize(F * G^power).
75
+
76
+ Returns:
77
+ NKLandscape with payoffs F'(x) = normalize(F(x) * G(x)^power)
78
+ """
79
+ # Multiply payoffs: F'(x) = F(x) * G(x)^power
80
+ combined_items = []
81
+ for item_f, item_g in zip(
82
+ self.landscape.items, self.bias_landscape.items
83
+ ):
84
+ assert np.array_equal(
85
+ item_f.coordinates, item_g.coordinates
86
+ ), "Coordinates must match"
87
+ combined_payoff = item_f.payoff * (item_g.payoff)
88
+ combined_items.append(
89
+ Item(
90
+ coordinates=item_f.coordinates.copy(),
91
+ payoff=combined_payoff,
92
+ )
93
+ )
94
+
95
+ # Normalize to [0, 1]
96
+ payoffs = np.array([item.payoff for item in combined_items])
97
+ min_payoff = np.min(payoffs)
98
+ max_payoff = np.max(payoffs)
99
+ if max_payoff > min_payoff:
100
+ normalized_payoffs = (payoffs - min_payoff) / (
101
+ max_payoff - min_payoff
102
+ )
103
+ else:
104
+ normalized_payoffs = np.zeros_like(payoffs)
105
+
106
+ # Create new landscape with normalized payoffs
107
+ normalized_items = [
108
+ Item(
109
+ coordinates=item.coordinates.copy(),
110
+ payoff=normalized_payoffs[i],
111
+ )
112
+ for i, item in enumerate(combined_items)
113
+ ]
114
+
115
+ # Create a new NKLandscape-like object or store items
116
+ # For now, we'll store items and create lookup
117
+ biased_landscape = NKLandscape.__new__(NKLandscape)
118
+ biased_landscape.params = self.landscape.params
119
+ biased_landscape.N = self.landscape.N
120
+ biased_landscape.K = self.landscape.K
121
+ biased_landscape.M = self.landscape.M
122
+ biased_landscape.items = normalized_items
123
+ biased_landscape._payoff_lookup = {
124
+ tuple(item.coordinates): item.payoff for item in normalized_items
125
+ }
126
+ biased_landscape.uuid = f"{self.landscape.uuid}_biased"
127
+
128
+ return biased_landscape
129
+
130
+ def get_suggestion(
131
+ self,
132
+ x: np.ndarray,
133
+ radius: int,
134
+ delta_mu: float,
135
+ sigma2: float,
136
+ ) -> Item:
137
+ """
138
+ Generate a biased suggestion from distribution p_x.
139
+
140
+ Args:
141
+ x: Current point in landscape
142
+ radius: Locality parameter r (Hamming distance radius)
143
+ delta_mu: Expected improvement delta over F'(x)
144
+ sigma2: Variance of improvement suggestions
145
+
146
+ Returns:
147
+ Suggested Item from original (unbiased) landscape
148
+ """
149
+ # Compute absolute mu from delta_mu
150
+ current_fitness = self.landscape.get_payoff(x)
151
+ mu = current_fitness + delta_mu
152
+
153
+ # Get or compute distribution over all landscape items
154
+ item_distribution = self.generate_prediction_distribution(
155
+ x, radius, mu, sigma2
156
+ )
157
+
158
+ # Extract coordinates and probabilities
159
+ coordinates = [coord for coord, _ in item_distribution]
160
+ probabilities = np.array([prob for _, prob in item_distribution])
161
+
162
+ # Sample from distribution
163
+ sampled_idx = np.random.choice(len(coordinates), p=probabilities)
164
+ sampled_coords = coordinates[sampled_idx]
165
+
166
+ # Find and return the corresponding Item from original landscape
167
+ for item in self.landscape.items:
168
+ if np.array_equal(item.coordinates, sampled_coords):
169
+ return item
170
+
171
+ raise ValueError(
172
+ f"Item with coordinates {sampled_coords} not found"
173
+ )
174
+
175
+ def generate_prediction_distribution(
176
+ self,
177
+ x: np.ndarray,
178
+ radius: int,
179
+ mu: float,
180
+ sigma2: float,
181
+ ) -> List[Tuple[np.ndarray, float]]:
182
+ """
183
+ Generate prediction distribution p_x using maximum entropy.
184
+
185
+ Returns a distribution over ALL landscape items, where items
186
+ outside B_r(x) have probability 0.
187
+
188
+ Args:
189
+ x: Current point
190
+ radius: Locality parameter r (Hamming distance radius)
191
+ mu: Target mean fitness F'(y)
192
+ sigma2: Target variance of fitness F'(y)
193
+
194
+ Returns:
195
+ List of (coordinates, probability) tuples for all items
196
+ """
197
+ # Check cache
198
+ x_tuple = tuple(x)
199
+ cache_key = (x_tuple, radius, mu, sigma2)
200
+ if cache_key in self._distribution_cache:
201
+ return self._distribution_cache[cache_key]
202
+
203
+ # Get ball items from original landscape
204
+ ball_items = self.closed_ball(x, radius)
205
+ ball_item_coordinates = [item.coordinates.copy() for item in ball_items]
206
+
207
+ # Check feasibility
208
+ self.feasible_mean_and_variance(ball_items, mu, sigma2)
209
+
210
+ # Initialize distribution with zeros for all items
211
+ item_distribution = [
212
+ (item.coordinates.copy(), 0.0) for item in self.landscape.items
213
+ ]
214
+
215
+ # Get probabilities for ball items
216
+ ball_distribution = self.solve_constraint_minimization(
217
+ mu, sigma2, ball_item_coordinates
218
+ )
219
+
220
+ # Create lookup for ball items
221
+ ball_coord_to_prob = {
222
+ tuple(coord): prob for coord, prob in ball_distribution
223
+ }
224
+
225
+ # Update distribution with ball item probabilities
226
+ item_distribution = [
227
+ (
228
+ coords,
229
+ ball_coord_to_prob.get(tuple(coords), 0.0),
230
+ )
231
+ for coords, _ in item_distribution
232
+ ]
233
+
234
+ # Cache result
235
+ self._distribution_cache[cache_key] = item_distribution
236
+
237
+ return item_distribution
238
+
239
+ def feasible_mean_and_variance(
240
+ self,
241
+ ball_items: list[Item],
242
+ mu: float,
243
+ sigma2: float,
244
+ ):
245
+ """
246
+ Get feasible min and max fitness values and validate mu, sigma2.
247
+
248
+ Args:
249
+ ball_items: List of items in B_r(x) from original landscape
250
+ mu: Target mean fitness F'(y)
251
+ sigma2: Target variance of fitness F'(y)
252
+
253
+ Returns:
254
+ Tuple of (min_fitness, max_fitness) in B_r(x)
255
+
256
+ Raises:
257
+ ValueError: If mu or sigma2 are not feasible
258
+ """
259
+ fitnesses = [
260
+ self.biased_landscape.get_payoff(item.coordinates)
261
+ for item in ball_items
262
+ ]
263
+ min_fitness = float(np.min(fitnesses))
264
+ max_fitness = float(np.max(fitnesses))
265
+
266
+ # Check mean feasibility
267
+ if not (min_fitness <= mu <= max_fitness):
268
+ raise ValueError(
269
+ f"mu={mu} not feasible. Must be in "
270
+ f"[{min_fitness}, {max_fitness}]"
271
+ )
272
+
273
+ # Check variance feasibility (Bhatia-Davis inequality)
274
+ max_var = (mu - min_fitness) * (max_fitness - mu)
275
+ if sigma2 > max_var:
276
+ raise ValueError(
277
+ f"sigma2={sigma2} not feasible. Must be <= {max_var}"
278
+ )
279
+
280
+ def solve_constraint_minimization(
281
+ self,
282
+ mu: float,
283
+ sigma2: float,
284
+ ball_item_coordinates: List[np.ndarray],
285
+ ) -> List[Tuple[np.ndarray, float]]:
286
+ """
287
+ Solve for maximum entropy distribution p_x.
288
+
289
+ Uses scipy.optimize.minimize to find distribution that maximizes
290
+ entropy subject to mean and variance constraints.
291
+
292
+ Args:
293
+ mu: Target mean
294
+ sigma2: Target variance
295
+ ball_item_coordinates: List of coordinate arrays in B_r(x)
296
+
297
+ Returns:
298
+ List of (coordinates, probability) tuples for ball items
299
+ """
300
+ # Get fitnesses from biased landscape
301
+ fitnesses = np.array(
302
+ [
303
+ self.biased_landscape.get_payoff(coordinate)
304
+ for coordinate in ball_item_coordinates
305
+ ]
306
+ )
307
+
308
+ n_points = len(ball_item_coordinates)
309
+
310
+ # Objective: minimize negative entropy (maximize entropy)
311
+ def objective(p: np.ndarray) -> float:
312
+ return np.sum(p * np.log(p + 1e-10))
313
+
314
+ # Constraints
315
+ constraints = [
316
+ {
317
+ "type": "eq",
318
+ "fun": lambda p: np.sum(p * fitnesses) - mu,
319
+ },
320
+ {
321
+ "type": "eq",
322
+ "fun": lambda p: np.sum(p * (fitnesses - mu) ** 2) - sigma2,
323
+ },
324
+ {"type": "eq", "fun": lambda p: np.sum(p) - 1},
325
+ ]
326
+
327
+ # Bounds: probabilities must be in [0, 1]
328
+ bounds = [(0, 1) for _ in range(n_points)]
329
+
330
+ # Initial guess: uniform distribution
331
+ p0 = np.ones(n_points) / n_points
332
+
333
+ # Solve
334
+ result = minimize(
335
+ objective,
336
+ p0,
337
+ bounds=bounds,
338
+ constraints=constraints,
339
+ method="SLSQP",
340
+ )
341
+
342
+ if not result.success:
343
+ raise RuntimeError(
344
+ f"Optimization failed: {result.message}"
345
+ )
346
+
347
+ # Return list of (coordinates, probability) tuples
348
+ return [
349
+ (ball_item_coordinates[i].copy(), float(result.x[i]))
350
+ for i in range(n_points)
351
+ ]
352
+
353
+ def hamming_distance(
354
+ self,
355
+ x: np.ndarray,
356
+ y: np.ndarray,
357
+ ) -> int:
358
+ """Compute Hamming distance between two points."""
359
+ return int(np.sum(x != y))
360
+
361
+ def closed_ball(
362
+ self,
363
+ x: np.ndarray,
364
+ r: int,
365
+ ) -> list[Item]:
366
+ """
367
+ Get all points y such that d_H(x, y) <= r.
368
+
369
+ Args:
370
+ x: Center point
371
+ r: Radius (Hamming distance)
372
+
373
+ Returns:
374
+ List of Item objects in B_r(x)
375
+
376
+ Raises:
377
+ ValueError: If no items found in ball B_r(x)
378
+ """
379
+ ball_items = [
380
+ item
381
+ for item in self.landscape.items
382
+ if self.hamming_distance(x, item.coordinates) <= r
383
+ ]
384
+ if len(ball_items) == 0:
385
+ raise ValueError(
386
+ f"No items found in ball B_{r}(x). "
387
+ f"Point x may not be in landscape."
388
+ )
389
+ return ball_items