ai-nk-cce 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/PKG-INFO +1 -1
  2. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/pyproject.toml +1 -1
  3. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/biased_prediction_agent.py +151 -27
  4. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/README.md +0 -0
  5. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/api/__init__.py +0 -0
  6. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/api/mpcdf_vllm.py +0 -0
  7. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/evals/nk_model.py +0 -0
  8. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/README.md +0 -0
  9. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v1.yml +0 -0
  10. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v2_m2.yml +0 -0
  11. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v3_m2_assembl_nearest.yml +0 -0
  12. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_debug.yml +0 -0
  13. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_v4_int_format.yml +0 -0
  14. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/dataset_v5.yml +0 -0
  15. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/inference.yml +0 -0
  16. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train.yml +0 -0
  17. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_debug.yml +0 -0
  18. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint.yml +0 -0
  19. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint_debug.yml +0 -0
  20. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_grpo.yml +0 -0
  21. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug.yml +0 -0
  22. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug_vllm.yml +0 -0
  23. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/config.py +0 -0
  24. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/dataset.py +0 -0
  25. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/inference.py +0 -0
  26. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/nk_assistant.py +0 -0
  27. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/parser.py +0 -0
  28. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/run_slurm.py +0 -0
  29. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/score.ipynb +0 -0
  30. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/scripts/template.slurm +0 -0
  31. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/scripts/template_rl.slurm +0 -0
  32. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/model/train.py +0 -0
  33. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/__init__.py +0 -0
  34. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/assembler.py +0 -0
  35. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/dataset.py +0 -0
  36. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/enums.py +0 -0
  37. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/landscape_cache.py +0 -0
  38. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/models.py +0 -0
  39. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/nk_model/nk_landscape.py +0 -0
  40. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_simulation.py +0 -0
  41. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_vs_ai_simulation.py +0 -0
  42. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/simulation/landscape_selection.py +0 -0
  43. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/utils/__init__.py +0 -0
  44. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/utils/binary_conversion.py +0 -0
  45. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/utils/logging.py +0 -0
  46. {ai_nk_cce-0.1.0 → ai_nk_cce-0.1.2}/src/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ai-nk-cce
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape.
5
5
  Author: Luis Mienhardt
6
6
  Author-email: mienhardt@mpib-berlin.mpg.de
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ai-nk-cce"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  description = "This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape."
5
5
  authors = [
6
6
  "Luis Mienhardt <mienhardt@mpib-berlin.mpg.de>",
@@ -22,6 +22,8 @@ class BiasedPredictionAgent:
22
22
  landscape: NKLandscape,
23
23
  bias_seed: int | None = None,
24
24
  bias_power: float = 1.0,
25
+ adjust_mu_to_feasible: bool = True,
26
+ adjust_sigma2_to_feasible: bool = True,
25
27
  ):
26
28
  """
27
29
  Initialize the biased prediction agent.
@@ -32,9 +34,12 @@ class BiasedPredictionAgent:
32
34
  bias_power: Power applied to bias landscape G. Higher values
33
35
  make the bias more pronounced for regions with higher G(x)
34
36
  """
35
- self.landscape = landscape
37
+ self.landscape = self._normalize_landscape(landscape)
38
+
36
39
  self.bias_seed = bias_seed
37
40
  self.bias_power = bias_power
41
+ self.adjust_mu_to_feasible = adjust_mu_to_feasible
42
+ self.adjust_sigma2_to_feasible = adjust_sigma2_to_feasible
38
43
 
39
44
  # Generate bias landscape G (N'=N, K'=0)
40
45
  self.bias_landscape = self._create_bias()
@@ -43,12 +48,59 @@ class BiasedPredictionAgent:
43
48
  self.biased_landscape = self._create_biased_landscape()
44
49
 
45
50
  # Cache for distributions: key=(x_tuple, radius, mu, sigma2),
46
- # value=List of (coordinates, probability) tuples
51
+ # value=(item_distribution, actual_mu, actual_sigma2)
47
52
  self._distribution_cache: Dict[
48
53
  Tuple[Tuple[int, ...], int, float, float],
49
- List[Tuple[np.ndarray, float]],
54
+ Tuple[List[Tuple[np.ndarray, float]], float, float],
50
55
  ] = {}
51
56
 
57
+ def _normalize_landscape(self, landscape: NKLandscape) -> NKLandscape:
58
+ """
59
+ Normalize landscape payoffs to [0, 1] range.
60
+
61
+ Args:
62
+ landscape: NKLandscape to normalize
63
+
64
+ Returns:
65
+ New NKLandscape with normalized payoffs
66
+ """
67
+ # Extract payoffs
68
+ payoffs = np.array([item.payoff for item in landscape.items])
69
+ min_payoff = np.min(payoffs)
70
+ max_payoff = np.max(payoffs)
71
+
72
+ # Normalize to [0, 1]
73
+ if max_payoff > min_payoff:
74
+ normalized_payoffs = (payoffs - min_payoff) / (
75
+ max_payoff - min_payoff
76
+ )
77
+ else:
78
+ normalized_payoffs = np.zeros_like(payoffs)
79
+
80
+ # Create new landscape with normalized payoffs
81
+ normalized_items = [
82
+ Item(
83
+ coordinates=item.coordinates.copy(),
84
+ payoff=normalized_payoffs[i],
85
+ )
86
+ for i, item in enumerate(landscape.items)
87
+ ]
88
+
89
+ # Create a new NKLandscape object
90
+ normalized_landscape = NKLandscape.__new__(NKLandscape)
91
+ normalized_landscape.params = landscape.params
92
+ normalized_landscape.N = landscape.N
93
+ normalized_landscape.K = landscape.K
94
+ normalized_landscape.M = landscape.M
95
+ normalized_landscape.items = normalized_items
96
+ normalized_landscape._payoff_lookup = {
97
+ tuple(item.coordinates): item.payoff
98
+ for item in normalized_items
99
+ }
100
+ normalized_landscape.uuid = f"{landscape.uuid}_normalized"
101
+
102
+ return normalized_landscape
103
+
52
104
  def _create_bias(self) -> NKLandscape:
53
105
  """
54
106
  Create bias landscape G with N'=N, K'=0.
@@ -133,7 +185,8 @@ class BiasedPredictionAgent:
133
185
  radius: int,
134
186
  delta_mu: float,
135
187
  sigma2: float,
136
- ) -> Item:
188
+ max_mu_factor: float = 0.5,
189
+ ) -> Tuple[Item, Tuple[float, float]]:
137
190
  """
138
191
  Generate a biased suggestion from distribution p_x.
139
192
 
@@ -142,17 +195,31 @@ class BiasedPredictionAgent:
142
195
  radius: Locality parameter r (Hamming distance radius)
143
196
  delta_mu: Expected improvement delta over F'(x)
144
197
  sigma2: Variance of improvement suggestions
198
+ max_mu_factor: Maximum fraction of distance to extremes that mu
199
+ can move from current_fitness (default: 0.5)
145
200
 
146
201
  Returns:
147
- Suggested Item from original (unbiased) landscape
202
+ Tuple of (suggested_item, (actual_delta_mu, actual_sigma2)) where:
203
+ - suggested_item: Suggested Item from original (unbiased) landscape
204
+ - actual_delta_mu: Actual delta_mu used after feasibility
205
+ adjustments (may differ from input if adjusted)
206
+ - actual_sigma2: Actual sigma2 used after feasibility adjustments
207
+ (may differ from input if adjusted)
148
208
  """
149
209
  # Compute absolute mu from delta_mu
150
210
  current_fitness = self.landscape.get_payoff(x)
151
211
  mu = current_fitness + delta_mu
152
212
 
153
213
  # Get or compute distribution over all landscape items
154
- item_distribution = self.generate_prediction_distribution(
155
- x, radius, mu, sigma2
214
+ item_distribution, actual_mu, actual_sigma2 = (
215
+ self.generate_prediction_distribution(
216
+ x=x,
217
+ radius=radius,
218
+ mu=mu,
219
+ sigma2=sigma2,
220
+ current_fitness=current_fitness,
221
+ max_mu_factor=max_mu_factor,
222
+ )
156
223
  )
157
224
 
158
225
  # Extract coordinates and probabilities
@@ -163,10 +230,12 @@ class BiasedPredictionAgent:
163
230
  sampled_idx = np.random.choice(len(coordinates), p=probabilities)
164
231
  sampled_coords = coordinates[sampled_idx]
165
232
 
166
- # Find and return the corresponding Item from original landscape
233
+ # Find the corresponding Item from original landscape
167
234
  for item in self.landscape.items:
168
235
  if np.array_equal(item.coordinates, sampled_coords):
169
- return item
236
+ # Convert actual_mu back to delta_mu
237
+ actual_delta_mu = actual_mu - current_fitness
238
+ return (item, (actual_delta_mu, actual_sigma2))
170
239
 
171
240
  raise ValueError(
172
241
  f"Item with coordinates {sampled_coords} not found"
@@ -178,7 +247,9 @@ class BiasedPredictionAgent:
178
247
  radius: int,
179
248
  mu: float,
180
249
  sigma2: float,
181
- ) -> List[Tuple[np.ndarray, float]]:
250
+ current_fitness: float,
251
+ max_mu_factor: float = 0.5,
252
+ ) -> Tuple[List[Tuple[np.ndarray, float]], float, float]:
182
253
  """
183
254
  Generate prediction distribution p_x using maximum entropy.
184
255
 
@@ -190,31 +261,48 @@ class BiasedPredictionAgent:
190
261
  radius: Locality parameter r (Hamming distance radius)
191
262
  mu: Target mean fitness F'(y)
192
263
  sigma2: Target variance of fitness F'(y)
264
+ current_fitness: Current fitness value F'(x)
265
+ max_mu_factor: Maximum fraction of distance to extremes that mu
266
+ can move from current_fitness (default: 0.5)
193
267
 
194
268
  Returns:
195
- List of (coordinates, probability) tuples for all items
269
+ Tuple of (item_distribution, actual_mu, actual_sigma2) where:
270
+ - item_distribution: List of (coordinates, probability) tuples
271
+ for all items
272
+ - actual_mu: Actual mu value used after feasibility adjustments
273
+ - actual_sigma2: Actual sigma2 value used after feasibility
274
+ adjustments
196
275
  """
197
276
  # Check cache
198
277
  x_tuple = tuple(x)
199
278
  cache_key = (x_tuple, radius, mu, sigma2)
200
279
  if cache_key in self._distribution_cache:
201
- return self._distribution_cache[cache_key]
280
+ cached_dist, cached_mu, cached_sigma2 = (
281
+ self._distribution_cache[cache_key]
282
+ )
283
+ return cached_dist, cached_mu, cached_sigma2
202
284
 
203
285
  # Get ball items from original landscape
204
286
  ball_items = self.closed_ball(x, radius)
205
287
  ball_item_coordinates = [item.coordinates.copy() for item in ball_items]
206
288
 
207
- # Check feasibility
208
- self.feasible_mean_and_variance(ball_items, mu, sigma2)
289
+ # Check feasibility and get adjusted values
290
+ actual_mu, actual_sigma2 = self.feasible_mean_and_variance(
291
+ ball_items=ball_items,
292
+ mu=mu,
293
+ sigma2=sigma2,
294
+ current_fitness=current_fitness,
295
+ max_mu_factor=max_mu_factor,
296
+ )
209
297
 
210
298
  # Initialize distribution with zeros for all items
211
299
  item_distribution = [
212
300
  (item.coordinates.copy(), 0.0) for item in self.landscape.items
213
301
  ]
214
302
 
215
- # Get probabilities for ball items
303
+ # Get probabilities for ball items using adjusted values
216
304
  ball_distribution = self.solve_constraint_minimization(
217
- mu, sigma2, ball_item_coordinates
305
+ actual_mu, actual_sigma2, ball_item_coordinates
218
306
  )
219
307
 
220
308
  # Create lookup for ball items
@@ -231,30 +319,41 @@ class BiasedPredictionAgent:
231
319
  for coords, _ in item_distribution
232
320
  ]
233
321
 
234
- # Cache result
235
- self._distribution_cache[cache_key] = item_distribution
322
+ # Cache result with actual values
323
+ result = (item_distribution, actual_mu, actual_sigma2)
324
+ self._distribution_cache[cache_key] = result
236
325
 
237
- return item_distribution
326
+ return result
238
327
 
239
328
  def feasible_mean_and_variance(
240
329
  self,
241
330
  ball_items: list[Item],
242
331
  mu: float,
243
332
  sigma2: float,
333
+ current_fitness: float,
334
+ max_mu_factor: float = 0.5,
244
335
  ):
245
336
  """
246
337
  Get feasible min and max fitness values and validate mu, sigma2.
247
338
 
339
+ When adjust_mu_to_feasible is True, mu is capped dynamically based on
340
+ max_mu_factor. The boundaries allow mu to move at most max_mu_factor
341
+ fraction of the distance from current_fitness toward each extreme.
342
+
248
343
  Args:
249
344
  ball_items: List of items in B_r(x) from original landscape
250
345
  mu: Target mean fitness F'(y)
251
346
  sigma2: Target variance of fitness F'(y)
347
+ current_fitness: Current fitness value F'(x)
348
+ max_mu_factor: Maximum fraction of distance to extremes that mu
349
+ can move from current_fitness (default: 0.5)
252
350
 
253
351
  Returns:
254
- Tuple of (min_fitness, max_fitness) in B_r(x)
352
+ Tuple of (mu, sigma2) with adjusted values
255
353
 
256
354
  Raises:
257
- ValueError: If mu or sigma2 are not feasible
355
+ ValueError: If mu or sigma2 are not feasible and adjustment
356
+ is disabled
258
357
  """
259
358
  fitnesses = [
260
359
  self.biased_landscape.get_payoff(item.coordinates)
@@ -263,8 +362,28 @@ class BiasedPredictionAgent:
263
362
  min_fitness = float(np.min(fitnesses))
264
363
  max_fitness = float(np.max(fitnesses))
265
364
 
266
- # Check mean feasibility
267
- if not (min_fitness <= mu <= max_fitness):
365
+ # Apply dynamic mu capping based on max_mu_factor
366
+ if self.adjust_mu_to_feasible:
367
+ # Calculate distances from current_fitness to extremes
368
+ dist_to_min = current_fitness - min_fitness
369
+ dist_to_max = max_fitness - current_fitness
370
+
371
+ # Calculate dynamic boundaries: mu can move at most
372
+ # max_mu_factor fraction of the distance to each extreme
373
+ mu_lower_bound = (
374
+ current_fitness - max_mu_factor * dist_to_min
375
+ )
376
+ mu_upper_bound = (
377
+ current_fitness + max_mu_factor * dist_to_max
378
+ )
379
+
380
+ # Ensure boundaries respect absolute limits
381
+ mu_lower_bound = max(mu_lower_bound, min_fitness)
382
+ mu_upper_bound = min(mu_upper_bound, max_fitness)
383
+
384
+ # Clamp mu to dynamic boundaries
385
+ mu = min(max(mu, mu_lower_bound), mu_upper_bound)
386
+ elif not (min_fitness <= mu <= max_fitness):
268
387
  raise ValueError(
269
388
  f"mu={mu} not feasible. Must be in "
270
389
  f"[{min_fitness}, {max_fitness}]"
@@ -272,10 +391,15 @@ class BiasedPredictionAgent:
272
391
 
273
392
  # Check variance feasibility (Bhatia-Davis inequality)
274
393
  max_var = (mu - min_fitness) * (max_fitness - mu)
275
- if sigma2 > max_var:
276
- raise ValueError(
277
- f"sigma2={sigma2} not feasible. Must be <= {max_var}"
278
- )
394
+ if not (0 <= sigma2 <= max_var):
395
+ if self.adjust_sigma2_to_feasible:
396
+ sigma2 = min(max(sigma2, 0), max_var)
397
+ else:
398
+ raise ValueError(
399
+ f"sigma2={sigma2} not feasible. Must be <= {max_var}"
400
+ )
401
+
402
+ return mu, sigma2
279
403
 
280
404
  def solve_constraint_minimization(
281
405
  self,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes