ai-nk-cce 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/PKG-INFO +1 -1
  2. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/pyproject.toml +1 -1
  3. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/biased_prediction_agent.py +102 -26
  4. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/README.md +0 -0
  5. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/api/__init__.py +0 -0
  6. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/api/mpcdf_vllm.py +0 -0
  7. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/evals/nk_model.py +0 -0
  8. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/README.md +0 -0
  9. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v1.yml +0 -0
  10. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v2_m2.yml +0 -0
  11. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v3_m2_assembl_nearest.yml +0 -0
  12. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_debug.yml +0 -0
  13. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_v4_int_format.yml +0 -0
  14. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_v5.yml +0 -0
  15. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/inference.yml +0 -0
  16. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train.yml +0 -0
  17. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_debug.yml +0 -0
  18. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint.yml +0 -0
  19. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint_debug.yml +0 -0
  20. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo.yml +0 -0
  21. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug.yml +0 -0
  22. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug_vllm.yml +0 -0
  23. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config.py +0 -0
  24. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/dataset.py +0 -0
  25. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/inference.py +0 -0
  26. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/nk_assistant.py +0 -0
  27. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/parser.py +0 -0
  28. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/run_slurm.py +0 -0
  29. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/score.ipynb +0 -0
  30. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/scripts/template.slurm +0 -0
  31. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/scripts/template_rl.slurm +0 -0
  32. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/train.py +0 -0
  33. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/__init__.py +0 -0
  34. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/assembler.py +0 -0
  35. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/dataset.py +0 -0
  36. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/enums.py +0 -0
  37. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/landscape_cache.py +0 -0
  38. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/models.py +0 -0
  39. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/nk_landscape.py +0 -0
  40. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_simulation.py +0 -0
  41. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_vs_ai_simulation.py +0 -0
  42. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/landscape_selection.py +0 -0
  43. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/__init__.py +0 -0
  44. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/binary_conversion.py +0 -0
  45. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/logging.py +0 -0
  46. {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ai-nk-cce
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape.
5
5
  Author: Luis Mienhardt
6
6
  Author-email: mienhardt@mpib-berlin.mpg.de
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ai-nk-cce"
3
- version = "0.1.1"
3
+ version = "0.1.2"
4
4
  description = "This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape."
5
5
  authors = [
6
6
  "Luis Mienhardt <mienhardt@mpib-berlin.mpg.de>",
@@ -22,6 +22,8 @@ class BiasedPredictionAgent:
22
22
  landscape: NKLandscape,
23
23
  bias_seed: int | None = None,
24
24
  bias_power: float = 1.0,
25
+ adjust_mu_to_feasible: bool = True,
26
+ adjust_sigma2_to_feasible: bool = True,
25
27
  ):
26
28
  """
27
29
  Initialize the biased prediction agent.
@@ -36,6 +38,8 @@ class BiasedPredictionAgent:
36
38
 
37
39
  self.bias_seed = bias_seed
38
40
  self.bias_power = bias_power
41
+ self.adjust_mu_to_feasible = adjust_mu_to_feasible
42
+ self.adjust_sigma2_to_feasible = adjust_sigma2_to_feasible
39
43
 
40
44
  # Generate bias landscape G (N'=N, K'=0)
41
45
  self.bias_landscape = self._create_bias()
@@ -44,10 +48,10 @@ class BiasedPredictionAgent:
44
48
  self.biased_landscape = self._create_biased_landscape()
45
49
 
46
50
  # Cache for distributions: key=(x_tuple, radius, mu, sigma2),
47
- # value=List of (coordinates, probability) tuples
51
+ # value=(item_distribution, actual_mu, actual_sigma2)
48
52
  self._distribution_cache: Dict[
49
53
  Tuple[Tuple[int, ...], int, float, float],
50
- List[Tuple[np.ndarray, float]],
54
+ Tuple[List[Tuple[np.ndarray, float]], float, float],
51
55
  ] = {}
52
56
 
53
57
  def _normalize_landscape(self, landscape: NKLandscape) -> NKLandscape:
@@ -181,7 +185,8 @@ class BiasedPredictionAgent:
181
185
  radius: int,
182
186
  delta_mu: float,
183
187
  sigma2: float,
184
- ) -> Item:
188
+ max_mu_factor: float = 0.5,
189
+ ) -> Tuple[Item, Tuple[float, float]]:
185
190
  """
186
191
  Generate a biased suggestion from distribution p_x.
187
192
 
@@ -190,17 +195,31 @@ class BiasedPredictionAgent:
190
195
  radius: Locality parameter r (Hamming distance radius)
191
196
  delta_mu: Expected improvement delta over F'(x)
192
197
  sigma2: Variance of improvement suggestions
198
+ max_mu_factor: Maximum fraction of distance to extremes that mu
199
+ can move from current_fitness (default: 0.5)
193
200
 
194
201
  Returns:
195
- Suggested Item from original (unbiased) landscape
202
+ Tuple of (suggested_item, (actual_delta_mu, actual_sigma2)) where:
203
+ - suggested_item: Suggested Item from original (unbiased) landscape
204
+ - actual_delta_mu: Actual delta_mu used after feasibility
205
+ adjustments (may differ from input if adjusted)
206
+ - actual_sigma2: Actual sigma2 used after feasibility adjustments
207
+ (may differ from input if adjusted)
196
208
  """
197
209
  # Compute absolute mu from delta_mu
198
210
  current_fitness = self.landscape.get_payoff(x)
199
211
  mu = current_fitness + delta_mu
200
212
 
201
213
  # Get or compute distribution over all landscape items
202
- item_distribution = self.generate_prediction_distribution(
203
- x, radius, mu, sigma2
214
+ item_distribution, actual_mu, actual_sigma2 = (
215
+ self.generate_prediction_distribution(
216
+ x=x,
217
+ radius=radius,
218
+ mu=mu,
219
+ sigma2=sigma2,
220
+ current_fitness=current_fitness,
221
+ max_mu_factor=max_mu_factor,
222
+ )
204
223
  )
205
224
 
206
225
  # Extract coordinates and probabilities
@@ -211,10 +230,12 @@ class BiasedPredictionAgent:
211
230
  sampled_idx = np.random.choice(len(coordinates), p=probabilities)
212
231
  sampled_coords = coordinates[sampled_idx]
213
232
 
214
- # Find and return the corresponding Item from original landscape
233
+ # Find the corresponding Item from original landscape
215
234
  for item in self.landscape.items:
216
235
  if np.array_equal(item.coordinates, sampled_coords):
217
- return item
236
+ # Convert actual_mu back to delta_mu
237
+ actual_delta_mu = actual_mu - current_fitness
238
+ return (item, (actual_delta_mu, actual_sigma2))
218
239
 
219
240
  raise ValueError(
220
241
  f"Item with coordinates {sampled_coords} not found"
@@ -226,7 +247,9 @@ class BiasedPredictionAgent:
226
247
  radius: int,
227
248
  mu: float,
228
249
  sigma2: float,
229
- ) -> List[Tuple[np.ndarray, float]]:
250
+ current_fitness: float,
251
+ max_mu_factor: float = 0.5,
252
+ ) -> Tuple[List[Tuple[np.ndarray, float]], float, float]:
230
253
  """
231
254
  Generate prediction distribution p_x using maximum entropy.
232
255
 
@@ -238,31 +261,48 @@ class BiasedPredictionAgent:
238
261
  radius: Locality parameter r (Hamming distance radius)
239
262
  mu: Target mean fitness F'(y)
240
263
  sigma2: Target variance of fitness F'(y)
264
+ current_fitness: Current fitness value F'(x)
265
+ max_mu_factor: Maximum fraction of distance to extremes that mu
266
+ can move from current_fitness (default: 0.5)
241
267
 
242
268
  Returns:
243
- List of (coordinates, probability) tuples for all items
269
+ Tuple of (item_distribution, actual_mu, actual_sigma2) where:
270
+ - item_distribution: List of (coordinates, probability) tuples
271
+ for all items
272
+ - actual_mu: Actual mu value used after feasibility adjustments
273
+ - actual_sigma2: Actual sigma2 value used after feasibility
274
+ adjustments
244
275
  """
245
276
  # Check cache
246
277
  x_tuple = tuple(x)
247
278
  cache_key = (x_tuple, radius, mu, sigma2)
248
279
  if cache_key in self._distribution_cache:
249
- return self._distribution_cache[cache_key]
280
+ cached_dist, cached_mu, cached_sigma2 = (
281
+ self._distribution_cache[cache_key]
282
+ )
283
+ return cached_dist, cached_mu, cached_sigma2
250
284
 
251
285
  # Get ball items from original landscape
252
286
  ball_items = self.closed_ball(x, radius)
253
287
  ball_item_coordinates = [item.coordinates.copy() for item in ball_items]
254
288
 
255
- # Check feasibility
256
- self.feasible_mean_and_variance(ball_items, mu, sigma2)
289
+ # Check feasibility and get adjusted values
290
+ actual_mu, actual_sigma2 = self.feasible_mean_and_variance(
291
+ ball_items=ball_items,
292
+ mu=mu,
293
+ sigma2=sigma2,
294
+ current_fitness=current_fitness,
295
+ max_mu_factor=max_mu_factor,
296
+ )
257
297
 
258
298
  # Initialize distribution with zeros for all items
259
299
  item_distribution = [
260
300
  (item.coordinates.copy(), 0.0) for item in self.landscape.items
261
301
  ]
262
302
 
263
- # Get probabilities for ball items
303
+ # Get probabilities for ball items using adjusted values
264
304
  ball_distribution = self.solve_constraint_minimization(
265
- mu, sigma2, ball_item_coordinates
305
+ actual_mu, actual_sigma2, ball_item_coordinates
266
306
  )
267
307
 
268
308
  # Create lookup for ball items
@@ -279,30 +319,41 @@ class BiasedPredictionAgent:
279
319
  for coords, _ in item_distribution
280
320
  ]
281
321
 
282
- # Cache result
283
- self._distribution_cache[cache_key] = item_distribution
322
+ # Cache result with actual values
323
+ result = (item_distribution, actual_mu, actual_sigma2)
324
+ self._distribution_cache[cache_key] = result
284
325
 
285
- return item_distribution
326
+ return result
286
327
 
287
328
  def feasible_mean_and_variance(
288
329
  self,
289
330
  ball_items: list[Item],
290
331
  mu: float,
291
332
  sigma2: float,
333
+ current_fitness: float,
334
+ max_mu_factor: float = 0.5,
292
335
  ):
293
336
  """
294
337
  Get feasible min and max fitness values and validate mu, sigma2.
295
338
 
339
+ When adjust_mu_to_feasible is True, mu is capped dynamically based on
340
+ max_mu_factor. The boundaries allow mu to move at most max_mu_factor
341
+ fraction of the distance from current_fitness toward each extreme.
342
+
296
343
  Args:
297
344
  ball_items: List of items in B_r(x) from original landscape
298
345
  mu: Target mean fitness F'(y)
299
346
  sigma2: Target variance of fitness F'(y)
347
+ current_fitness: Current fitness value F'(x)
348
+ max_mu_factor: Maximum fraction of distance to extremes that mu
349
+ can move from current_fitness (default: 0.5)
300
350
 
301
351
  Returns:
302
- Tuple of (min_fitness, max_fitness) in B_r(x)
352
+ Tuple of (mu, sigma2) with adjusted values
303
353
 
304
354
  Raises:
305
- ValueError: If mu or sigma2 are not feasible
355
+ ValueError: If mu or sigma2 are not feasible and adjustment
356
+ is disabled
306
357
  """
307
358
  fitnesses = [
308
359
  self.biased_landscape.get_payoff(item.coordinates)
@@ -311,8 +362,28 @@ class BiasedPredictionAgent:
311
362
  min_fitness = float(np.min(fitnesses))
312
363
  max_fitness = float(np.max(fitnesses))
313
364
 
314
- # Check mean feasibility
315
- if not (min_fitness <= mu <= max_fitness):
365
+ # Apply dynamic mu capping based on max_mu_factor
366
+ if self.adjust_mu_to_feasible:
367
+ # Calculate distances from current_fitness to extremes
368
+ dist_to_min = current_fitness - min_fitness
369
+ dist_to_max = max_fitness - current_fitness
370
+
371
+ # Calculate dynamic boundaries: mu can move at most
372
+ # max_mu_factor fraction of the distance to each extreme
373
+ mu_lower_bound = (
374
+ current_fitness - max_mu_factor * dist_to_min
375
+ )
376
+ mu_upper_bound = (
377
+ current_fitness + max_mu_factor * dist_to_max
378
+ )
379
+
380
+ # Ensure boundaries respect absolute limits
381
+ mu_lower_bound = max(mu_lower_bound, min_fitness)
382
+ mu_upper_bound = min(mu_upper_bound, max_fitness)
383
+
384
+ # Clamp mu to dynamic boundaries
385
+ mu = min(max(mu, mu_lower_bound), mu_upper_bound)
386
+ elif not (min_fitness <= mu <= max_fitness):
316
387
  raise ValueError(
317
388
  f"mu={mu} not feasible. Must be in "
318
389
  f"[{min_fitness}, {max_fitness}]"
@@ -320,10 +391,15 @@ class BiasedPredictionAgent:
320
391
 
321
392
  # Check variance feasibility (Bhatia-Davis inequality)
322
393
  max_var = (mu - min_fitness) * (max_fitness - mu)
323
- if sigma2 > max_var:
324
- raise ValueError(
325
- f"sigma2={sigma2} not feasible. Must be <= {max_var}"
326
- )
394
+ if not (0 <= sigma2 <= max_var):
395
+ if self.adjust_sigma2_to_feasible:
396
+ sigma2 = min(max(sigma2, 0), max_var)
397
+ else:
398
+ raise ValueError(
399
+ f"sigma2={sigma2} not feasible. Must be <= {max_var}"
400
+ )
401
+
402
+ return mu, sigma2
327
403
 
328
404
  def solve_constraint_minimization(
329
405
  self,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes