ai-nk-cce 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/PKG-INFO +1 -1
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/pyproject.toml +1 -1
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/biased_prediction_agent.py +102 -26
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/README.md +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/api/__init__.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/api/mpcdf_vllm.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/evals/nk_model.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/README.md +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v1.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v2_m2.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_conv_v3_m2_assembl_nearest.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_debug.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_v4_int_format.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/dataset_v5.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/inference.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_debug.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_from_checkpoint_debug.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config/train_grpo_debug_vllm.yml +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/config.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/dataset.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/inference.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/nk_assistant.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/parser.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/run_slurm.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/score.ipynb +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/scripts/template.slurm +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/scripts/template_rl.slurm +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/model/train.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/__init__.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/assembler.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/dataset.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/enums.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/landscape_cache.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/models.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/nk_model/nk_landscape.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_simulation.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/hill_climber_vs_ai_simulation.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/simulation/landscape_selection.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/__init__.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/binary_conversion.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/logging.py +0 -0
- {ai_nk_cce-0.1.1 → ai_nk_cce-0.1.2}/src/utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ai-nk-cce
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape.
|
|
5
5
|
Author: Luis Mienhardt
|
|
6
6
|
Author-email: mienhardt@mpib-berlin.mpg.de
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ai-nk-cce"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape."
|
|
5
5
|
authors = [
|
|
6
6
|
"Luis Mienhardt <mienhardt@mpib-berlin.mpg.de>",
|
|
@@ -22,6 +22,8 @@ class BiasedPredictionAgent:
|
|
|
22
22
|
landscape: NKLandscape,
|
|
23
23
|
bias_seed: int | None = None,
|
|
24
24
|
bias_power: float = 1.0,
|
|
25
|
+
adjust_mu_to_feasible: bool = True,
|
|
26
|
+
adjust_sigma2_to_feasible: bool = True,
|
|
25
27
|
):
|
|
26
28
|
"""
|
|
27
29
|
Initialize the biased prediction agent.
|
|
@@ -36,6 +38,8 @@ class BiasedPredictionAgent:
|
|
|
36
38
|
|
|
37
39
|
self.bias_seed = bias_seed
|
|
38
40
|
self.bias_power = bias_power
|
|
41
|
+
self.adjust_mu_to_feasible = adjust_mu_to_feasible
|
|
42
|
+
self.adjust_sigma2_to_feasible = adjust_sigma2_to_feasible
|
|
39
43
|
|
|
40
44
|
# Generate bias landscape G (N'=N, K'=0)
|
|
41
45
|
self.bias_landscape = self._create_bias()
|
|
@@ -44,10 +48,10 @@ class BiasedPredictionAgent:
|
|
|
44
48
|
self.biased_landscape = self._create_biased_landscape()
|
|
45
49
|
|
|
46
50
|
# Cache for distributions: key=(x_tuple, radius, mu, sigma2),
|
|
47
|
-
# value=
|
|
51
|
+
# value=(item_distribution, actual_mu, actual_sigma2)
|
|
48
52
|
self._distribution_cache: Dict[
|
|
49
53
|
Tuple[Tuple[int, ...], int, float, float],
|
|
50
|
-
List[Tuple[np.ndarray, float]],
|
|
54
|
+
Tuple[List[Tuple[np.ndarray, float]], float, float],
|
|
51
55
|
] = {}
|
|
52
56
|
|
|
53
57
|
def _normalize_landscape(self, landscape: NKLandscape) -> NKLandscape:
|
|
@@ -181,7 +185,8 @@ class BiasedPredictionAgent:
|
|
|
181
185
|
radius: int,
|
|
182
186
|
delta_mu: float,
|
|
183
187
|
sigma2: float,
|
|
184
|
-
|
|
188
|
+
max_mu_factor: float = 0.5,
|
|
189
|
+
) -> Tuple[Item, Tuple[float, float]]:
|
|
185
190
|
"""
|
|
186
191
|
Generate a biased suggestion from distribution p_x.
|
|
187
192
|
|
|
@@ -190,17 +195,31 @@ class BiasedPredictionAgent:
|
|
|
190
195
|
radius: Locality parameter r (Hamming distance radius)
|
|
191
196
|
delta_mu: Expected improvement delta over F'(x)
|
|
192
197
|
sigma2: Variance of improvement suggestions
|
|
198
|
+
max_mu_factor: Maximum fraction of distance to extremes that mu
|
|
199
|
+
can move from current_fitness (default: 0.5)
|
|
193
200
|
|
|
194
201
|
Returns:
|
|
195
|
-
|
|
202
|
+
Tuple of (suggested_item, (actual_delta_mu, actual_sigma2)) where:
|
|
203
|
+
- suggested_item: Suggested Item from original (unbiased) landscape
|
|
204
|
+
- actual_delta_mu: Actual delta_mu used after feasibility
|
|
205
|
+
adjustments (may differ from input if adjusted)
|
|
206
|
+
- actual_sigma2: Actual sigma2 used after feasibility adjustments
|
|
207
|
+
(may differ from input if adjusted)
|
|
196
208
|
"""
|
|
197
209
|
# Compute absolute mu from delta_mu
|
|
198
210
|
current_fitness = self.landscape.get_payoff(x)
|
|
199
211
|
mu = current_fitness + delta_mu
|
|
200
212
|
|
|
201
213
|
# Get or compute distribution over all landscape items
|
|
202
|
-
item_distribution =
|
|
203
|
-
|
|
214
|
+
item_distribution, actual_mu, actual_sigma2 = (
|
|
215
|
+
self.generate_prediction_distribution(
|
|
216
|
+
x=x,
|
|
217
|
+
radius=radius,
|
|
218
|
+
mu=mu,
|
|
219
|
+
sigma2=sigma2,
|
|
220
|
+
current_fitness=current_fitness,
|
|
221
|
+
max_mu_factor=max_mu_factor,
|
|
222
|
+
)
|
|
204
223
|
)
|
|
205
224
|
|
|
206
225
|
# Extract coordinates and probabilities
|
|
@@ -211,10 +230,12 @@ class BiasedPredictionAgent:
|
|
|
211
230
|
sampled_idx = np.random.choice(len(coordinates), p=probabilities)
|
|
212
231
|
sampled_coords = coordinates[sampled_idx]
|
|
213
232
|
|
|
214
|
-
# Find
|
|
233
|
+
# Find the corresponding Item from original landscape
|
|
215
234
|
for item in self.landscape.items:
|
|
216
235
|
if np.array_equal(item.coordinates, sampled_coords):
|
|
217
|
-
|
|
236
|
+
# Convert actual_mu back to delta_mu
|
|
237
|
+
actual_delta_mu = actual_mu - current_fitness
|
|
238
|
+
return (item, (actual_delta_mu, actual_sigma2))
|
|
218
239
|
|
|
219
240
|
raise ValueError(
|
|
220
241
|
f"Item with coordinates {sampled_coords} not found"
|
|
@@ -226,7 +247,9 @@ class BiasedPredictionAgent:
|
|
|
226
247
|
radius: int,
|
|
227
248
|
mu: float,
|
|
228
249
|
sigma2: float,
|
|
229
|
-
|
|
250
|
+
current_fitness: float,
|
|
251
|
+
max_mu_factor: float = 0.5,
|
|
252
|
+
) -> Tuple[List[Tuple[np.ndarray, float]], float, float]:
|
|
230
253
|
"""
|
|
231
254
|
Generate prediction distribution p_x using maximum entropy.
|
|
232
255
|
|
|
@@ -238,31 +261,48 @@ class BiasedPredictionAgent:
|
|
|
238
261
|
radius: Locality parameter r (Hamming distance radius)
|
|
239
262
|
mu: Target mean fitness F'(y)
|
|
240
263
|
sigma2: Target variance of fitness F'(y)
|
|
264
|
+
current_fitness: Current fitness value F'(x)
|
|
265
|
+
max_mu_factor: Maximum fraction of distance to extremes that mu
|
|
266
|
+
can move from current_fitness (default: 0.5)
|
|
241
267
|
|
|
242
268
|
Returns:
|
|
243
|
-
|
|
269
|
+
Tuple of (item_distribution, actual_mu, actual_sigma2) where:
|
|
270
|
+
- item_distribution: List of (coordinates, probability) tuples
|
|
271
|
+
for all items
|
|
272
|
+
- actual_mu: Actual mu value used after feasibility adjustments
|
|
273
|
+
- actual_sigma2: Actual sigma2 value used after feasibility
|
|
274
|
+
adjustments
|
|
244
275
|
"""
|
|
245
276
|
# Check cache
|
|
246
277
|
x_tuple = tuple(x)
|
|
247
278
|
cache_key = (x_tuple, radius, mu, sigma2)
|
|
248
279
|
if cache_key in self._distribution_cache:
|
|
249
|
-
|
|
280
|
+
cached_dist, cached_mu, cached_sigma2 = (
|
|
281
|
+
self._distribution_cache[cache_key]
|
|
282
|
+
)
|
|
283
|
+
return cached_dist, cached_mu, cached_sigma2
|
|
250
284
|
|
|
251
285
|
# Get ball items from original landscape
|
|
252
286
|
ball_items = self.closed_ball(x, radius)
|
|
253
287
|
ball_item_coordinates = [item.coordinates.copy() for item in ball_items]
|
|
254
288
|
|
|
255
|
-
# Check feasibility
|
|
256
|
-
self.feasible_mean_and_variance(
|
|
289
|
+
# Check feasibility and get adjusted values
|
|
290
|
+
actual_mu, actual_sigma2 = self.feasible_mean_and_variance(
|
|
291
|
+
ball_items=ball_items,
|
|
292
|
+
mu=mu,
|
|
293
|
+
sigma2=sigma2,
|
|
294
|
+
current_fitness=current_fitness,
|
|
295
|
+
max_mu_factor=max_mu_factor,
|
|
296
|
+
)
|
|
257
297
|
|
|
258
298
|
# Initialize distribution with zeros for all items
|
|
259
299
|
item_distribution = [
|
|
260
300
|
(item.coordinates.copy(), 0.0) for item in self.landscape.items
|
|
261
301
|
]
|
|
262
302
|
|
|
263
|
-
# Get probabilities for ball items
|
|
303
|
+
# Get probabilities for ball items using adjusted values
|
|
264
304
|
ball_distribution = self.solve_constraint_minimization(
|
|
265
|
-
|
|
305
|
+
actual_mu, actual_sigma2, ball_item_coordinates
|
|
266
306
|
)
|
|
267
307
|
|
|
268
308
|
# Create lookup for ball items
|
|
@@ -279,30 +319,41 @@ class BiasedPredictionAgent:
|
|
|
279
319
|
for coords, _ in item_distribution
|
|
280
320
|
]
|
|
281
321
|
|
|
282
|
-
# Cache result
|
|
283
|
-
|
|
322
|
+
# Cache result with actual values
|
|
323
|
+
result = (item_distribution, actual_mu, actual_sigma2)
|
|
324
|
+
self._distribution_cache[cache_key] = result
|
|
284
325
|
|
|
285
|
-
return
|
|
326
|
+
return result
|
|
286
327
|
|
|
287
328
|
def feasible_mean_and_variance(
|
|
288
329
|
self,
|
|
289
330
|
ball_items: list[Item],
|
|
290
331
|
mu: float,
|
|
291
332
|
sigma2: float,
|
|
333
|
+
current_fitness: float,
|
|
334
|
+
max_mu_factor: float = 0.5,
|
|
292
335
|
):
|
|
293
336
|
"""
|
|
294
337
|
Get feasible min and max fitness values and validate mu, sigma2.
|
|
295
338
|
|
|
339
|
+
When adjust_mu_to_feasible is True, mu is capped dynamically based on
|
|
340
|
+
max_mu_factor. The boundaries allow mu to move at most max_mu_factor
|
|
341
|
+
fraction of the distance from current_fitness toward each extreme.
|
|
342
|
+
|
|
296
343
|
Args:
|
|
297
344
|
ball_items: List of items in B_r(x) from original landscape
|
|
298
345
|
mu: Target mean fitness F'(y)
|
|
299
346
|
sigma2: Target variance of fitness F'(y)
|
|
347
|
+
current_fitness: Current fitness value F'(x)
|
|
348
|
+
max_mu_factor: Maximum fraction of distance to extremes that mu
|
|
349
|
+
can move from current_fitness (default: 0.5)
|
|
300
350
|
|
|
301
351
|
Returns:
|
|
302
|
-
Tuple of (
|
|
352
|
+
Tuple of (mu, sigma2) with adjusted values
|
|
303
353
|
|
|
304
354
|
Raises:
|
|
305
|
-
ValueError: If mu or sigma2 are not feasible
|
|
355
|
+
ValueError: If mu or sigma2 are not feasible and adjustment
|
|
356
|
+
is disabled
|
|
306
357
|
"""
|
|
307
358
|
fitnesses = [
|
|
308
359
|
self.biased_landscape.get_payoff(item.coordinates)
|
|
@@ -311,8 +362,28 @@ class BiasedPredictionAgent:
|
|
|
311
362
|
min_fitness = float(np.min(fitnesses))
|
|
312
363
|
max_fitness = float(np.max(fitnesses))
|
|
313
364
|
|
|
314
|
-
#
|
|
315
|
-
if
|
|
365
|
+
# Apply dynamic mu capping based on max_mu_factor
|
|
366
|
+
if self.adjust_mu_to_feasible:
|
|
367
|
+
# Calculate distances from current_fitness to extremes
|
|
368
|
+
dist_to_min = current_fitness - min_fitness
|
|
369
|
+
dist_to_max = max_fitness - current_fitness
|
|
370
|
+
|
|
371
|
+
# Calculate dynamic boundaries: mu can move at most
|
|
372
|
+
# max_mu_factor fraction of the distance to each extreme
|
|
373
|
+
mu_lower_bound = (
|
|
374
|
+
current_fitness - max_mu_factor * dist_to_min
|
|
375
|
+
)
|
|
376
|
+
mu_upper_bound = (
|
|
377
|
+
current_fitness + max_mu_factor * dist_to_max
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Ensure boundaries respect absolute limits
|
|
381
|
+
mu_lower_bound = max(mu_lower_bound, min_fitness)
|
|
382
|
+
mu_upper_bound = min(mu_upper_bound, max_fitness)
|
|
383
|
+
|
|
384
|
+
# Clamp mu to dynamic boundaries
|
|
385
|
+
mu = min(max(mu, mu_lower_bound), mu_upper_bound)
|
|
386
|
+
elif not (min_fitness <= mu <= max_fitness):
|
|
316
387
|
raise ValueError(
|
|
317
388
|
f"mu={mu} not feasible. Must be in "
|
|
318
389
|
f"[{min_fitness}, {max_fitness}]"
|
|
@@ -320,10 +391,15 @@ class BiasedPredictionAgent:
|
|
|
320
391
|
|
|
321
392
|
# Check variance feasibility (Bhatia-Davis inequality)
|
|
322
393
|
max_var = (mu - min_fitness) * (max_fitness - mu)
|
|
323
|
-
if sigma2
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
394
|
+
if not (0 <= sigma2 <= max_var):
|
|
395
|
+
if self.adjust_sigma2_to_feasible:
|
|
396
|
+
sigma2 = min(max(sigma2, 0), max_var)
|
|
397
|
+
else:
|
|
398
|
+
raise ValueError(
|
|
399
|
+
f"sigma2={sigma2} not feasible. Must be <= {max_var}"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
return mu, sigma2
|
|
327
403
|
|
|
328
404
|
def solve_constraint_minimization(
|
|
329
405
|
self,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|