ai-nk-cce 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. ai_nk_cce-0.1.0.dist-info/METADATA +118 -0
  2. ai_nk_cce-0.1.0.dist-info/RECORD +46 -0
  3. ai_nk_cce-0.1.0.dist-info/WHEEL +4 -0
  4. api/__init__.py +0 -0
  5. api/mpcdf_vllm.py +94 -0
  6. evals/nk_model.py +277 -0
  7. model/README.md +64 -0
  8. model/config/dataset_conv_v1.yml +9 -0
  9. model/config/dataset_conv_v2_m2.yml +9 -0
  10. model/config/dataset_conv_v3_m2_assembl_nearest.yml +9 -0
  11. model/config/dataset_debug.yml +9 -0
  12. model/config/dataset_v4_int_format.yml +9 -0
  13. model/config/dataset_v5.yml +9 -0
  14. model/config/inference.yml +7 -0
  15. model/config/train.yml +24 -0
  16. model/config/train_debug.yml +19 -0
  17. model/config/train_from_checkpoint.yml +24 -0
  18. model/config/train_from_checkpoint_debug.yml +19 -0
  19. model/config/train_grpo.yml +30 -0
  20. model/config/train_grpo_debug.yml +30 -0
  21. model/config/train_grpo_debug_vllm.yml +32 -0
  22. model/config.py +54 -0
  23. model/dataset.py +324 -0
  24. model/inference.py +51 -0
  25. model/nk_assistant.py +207 -0
  26. model/parser.py +70 -0
  27. model/run_slurm.py +335 -0
  28. model/score.ipynb +596 -0
  29. model/scripts/template.slurm +54 -0
  30. model/scripts/template_rl.slurm +54 -0
  31. model/train.py +293 -0
  32. nk_model/__init__.py +0 -0
  33. nk_model/assembler.py +112 -0
  34. nk_model/biased_prediction_agent.py +389 -0
  35. nk_model/dataset.py +434 -0
  36. nk_model/enums.py +21 -0
  37. nk_model/landscape_cache.py +149 -0
  38. nk_model/models.py +172 -0
  39. nk_model/nk_landscape.py +498 -0
  40. simulation/hill_climber_simulation.py +211 -0
  41. simulation/hill_climber_vs_ai_simulation.py +132 -0
  42. simulation/landscape_selection.py +179 -0
  43. utils/__init__.py +0 -0
  44. utils/binary_conversion.py +128 -0
  45. utils/logging.py +33 -0
  46. utils/utils.py +51 -0
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.3
2
+ Name: ai-nk-cce
3
+ Version: 0.1.0
4
+ Summary: This repository is used to train AI agents to predict good strategies in a social learning game based on a NK landscape.
5
+ Author: Luis Mienhardt
6
+ Author-email: mienhardt@mpib-berlin.mpg.de
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Provides-Extra: model
14
+ Requires-Dist: dataframe-image (>=0.2.7,<0.3.0)
15
+ Requires-Dist: evaluate (>=0.4.3,<0.5.0)
16
+ Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
17
+ Requires-Dist: networkx (>=3.4.2,<4.0.0)
18
+ Requires-Dist: numpy (>=1.26.0,<2.0.0)
19
+ Requires-Dist: openai (>=1.52.0,<2.0.0)
20
+ Requires-Dist: pandas (>=2.2.3,<3.0.0)
21
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
22
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
23
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
24
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
25
+ Requires-Dist: tiktoken (>=0.8.0,<0.9.0)
26
+ Requires-Dist: tqdm (>=4.66.0,<5.0.0)
27
+ Description-Content-Type: text/markdown
28
+
29
+ # human-ai-social-learning
30
+
31
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/)
32
+
33
+ ## Description
34
+
35
+ ## Installation
36
+
37
+ This project uses Poetry for package management. If you haven't installed Poetry yet, please follow the instructions on the [official Poetry website](https://python-poetry.org/docs/#installation).
38
+
39
+ To install the project:
40
+
41
+ 1. Clone the repository:
42
+
43
+ ```bash
44
+ git clone https://github.com/your-username/human-ai-social-learning.git
45
+ cd human-ai-social-learning
46
+ ```
47
+
48
+ 2. Install dependencies with Poetry:
49
+
50
+ ```bash
51
+ poetry install
52
+ ```
53
+
54
+ 3. Activate the virtual environment:
55
+
56
+ ```bash
57
+ poetry shell
58
+ ```
59
+
60
+ 4. (Optional) Set up Jupyter kernel for notebooks:
61
+
62
+ ```bash
63
+ poetry run python -m ipykernel install --user --name nk-cce-kernel
64
+ ```
65
+
66
+ Now your development environment is set up and ready to use.
67
+
68
+ ## Contributing
69
+
70
+ We welcome contributions! Please note:
71
+
72
+ 1. Please create a descriptive branch for each contribution (naming convention *feature_type/feature_name*)
73
+ 2. Follow the project style (PEP 8 for Python).
74
+ 3. Add tests and run all before commit. (At least one test per function or method.)
75
+ 4. Write meaningful commit messages.
76
+ 5. Keep in line with pre-commit linting.
77
+ 6. Submit a Pull Request, to include your code into main.
78
+
79
+ ### Pre-commit Hooks
80
+
81
+ We use pre-commit hooks. Installation:
82
+
83
+ ```bash
84
+ poetry add pre-commit
85
+ pre-commit install
86
+ pre-commit run --all-files
87
+ ```
88
+
89
+ ### Running Tests
90
+
91
+ Tests need to be run in the virtual environment. You can use Poetry or Visual Studio Code settings to do so automatically.
92
+
93
+ To run all tests using Poetry run:
94
+
95
+ ```bash
96
+ poetry run pytest
97
+ ```
98
+
99
+ We included Visual Studio Code settings in the repository.
100
+ You can try to use them to run the tests within Visual Studio Code.
101
+
102
+ ```text
103
+ .vscode/
104
+ ├── settings.json
105
+ ├── launch.json
106
+ ```
107
+
108
+ ## Documentation
109
+
110
+ For an overview of the repository structure, module organization, and
111
+ dependencies, see the [Repository Structure Guide](doc/repository_structure.md).
112
+
113
+ Additional documentation is available in the `doc/` directory:
114
+
115
+ - [How to compare hill climber vs AI](doc/how_to_compare_hill_climber_vs_ai.md)
116
+ - [How to find average hill climber landscape](doc/how_to_find_average_hill_climber_landscape.md)
117
+ - [Using MPCDF LLM inference](doc/use_mpcdf_llm_inference.md)
118
+
@@ -0,0 +1,46 @@
1
+ api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ api/mpcdf_vllm.py,sha256=PlAUrwyiTrROVhRufxqjPGCYVh0GDPFyJlqSnJD6tZo,2821
3
+ evals/nk_model.py,sha256=hJhtkdIclL-s2dN2HHpJSpZuTVIPe6SLM0EP9jzs0JA,9122
4
+ model/README.md,sha256=c_qO2gyGjYmGfGcpnaxvQ7MWFg-WlAUtK8IMCfzHIPc,2226
5
+ model/config/dataset_conv_v1.yml,sha256=cVT4Hgo5lB9w1Xl3EaUO_TtrHdMWQSe8U022JkKQ5SE,409
6
+ model/config/dataset_conv_v2_m2.yml,sha256=Xz75wZs7tOpqVfiTSsgpepvOzlp_scKwoqyLLug-KGM,412
7
+ model/config/dataset_conv_v3_m2_assembl_nearest.yml,sha256=8-H1TWK7w44eg1lx8YO9FY6tO_Q0Jw49pFOfYbm_rl4,444
8
+ model/config/dataset_debug.yml,sha256=7oBNvLZ9vDh8jrgnf4Rc49Q3hE8wz1agG1SuK2pycsA,332
9
+ model/config/dataset_v4_int_format.yml,sha256=A7oYBvzmX7YMq97sYhB-87xe5fUpApfR-zHydMm8VsQ,330
10
+ model/config/dataset_v5.yml,sha256=SkgLZTB8yQE2lRg0N3Os360zKykGKHQHNajEN7MDj3k,404
11
+ model/config/inference.yml,sha256=Gh9-QkZsSH9qdMFY5zYoFyYTPvOO2Ar-up9hnXpR9IU,276
12
+ model/config/train.yml,sha256=6vbF0-EDuuGj94IltFY1z8IbMlP1xSu4sVVr_m7ZK1E,650
13
+ model/config/train_debug.yml,sha256=IvUBTyZVWLQf7rdPaD_pSekGz1d1-EQ9T9hdzUiMKSg,517
14
+ model/config/train_from_checkpoint.yml,sha256=GQ-DyemDxBiWG2MH8jGOVc8bZSPYejktVgf15ST5gxk,686
15
+ model/config/train_from_checkpoint_debug.yml,sha256=T04e0t94G9s7JKZEAEm_CYvbgLvW9b1F0DISTHCaGak,553
16
+ model/config/train_grpo.yml,sha256=jVZbSzx9OiFrNltgQhkbUQBkJkPRS-xaiYnCRExgPTw,861
17
+ model/config/train_grpo_debug.yml,sha256=5LF1Y2pvVYOv8OujxQ3gLuPcJuEEpC8BVPrtQ4yP5TY,946
18
+ model/config/train_grpo_debug_vllm.yml,sha256=rAB6O2GGoC7oHZ6V3zglx16U2IByoG6uZ9T4zwE0Nu8,999
19
+ model/config.py,sha256=NGT1zWblFh0yfaHppjpJV-1CHb3GxBBVj3LFmkz9Bkg,1628
20
+ model/dataset.py,sha256=cesCqK9vR8vgv7nIfVVjm7mob-lZa2eniekzofvzpEU,10433
21
+ model/inference.py,sha256=eN4RwBQO1b35NL2tzAA2KkDAP5g1-t9KPF5uiPT2z2w,1585
22
+ model/nk_assistant.py,sha256=TjHrYGCGvxLE2bZkWTKx4PoMw5t3mbYDxrXe1S55UWU,6696
23
+ model/parser.py,sha256=Fg0yPmnK2DDiEgFRfh-WhIOZ3itbdf04hzOKd-bEwZM,1738
24
+ model/run_slurm.py,sha256=CDx9qF_cz6jqgfRkAgrr--tN2dIoY2I7Foszu7GcxBE,9602
25
+ model/score.ipynb,sha256=3iGnpQJJGhC5eH0YRTHAYWAaWZq8ZDzSwvX4rJMAEOk,21993
26
+ model/scripts/template.slurm,sha256=4TPHAS5tuPmoFlgZ6r5WsGQwVyOtnIknuR5LnEELiys,1541
27
+ model/scripts/template_rl.slurm,sha256=Un8VTcS8n-W4l4nkz6xER9R6NpeKX4vQp2Go2Hv6C7g,1565
28
+ model/train.py,sha256=fhNvzUEgEqREsDZPjRLyNPVU4zjBUkF1lbUKr93d4b4,9619
29
+ nk_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ nk_model/assembler.py,sha256=ea9YDlYBiWp927jf2oxl9NaeMzYhXeaKBF5cOKSF1C4,3192
31
+ nk_model/biased_prediction_agent.py,sha256=hXkgNi3yTlv3xwCrwPyAo6cBJ_G2CHXvgZaHn0T5hJI,12302
32
+ nk_model/dataset.py,sha256=fwaCvSd75Rz4afQ5VmbMjLoBSi9lCNWARI4GpyJ5ENQ,14471
33
+ nk_model/enums.py,sha256=rUzY6ky4tISpH9IVHc2Zxy7Uqzn0wTkDmlg6kDTxXnY,403
34
+ nk_model/landscape_cache.py,sha256=7V2w9MErkoAMfTK_1MLf1NDPzIJ9PeuyBV8bEuWszO0,4727
35
+ nk_model/models.py,sha256=dUBU1ONRUbwvfpdRADYoXKL4Q-q02lpsTp4_QNIjtEE,5269
36
+ nk_model/nk_landscape.py,sha256=OW4hPQ7edlFT-KH5btpCMoZIVwbucN3mpHpwaBTzRi8,17096
37
+ simulation/hill_climber_simulation.py,sha256=SgkhuEG0elzBpNwsaIPLiY2taj-ByCbmcM5TcpoAh6I,7162
38
+ simulation/hill_climber_vs_ai_simulation.py,sha256=fn5kG2JsCKmpcZQkHnKtNtlbfYLpb-WYaIli3W0Mdxk,4083
39
+ simulation/landscape_selection.py,sha256=Co0O2Te0kQYX1e3lj27B7Z_OLGtJyvQuPEECFmN9mvY,5274
40
+ utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ utils/binary_conversion.py,sha256=9YTEbxSUNuGnVWAIU5fG1U1bjdqSbhC1C3NGUmqv-C8,3705
42
+ utils/logging.py,sha256=SQlSxGt4ntkW3kyyy8cj_Moldg10szIzP1Cu1wMJVVo,895
43
+ utils/utils.py,sha256=_GypnP34bOrr3OD8PB-8QmJmMcpM6hTJm5G67PXhAME,1455
44
+ ai_nk_cce-0.1.0.dist-info/METADATA,sha256=QrOK2mNNijYE-3d7W4um5DwdbXdPtV-sIsa_3QwvIAs,3604
45
+ ai_nk_cce-0.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
46
+ ai_nk_cce-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.1.3
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
api/__init__.py ADDED
File without changes
api/mpcdf_vllm.py ADDED
@@ -0,0 +1,94 @@
1
+ import os
2
+ import time
3
+ from typing import Any, Optional
4
+
5
+ import httpx
6
+ from dotenv import load_dotenv
7
+
8
+ from src.utils.logging import log_dec
9
+
10
+ load_dotenv()
11
+
12
+ BASE_URL = os.getenv("MPCDF_BASE_URL")
13
+ API_KEY = os.getenv("MPCDF_API_KEY")
14
+ MODEL_NAME = "center-for-humans-and-machines/assembl_nk_1"
15
+
16
+
17
+ @log_dec
18
+ def mpcdf_vllm_request(
19
+ prompt: str,
20
+ model: Optional[str] = None,
21
+ max_tokens: int = 150,
22
+ temperature: float = 0.7,
23
+ timeout: float = 30.0,
24
+ max_retries: int = 3,
25
+ initial_backoff: float = 1.0,
26
+ **kwargs: Any,
27
+ ) -> str:
28
+ """Make request to vLLM model with tokenized input.
29
+
30
+ Uses the /v1/completions endpoint which supports prompt_token_ids
31
+ directly, bypassing the tokenizer. This function tokenizes the prompt
32
+ and sends it as token IDs.
33
+
34
+ Args:
35
+ prompt: Text prompt string to send to the model.
36
+ model: Model name (defaults to MODEL_NAME).
37
+ max_tokens: Maximum tokens to generate.
38
+ temperature: Sampling temperature.
39
+ timeout: Request timeout in seconds.
40
+ max_retries: Maximum number of retry attempts (default: 3).
41
+ initial_backoff: Initial backoff delay in seconds (default: 1.0).
42
+ **kwargs: Additional parameters for completion request.
43
+
44
+ Returns:
45
+ Generated text content.
46
+ """
47
+ # Build request URL - ensure /v1 prefix if not present
48
+ base = BASE_URL.rstrip("/")
49
+ if not base.endswith("/v1"):
50
+ base = f"{base}/v1"
51
+ url = f"{base}/completions"
52
+
53
+ # Prepare payload - use prompt_token_ids for completions endpoint
54
+ payload = {
55
+ "model": model or MODEL_NAME,
56
+ "prompt": prompt,
57
+ "max_tokens": max_tokens,
58
+ "temperature": temperature,
59
+ **kwargs,
60
+ }
61
+
62
+ # Make HTTP request
63
+ headers = {
64
+ "Authorization": f"Bearer {API_KEY}",
65
+ "Content-Type": "application/json",
66
+ }
67
+
68
+ last_exception = None
69
+ for attempt in range(max_retries + 1):
70
+ try:
71
+ with httpx.Client(timeout=timeout) as http_client:
72
+ response = http_client.post(url, json=payload, headers=headers)
73
+ response.raise_for_status()
74
+ result = response.json()
75
+
76
+ # Completions endpoint returns text directly
77
+ return result["choices"][0]["text"]
78
+ except (
79
+ httpx.HTTPStatusError,
80
+ httpx.RequestError,
81
+ httpx.TimeoutException,
82
+ httpx.NetworkError,
83
+ ) as e:
84
+ last_exception = e
85
+ if attempt < max_retries:
86
+ backoff = initial_backoff * (2**attempt)
87
+ time.sleep(backoff)
88
+ continue
89
+ raise
90
+
91
+ # This should never be reached, but type checker needs it
92
+ if last_exception:
93
+ raise last_exception
94
+ raise RuntimeError("Request failed unexpectedly")
evals/nk_model.py ADDED
@@ -0,0 +1,277 @@
1
+ """
2
+ NK Model evaluation functions for analyzing fitness landscapes.
3
+
4
+ This module provides functions to evaluate statistical properties of NK
5
+ fitness landscapes, including payoff distributions and peak prominence
6
+ analysis.
7
+ """
8
+
9
+ from collections import Counter
10
+ from typing import Dict, Tuple
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from src.nk_model.nk_landscape import NKLandscape
16
+
17
+
18
+ def calculate_payoff_distribution(
19
+ landscape: NKLandscape, bins: int = 50
20
+ ) -> pd.DataFrame:
21
+ """
22
+ Calculate the average payoff distribution for an NK fitness landscape.
23
+
24
+ Args:
25
+ landscape: NKLandscape instance to analyze
26
+ bins: Number of bins for the histogram (default: 50)
27
+
28
+ Returns:
29
+ pd.DataFrame with columns ['bin_center', 'frequency', 'bin_edges_left',
30
+ 'bin_edges_right'] suitable for plotting
31
+ """
32
+ # Extract payoffs from all items in the landscape
33
+ payoffs = [item.payoff for item in landscape.items]
34
+
35
+ # Calculate histogram
36
+ hist, bin_edges = np.histogram(payoffs, bins=bins, range=(0, 1))
37
+
38
+ # Calculate bin centers for plotting
39
+ bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
40
+
41
+ # Create DataFrame for easy plotting
42
+ return pd.DataFrame(
43
+ {
44
+ "bin_center": bin_centers,
45
+ "frequency": hist,
46
+ "bin_edges_left": bin_edges[:-1],
47
+ "bin_edges_right": bin_edges[1:],
48
+ }
49
+ )
50
+
51
+
52
+ def calculate_peaks_prominence(landscape: NKLandscape) -> pd.DataFrame:
53
+ """
54
+ Calculate peaks and their prominence distribution in NK landscape.
55
+
56
+ Prominence is defined as the minimum Hamming distance to reach a
57
+ higher payoff. A prominence of n+1 indicates a global peak.
58
+
59
+ This elegant implementation uses vectorized operations for better
60
+ performance and readability.
61
+
62
+ Args:
63
+ landscape: NKLandscape instance to analyze
64
+
65
+ Returns:
66
+ pd.DataFrame with columns ['prominence', 'count', 'proportion',
67
+ 'coordinates'] where prominence ranges from 1 to n+1, and coordinates
68
+ contains lists of coordinate tuples for points at each prominence level
69
+ """
70
+ n = landscape.params.n
71
+ items = landscape.items
72
+ total_items = len(items)
73
+
74
+ # Convert all coordinates and payoffs to numpy arrays for vectorization
75
+ coords = np.array(
76
+ [item.coordinates for item in items]
77
+ ) # Shape: (total_items, n)
78
+ payoffs = np.array(
79
+ [item.payoff for item in items]
80
+ ) # Shape: (total_items,)
81
+
82
+ # Calculate prominence for each point using vectorized operations
83
+ prominence_values = np.full(
84
+ total_items, n + 1, dtype=int
85
+ ) # Start with global peak assumption
86
+
87
+ # For each Hamming distance from 1 to n
88
+ for distance in range(1, n + 1):
89
+ # Find items that haven't found a higher payoff yet
90
+ unresolved_mask = prominence_values == n + 1
91
+ unresolved_indices = np.where(unresolved_mask)[0]
92
+
93
+ if len(unresolved_indices) == 0:
94
+ break # All items have found their prominence
95
+
96
+ # For unresolved items, check if any other item is at this distance
97
+ # with higher payoff
98
+ for i in unresolved_indices:
99
+ current_coords = coords[i]
100
+ current_payoff = payoffs[i]
101
+
102
+ # Calculate Hamming distances to all other points
103
+ hamming_distances = np.sum(coords != current_coords, axis=1)
104
+
105
+ # Find items at exactly this distance with higher payoff
106
+ at_distance_mask = hamming_distances == distance
107
+ higher_payoff_mask = payoffs > current_payoff
108
+ found_higher = np.any(at_distance_mask & higher_payoff_mask)
109
+
110
+ if found_higher:
111
+ prominence_values[i] = distance
112
+
113
+ # Create DataFrame with all prominence levels (1 to n+1) and collect
114
+ # coordinates
115
+ prominence_range = np.arange(1, n + 2)
116
+ results = []
117
+
118
+ for prom_val in prominence_range:
119
+ mask = prominence_values == prom_val
120
+ count = np.sum(mask)
121
+ proportion = count / total_items
122
+
123
+ # Collect coordinates for this prominence level
124
+ coords_at_prominence = [tuple(coords[i]) for i in np.where(mask)[0]]
125
+
126
+ results.append(
127
+ {
128
+ "prominence": prom_val,
129
+ "count": count,
130
+ "proportion": proportion,
131
+ "coordinates": coords_at_prominence,
132
+ }
133
+ )
134
+
135
+ df = pd.DataFrame(results)
136
+
137
+ # Add summary statistics as metadata
138
+ peak_mask = df["prominence"] >= 2 # Prominence >= 2 indicates peaks
139
+ df.attrs["peak_count"] = df.loc[peak_mask, "count"].sum()
140
+ df.attrs["peak_proportion"] = df.loc[peak_mask, "proportion"].sum()
141
+ df.attrs["global_peaks"] = df.loc[df["prominence"] == n + 1, "count"].iloc[
142
+ 0
143
+ ]
144
+
145
+ return df
146
+
147
+
148
+ def _hill_climb_to_maximum(
149
+ start_coords: np.ndarray, landscape: NKLandscape
150
+ ) -> Tuple[np.ndarray, float]:
151
+ """
152
+ Perform hill-climbing from a starting point to find the local maximum
153
+ it reaches.
154
+
155
+ Args:
156
+ start_coords: Starting coordinates
157
+ landscape: NKLandscape instance
158
+
159
+ Returns:
160
+ Tuple of (final_coordinates, final_payoff) of the local maximum reached
161
+ """
162
+ current_coords = start_coords.copy()
163
+ current_payoff = landscape.get_payoff(current_coords)
164
+
165
+ improved = True
166
+ while improved:
167
+ improved = False
168
+ best_coords = current_coords.copy()
169
+ best_payoff = current_payoff
170
+
171
+ # Check all neighbors at Hamming distance 1
172
+ for i in range(len(current_coords)):
173
+ neighbor_coords = current_coords.copy()
174
+ neighbor_coords[i] = 1 - neighbor_coords[i] # Flip bit
175
+ neighbor_payoff = landscape.get_payoff(neighbor_coords)
176
+
177
+ if neighbor_payoff > best_payoff:
178
+ best_coords = neighbor_coords.copy()
179
+ best_payoff = neighbor_payoff
180
+ improved = True
181
+
182
+ if improved:
183
+ current_coords = best_coords
184
+ current_payoff = best_payoff
185
+
186
+ return current_coords, current_payoff
187
+
188
+
189
+ def calculate_basin_analysis(landscape: NKLandscape) -> pd.DataFrame:
190
+ """
191
+ Analyze basins of attraction for all local maxima in the NK landscape.
192
+
193
+ This implements a simplified basin analysis: for each point in the
194
+ landscape, hill-climb to find which local maximum it reaches, then count
195
+ unique destinations.
196
+
197
+ Args:
198
+ landscape: NKLandscape instance to analyze
199
+
200
+ Returns:
201
+ pd.DataFrame with columns:
202
+ - 'coordinates': tuple of coordinates for each local maximum
203
+ - 'payoff': payoff value of the local maximum
204
+ - 'basin_size': number of points in the basin of attraction
205
+ - 'is_global_max': boolean indicating if this is a global maximum
206
+ """
207
+ # Collect all destinations from hill-climbing
208
+ destinations = []
209
+ for item in landscape.items:
210
+ final_coords, final_payoff = _hill_climb_to_maximum(
211
+ item.coordinates, landscape
212
+ )
213
+ destinations.append((tuple(final_coords), final_payoff))
214
+
215
+ # Count unique destinations (basin sizes)
216
+ basin_counts = Counter(dest[0] for dest in destinations)
217
+
218
+ # Get payoffs for each unique destination
219
+ payoff_map = {dest[0]: dest[1] for dest in destinations}
220
+
221
+ # Find global maximum payoff
222
+ global_max_payoff = max(payoff_map.values())
223
+
224
+ # Create results
225
+ results = []
226
+ for coords_tuple, basin_size in basin_counts.items():
227
+ results.append(
228
+ {
229
+ "coordinates": coords_tuple,
230
+ "payoff": payoff_map[coords_tuple],
231
+ "basin_size": basin_size,
232
+ "is_global_max": payoff_map[coords_tuple] == global_max_payoff,
233
+ }
234
+ )
235
+
236
+ df = pd.DataFrame(results)
237
+
238
+ # Sort by payoff (descending) for better readability
239
+ df = df.sort_values("payoff", ascending=False).reset_index(drop=True)
240
+
241
+ # Add summary statistics as metadata
242
+ df.attrs["total_local_maxima"] = len(df)
243
+ df.attrs["total_points"] = len(landscape.items)
244
+ df.attrs["global_maxima_count"] = df["is_global_max"].sum()
245
+ df.attrs["average_basin_size"] = df["basin_size"].mean()
246
+
247
+ return df
248
+
249
+
250
+ def evaluate_nk_landscape(
251
+ landscape: NKLandscape, bins: int = 50, include_basins: bool = False
252
+ ) -> Dict[str, pd.DataFrame]:
253
+ """
254
+ Comprehensive evaluation of an NK fitness landscape.
255
+
256
+ Args:
257
+ landscape: NKLandscape instance to analyze
258
+ bins: Number of bins for payoff distribution histogram
259
+ include_basins: If True, include basin analysis (computationally
260
+ expensive)
261
+
262
+ Returns:
263
+ Dict containing:
264
+ - 'payoff_distribution': DataFrame with payoff distribution
265
+ - 'peaks_prominence': DataFrame with peaks and prominence analysis
266
+ - 'basin_analysis': DataFrame with basin analysis
267
+ (if include_basins=True)
268
+ """
269
+ results = {
270
+ "payoff_distribution": calculate_payoff_distribution(landscape, bins),
271
+ "peaks_prominence": calculate_peaks_prominence(landscape),
272
+ }
273
+
274
+ if include_basins:
275
+ results["basin_analysis"] = calculate_basin_analysis(landscape)
276
+
277
+ return results
model/README.md ADDED
@@ -0,0 +1,64 @@
1
+
2
+ # Debug
3
+
4
+ ## Create Dataset
5
+
6
+ ```
7
+ python src/model/dataset.py --config src/model/config/dataset_debug.yml
8
+ ```
9
+
10
+ # Train Model
11
+
12
+ ```
13
+ python src/model/train.py --config src/model/config/train_debug.yml
14
+ ```
15
+
16
+ # Train Model on Cluster
17
+
18
+ ```
19
+ python src/model/run_slurm.py --config_file src/model/config/train_debug.yml --group_name debug --job_name debug --n_gpu 1 --time 00:10:00 --ds debug_1000
20
+ ```
21
+
22
+ ## Evaluate Model
23
+
24
+ ```
25
+ python src/model/run_slurm.py --script src/model/inference.py --config_file src/model/config/inference.yml --group_name debug --job_name debug --ds debug_1000 --n_gpu 1 --model gpt2_debug_1000
26
+ ```
27
+
28
+ # Train RL Model on Cluster
29
+
30
+ python src/model/run_slurm.py --template src/model/scripts/template_rl.slurm --config_file src/model/config/train_grpo_debug.yml --image /u/lumi/projects/llm-strategic-tuning/images/ai_nk_rl.sif --group_name debug --job_name rl_debug --n_gpu 1 --lr 1e-5 --time 00:10:00 --ds v5_xxl_3.0
31
+
32
+ # Train RL Model with vLLM on Cluster
33
+
34
+ python src/model/run_slurm.py --template src/model/scripts/template_rl.slurm --config_file src/model/config/train_grpo_debug_vllm.yml --image /u/lumi/projects/llm-strategic-tuning/images/ai_nk_trl_vllm.sif --group_name debug --job_name rl_debug --n_gpu 1 --lr 1e-5 --time 00:10:00 --ds v5_xxl_3.0
35
+
36
+ # Actual Run
37
+
38
+ ## Create Dataset
39
+
40
+ ```
41
+ python src/model/dataset.py --config src/model/config/dataset.yml
42
+ ```
43
+
44
+ ## Train Model
45
+
46
+ ```
47
+ python src/model/run_slurm.py --config_file src/model/config/train.yml --group_name gpt2_v4 --job_name 5e-5 --n_gpu 4 --lr 1e-5 --ds v4 --time 04:00:00
48
+ ```
49
+
50
+ ## Evaluate Model
51
+
52
+ ```
53
+ python src/model/run_slurm.py --script src/model/inference.py --config_file src/model/config/inference.yml --n_gpu 1 --group_name gpt2_v4 --job_name 5e-5 --ds v4 --train_job_id 2025_04_08__13_11_40 --time 01:30:00
54
+ ```
55
+
56
+ # Train RL Model on Cluster
57
+
58
+ python src/model/run_slurm.py --template src/model/scripts/template_rl.slurm --config_file src/model/config/train_grpo.yml --image /u/lumi/projects/llm-strategic-tuning/images/ai_nk_rl.sif --group_name gpt2_v6 --job_name rl --n_gpu 4 --lr 1e-5 --time 24:00:00 --ds v5_xxl_3.0
59
+
60
+ ## Push Model
61
+
62
+ ```
63
+ huggingface-cli upload center-for-humans-and-machines/nk-1 models/gpt2_v4/5e-5/2025_03_25__10_09_45
64
+ ```
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_m1_p1_f1000._size100000_20250925_125128.parquet"
2
+ output_file: "data/samples/conv_xxl_v1"
3
+ samples_per_landscapes: 20
4
+ constraints_range: [1, 9] # as the constraints are [low, high) this means 1, 2, 3, 4, 5, 6, 7, 8
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: null
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_m2_p1_f1000._size100000_20251007_144922.parquet"
2
+ output_file: "data/samples/conv_xxl_v2_m2"
3
+ samples_per_landscapes: 20
4
+ constraints_range: [1, 9] # as the constraints are [low, high) this means 1, 2, 3, 4, 5, 6, 7, 8
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: null
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_m2_p1_f1000._nm-nearest_cm-symmetric_size100000_20251106_164449.parquet"
2
+ output_file: "data/samples/assembl_conv_xxl_v3_m2"
3
+ samples_per_landscapes: 20
4
+ constraints_range: [1, 9] # as the constraints are [low, high) this means 1, 2, 3, 4, 5, 6, 7, 8
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: null
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_p1_f1000_size100000_20250519_185138.parquet"
2
+ output_file: "data/samples/debug_1000"
3
+ samples_per_landscapes: 1
4
+ constraints_range: [1, 9]
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: 1000
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_p1_f1000._size100000_20250425_151338.parquet"
2
+ output_file: "data/samples/v4_xxl"
3
+ samples_per_landscapes: 10
4
+ constraints_range: [0, 8]
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: null
@@ -0,0 +1,9 @@
1
+ input_file: "data/landscapes/landscapes_n8_k0-1-2-3-4-5-6-7_p1_f1000_size100000_20250519_185138.parquet"
2
+ output_file: "data/samples/v5_xxl_2.0"
3
+ samples_per_landscapes: 20
4
+ constraints_range: [1, 9] # as the constraints are [low, high) this means 1, 2, 3, 4, 5, 6, 7, 8
5
+ n_samples: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
6
+ include_payoff: true
7
+ test_ratio: 0.2
8
+ exp_ratio: 0.2
9
+ debug_size: null
@@ -0,0 +1,7 @@
1
+ dataset_file: data/samples/<<ds>>
2
+ model_path: models/<<group_name>>/<<job_name>>/<<train_job_id>>
3
+ output_dataset_file: models/<<group_name>>/<<job_name>>/<<train_job_id>>/inference_results_xxl
4
+ splits: ["test"]
5
+ max_test_samples: 80000
6
+ # generation_params:
7
+ # temperature: 0.7