kaggle-environments 1.22.6__py3-none-any.whl → 1.24.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaggle-environments might be problematic. Click here for more details.
- kaggle_environments/envs/connectx/visualizer/default/index.html +13 -0
- kaggle_environments/envs/connectx/visualizer/default/package.json +22 -0
- kaggle_environments/envs/connectx/visualizer/default/replays/test-replay.json +1129 -0
- kaggle_environments/envs/connectx/visualizer/default/src/main.ts +12 -0
- kaggle_environments/envs/connectx/visualizer/default/src/renderer.ts +396 -0
- kaggle_environments/envs/connectx/visualizer/default/src/style.css +38 -0
- kaggle_environments/envs/connectx/visualizer/default/tsconfig.json +4 -0
- kaggle_environments/envs/connectx/visualizer/default/vite.config.ts +7 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/repeated_poker.js +163 -88
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/index.html +13 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/package.json +23 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/replays/test-replay.json +1 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_first_steps.mjs +202 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_replay.mjs +215 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_steps_with_end_states.mjs +234 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/getRepeatedPokerStateForStep.js +260 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/utils.ts +61 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/debug_repeated_poker_renderer.ts +49 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_1.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_10.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_100.svg +48 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_25.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_5.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/main.ts +36 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/repeated_poker_renderer.ts +573 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/style.css +594 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/tsconfig.json +7 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/vite.config.ts +6 -0
- kaggle_environments/envs/werewolf/README.md +190 -0
- kaggle_environments/envs/werewolf/harness/__init__.py +0 -0
- kaggle_environments/envs/werewolf/harness/base.py +773 -0
- kaggle_environments/envs/werewolf/harness/litellm_models.yaml +51 -0
- kaggle_environments/envs/werewolf/harness/main.py +54 -0
- kaggle_environments/envs/werewolf/harness/test_base.py +35 -0
- kaggle_environments/envs/werewolf/runner.py +146 -0
- kaggle_environments/envs/werewolf/scripts/__init__.py +0 -0
- kaggle_environments/envs/werewolf/scripts/add_audio.py +425 -0
- kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml +24 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml +102 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml +100 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml +104 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml +104 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml +58 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml +115 -0
- kaggle_environments/envs/werewolf/scripts/measure_cost.py +251 -0
- kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py +135 -0
- kaggle_environments/envs/werewolf/scripts/rerender_html.py +87 -0
- kaggle_environments/envs/werewolf/scripts/run.py +93 -0
- kaggle_environments/envs/werewolf/scripts/run_block.py +237 -0
- kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py +222 -0
- kaggle_environments/envs/werewolf/scripts/self_play.py +196 -0
- kaggle_environments/envs/werewolf/scripts/utils.py +47 -0
- kaggle_environments/envs/werewolf/werewolf.json +1 -1
- {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/METADATA +1 -1
- {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/RECORD +68 -7
- {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/WHEEL +0 -0
- {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/entry_points.txt +0 -0
- {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import collections
|
|
3
|
+
import logging
|
|
4
|
+
import math
|
|
5
|
+
import multiprocessing
|
|
6
|
+
import os
|
|
7
|
+
import random
|
|
8
|
+
from itertools import permutations
|
|
9
|
+
from typing import Any, Dict, List
|
|
10
|
+
|
|
11
|
+
import tenacity
|
|
12
|
+
import yaml
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
|
|
15
|
+
from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger
|
|
16
|
+
from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli
|
|
17
|
+
|
|
18
|
+
# Initialize a placeholder logger
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_config(config_path):
|
|
23
|
+
"""Loads the configuration from a YAML file."""
|
|
24
|
+
with open(config_path, "r") as f:
|
|
25
|
+
return yaml.safe_load(f)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_all_unique_role_configs(role_configs: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
|
|
29
|
+
"""
|
|
30
|
+
Generates all unique permutations of role configurations.
|
|
31
|
+
A role configuration is a dict with 'role' and 'role_params'.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def make_hashable(config):
|
|
35
|
+
role = config["role"]
|
|
36
|
+
params = config.get("role_params", {})
|
|
37
|
+
if params:
|
|
38
|
+
return role, frozenset(params.items())
|
|
39
|
+
return role, frozenset()
|
|
40
|
+
|
|
41
|
+
def make_unhashable(hashable_config):
|
|
42
|
+
role, params_frozenset = hashable_config
|
|
43
|
+
return {"role": role, "role_params": dict(params_frozenset)}
|
|
44
|
+
|
|
45
|
+
hashable_configs = [make_hashable(c) for c in role_configs]
|
|
46
|
+
all_perms_hashable = list(set(permutations(hashable_configs)))
|
|
47
|
+
all_perms = [[make_unhashable(c) for c in p] for p in all_perms_hashable]
|
|
48
|
+
return all_perms
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
run_single_game_with_retry = tenacity.retry(
|
|
52
|
+
wait=tenacity.wait_random_exponential(multiplier=1, min=2, max=10),
|
|
53
|
+
stop=tenacity.stop_after_attempt(3),
|
|
54
|
+
before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
|
|
55
|
+
)(run_single_game_cli)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def game_runner_wrapper(args):
|
|
59
|
+
"""Wrapper to unpack arguments for the multiprocessing pool."""
|
|
60
|
+
game_dir, game_config, use_random_agents, debug, _, _ = args
|
|
61
|
+
run_single_game_with_retry(game_dir, game_config, use_random_agents, debug)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def generate_game_tasks(output_dir, num_blocks, config, use_random_agents, debug, shuffle_player_ids):
|
|
65
|
+
"""
|
|
66
|
+
Generates all game configurations for the entire experiment.
|
|
67
|
+
"""
|
|
68
|
+
base_game_config = config["game_config"]
|
|
69
|
+
players_data = base_game_config["agents"]
|
|
70
|
+
base_role_configs = [{"role": agent["role"], "role_params": agent.get("role_params", {})} for agent in players_data]
|
|
71
|
+
|
|
72
|
+
logger.info("Generating all unique role configurations...")
|
|
73
|
+
all_role_configs = get_all_unique_role_configs(base_role_configs)
|
|
74
|
+
logger.info(f"Found {len(all_role_configs)} unique arrangements.")
|
|
75
|
+
|
|
76
|
+
available_role_configs = []
|
|
77
|
+
|
|
78
|
+
for block_index in range(num_blocks):
|
|
79
|
+
block_dir = os.path.join(output_dir, f"block_{block_index}")
|
|
80
|
+
os.makedirs(block_dir, exist_ok=True)
|
|
81
|
+
|
|
82
|
+
if not available_role_configs:
|
|
83
|
+
if num_blocks > len(all_role_configs):
|
|
84
|
+
logger.warning("Sampling with replacement as num_blocks > unique configurations.")
|
|
85
|
+
available_role_configs = list(all_role_configs)
|
|
86
|
+
random.shuffle(available_role_configs)
|
|
87
|
+
|
|
88
|
+
block_role_config = available_role_configs.pop()
|
|
89
|
+
random.shuffle(players_data)
|
|
90
|
+
current_players_deque = collections.deque(players_data)
|
|
91
|
+
|
|
92
|
+
for game_in_block in range(len(players_data)):
|
|
93
|
+
game_dir = os.path.join(block_dir, f"game_{game_in_block}")
|
|
94
|
+
os.makedirs(game_dir, exist_ok=True)
|
|
95
|
+
|
|
96
|
+
current_players = list(current_players_deque)
|
|
97
|
+
game_agents_config = [
|
|
98
|
+
{**player_config, **block_role_config[i]} for i, player_config in enumerate(current_players)
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
if shuffle_player_ids:
|
|
102
|
+
player_ids = [agent["id"] for agent in game_agents_config]
|
|
103
|
+
random.shuffle(player_ids)
|
|
104
|
+
for i, agent in enumerate(game_agents_config):
|
|
105
|
+
agent["id"] = player_ids[i]
|
|
106
|
+
|
|
107
|
+
game_config = {**base_game_config, "agents": game_agents_config}
|
|
108
|
+
yield (game_dir, game_config, use_random_agents, debug, block_index, game_in_block)
|
|
109
|
+
current_players_deque.rotate(1)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def run_experiment(
|
|
113
|
+
output_dir, num_blocks, config, use_random_agents, debug, parallel, num_processes, shuffle_player_ids
|
|
114
|
+
):
|
|
115
|
+
"""
|
|
116
|
+
Runs a tournament by generating all game tasks and processing them,
|
|
117
|
+
potentially in parallel.
|
|
118
|
+
"""
|
|
119
|
+
if debug:
|
|
120
|
+
logger.warning("Debug mode is enabled. Forcing sequential execution.")
|
|
121
|
+
|
|
122
|
+
base_game_config = config["game_config"]
|
|
123
|
+
players_data = base_game_config["agents"]
|
|
124
|
+
total_games = num_blocks * len(players_data)
|
|
125
|
+
|
|
126
|
+
if parallel:
|
|
127
|
+
logger.info(f"Running games in parallel with up to {num_processes} processes.")
|
|
128
|
+
|
|
129
|
+
game_tasks = generate_game_tasks(output_dir, num_blocks, config, use_random_agents, debug, shuffle_player_ids)
|
|
130
|
+
|
|
131
|
+
with tqdm(total=total_games, desc="Processing Games") as pbar:
|
|
132
|
+
if parallel:
|
|
133
|
+
with multiprocessing.Pool(processes=num_processes) as pool:
|
|
134
|
+
for _ in pool.imap_unordered(game_runner_wrapper, game_tasks):
|
|
135
|
+
pbar.update(1)
|
|
136
|
+
else:
|
|
137
|
+
for task_args in game_tasks:
|
|
138
|
+
game_runner_wrapper(task_args)
|
|
139
|
+
pbar.update(1)
|
|
140
|
+
|
|
141
|
+
logger.info("All game tasks have been processed.")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def main():
|
|
145
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
146
|
+
default_config_path = os.path.join(script_dir, "configs", "run", "run_config.yaml")
|
|
147
|
+
|
|
148
|
+
parser = argparse.ArgumentParser(
|
|
149
|
+
description="Run a block-design experiment for the Werewolf game, "
|
|
150
|
+
"where each block is a complete role rotation amongst the players."
|
|
151
|
+
)
|
|
152
|
+
parser.add_argument(
|
|
153
|
+
"-o",
|
|
154
|
+
"--output_dir",
|
|
155
|
+
type=str,
|
|
156
|
+
help="Output directory for game replays and logs.",
|
|
157
|
+
default="werewolf_block_experiment",
|
|
158
|
+
)
|
|
159
|
+
parser.add_argument(
|
|
160
|
+
"-c", "--config", type=str, default=default_config_path, help="Path to the base configuration YAML file."
|
|
161
|
+
)
|
|
162
|
+
parser.add_argument(
|
|
163
|
+
"-b",
|
|
164
|
+
"--num_blocks",
|
|
165
|
+
type=int,
|
|
166
|
+
default=10,
|
|
167
|
+
help="Number of blocks to run. Each block is a complete role rotation.",
|
|
168
|
+
)
|
|
169
|
+
parser.add_argument(
|
|
170
|
+
"-r", "--use_random_agents", action="store_true", help="Use random agents for all players for fast testing."
|
|
171
|
+
)
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"-d",
|
|
174
|
+
"--debug",
|
|
175
|
+
action="store_true",
|
|
176
|
+
help="Enable debug mode for the game environment. "
|
|
177
|
+
"Note that you can use debug mode to enable intra game sequential execution.",
|
|
178
|
+
)
|
|
179
|
+
parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.")
|
|
180
|
+
parser.add_argument(
|
|
181
|
+
"-n", "--num_processes", type=int, default=None, help="Number of processes for parallel execution."
|
|
182
|
+
)
|
|
183
|
+
parser.add_argument(
|
|
184
|
+
"-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory."
|
|
185
|
+
)
|
|
186
|
+
parser.add_argument(
|
|
187
|
+
"-s",
|
|
188
|
+
"--shuffle_player_ids",
|
|
189
|
+
action="store_true",
|
|
190
|
+
help="Shuffle player ids for each game to account for name bias.",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
args = parser.parse_args()
|
|
194
|
+
|
|
195
|
+
output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir)
|
|
196
|
+
|
|
197
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
setup_logger(output_dir, "run_block")
|
|
200
|
+
|
|
201
|
+
config = load_config(args.config)
|
|
202
|
+
|
|
203
|
+
num_players = len(config.get("game_config", {}).get("agents", []))
|
|
204
|
+
if args.num_processes is None:
|
|
205
|
+
num_processes = multiprocessing.cpu_count() * 0.9
|
|
206
|
+
if not args.debug:
|
|
207
|
+
num_processes /= num_players
|
|
208
|
+
num_processes = max(1, math.floor(num_processes))
|
|
209
|
+
else:
|
|
210
|
+
num_processes = args.num_processes
|
|
211
|
+
|
|
212
|
+
logger.info("Starting experiment with the following settings:")
|
|
213
|
+
logger.info(f"Output Directory: {output_dir}")
|
|
214
|
+
logger.info(f"Number of Blocks: {args.num_blocks}")
|
|
215
|
+
logger.info(f"Parallel Execution: {args.parallel}")
|
|
216
|
+
if args.parallel:
|
|
217
|
+
logger.info(f"Number of Processes: {num_processes}")
|
|
218
|
+
logger.info(f"Debug Mode: {args.debug}")
|
|
219
|
+
logger.info(f"Use Random Agents: {args.use_random_agents}")
|
|
220
|
+
logger.info(f"Shuffle Player IDs: {args.shuffle_player_ids}")
|
|
221
|
+
|
|
222
|
+
with LogExecutionTime(logger_obj=logger, task_str="block experiment"):
|
|
223
|
+
run_experiment(
|
|
224
|
+
output_dir=output_dir,
|
|
225
|
+
num_blocks=args.num_blocks,
|
|
226
|
+
config=config,
|
|
227
|
+
use_random_agents=args.use_random_agents,
|
|
228
|
+
debug=args.debug,
|
|
229
|
+
parallel=args.parallel,
|
|
230
|
+
num_processes=num_processes,
|
|
231
|
+
shuffle_player_ids=args.shuffle_player_ids,
|
|
232
|
+
)
|
|
233
|
+
logger.info("Experiment finished successfully.")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == "__main__":
|
|
237
|
+
main()
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Run pairwise zero-sum setting where one player play the entire team of Werewolf and another player play
|
|
2
|
+
the team of Villager. Given a config, we play all possible pairwise combinations N times.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import logging
|
|
7
|
+
import math
|
|
8
|
+
import multiprocessing
|
|
9
|
+
import os
|
|
10
|
+
import random
|
|
11
|
+
from copy import deepcopy
|
|
12
|
+
from typing import List
|
|
13
|
+
|
|
14
|
+
import tenacity
|
|
15
|
+
import yaml
|
|
16
|
+
from tqdm import tqdm
|
|
17
|
+
|
|
18
|
+
from kaggle_environments.envs.werewolf.game.consts import RoleConst
|
|
19
|
+
from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger
|
|
20
|
+
from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli
|
|
21
|
+
|
|
22
|
+
# Initialize a placeholder logger
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_config(config_path):
|
|
27
|
+
"""Loads the configuration from a YAML file."""
|
|
28
|
+
with open(config_path, "r") as f:
|
|
29
|
+
return yaml.safe_load(f)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_team_roles(base_roles: List[str]) -> (List[str], List[str]):
|
|
33
|
+
"""Partitions roles into villager and werewolf teams."""
|
|
34
|
+
villager_roles = []
|
|
35
|
+
werewolf_roles = []
|
|
36
|
+
for role_name in base_roles:
|
|
37
|
+
role = RoleConst(role_name)
|
|
38
|
+
if role == RoleConst.WEREWOLF:
|
|
39
|
+
werewolf_roles.append(role_name)
|
|
40
|
+
else:
|
|
41
|
+
villager_roles.append(role_name)
|
|
42
|
+
return villager_roles, werewolf_roles
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
run_single_game_with_retry = tenacity.retry(
|
|
46
|
+
wait=tenacity.wait_exponential(multiplier=1, min=2, max=10),
|
|
47
|
+
stop=tenacity.stop_after_attempt(3),
|
|
48
|
+
before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
|
|
49
|
+
)(run_single_game_cli)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def game_runner_wrapper(args):
|
|
53
|
+
"""Wrapper to unpack arguments for the multiprocessing pool."""
|
|
54
|
+
game_dir, game_config, use_random_agents, debug, _, _ = args
|
|
55
|
+
run_single_game_with_retry(game_dir, game_config, use_random_agents, debug)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def assign_roles_dup_agents(roles, agent_config, player_ids):
|
|
59
|
+
agents = [deepcopy(agent_config) for _ in range(len(roles))]
|
|
60
|
+
for role, agent, player_id in zip(roles, agents, player_ids):
|
|
61
|
+
agent["role"] = role
|
|
62
|
+
agent["id"] = player_id
|
|
63
|
+
return agents
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def prepare_pairwise_agents(villager_roles, werewolf_roles, player_a_config, player_b_config, player_ids):
|
|
67
|
+
pid_v, pid_w = player_ids[: len(villager_roles)], player_ids[len(villager_roles) :]
|
|
68
|
+
agents_v = assign_roles_dup_agents(villager_roles, player_a_config, pid_v)
|
|
69
|
+
agents_w = assign_roles_dup_agents(werewolf_roles, player_b_config, pid_w)
|
|
70
|
+
agents = agents_v + agents_w
|
|
71
|
+
return agents
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def generate_game_tasks(output_dir, num_tournaments, config, use_random_agents, debug):
|
|
75
|
+
"""
|
|
76
|
+
Generates game configurations for a pairwise matrix tournament.
|
|
77
|
+
"""
|
|
78
|
+
base_game_config = config["game_config"]
|
|
79
|
+
all_players = base_game_config["agents"]
|
|
80
|
+
num_players = len(all_players)
|
|
81
|
+
base_roles = [agent["role"] for agent in all_players]
|
|
82
|
+
player_ids = [agent["id"] for agent in all_players]
|
|
83
|
+
|
|
84
|
+
villager_roles, werewolf_roles = get_team_roles(base_roles)
|
|
85
|
+
|
|
86
|
+
if not werewolf_roles:
|
|
87
|
+
raise ValueError("Configuration must include at least one werewolf role.")
|
|
88
|
+
if not villager_roles:
|
|
89
|
+
raise ValueError("Configuration must include at least one villager role.")
|
|
90
|
+
|
|
91
|
+
for tourney_idx in range(num_tournaments):
|
|
92
|
+
for i in range(num_players):
|
|
93
|
+
for j in range(num_players):
|
|
94
|
+
game_dir = os.path.join(output_dir, f"tourney_{tourney_idx}", f"game_{i}_vs_{j}")
|
|
95
|
+
os.makedirs(game_dir, exist_ok=True)
|
|
96
|
+
|
|
97
|
+
player_a_config = all_players[i]
|
|
98
|
+
player_b_config = all_players[j]
|
|
99
|
+
|
|
100
|
+
game_agents_config = prepare_pairwise_agents(
|
|
101
|
+
villager_roles, werewolf_roles, player_a_config, player_b_config, player_ids
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# since name has to be unique and all names come from config, we by default shuffle all names
|
|
105
|
+
# since name might change
|
|
106
|
+
random.shuffle(player_ids)
|
|
107
|
+
for agent_ind, agent in enumerate(game_agents_config):
|
|
108
|
+
agent["id"] = player_ids[agent_ind]
|
|
109
|
+
|
|
110
|
+
random.shuffle(game_agents_config)
|
|
111
|
+
|
|
112
|
+
game_config = {**base_game_config, "agents": game_agents_config}
|
|
113
|
+
yield game_dir, game_config, use_random_agents, debug, tourney_idx, f"{i}_vs_{j}"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def run_tournament(output_dir, num_tournaments, config, use_random_agents, debug, parallel, num_processes):
|
|
117
|
+
"""
|
|
118
|
+
Runs a tournament by generating all game tasks and processing them,
|
|
119
|
+
potentially in parallel.
|
|
120
|
+
"""
|
|
121
|
+
total_games = num_tournaments * len(config["game_config"]["agents"]) ** 2
|
|
122
|
+
|
|
123
|
+
if parallel:
|
|
124
|
+
logger.info(f"Running games in parallel with up to {num_processes} processes.")
|
|
125
|
+
|
|
126
|
+
game_tasks = generate_game_tasks(output_dir, num_tournaments, config, use_random_agents, debug)
|
|
127
|
+
|
|
128
|
+
# the following shuffle is to reduce the load of a particular LLM api
|
|
129
|
+
game_tasks = [*game_tasks]
|
|
130
|
+
random.shuffle(game_tasks)
|
|
131
|
+
|
|
132
|
+
with tqdm(total=total_games, desc="Processing Games") as pbar:
|
|
133
|
+
if parallel:
|
|
134
|
+
with multiprocessing.Pool(processes=num_processes) as pool:
|
|
135
|
+
for _ in pool.imap_unordered(game_runner_wrapper, game_tasks):
|
|
136
|
+
pbar.update(1)
|
|
137
|
+
else:
|
|
138
|
+
for task_args in game_tasks:
|
|
139
|
+
game_runner_wrapper(task_args)
|
|
140
|
+
pbar.update(1)
|
|
141
|
+
|
|
142
|
+
logger.info("All game tasks have been processed.")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def main():
|
|
146
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
147
|
+
default_config_path = os.path.join(script_dir, "configs", "run", "run_config.yaml")
|
|
148
|
+
|
|
149
|
+
parser = argparse.ArgumentParser(description="Run a pairwise matrix tournament for the Werewolf game.")
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"-o",
|
|
152
|
+
"--output_dir",
|
|
153
|
+
type=str,
|
|
154
|
+
help="Output directory for game replays and logs.",
|
|
155
|
+
default="werewolf_pairwise_matrix",
|
|
156
|
+
)
|
|
157
|
+
parser.add_argument(
|
|
158
|
+
"-c", "--config", type=str, default=default_config_path, help="Path to the base configuration YAML file."
|
|
159
|
+
)
|
|
160
|
+
parser.add_argument(
|
|
161
|
+
"-t",
|
|
162
|
+
"--num_tournaments",
|
|
163
|
+
type=int,
|
|
164
|
+
default=1,
|
|
165
|
+
help="Number of tournaments to run. Each tournament is a full N*N matrix of games.",
|
|
166
|
+
)
|
|
167
|
+
parser.add_argument(
|
|
168
|
+
"-r", "--use_random_agents", action="store_true", help="Use random agents for all players for fast testing."
|
|
169
|
+
)
|
|
170
|
+
parser.add_argument(
|
|
171
|
+
"-d",
|
|
172
|
+
"--debug",
|
|
173
|
+
action="store_true",
|
|
174
|
+
help="Enable debug mode for the game environment. Forces sequential execution.",
|
|
175
|
+
)
|
|
176
|
+
parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.")
|
|
177
|
+
parser.add_argument(
|
|
178
|
+
"-n", "--num_processes", type=int, default=None, help="Number of processes for parallel execution."
|
|
179
|
+
)
|
|
180
|
+
parser.add_argument(
|
|
181
|
+
"-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
args = parser.parse_args()
|
|
185
|
+
|
|
186
|
+
output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir)
|
|
187
|
+
|
|
188
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
189
|
+
|
|
190
|
+
setup_logger(output_dir, "run_pairwise_matrix")
|
|
191
|
+
|
|
192
|
+
config = load_config(args.config)
|
|
193
|
+
|
|
194
|
+
if args.num_processes is None:
|
|
195
|
+
num_processes = max(1, math.floor(multiprocessing.cpu_count() * 0.8))
|
|
196
|
+
else:
|
|
197
|
+
num_processes = args.num_processes
|
|
198
|
+
|
|
199
|
+
logger.info("Starting tournament with the following settings:")
|
|
200
|
+
logger.info(f"Output Directory: {output_dir}")
|
|
201
|
+
logger.info(f"Number of Tournaments: {args.num_tournaments}")
|
|
202
|
+
logger.info(f"Parallel Execution: {args.parallel}")
|
|
203
|
+
if args.parallel:
|
|
204
|
+
logger.info(f"Number of Processes: {num_processes}")
|
|
205
|
+
logger.info(f"Debug Mode: {args.debug}")
|
|
206
|
+
logger.info(f"Use Random Agents: {args.use_random_agents}")
|
|
207
|
+
|
|
208
|
+
with LogExecutionTime(logger_obj=logger, task_str="pairwise matrix tournament"):
|
|
209
|
+
run_tournament(
|
|
210
|
+
output_dir=output_dir,
|
|
211
|
+
num_tournaments=args.num_tournaments,
|
|
212
|
+
config=config,
|
|
213
|
+
use_random_agents=args.use_random_agents,
|
|
214
|
+
debug=args.debug,
|
|
215
|
+
parallel=args.parallel,
|
|
216
|
+
num_processes=num_processes,
|
|
217
|
+
)
|
|
218
|
+
logger.info("Tournament finished successfully.")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
if __name__ == "__main__":
|
|
222
|
+
main()
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Run the settings in a given config with all agents llm agents by substituting all with a single model.
|
|
2
|
+
This is useful for example to evaluate the game rule balance.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import copy
|
|
7
|
+
import logging
|
|
8
|
+
import multiprocessing
|
|
9
|
+
import os
|
|
10
|
+
import random
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
|
|
13
|
+
import tenacity
|
|
14
|
+
import yaml
|
|
15
|
+
from tqdm import tqdm
|
|
16
|
+
|
|
17
|
+
from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger
|
|
18
|
+
from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
run_single_game_with_retry = tenacity.retry(
|
|
24
|
+
wait=tenacity.wait_random_exponential(multiplier=1, min=2, max=10),
|
|
25
|
+
stop=tenacity.stop_after_attempt(3),
|
|
26
|
+
before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
|
|
27
|
+
)(run_single_game_cli)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def game_runner_wrapper(args):
|
|
31
|
+
"""Wrapper to unpack arguments for the multiprocessing pool."""
|
|
32
|
+
game_dir, game_config, use_random_agents, debug = args
|
|
33
|
+
run_single_game_with_retry(game_dir, game_config, use_random_agents, debug)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def shuffle_field(agents, field_name):
|
|
37
|
+
values = [agent[field_name] for agent in agents]
|
|
38
|
+
random.shuffle(values)
|
|
39
|
+
for agent, value in zip(agents, values):
|
|
40
|
+
agent[field_name] = value
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def run_self_play_games(
|
|
44
|
+
model_name,
|
|
45
|
+
thumbnail,
|
|
46
|
+
output_dir,
|
|
47
|
+
num_games,
|
|
48
|
+
config,
|
|
49
|
+
use_random_agents,
|
|
50
|
+
debug,
|
|
51
|
+
parallel,
|
|
52
|
+
num_processes,
|
|
53
|
+
shuffle_roles,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Generates and runs game tasks for the self-play experiment.
|
|
57
|
+
"""
|
|
58
|
+
if debug:
|
|
59
|
+
logger.warning("Debug mode is enabled. Forcing sequential execution.")
|
|
60
|
+
|
|
61
|
+
game_tasks = []
|
|
62
|
+
base_game_config = config["game_config"]
|
|
63
|
+
|
|
64
|
+
# modify the config to use a single model
|
|
65
|
+
agents = base_game_config["agents"]
|
|
66
|
+
for agent in agents:
|
|
67
|
+
agent["thumbnail"] = thumbnail
|
|
68
|
+
agent["agent_id"] = f"llm/{model_name}"
|
|
69
|
+
agent["display_name"] = os.path.basename(model_name)
|
|
70
|
+
agent["llms"][0]["model_name"] = model_name
|
|
71
|
+
|
|
72
|
+
for i in range(num_games):
|
|
73
|
+
game_output_dir = os.path.join(output_dir, f"game_{i}")
|
|
74
|
+
os.makedirs(game_output_dir, exist_ok=True)
|
|
75
|
+
|
|
76
|
+
game_config = copy.deepcopy(base_game_config)
|
|
77
|
+
|
|
78
|
+
if shuffle_roles:
|
|
79
|
+
logger.info(f"Shuffling roles for game {i}")
|
|
80
|
+
role_configs = [
|
|
81
|
+
{"role": agent["role"], "role_params": agent.get("role_params", {})} for agent in game_config["agents"]
|
|
82
|
+
]
|
|
83
|
+
random.shuffle(role_configs)
|
|
84
|
+
for agent, role_config in zip(game_config["agents"], role_configs):
|
|
85
|
+
agent["role"] = role_config["role"]
|
|
86
|
+
agent["role_params"] = role_config["role_params"]
|
|
87
|
+
|
|
88
|
+
# shuffle player ids
|
|
89
|
+
logger.info(f"Shuffling player ids for game {i}")
|
|
90
|
+
shuffle_field(game_config["agents"], "id")
|
|
91
|
+
|
|
92
|
+
task = (game_output_dir, game_config, use_random_agents, debug)
|
|
93
|
+
game_tasks.append(task)
|
|
94
|
+
|
|
95
|
+
with tqdm(total=num_games, desc="Running Self-Play Games") as pbar:
|
|
96
|
+
if parallel:
|
|
97
|
+
with ThreadPoolExecutor(max_workers=num_processes) as executor:
|
|
98
|
+
futures = [executor.submit(game_runner_wrapper, task) for task in game_tasks]
|
|
99
|
+
for future in as_completed(futures):
|
|
100
|
+
# You could also add error handling here by checking future.exception()
|
|
101
|
+
pbar.update(1)
|
|
102
|
+
else:
|
|
103
|
+
for task in game_tasks:
|
|
104
|
+
game_runner_wrapper(task)
|
|
105
|
+
pbar.update(1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def main():
|
|
109
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
110
|
+
default_config_path = os.path.join(script_dir, "configs", "run", "roundrobin_discussion_small.yaml")
|
|
111
|
+
|
|
112
|
+
parser = argparse.ArgumentParser(description="Run N self-play Werewolf games based on a configuration file.")
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"-c", "--config_path", type=str, default=default_config_path, help="Path to the YAML configuration file."
|
|
115
|
+
)
|
|
116
|
+
parser.add_argument(
|
|
117
|
+
"-o",
|
|
118
|
+
"--output_dir",
|
|
119
|
+
type=str,
|
|
120
|
+
default="werewolf_self_play",
|
|
121
|
+
help="Output directory for the log and replay files.",
|
|
122
|
+
)
|
|
123
|
+
parser.add_argument(
|
|
124
|
+
"-m",
|
|
125
|
+
"--model_name",
|
|
126
|
+
type=str,
|
|
127
|
+
default="gemini/gemini-2.5-flash",
|
|
128
|
+
help="The model name by litellm for self play.",
|
|
129
|
+
)
|
|
130
|
+
parser.add_argument(
|
|
131
|
+
"-t",
|
|
132
|
+
"--thumbnail",
|
|
133
|
+
type=str,
|
|
134
|
+
default="https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png",
|
|
135
|
+
help="The thumbnail image url.",
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument("-n", "--num_games", type=int, default=1, help="Number of self-play games to run.")
|
|
138
|
+
parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode.")
|
|
139
|
+
parser.add_argument(
|
|
140
|
+
"-r", "--random_agents", action="store_true", help="Use random agents for all players for fast testing."
|
|
141
|
+
)
|
|
142
|
+
parser.add_argument(
|
|
143
|
+
"-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory."
|
|
144
|
+
)
|
|
145
|
+
parser.add_argument(
|
|
146
|
+
"-s", "--shuffle_roles", action="store_true", help="If provided, shuffle the roles for each game."
|
|
147
|
+
)
|
|
148
|
+
parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.")
|
|
149
|
+
parser.add_argument("--num_processes", type=int, default=None, help="Number of processes for parallel execution.")
|
|
150
|
+
|
|
151
|
+
args = parser.parse_args()
|
|
152
|
+
|
|
153
|
+
run_output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir)
|
|
154
|
+
os.makedirs(run_output_dir, exist_ok=True)
|
|
155
|
+
setup_logger(output_dir=run_output_dir, base_name="self_play")
|
|
156
|
+
|
|
157
|
+
with open(args.config_path, "r") as f:
|
|
158
|
+
config = yaml.safe_load(f)
|
|
159
|
+
|
|
160
|
+
num_processes = args.num_processes
|
|
161
|
+
if args.parallel and num_processes is None:
|
|
162
|
+
# Default to 4x the number of CPUs for I/O bound tasks
|
|
163
|
+
num_processes = multiprocessing.cpu_count() * 4
|
|
164
|
+
|
|
165
|
+
logger.info("Starting self-play with the following settings:")
|
|
166
|
+
logger.info(f"Model Name: {args.model_name}")
|
|
167
|
+
logger.info(f"Thumbnail: {args.thumbnail}")
|
|
168
|
+
logger.info(f"Output Directory: {run_output_dir}")
|
|
169
|
+
logger.info(f"Number of Games: {args.num_games}")
|
|
170
|
+
logger.info(f"Config Path: {args.config_path}")
|
|
171
|
+
logger.info(f"Parallel Execution: {args.parallel}")
|
|
172
|
+
if args.parallel:
|
|
173
|
+
logger.info(f"Number of Processes: {num_processes}")
|
|
174
|
+
logger.info(f"Debug Mode: {args.debug}")
|
|
175
|
+
logger.info(f"Use Random Agents: {args.random_agents}")
|
|
176
|
+
logger.info(f"Shuffle Roles: {args.shuffle_roles}")
|
|
177
|
+
|
|
178
|
+
with LogExecutionTime(logger_obj=logger, task_str=f"{args.num_games} self-play games"):
|
|
179
|
+
run_self_play_games(
|
|
180
|
+
model_name=args.model_name,
|
|
181
|
+
thumbnail=args.thumbnail,
|
|
182
|
+
output_dir=run_output_dir,
|
|
183
|
+
num_games=args.num_games,
|
|
184
|
+
config=config,
|
|
185
|
+
use_random_agents=args.random_agents,
|
|
186
|
+
debug=args.debug,
|
|
187
|
+
parallel=args.parallel,
|
|
188
|
+
num_processes=num_processes,
|
|
189
|
+
shuffle_roles=args.shuffle_roles,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
logger.info("Self-play run finished successfully.")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
if __name__ == "__main__":
|
|
196
|
+
main()
|