kaggle-environments 1.22.6__py3-none-any.whl → 1.24.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kaggle-environments might be problematic. Click here for more details.

Files changed (68) hide show
  1. kaggle_environments/envs/connectx/visualizer/default/index.html +13 -0
  2. kaggle_environments/envs/connectx/visualizer/default/package.json +22 -0
  3. kaggle_environments/envs/connectx/visualizer/default/replays/test-replay.json +1129 -0
  4. kaggle_environments/envs/connectx/visualizer/default/src/main.ts +12 -0
  5. kaggle_environments/envs/connectx/visualizer/default/src/renderer.ts +396 -0
  6. kaggle_environments/envs/connectx/visualizer/default/src/style.css +38 -0
  7. kaggle_environments/envs/connectx/visualizer/default/tsconfig.json +4 -0
  8. kaggle_environments/envs/connectx/visualizer/default/vite.config.ts +7 -0
  9. kaggle_environments/envs/open_spiel_env/games/repeated_poker/repeated_poker.js +163 -88
  10. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/index.html +13 -0
  11. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/package.json +23 -0
  12. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/replays/test-replay.json +1 -0
  13. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_first_steps.mjs +202 -0
  14. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_replay.mjs +215 -0
  15. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/scripts/print_steps_with_end_states.mjs +234 -0
  16. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/getRepeatedPokerStateForStep.js +260 -0
  17. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/utils.ts +61 -0
  18. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/debug_repeated_poker_renderer.ts +49 -0
  19. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_1.svg +22 -0
  20. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_10.svg +22 -0
  21. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_100.svg +48 -0
  22. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_25.svg +22 -0
  23. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_5.svg +22 -0
  24. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/main.ts +36 -0
  25. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/repeated_poker_renderer.ts +573 -0
  26. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/style.css +594 -0
  27. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/tsconfig.json +7 -0
  28. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/vite.config.ts +6 -0
  29. kaggle_environments/envs/werewolf/README.md +190 -0
  30. kaggle_environments/envs/werewolf/harness/__init__.py +0 -0
  31. kaggle_environments/envs/werewolf/harness/base.py +773 -0
  32. kaggle_environments/envs/werewolf/harness/litellm_models.yaml +51 -0
  33. kaggle_environments/envs/werewolf/harness/main.py +54 -0
  34. kaggle_environments/envs/werewolf/harness/test_base.py +35 -0
  35. kaggle_environments/envs/werewolf/runner.py +146 -0
  36. kaggle_environments/envs/werewolf/scripts/__init__.py +0 -0
  37. kaggle_environments/envs/werewolf/scripts/add_audio.py +425 -0
  38. kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml +24 -0
  39. kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml +102 -0
  40. kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml +100 -0
  41. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml +104 -0
  42. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml +103 -0
  43. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml +103 -0
  44. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml +103 -0
  45. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml +104 -0
  46. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml +105 -0
  47. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml +105 -0
  48. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml +105 -0
  49. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml +103 -0
  50. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml +103 -0
  51. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml +103 -0
  52. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml +105 -0
  53. kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml +58 -0
  54. kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml +115 -0
  55. kaggle_environments/envs/werewolf/scripts/measure_cost.py +251 -0
  56. kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py +135 -0
  57. kaggle_environments/envs/werewolf/scripts/rerender_html.py +87 -0
  58. kaggle_environments/envs/werewolf/scripts/run.py +93 -0
  59. kaggle_environments/envs/werewolf/scripts/run_block.py +237 -0
  60. kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py +222 -0
  61. kaggle_environments/envs/werewolf/scripts/self_play.py +196 -0
  62. kaggle_environments/envs/werewolf/scripts/utils.py +47 -0
  63. kaggle_environments/envs/werewolf/werewolf.json +1 -1
  64. {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/METADATA +1 -1
  65. {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/RECORD +68 -7
  66. {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/WHEEL +0 -0
  67. {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/entry_points.txt +0 -0
  68. {kaggle_environments-1.22.6.dist-info → kaggle_environments-1.24.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,115 @@
1
+ # Settings for the dump_audio.py script
2
+ script_settings:
3
+ server:
4
+ port: 7999
5
+ paths:
6
+ audio_dir_name: "audio"
7
+ debug_audio_dir_name: "debug_audio"
8
+ output_html_filename: "replay.html"
9
+ voices:
10
+ moderator: "enceladus"
11
+ players:
12
+ "gemini-2.0-flash-lite-001_1": 'Kore'
13
+ "gemini-2.0-flash-lite-001_2": 'Charon'
14
+ "gemini-2.5-flash_3": 'Leda'
15
+ "gemini-2.5-flash_4": 'Despina'
16
+ "gemini-2.5-flash_5": 'Erinome'
17
+ "gemini-2.5-flash_6": 'Gacrux'
18
+ "gemini-2.5-flash_7": 'Achird'
19
+ "gemini-2.5-flash_8": 'Puck'
20
+ audio:
21
+ static_moderator_messages:
22
+ night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]"
23
+ day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]"
24
+ discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]"
25
+ voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]"
26
+
27
+ # Configuration for the Werewolf game environment
28
+ game_config:
29
+ seed: 123
30
+ actTimeout: 300
31
+ runTimeout: 3600
32
+ discussion_protocol:
33
+ name: "RoundRobinDiscussion"
34
+ params:
35
+ max_rounds: 2
36
+ day_voting_protocol:
37
+ name: "SequentialVoting"
38
+ werewolf_night_vote_protocol:
39
+ name: "SequentialVoting"
40
+ # reveal_night_elimination_role: false
41
+ # reveal_day_exile_role: false
42
+ allow_doctor_self_save: true
43
+ agents:
44
+ - role: "Werewolf"
45
+ id: "Alex"
46
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
47
+ agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
48
+ display_name: "vertex_ai/gemini-2.0-flash-lite-001"
49
+ agent_harness_name: "llm_harness"
50
+ chat_mode: "text"
51
+ llms:
52
+ - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
53
+ - role: "Werewolf"
54
+ id: "Jordan"
55
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
56
+ agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
57
+ display_name: "vertex_ai/gemini-2.0-flash-lite-001"
58
+ agent_harness_name: "llm_harness"
59
+ chat_mode: "text"
60
+ llms:
61
+ - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
62
+ - role: "Doctor"
63
+ id: "Taylor"
64
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
65
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
66
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
67
+ agent_harness_name: "llm_harness"
68
+ chat_mode: "text"
69
+ llms:
70
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
71
+ - role: "Seer"
72
+ id: "Casey"
73
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
74
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
75
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
76
+ agent_harness_name: "llm_harness"
77
+ chat_mode: "text"
78
+ llms:
79
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
80
+ - role: "Villager"
81
+ id: "Riley"
82
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
83
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
84
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
85
+ agent_harness_name: "llm_harness"
86
+ chat_mode: "text"
87
+ llms:
88
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
89
+ - role: "Villager"
90
+ id: "Jamie"
91
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
92
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
93
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
94
+ agent_harness_name: "llm_harness"
95
+ chat_mode: "text"
96
+ llms:
97
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
98
+ - role: "Villager"
99
+ id: "Morgan"
100
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
101
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
102
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
103
+ agent_harness_name: "llm_harness"
104
+ chat_mode: "text"
105
+ llms:
106
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
107
+ - role: "Villager"
108
+ id: "Skyler"
109
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
110
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
111
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
112
+ agent_harness_name: "llm_harness"
113
+ chat_mode: "text"
114
+ llms:
115
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
@@ -0,0 +1,251 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import os
5
+ import random
6
+ from datetime import datetime
7
+
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import yaml
11
+
12
+ from kaggle_environments.envs.werewolf.runner import run_werewolf, setup_logger
13
+ from kaggle_environments.envs.werewolf.werewolf import LLM_MODEL_NAMES, CostSummary
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ AGENT_NAMES = ["Alex", "Jordan", "Taylor", "Casey", "Riley", "Jamie", "Morgan", "Skyler"]
18
+ DEFAULT_MODEL = "gemini/gemini-2.5-flash"
19
+
20
+
21
+ def setup_game_config(max_turns: int, base_config: dict, model_name: str):
22
+ """
23
+ Sets up the game configuration for a single run.
24
+ """
25
+ config = base_config.copy()
26
+
27
+ # Define roles and shuffle them
28
+ roles = ["Werewolf", "Werewolf", "Doctor", "Seer", "Villager", "Villager", "Villager", "Villager"]
29
+ random.shuffle(roles)
30
+ random.shuffle(AGENT_NAMES)
31
+
32
+ # Create agent configurations
33
+ agents_config = []
34
+ for i, role in enumerate(roles):
35
+ player_name = AGENT_NAMES[i]
36
+ agents_config.append(
37
+ {
38
+ "role": role,
39
+ "id": player_name,
40
+ "agent_id": f"llm/{model_name}",
41
+ "display_name": f"{model_name}/{player_name}",
42
+ "agent_harness_name": "llm_harness",
43
+ "chat_mode": "text",
44
+ "llms": [{"model_name": model_name}],
45
+ }
46
+ )
47
+
48
+ config["agents"] = agents_config
49
+
50
+ # Update discussion protocol with the specified max_turns
51
+ if "discussion_protocol" in config and config["discussion_protocol"]["name"] == "TurnByTurnBiddingDiscussion":
52
+ config["discussion_protocol"]["params"]["max_turns"] = max_turns
53
+ else:
54
+ logger.warning("Could not find 'TurnByTurnBiddingDiscussion' protocol to set max_turns.")
55
+
56
+ # Set a new random seed for each game to ensure role/name shuffling is different
57
+ config["seed"] = random.randint(0, 2**32 - 1)
58
+
59
+ agent_harnesses = [f"llm/{model_name}"] * len(roles)
60
+
61
+ return config, agent_harnesses
62
+
63
+
64
+ def plot_results(summary_data, output_dir):
65
+ """
66
+ Plots the results and saves them to files.
67
+ """
68
+ max_turns = sorted([int(k) for k in summary_data.keys()])
69
+ metrics = ["total_cost", "total_tokens", "total_prompt_tokens", "total_completion_tokens"]
70
+
71
+ for metric in metrics:
72
+ means = [summary_data[str(t)][metric]["mean"] for t in max_turns]
73
+ stds = [summary_data[str(t)][metric]["std"] for t in max_turns]
74
+
75
+ plt.figure(figsize=(10, 6))
76
+ plt.errorbar(max_turns, means, yerr=stds, fmt="-o", capsize=5, ecolor="red", markeredgecolor="black")
77
+ plt.xlabel("Maximum Turns in Discussion")
78
+ plt.ylabel(metric.replace("_", " ").title())
79
+ plt.title(f"{metric.replace('_', ' ').title()} vs. Maximum Turns")
80
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
81
+ plt.xticks(max_turns)
82
+
83
+ plot_filename = os.path.join(output_dir, f"{metric}_vs_max_turns.png")
84
+ plt.savefig(plot_filename)
85
+ plt.close()
86
+ logger.info(f"Saved plot: {plot_filename}")
87
+
88
+
89
+ def plot_token_trajectories(trajectories_data, output_dir):
90
+ """
91
+ Plots token usage trajectories, grouped by max_turns, and saves them to files.
92
+ """
93
+ for metric, trajectories_by_turns in trajectories_data.items():
94
+ if not trajectories_by_turns:
95
+ continue
96
+
97
+ plt.figure(figsize=(12, 8))
98
+
99
+ # Create a color map for the different turn settings
100
+ turn_keys = sorted(trajectories_by_turns.keys(), key=int)
101
+ colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys)))
102
+ color_map = {turns: color for turns, color in zip(turn_keys, colors)}
103
+
104
+ for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])):
105
+ for i, traj in enumerate(trajectories):
106
+ # Only add a label to the first trajectory of each group for a clean legend
107
+ label = f"Max Turns: {turns}" if i == 0 else None
108
+ plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label)
109
+
110
+ plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories")
111
+ plt.xlabel("Query Step")
112
+ plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step")
113
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
114
+ plt.legend()
115
+
116
+ plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png")
117
+ plt.savefig(plot_filename)
118
+ plt.close()
119
+ logger.info(f"Saved trajectory plot: {plot_filename}")
120
+
121
+
122
+ def main():
123
+ parser = argparse.ArgumentParser(description="Measure LLM cost for the Werewolf game.")
124
+ parser.add_argument(
125
+ "-c",
126
+ "--config_path",
127
+ type=str,
128
+ default=os.path.join(os.path.dirname(__file__), "configs/run/comprehensive.yaml"),
129
+ help="Path to the base YAML configuration file.",
130
+ )
131
+ parser.add_argument(
132
+ "-o",
133
+ "--output_dir",
134
+ type=str,
135
+ default="cost_measurement",
136
+ help="Output directory for logs, replays, and results.",
137
+ )
138
+ parser.add_argument(
139
+ "-m",
140
+ "--model_name",
141
+ type=str,
142
+ default=DEFAULT_MODEL,
143
+ choices=LLM_MODEL_NAMES,
144
+ help="LiteLLM model name to use for all agents.",
145
+ )
146
+ parser.add_argument("-d", "--disable_debug_mode", action="store_true", help="Disable debug mode.")
147
+
148
+ args = parser.parse_args()
149
+
150
+ # Create a unique subdirectory for this run
151
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
152
+ run_output_dir = os.path.join(args.output_dir, f"run_{timestamp}")
153
+ os.makedirs(run_output_dir, exist_ok=True)
154
+
155
+ log_filename = f"measure_cost_{timestamp}"
156
+ setup_logger(output_dir=run_output_dir, base_name=log_filename)
157
+ logger.info(f"Starting cost measurement script. Results will be saved in: {run_output_dir}")
158
+
159
+ # Load base game configuration
160
+ with open(args.config_path, "r") as f:
161
+ base_config = yaml.safe_load(f).get("game_config", {})
162
+
163
+ max_turns_to_test = [8, 12, 16, 20, 24]
164
+ runs_per_setting = 3
165
+ results = {
166
+ str(t): {"total_cost": [], "total_tokens": [], "total_prompt_tokens": [], "total_completion_tokens": []}
167
+ for t in max_turns_to_test
168
+ }
169
+ all_trajectories = {
170
+ "total_tokens": {str(t): [] for t in max_turns_to_test},
171
+ "reasoning_tokens": {str(t): [] for t in max_turns_to_test},
172
+ "text_tokens": {str(t): [] for t in max_turns_to_test},
173
+ }
174
+
175
+ for turns in max_turns_to_test:
176
+ logger.info(f"--- Starting runs for max_turns = {turns} ---")
177
+ for run in range(runs_per_setting):
178
+ base_name = f"game_turns_{turns}_run_{run + 1}"
179
+ logger.info(f"Starting {base_name}...")
180
+
181
+ game_config, agent_harnesses = setup_game_config(turns, base_config, args.model_name)
182
+
183
+ try:
184
+ final_env = run_werewolf(
185
+ output_dir=run_output_dir,
186
+ base_name=base_name,
187
+ config=game_config,
188
+ agents=agent_harnesses,
189
+ debug=not args.disable_debug_mode,
190
+ )
191
+
192
+ # Extract cost summary
193
+ cost_summary_dict = final_env.info.get("GAME_END", {}).get("cost_summary", {})
194
+ if cost_summary_dict:
195
+ cost_summary = CostSummary(**cost_summary_dict)
196
+ results[str(turns)]["total_cost"].append(cost_summary.total_cost)
197
+ results[str(turns)]["total_tokens"].append(cost_summary.total_tokens)
198
+ results[str(turns)]["total_prompt_tokens"].append(cost_summary.total_prompt_tokens)
199
+ results[str(turns)]["total_completion_tokens"].append(cost_summary.total_completion_tokens)
200
+ logger.info(f"Finished {base_name}. Total Cost: ${cost_summary.total_cost:.4f}")
201
+
202
+ for agent_summary in cost_summary.cost_per_agent:
203
+ if agent_summary.data and agent_summary.data.usage_history:
204
+ usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history]
205
+
206
+ total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts]
207
+ all_trajectories["total_tokens"][str(turns)].append(total_tokens_traj)
208
+
209
+ reasoning_tokens_traj = [
210
+ usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0
211
+ for usage in usage_history_dicts
212
+ ]
213
+ all_trajectories["reasoning_tokens"][str(turns)].append(reasoning_tokens_traj)
214
+
215
+ text_tokens_traj = [
216
+ (u.get("completion_tokens", 0) or 0)
217
+ - (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0)
218
+ for u in usage_history_dicts
219
+ ]
220
+ all_trajectories["text_tokens"][str(turns)].append(text_tokens_traj)
221
+ else:
222
+ logger.error(f"Could not find cost summary for {base_name}.")
223
+
224
+ except Exception as e:
225
+ logger.error(f"An error occurred during {base_name}: {e}", exc_info=True)
226
+
227
+ # Calculate mean and standard deviation
228
+ summary_data = {}
229
+ for turns, metrics in results.items():
230
+ summary_data[turns] = {}
231
+ for metric, values in metrics.items():
232
+ if values:
233
+ summary_data[turns][metric] = {"mean": np.mean(values), "std": np.std(values), "raw_values": values}
234
+ else:
235
+ summary_data[turns][metric] = {"mean": 0, "std": 0, "raw_values": []}
236
+
237
+ # Save summary to JSON
238
+ summary_filename = os.path.join(run_output_dir, "cost_analysis_summary.json")
239
+ with open(summary_filename, "w") as f:
240
+ json.dump(summary_data, f, indent=4)
241
+ logger.info(f"Saved summary results to {summary_filename}")
242
+
243
+ # Plot results
244
+ plot_results(summary_data, run_output_dir)
245
+ plot_token_trajectories(all_trajectories, run_output_dir)
246
+
247
+ logger.info("--- Cost measurement script finished ---")
248
+
249
+
250
+ if __name__ == "__main__":
251
+ main()
@@ -0,0 +1,135 @@
1
+ import argparse
2
+ import glob
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ import sys
8
+
9
+ import matplotlib.pyplot as plt
10
+ import numpy as np
11
+
12
+ from kaggle_environments.envs.werewolf.werewolf import CostSummary
13
+
14
+ # Add the project root to the Python path to allow importing from kaggle_environments
15
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
16
+ if project_root not in sys.path:
17
+ sys.path.insert(0, project_root)
18
+
19
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def plot_token_trajectories(trajectories_data, output_dir):
24
+ """
25
+ Plots token usage trajectories, grouped by max_turns, and saves them to files.
26
+ """
27
+ for metric, trajectories_by_turns in trajectories_data.items():
28
+ if not trajectories_by_turns:
29
+ logger.warning(f"No data found for metric '{metric}'. Skipping plot.")
30
+ continue
31
+
32
+ plt.figure(figsize=(12, 8))
33
+
34
+ # Create a color map for the different turn settings
35
+ turn_keys = sorted(trajectories_by_turns.keys(), key=int)
36
+ colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys)))
37
+ color_map = {turns: color for turns, color in zip(turn_keys, colors)}
38
+
39
+ for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])):
40
+ for i, traj in enumerate(trajectories):
41
+ if not all(isinstance(x, (int, float)) for x in traj):
42
+ logger.error(
43
+ f"Trajectory for metric '{metric}' (turns={turns}) contains non-numeric data. Skipping."
44
+ )
45
+ continue
46
+ # Only add a label to the first trajectory of each group for a clean legend
47
+ label = f"Max Turns: {turns}" if i == 0 else None
48
+ plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label)
49
+
50
+ plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories")
51
+ plt.xlabel("Query Step")
52
+ plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step")
53
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
54
+ plt.legend()
55
+
56
+ plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png")
57
+ plt.savefig(plot_filename)
58
+ plt.close()
59
+ logger.info(f"Saved trajectory plot: {plot_filename}")
60
+
61
+
62
+ def main():
63
+ parser = argparse.ArgumentParser(
64
+ description="Load data from a measure_cost.py output directory and generate token trajectory plots."
65
+ )
66
+ parser.add_argument(
67
+ "-i",
68
+ "--input_dir",
69
+ type=str,
70
+ required=True,
71
+ help="Path to the output directory of a previous measure_cost.py run.",
72
+ )
73
+ args = parser.parse_args()
74
+
75
+ if not os.path.isdir(args.input_dir):
76
+ logger.error(f"Input directory not found: {args.input_dir}")
77
+ return
78
+
79
+ logger.info(f"Loading data from: {args.input_dir}")
80
+
81
+ all_trajectories = {"total_tokens": {}, "reasoning_tokens": {}, "text_tokens": {}}
82
+
83
+ # Find all game replay JSON files
84
+ game_files = glob.glob(os.path.join(args.input_dir, "game_*_run_*.json"))
85
+ if not game_files:
86
+ logger.error(f"No game replay files (game_*_run_*.json) found in {args.input_dir}.")
87
+ return
88
+
89
+ logger.info(f"Found {len(game_files)} game replay files to process.")
90
+
91
+ for game_file in game_files:
92
+ # Extract max_turns from filename
93
+ match = re.search(r"game_turns_(\d+)_run_", os.path.basename(game_file))
94
+ if not match:
95
+ logger.warning(f"Could not parse max_turns from filename: {game_file}. Skipping.")
96
+ continue
97
+ turns = match.group(1)
98
+
99
+ with open(game_file, "r") as f:
100
+ game_data = json.load(f)
101
+
102
+ cost_summary_dict = game_data.get("info", {}).get("GAME_END", {}).get("cost_summary")
103
+ if not cost_summary_dict:
104
+ logger.warning(f"No cost_summary found in {game_file}. Skipping.")
105
+ continue
106
+
107
+ cost_summary = CostSummary(**cost_summary_dict)
108
+
109
+ for agent_summary in cost_summary.cost_per_agent:
110
+ if agent_summary.data and agent_summary.data.usage_history:
111
+ usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history]
112
+
113
+ total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts]
114
+ all_trajectories["total_tokens"].setdefault(turns, []).append(total_tokens_traj)
115
+
116
+ reasoning_tokens_traj = [
117
+ usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0
118
+ for usage in usage_history_dicts
119
+ ]
120
+ all_trajectories["reasoning_tokens"].setdefault(turns, []).append(reasoning_tokens_traj)
121
+
122
+ text_tokens_traj = [
123
+ (u.get("completion_tokens", 0) or 0)
124
+ - (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0)
125
+ for u in usage_history_dicts
126
+ ]
127
+ all_trajectories["text_tokens"].setdefault(turns, []).append(text_tokens_traj)
128
+
129
+ logger.info("Finished processing all files. Generating plots...")
130
+ plot_token_trajectories(all_trajectories, args.input_dir)
131
+ logger.info(f"--- Script finished. Plots saved in {args.input_dir} ---")
132
+
133
+
134
+ if __name__ == "__main__":
135
+ main()
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import os
5
+
6
+ from kaggle_environments import make
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
10
+
11
+
12
+ def main():
13
+ """
14
+ Rerenders a Werewolf game replay HTML file from an existing game record JSON.
15
+ This is useful for updating the replay viewer to the latest version without
16
+ rerunning the entire game simulation.
17
+ """
18
+ parser = argparse.ArgumentParser(
19
+ description="Rerender a Werewolf game HTML replay from a JSON game record.",
20
+ formatter_class=argparse.RawTextHelpFormatter,
21
+ )
22
+ parser.add_argument(
23
+ "-i",
24
+ "--input_json",
25
+ type=str,
26
+ required=True,
27
+ help="Path to the input game record JSON file (e.g., werewolf_game.json).",
28
+ )
29
+ parser.add_argument(
30
+ "-o", "--output_html", type=str, required=True, help="Path to write the newly rendered HTML output file."
31
+ )
32
+ args = parser.parse_args()
33
+
34
+ logging.info(f"Loading game record from: {args.input_json}")
35
+ if not os.path.exists(args.input_json):
36
+ logging.error(f"Error: Input file not found at {args.input_json}")
37
+ return
38
+
39
+ try:
40
+ with open(args.input_json, "r", encoding="utf-8") as f:
41
+ replay_data = json.load(f)
42
+ except json.JSONDecodeError:
43
+ logging.error(f"Error: Failed to decode JSON from {args.input_json}. The file might be corrupted.")
44
+ return
45
+ except Exception as e:
46
+ logging.error(f"An unexpected error occurred while reading the file: {e}")
47
+ return
48
+
49
+ logging.info("Successfully loaded game data. Initializing Kaggle environment...")
50
+
51
+ # The environment name should be stored in the replay, but we default to 'werewolf'
52
+ env_name = replay_data.get("name", "werewolf")
53
+ if env_name != "werewolf":
54
+ logging.warning(f"Game record is for '{env_name}', but we are rendering with the 'werewolf' environment.")
55
+
56
+ try:
57
+ # Recreate the environment state from the replay file
58
+ env = make(
59
+ "werewolf",
60
+ configuration=replay_data.get("configuration"),
61
+ steps=replay_data.get("steps", []),
62
+ info=replay_data.get("info", {}),
63
+ )
64
+ logging.info("Environment initialized. Rendering new HTML...")
65
+
66
+ # Render the HTML. This will use the werewolf.js file included in the
67
+ # installed kaggle_environments package.
68
+ html_content = env.render(mode="html")
69
+
70
+ output_dir = os.path.dirname(args.output_html)
71
+ if output_dir:
72
+ os.makedirs(output_dir, exist_ok=True)
73
+
74
+ with open(args.output_html, "w", encoding="utf-8") as f:
75
+ f.write(html_content)
76
+
77
+ logging.info(f"Successfully rerendered HTML to: {args.output_html}")
78
+
79
+ except Exception as e:
80
+ logging.error(f"An error occurred during environment creation or rendering: {e}")
81
+ logging.error(
82
+ "Please ensure the 'kaggle_environments' package is correctly installed and the JSON file is valid."
83
+ )
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
@@ -0,0 +1,93 @@
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import random
5
+
6
+ import yaml
7
+
8
+ from kaggle_environments.envs.werewolf.harness.base import LLMWerewolfAgent
9
+ from kaggle_environments.envs.werewolf.runner import (
10
+ LogExecutionTime,
11
+ append_timestamp_to_dir,
12
+ log_git_hash,
13
+ run_werewolf,
14
+ setup_logger,
15
+ )
16
+ from kaggle_environments.envs.werewolf.werewolf import LLM_SYSTEM_PROMPT, AgentFactoryWrapper, register_agents
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def main():
22
+ parser = argparse.ArgumentParser(description="Run a single Werewolf game.")
23
+ parser.add_argument(
24
+ "-c",
25
+ "--config_path",
26
+ type=str,
27
+ default=os.path.join(os.path.dirname(__file__), "configs/run/run_config.yaml"),
28
+ help="Path to the YAML configuration file.",
29
+ )
30
+ parser.add_argument(
31
+ "-o", "--output_dir", type=str, default="werewolf_run", help="Output directory for the log and replay file."
32
+ )
33
+ parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode.")
34
+ parser.add_argument(
35
+ "-r", "--random_agents", action="store_true", help="Use random agents for all players for fast testing."
36
+ )
37
+ parser.add_argument(
38
+ "-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory."
39
+ )
40
+ parser.add_argument(
41
+ "-s", "--shuffle_roles", action="store_true", help="If provided, shuffle the roles provided in the config."
42
+ )
43
+
44
+ args = parser.parse_args()
45
+
46
+ # Create a unique subdirectory for this run
47
+ run_output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir)
48
+
49
+ os.makedirs(run_output_dir, exist_ok=True)
50
+
51
+ base_name = "werewolf_game"
52
+ setup_logger(output_dir=run_output_dir, base_name=base_name)
53
+
54
+ log_git_hash()
55
+
56
+ # Load game configuration
57
+ with open(args.config_path, "r") as f:
58
+ config = yaml.safe_load(f)
59
+ game_config = config.get("game_config", {})
60
+
61
+ # shuffle roles
62
+ if args.shuffle_roles:
63
+ role_and_params = [(agent["role"], agent.get("role_params", {})) for agent in game_config["agents"]]
64
+ random.shuffle(role_and_params)
65
+ for agent, (new_role, new_role_params) in zip(game_config["agents"], role_and_params):
66
+ agent["role"] = new_role
67
+ agent["role_params"] = new_role_params
68
+
69
+ # Extract agent harnesses from the config and register the agents
70
+ agents_ = [agent.get("agent_id", "random") for agent in game_config.get("agents", [])]
71
+ agent_dict = {}
72
+ for agent_name in agents_:
73
+ if agent_name.startswith("llm/"):
74
+ model_name = agent_name.lstrip("llm/")
75
+ agent_dict[agent_name] = AgentFactoryWrapper(
76
+ LLMWerewolfAgent, model_name=model_name, system_prompt=LLM_SYSTEM_PROMPT
77
+ )
78
+ register_agents(agent_dict)
79
+
80
+ if args.random_agents:
81
+ logger.info("Using random agents for all players.")
82
+ agents_ = ["random"] * len(agents_)
83
+
84
+ logger.info(f"Starting Werewolf game run. Output will be saved to: {run_output_dir}")
85
+ with LogExecutionTime(logger_obj=logger, task_str="single game"):
86
+ run_werewolf(
87
+ output_dir=run_output_dir, base_name=base_name, config=game_config, agents=agents_, debug=args.debug
88
+ )
89
+ logger.info(f"Game finished. Replay and log saved in: {run_output_dir}")
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()