kaggle-environments 1.23.3__py3-none-any.whl → 1.23.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaggle-environments might be problematic. Click here for more details.
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/repeated_poker.js +2 -2
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/getRepeatedPokerStateForStep.js +41 -67
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_1.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_10.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_100.svg +48 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_25.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_5.svg +22 -0
- kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/repeated_poker_renderer.js +557 -332
- kaggle_environments/envs/werewolf/README.md +190 -0
- kaggle_environments/envs/werewolf/harness/__init__.py +0 -0
- kaggle_environments/envs/werewolf/harness/base.py +767 -0
- kaggle_environments/envs/werewolf/harness/litellm_models.yaml +51 -0
- kaggle_environments/envs/werewolf/harness/test_base.py +35 -0
- kaggle_environments/envs/werewolf/runner.py +146 -0
- kaggle_environments/envs/werewolf/scripts/__init__.py +0 -0
- kaggle_environments/envs/werewolf/scripts/add_audio.py +425 -0
- kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml +24 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml +102 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml +100 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml +104 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml +104 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml +103 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml +105 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml +58 -0
- kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml +115 -0
- kaggle_environments/envs/werewolf/scripts/measure_cost.py +251 -0
- kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py +135 -0
- kaggle_environments/envs/werewolf/scripts/rerender_html.py +87 -0
- kaggle_environments/envs/werewolf/scripts/run.py +93 -0
- kaggle_environments/envs/werewolf/scripts/run_block.py +237 -0
- kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py +222 -0
- kaggle_environments/envs/werewolf/scripts/self_play.py +196 -0
- kaggle_environments/envs/werewolf/scripts/utils.py +47 -0
- {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.5.dist-info}/METADATA +1 -1
- {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.5.dist-info}/RECORD +46 -8
- {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.5.dist-info}/WHEEL +0 -0
- {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.5.dist-info}/entry_points.txt +0 -0
- {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Configuration for the Werewolf game environment
|
|
2
|
+
game_config:
|
|
3
|
+
seed: 123
|
|
4
|
+
actTimeout: 900
|
|
5
|
+
runTimeout: 7200
|
|
6
|
+
discussion_protocol:
|
|
7
|
+
name: "RoundRobinDiscussion"
|
|
8
|
+
params:
|
|
9
|
+
max_rounds: 2
|
|
10
|
+
assign_random_first_speaker: true
|
|
11
|
+
day_voting_protocol:
|
|
12
|
+
name: "SimultaneousMajority"
|
|
13
|
+
params:
|
|
14
|
+
tie_break: 'no_elected'
|
|
15
|
+
werewolf_night_vote_protocol:
|
|
16
|
+
name: "SequentialVoting"
|
|
17
|
+
params:
|
|
18
|
+
assign_random_first_voter: true
|
|
19
|
+
night_elimination_reveal_level: role
|
|
20
|
+
day_exile_reveal_level: role
|
|
21
|
+
agents:
|
|
22
|
+
- role: "Werewolf"
|
|
23
|
+
id: "Kai"
|
|
24
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
25
|
+
agent_id: "llm/gemini/gemini-2.5-flash"
|
|
26
|
+
display_name: "gemini-2.5-flash"
|
|
27
|
+
agent_harness_name: "llm_harness"
|
|
28
|
+
chat_mode: "text"
|
|
29
|
+
enable_bid_reasoning: false
|
|
30
|
+
llms:
|
|
31
|
+
- model_name: "gemini/gemini-2.5-flash"
|
|
32
|
+
- role: "Werewolf"
|
|
33
|
+
id: "Jordan"
|
|
34
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png"
|
|
35
|
+
agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1"
|
|
36
|
+
display_name: "deepseek-chat-v3.1"
|
|
37
|
+
agent_harness_name: "llm_harness"
|
|
38
|
+
chat_mode: "text"
|
|
39
|
+
enable_bid_reasoning: false
|
|
40
|
+
llms:
|
|
41
|
+
- model_name: "openrouter/deepseek/deepseek-chat-v3.1"
|
|
42
|
+
- role: "Doctor"
|
|
43
|
+
role_params:
|
|
44
|
+
allow_self_save: true
|
|
45
|
+
id: "Charlie"
|
|
46
|
+
thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
|
|
47
|
+
agent_id: "llm/openrouter/openai/gpt-4o-mini"
|
|
48
|
+
display_name: "gpt-4o-mini"
|
|
49
|
+
agent_harness_name: "llm_harness"
|
|
50
|
+
chat_mode: "text"
|
|
51
|
+
enable_bid_reasoning: false
|
|
52
|
+
llms:
|
|
53
|
+
- model_name: "openrouter/openai/gpt-4o-mini"
|
|
54
|
+
- role: "Seer"
|
|
55
|
+
id: "Taylor"
|
|
56
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
|
|
57
|
+
agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507"
|
|
58
|
+
display_name: "qwen3-235b"
|
|
59
|
+
agent_harness_name: "llm_harness"
|
|
60
|
+
chat_mode: "text"
|
|
61
|
+
enable_bid_reasoning: false
|
|
62
|
+
llms:
|
|
63
|
+
- model_name: "openrouter/qwen/qwen3-235b-a22b-2507"
|
|
64
|
+
- role: "Villager"
|
|
65
|
+
id: "Alex"
|
|
66
|
+
thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg"
|
|
67
|
+
agent_id: "llm/openrouter/z-ai/glm-4.5"
|
|
68
|
+
display_name: "glm-4.5"
|
|
69
|
+
agent_harness_name: "llm_harness"
|
|
70
|
+
chat_mode: "text"
|
|
71
|
+
enable_bid_reasoning: false
|
|
72
|
+
llms:
|
|
73
|
+
- model_name: "openrouter/z-ai/glm-4.5"
|
|
74
|
+
- role: "Villager"
|
|
75
|
+
id: "Jamie"
|
|
76
|
+
thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
|
|
77
|
+
agent_id: "llm/openrouter/openai/gpt-oss-120b"
|
|
78
|
+
display_name: "gpt-oss-120b"
|
|
79
|
+
agent_harness_name: "llm_harness"
|
|
80
|
+
chat_mode: "text"
|
|
81
|
+
enable_bid_reasoning: false
|
|
82
|
+
llms:
|
|
83
|
+
- model_name: "openrouter/openai/gpt-oss-120b"
|
|
84
|
+
- role: "Villager"
|
|
85
|
+
id: "Quinn"
|
|
86
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
87
|
+
agent_id: "llm/gemini/gemini-2.5-pro"
|
|
88
|
+
display_name: "gemini-2.5-pro"
|
|
89
|
+
agent_harness_name: "llm_harness"
|
|
90
|
+
chat_mode: "text"
|
|
91
|
+
enable_bid_reasoning: false
|
|
92
|
+
llms:
|
|
93
|
+
- model_name: "gemini/gemini-2.5-pro"
|
|
94
|
+
- role: "Villager"
|
|
95
|
+
id: "Casey"
|
|
96
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
|
|
97
|
+
agent_id: "llm/openrouter/qwen/qwen3-30b-a3b"
|
|
98
|
+
display_name: "qwen3-30b"
|
|
99
|
+
agent_harness_name: "llm_harness"
|
|
100
|
+
chat_mode: "text"
|
|
101
|
+
enable_bid_reasoning: false
|
|
102
|
+
llms:
|
|
103
|
+
- model_name: "openrouter/qwen/qwen3-30b-a3b"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Configuration for the Werewolf game environment
|
|
2
|
+
game_config:
|
|
3
|
+
seed: 123
|
|
4
|
+
actTimeout: 900
|
|
5
|
+
runTimeout: 7200
|
|
6
|
+
discussion_protocol:
|
|
7
|
+
name: "RoundByRoundBiddingDiscussion"
|
|
8
|
+
params:
|
|
9
|
+
bidding:
|
|
10
|
+
name: "SimpleBiddingProtocol"
|
|
11
|
+
max_rounds: 2
|
|
12
|
+
bid_result_public: true
|
|
13
|
+
day_voting_protocol:
|
|
14
|
+
name: "SimultaneousMajority"
|
|
15
|
+
params:
|
|
16
|
+
tie_break: 'random'
|
|
17
|
+
werewolf_night_vote_protocol:
|
|
18
|
+
name: "SequentialVoting"
|
|
19
|
+
params:
|
|
20
|
+
assign_random_first_voter: true
|
|
21
|
+
night_elimination_reveal_level: role
|
|
22
|
+
day_exile_reveal_level: role
|
|
23
|
+
agents:
|
|
24
|
+
- role: "Werewolf"
|
|
25
|
+
id: "Kai"
|
|
26
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
27
|
+
agent_id: "llm/gemini/gemini-2.5-flash"
|
|
28
|
+
display_name: "gemini-2.5-flash"
|
|
29
|
+
agent_harness_name: "llm_harness"
|
|
30
|
+
chat_mode: "text"
|
|
31
|
+
enable_bid_reasoning: false
|
|
32
|
+
llms:
|
|
33
|
+
- model_name: "gemini/gemini-2.5-flash"
|
|
34
|
+
- role: "Werewolf"
|
|
35
|
+
id: "Jordan"
|
|
36
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png"
|
|
37
|
+
agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1"
|
|
38
|
+
display_name: "deepseek-chat-v3.1"
|
|
39
|
+
agent_harness_name: "llm_harness"
|
|
40
|
+
chat_mode: "text"
|
|
41
|
+
enable_bid_reasoning: false
|
|
42
|
+
llms:
|
|
43
|
+
- model_name: "openrouter/deepseek/deepseek-chat-v3.1"
|
|
44
|
+
- role: "Doctor"
|
|
45
|
+
role_params:
|
|
46
|
+
allow_self_save: true
|
|
47
|
+
id: "Charlie"
|
|
48
|
+
thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
|
|
49
|
+
agent_id: "llm/openrouter/openai/gpt-4o-mini"
|
|
50
|
+
display_name: "gpt-4o-mini"
|
|
51
|
+
agent_harness_name: "llm_harness"
|
|
52
|
+
chat_mode: "text"
|
|
53
|
+
enable_bid_reasoning: false
|
|
54
|
+
llms:
|
|
55
|
+
- model_name: "openrouter/openai/gpt-4o-mini"
|
|
56
|
+
- role: "Seer"
|
|
57
|
+
id: "Taylor"
|
|
58
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
|
|
59
|
+
agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507"
|
|
60
|
+
display_name: "qwen3-235b"
|
|
61
|
+
agent_harness_name: "llm_harness"
|
|
62
|
+
chat_mode: "text"
|
|
63
|
+
enable_bid_reasoning: false
|
|
64
|
+
llms:
|
|
65
|
+
- model_name: "openrouter/qwen/qwen3-235b-a22b-2507"
|
|
66
|
+
- role: "Villager"
|
|
67
|
+
id: "Alex"
|
|
68
|
+
thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg"
|
|
69
|
+
agent_id: "llm/openrouter/z-ai/glm-4.5"
|
|
70
|
+
display_name: "glm-4.5"
|
|
71
|
+
agent_harness_name: "llm_harness"
|
|
72
|
+
chat_mode: "text"
|
|
73
|
+
enable_bid_reasoning: false
|
|
74
|
+
llms:
|
|
75
|
+
- model_name: "openrouter/z-ai/glm-4.5"
|
|
76
|
+
- role: "Villager"
|
|
77
|
+
id: "Jamie"
|
|
78
|
+
thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
|
|
79
|
+
agent_id: "llm/openrouter/openai/gpt-oss-120b"
|
|
80
|
+
display_name: "gpt-oss-120b"
|
|
81
|
+
agent_harness_name: "llm_harness"
|
|
82
|
+
chat_mode: "text"
|
|
83
|
+
enable_bid_reasoning: false
|
|
84
|
+
llms:
|
|
85
|
+
- model_name: "openrouter/openai/gpt-oss-120b"
|
|
86
|
+
- role: "Villager"
|
|
87
|
+
id: "Quinn"
|
|
88
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
89
|
+
agent_id: "llm/gemini/gemini-2.5-pro"
|
|
90
|
+
display_name: "gemini-2.5-pro"
|
|
91
|
+
agent_harness_name: "llm_harness"
|
|
92
|
+
chat_mode: "text"
|
|
93
|
+
enable_bid_reasoning: false
|
|
94
|
+
llms:
|
|
95
|
+
- model_name: "gemini/gemini-2.5-pro"
|
|
96
|
+
- role: "Villager"
|
|
97
|
+
id: "Casey"
|
|
98
|
+
thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
|
|
99
|
+
agent_id: "llm/openrouter/qwen/qwen3-30b-a3b"
|
|
100
|
+
display_name: "qwen3-30b"
|
|
101
|
+
agent_harness_name: "llm_harness"
|
|
102
|
+
chat_mode: "text"
|
|
103
|
+
enable_bid_reasoning: false
|
|
104
|
+
llms:
|
|
105
|
+
- model_name: "openrouter/qwen/qwen3-30b-a3b"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Configuration for the Werewolf game environment
|
|
2
|
+
game_config:
|
|
3
|
+
seed: 42
|
|
4
|
+
actTimeout: 300
|
|
5
|
+
runTimeout: 3600
|
|
6
|
+
discussion_protocol:
|
|
7
|
+
name: "TurnByTurnBiddingDiscussion"
|
|
8
|
+
params:
|
|
9
|
+
max_turns: 16
|
|
10
|
+
day_voting_protocol:
|
|
11
|
+
name: "SequentialVoting"
|
|
12
|
+
werewolf_night_vote_protocol:
|
|
13
|
+
name: "SequentialVoting"
|
|
14
|
+
night_elimination_reveal_level: no_reveal
|
|
15
|
+
day_exile_reveal_level: no_reveal
|
|
16
|
+
agents:
|
|
17
|
+
- role: "Werewolf"
|
|
18
|
+
id: "Player1"
|
|
19
|
+
agent_id: "llm/gemini/gemini-2.5-flash"
|
|
20
|
+
display_name: "Player1 (Flash)"
|
|
21
|
+
agent_harness_name: "llm_harness"
|
|
22
|
+
chat_mode: "text"
|
|
23
|
+
llms:
|
|
24
|
+
- model_name: "gemini/gemini-2.5-flash"
|
|
25
|
+
- role: "Werewolf"
|
|
26
|
+
id: "Player2"
|
|
27
|
+
agent_id: "llm/gemini/gemini-2.5-pro"
|
|
28
|
+
display_name: "Player2 (Pro)"
|
|
29
|
+
agent_harness_name: "llm_harness"
|
|
30
|
+
chat_mode: "text"
|
|
31
|
+
llms:
|
|
32
|
+
- model_name: "gemini/gemini-2.5-pro"
|
|
33
|
+
- role: "Doctor"
|
|
34
|
+
role_params:
|
|
35
|
+
allow_self_save: true
|
|
36
|
+
id: "Player3"
|
|
37
|
+
agent_id: "random"
|
|
38
|
+
display_name: "Player3 (Random)"
|
|
39
|
+
- role: "Seer"
|
|
40
|
+
id: "Player4"
|
|
41
|
+
agent_id: "random"
|
|
42
|
+
display_name: "Player4 (Random)"
|
|
43
|
+
- role: "Villager"
|
|
44
|
+
id: "Player5"
|
|
45
|
+
agent_id: "random"
|
|
46
|
+
display_name: "Player5 (Random)"
|
|
47
|
+
- role: "Villager"
|
|
48
|
+
id: "Player6"
|
|
49
|
+
agent_id: "random"
|
|
50
|
+
display_name: "Player6 (Random)"
|
|
51
|
+
- role: "Villager"
|
|
52
|
+
id: "Player7"
|
|
53
|
+
agent_id: "random"
|
|
54
|
+
display_name: "Player7 (Random)"
|
|
55
|
+
- role: "Villager"
|
|
56
|
+
id: "Player8"
|
|
57
|
+
agent_id: "random"
|
|
58
|
+
display_name: "Player8 (Random)"
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Settings for the dump_audio.py script
|
|
2
|
+
script_settings:
|
|
3
|
+
server:
|
|
4
|
+
port: 7999
|
|
5
|
+
paths:
|
|
6
|
+
audio_dir_name: "audio"
|
|
7
|
+
debug_audio_dir_name: "debug_audio"
|
|
8
|
+
output_html_filename: "replay.html"
|
|
9
|
+
voices:
|
|
10
|
+
moderator: "enceladus"
|
|
11
|
+
players:
|
|
12
|
+
"gemini-2.0-flash-lite-001_1": 'Kore'
|
|
13
|
+
"gemini-2.0-flash-lite-001_2": 'Charon'
|
|
14
|
+
"gemini-2.5-flash_3": 'Leda'
|
|
15
|
+
"gemini-2.5-flash_4": 'Despina'
|
|
16
|
+
"gemini-2.5-flash_5": 'Erinome'
|
|
17
|
+
"gemini-2.5-flash_6": 'Gacrux'
|
|
18
|
+
"gemini-2.5-flash_7": 'Achird'
|
|
19
|
+
"gemini-2.5-flash_8": 'Puck'
|
|
20
|
+
audio:
|
|
21
|
+
static_moderator_messages:
|
|
22
|
+
night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]"
|
|
23
|
+
day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]"
|
|
24
|
+
discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]"
|
|
25
|
+
voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]"
|
|
26
|
+
|
|
27
|
+
# Configuration for the Werewolf game environment
|
|
28
|
+
game_config:
|
|
29
|
+
seed: 123
|
|
30
|
+
actTimeout: 300
|
|
31
|
+
runTimeout: 3600
|
|
32
|
+
discussion_protocol:
|
|
33
|
+
name: "RoundRobinDiscussion"
|
|
34
|
+
params:
|
|
35
|
+
max_rounds: 2
|
|
36
|
+
day_voting_protocol:
|
|
37
|
+
name: "SequentialVoting"
|
|
38
|
+
werewolf_night_vote_protocol:
|
|
39
|
+
name: "SequentialVoting"
|
|
40
|
+
# reveal_night_elimination_role: false
|
|
41
|
+
# reveal_day_exile_role: false
|
|
42
|
+
allow_doctor_self_save: true
|
|
43
|
+
agents:
|
|
44
|
+
- role: "Werewolf"
|
|
45
|
+
id: "Alex"
|
|
46
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
47
|
+
agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
|
|
48
|
+
display_name: "vertex_ai/gemini-2.0-flash-lite-001"
|
|
49
|
+
agent_harness_name: "llm_harness"
|
|
50
|
+
chat_mode: "text"
|
|
51
|
+
llms:
|
|
52
|
+
- model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
|
|
53
|
+
- role: "Werewolf"
|
|
54
|
+
id: "Jordan"
|
|
55
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
56
|
+
agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
|
|
57
|
+
display_name: "vertex_ai/gemini-2.0-flash-lite-001"
|
|
58
|
+
agent_harness_name: "llm_harness"
|
|
59
|
+
chat_mode: "text"
|
|
60
|
+
llms:
|
|
61
|
+
- model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
|
|
62
|
+
- role: "Doctor"
|
|
63
|
+
id: "Taylor"
|
|
64
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
65
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
66
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
67
|
+
agent_harness_name: "llm_harness"
|
|
68
|
+
chat_mode: "text"
|
|
69
|
+
llms:
|
|
70
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
71
|
+
- role: "Seer"
|
|
72
|
+
id: "Casey"
|
|
73
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
74
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
75
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
76
|
+
agent_harness_name: "llm_harness"
|
|
77
|
+
chat_mode: "text"
|
|
78
|
+
llms:
|
|
79
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
80
|
+
- role: "Villager"
|
|
81
|
+
id: "Riley"
|
|
82
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
83
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
84
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
85
|
+
agent_harness_name: "llm_harness"
|
|
86
|
+
chat_mode: "text"
|
|
87
|
+
llms:
|
|
88
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
89
|
+
- role: "Villager"
|
|
90
|
+
id: "Jamie"
|
|
91
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
92
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
93
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
94
|
+
agent_harness_name: "llm_harness"
|
|
95
|
+
chat_mode: "text"
|
|
96
|
+
llms:
|
|
97
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
98
|
+
- role: "Villager"
|
|
99
|
+
id: "Morgan"
|
|
100
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
101
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
102
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
103
|
+
agent_harness_name: "llm_harness"
|
|
104
|
+
chat_mode: "text"
|
|
105
|
+
llms:
|
|
106
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
107
|
+
- role: "Villager"
|
|
108
|
+
id: "Skyler"
|
|
109
|
+
thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
|
|
110
|
+
agent_id: "llm/vertex_ai/gemini-2.5-flash"
|
|
111
|
+
display_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
112
|
+
agent_harness_name: "llm_harness"
|
|
113
|
+
chat_mode: "text"
|
|
114
|
+
llms:
|
|
115
|
+
- model_name: "llm/vertex_ai/gemini-2.5-flash"
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import random
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
import numpy as np
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from kaggle_environments.envs.werewolf.runner import run_werewolf, setup_logger
|
|
13
|
+
from kaggle_environments.envs.werewolf.werewolf import LLM_MODEL_NAMES, CostSummary
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
AGENT_NAMES = ["Alex", "Jordan", "Taylor", "Casey", "Riley", "Jamie", "Morgan", "Skyler"]
|
|
18
|
+
DEFAULT_MODEL = "gemini/gemini-2.5-flash"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def setup_game_config(max_turns: int, base_config: dict, model_name: str):
|
|
22
|
+
"""
|
|
23
|
+
Sets up the game configuration for a single run.
|
|
24
|
+
"""
|
|
25
|
+
config = base_config.copy()
|
|
26
|
+
|
|
27
|
+
# Define roles and shuffle them
|
|
28
|
+
roles = ["Werewolf", "Werewolf", "Doctor", "Seer", "Villager", "Villager", "Villager", "Villager"]
|
|
29
|
+
random.shuffle(roles)
|
|
30
|
+
random.shuffle(AGENT_NAMES)
|
|
31
|
+
|
|
32
|
+
# Create agent configurations
|
|
33
|
+
agents_config = []
|
|
34
|
+
for i, role in enumerate(roles):
|
|
35
|
+
player_name = AGENT_NAMES[i]
|
|
36
|
+
agents_config.append(
|
|
37
|
+
{
|
|
38
|
+
"role": role,
|
|
39
|
+
"id": player_name,
|
|
40
|
+
"agent_id": f"llm/{model_name}",
|
|
41
|
+
"display_name": f"{model_name}/{player_name}",
|
|
42
|
+
"agent_harness_name": "llm_harness",
|
|
43
|
+
"chat_mode": "text",
|
|
44
|
+
"llms": [{"model_name": model_name}],
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
config["agents"] = agents_config
|
|
49
|
+
|
|
50
|
+
# Update discussion protocol with the specified max_turns
|
|
51
|
+
if "discussion_protocol" in config and config["discussion_protocol"]["name"] == "TurnByTurnBiddingDiscussion":
|
|
52
|
+
config["discussion_protocol"]["params"]["max_turns"] = max_turns
|
|
53
|
+
else:
|
|
54
|
+
logger.warning("Could not find 'TurnByTurnBiddingDiscussion' protocol to set max_turns.")
|
|
55
|
+
|
|
56
|
+
# Set a new random seed for each game to ensure role/name shuffling is different
|
|
57
|
+
config["seed"] = random.randint(0, 2**32 - 1)
|
|
58
|
+
|
|
59
|
+
agent_harnesses = [f"llm/{model_name}"] * len(roles)
|
|
60
|
+
|
|
61
|
+
return config, agent_harnesses
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def plot_results(summary_data, output_dir):
|
|
65
|
+
"""
|
|
66
|
+
Plots the results and saves them to files.
|
|
67
|
+
"""
|
|
68
|
+
max_turns = sorted([int(k) for k in summary_data.keys()])
|
|
69
|
+
metrics = ["total_cost", "total_tokens", "total_prompt_tokens", "total_completion_tokens"]
|
|
70
|
+
|
|
71
|
+
for metric in metrics:
|
|
72
|
+
means = [summary_data[str(t)][metric]["mean"] for t in max_turns]
|
|
73
|
+
stds = [summary_data[str(t)][metric]["std"] for t in max_turns]
|
|
74
|
+
|
|
75
|
+
plt.figure(figsize=(10, 6))
|
|
76
|
+
plt.errorbar(max_turns, means, yerr=stds, fmt="-o", capsize=5, ecolor="red", markeredgecolor="black")
|
|
77
|
+
plt.xlabel("Maximum Turns in Discussion")
|
|
78
|
+
plt.ylabel(metric.replace("_", " ").title())
|
|
79
|
+
plt.title(f"{metric.replace('_', ' ').title()} vs. Maximum Turns")
|
|
80
|
+
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
|
|
81
|
+
plt.xticks(max_turns)
|
|
82
|
+
|
|
83
|
+
plot_filename = os.path.join(output_dir, f"{metric}_vs_max_turns.png")
|
|
84
|
+
plt.savefig(plot_filename)
|
|
85
|
+
plt.close()
|
|
86
|
+
logger.info(f"Saved plot: {plot_filename}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def plot_token_trajectories(trajectories_data, output_dir):
|
|
90
|
+
"""
|
|
91
|
+
Plots token usage trajectories, grouped by max_turns, and saves them to files.
|
|
92
|
+
"""
|
|
93
|
+
for metric, trajectories_by_turns in trajectories_data.items():
|
|
94
|
+
if not trajectories_by_turns:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
plt.figure(figsize=(12, 8))
|
|
98
|
+
|
|
99
|
+
# Create a color map for the different turn settings
|
|
100
|
+
turn_keys = sorted(trajectories_by_turns.keys(), key=int)
|
|
101
|
+
colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys)))
|
|
102
|
+
color_map = {turns: color for turns, color in zip(turn_keys, colors)}
|
|
103
|
+
|
|
104
|
+
for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])):
|
|
105
|
+
for i, traj in enumerate(trajectories):
|
|
106
|
+
# Only add a label to the first trajectory of each group for a clean legend
|
|
107
|
+
label = f"Max Turns: {turns}" if i == 0 else None
|
|
108
|
+
plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label)
|
|
109
|
+
|
|
110
|
+
plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories")
|
|
111
|
+
plt.xlabel("Query Step")
|
|
112
|
+
plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step")
|
|
113
|
+
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
|
|
114
|
+
plt.legend()
|
|
115
|
+
|
|
116
|
+
plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png")
|
|
117
|
+
plt.savefig(plot_filename)
|
|
118
|
+
plt.close()
|
|
119
|
+
logger.info(f"Saved trajectory plot: {plot_filename}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def main():
|
|
123
|
+
parser = argparse.ArgumentParser(description="Measure LLM cost for the Werewolf game.")
|
|
124
|
+
parser.add_argument(
|
|
125
|
+
"-c",
|
|
126
|
+
"--config_path",
|
|
127
|
+
type=str,
|
|
128
|
+
default=os.path.join(os.path.dirname(__file__), "configs/run/comprehensive.yaml"),
|
|
129
|
+
help="Path to the base YAML configuration file.",
|
|
130
|
+
)
|
|
131
|
+
parser.add_argument(
|
|
132
|
+
"-o",
|
|
133
|
+
"--output_dir",
|
|
134
|
+
type=str,
|
|
135
|
+
default="cost_measurement",
|
|
136
|
+
help="Output directory for logs, replays, and results.",
|
|
137
|
+
)
|
|
138
|
+
parser.add_argument(
|
|
139
|
+
"-m",
|
|
140
|
+
"--model_name",
|
|
141
|
+
type=str,
|
|
142
|
+
default=DEFAULT_MODEL,
|
|
143
|
+
choices=LLM_MODEL_NAMES,
|
|
144
|
+
help="LiteLLM model name to use for all agents.",
|
|
145
|
+
)
|
|
146
|
+
parser.add_argument("-d", "--disable_debug_mode", action="store_true", help="Disable debug mode.")
|
|
147
|
+
|
|
148
|
+
args = parser.parse_args()
|
|
149
|
+
|
|
150
|
+
# Create a unique subdirectory for this run
|
|
151
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
152
|
+
run_output_dir = os.path.join(args.output_dir, f"run_{timestamp}")
|
|
153
|
+
os.makedirs(run_output_dir, exist_ok=True)
|
|
154
|
+
|
|
155
|
+
log_filename = f"measure_cost_{timestamp}"
|
|
156
|
+
setup_logger(output_dir=run_output_dir, base_name=log_filename)
|
|
157
|
+
logger.info(f"Starting cost measurement script. Results will be saved in: {run_output_dir}")
|
|
158
|
+
|
|
159
|
+
# Load base game configuration
|
|
160
|
+
with open(args.config_path, "r") as f:
|
|
161
|
+
base_config = yaml.safe_load(f).get("game_config", {})
|
|
162
|
+
|
|
163
|
+
max_turns_to_test = [8, 12, 16, 20, 24]
|
|
164
|
+
runs_per_setting = 3
|
|
165
|
+
results = {
|
|
166
|
+
str(t): {"total_cost": [], "total_tokens": [], "total_prompt_tokens": [], "total_completion_tokens": []}
|
|
167
|
+
for t in max_turns_to_test
|
|
168
|
+
}
|
|
169
|
+
all_trajectories = {
|
|
170
|
+
"total_tokens": {str(t): [] for t in max_turns_to_test},
|
|
171
|
+
"reasoning_tokens": {str(t): [] for t in max_turns_to_test},
|
|
172
|
+
"text_tokens": {str(t): [] for t in max_turns_to_test},
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
for turns in max_turns_to_test:
|
|
176
|
+
logger.info(f"--- Starting runs for max_turns = {turns} ---")
|
|
177
|
+
for run in range(runs_per_setting):
|
|
178
|
+
base_name = f"game_turns_{turns}_run_{run + 1}"
|
|
179
|
+
logger.info(f"Starting {base_name}...")
|
|
180
|
+
|
|
181
|
+
game_config, agent_harnesses = setup_game_config(turns, base_config, args.model_name)
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
final_env = run_werewolf(
|
|
185
|
+
output_dir=run_output_dir,
|
|
186
|
+
base_name=base_name,
|
|
187
|
+
config=game_config,
|
|
188
|
+
agents=agent_harnesses,
|
|
189
|
+
debug=not args.disable_debug_mode,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Extract cost summary
|
|
193
|
+
cost_summary_dict = final_env.info.get("GAME_END", {}).get("cost_summary", {})
|
|
194
|
+
if cost_summary_dict:
|
|
195
|
+
cost_summary = CostSummary(**cost_summary_dict)
|
|
196
|
+
results[str(turns)]["total_cost"].append(cost_summary.total_cost)
|
|
197
|
+
results[str(turns)]["total_tokens"].append(cost_summary.total_tokens)
|
|
198
|
+
results[str(turns)]["total_prompt_tokens"].append(cost_summary.total_prompt_tokens)
|
|
199
|
+
results[str(turns)]["total_completion_tokens"].append(cost_summary.total_completion_tokens)
|
|
200
|
+
logger.info(f"Finished {base_name}. Total Cost: ${cost_summary.total_cost:.4f}")
|
|
201
|
+
|
|
202
|
+
for agent_summary in cost_summary.cost_per_agent:
|
|
203
|
+
if agent_summary.data and agent_summary.data.usage_history:
|
|
204
|
+
usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history]
|
|
205
|
+
|
|
206
|
+
total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts]
|
|
207
|
+
all_trajectories["total_tokens"][str(turns)].append(total_tokens_traj)
|
|
208
|
+
|
|
209
|
+
reasoning_tokens_traj = [
|
|
210
|
+
usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0
|
|
211
|
+
for usage in usage_history_dicts
|
|
212
|
+
]
|
|
213
|
+
all_trajectories["reasoning_tokens"][str(turns)].append(reasoning_tokens_traj)
|
|
214
|
+
|
|
215
|
+
text_tokens_traj = [
|
|
216
|
+
(u.get("completion_tokens", 0) or 0)
|
|
217
|
+
- (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0)
|
|
218
|
+
for u in usage_history_dicts
|
|
219
|
+
]
|
|
220
|
+
all_trajectories["text_tokens"][str(turns)].append(text_tokens_traj)
|
|
221
|
+
else:
|
|
222
|
+
logger.error(f"Could not find cost summary for {base_name}.")
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(f"An error occurred during {base_name}: {e}", exc_info=True)
|
|
226
|
+
|
|
227
|
+
# Calculate mean and standard deviation
|
|
228
|
+
summary_data = {}
|
|
229
|
+
for turns, metrics in results.items():
|
|
230
|
+
summary_data[turns] = {}
|
|
231
|
+
for metric, values in metrics.items():
|
|
232
|
+
if values:
|
|
233
|
+
summary_data[turns][metric] = {"mean": np.mean(values), "std": np.std(values), "raw_values": values}
|
|
234
|
+
else:
|
|
235
|
+
summary_data[turns][metric] = {"mean": 0, "std": 0, "raw_values": []}
|
|
236
|
+
|
|
237
|
+
# Save summary to JSON
|
|
238
|
+
summary_filename = os.path.join(run_output_dir, "cost_analysis_summary.json")
|
|
239
|
+
with open(summary_filename, "w") as f:
|
|
240
|
+
json.dump(summary_data, f, indent=4)
|
|
241
|
+
logger.info(f"Saved summary results to {summary_filename}")
|
|
242
|
+
|
|
243
|
+
# Plot results
|
|
244
|
+
plot_results(summary_data, run_output_dir)
|
|
245
|
+
plot_token_trajectories(all_trajectories, run_output_dir)
|
|
246
|
+
|
|
247
|
+
logger.info("--- Cost measurement script finished ---")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
if __name__ == "__main__":
|
|
251
|
+
main()
|