kaggle-environments 1.23.3__py3-none-any.whl → 1.23.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kaggle-environments might be problematic. Click here for more details.

Files changed (46) hide show
  1. kaggle_environments/envs/open_spiel_env/games/repeated_poker/repeated_poker.js +2 -2
  2. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/components/getRepeatedPokerStateForStep.js +6 -6
  3. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_1.svg +22 -0
  4. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_10.svg +22 -0
  5. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_100.svg +48 -0
  6. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_25.svg +22 -0
  7. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/images/poker_chip_5.svg +22 -0
  8. kaggle_environments/envs/open_spiel_env/games/repeated_poker/visualizer/default/src/repeated_poker_renderer.js +550 -331
  9. kaggle_environments/envs/werewolf/README.md +190 -0
  10. kaggle_environments/envs/werewolf/harness/__init__.py +0 -0
  11. kaggle_environments/envs/werewolf/harness/base.py +767 -0
  12. kaggle_environments/envs/werewolf/harness/litellm_models.yaml +51 -0
  13. kaggle_environments/envs/werewolf/harness/test_base.py +35 -0
  14. kaggle_environments/envs/werewolf/runner.py +146 -0
  15. kaggle_environments/envs/werewolf/scripts/__init__.py +0 -0
  16. kaggle_environments/envs/werewolf/scripts/add_audio.py +425 -0
  17. kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml +24 -0
  18. kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml +102 -0
  19. kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml +100 -0
  20. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml +104 -0
  21. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml +103 -0
  22. kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml +103 -0
  23. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml +103 -0
  24. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml +104 -0
  25. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml +105 -0
  26. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml +105 -0
  27. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml +105 -0
  28. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml +103 -0
  29. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml +103 -0
  30. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml +103 -0
  31. kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml +105 -0
  32. kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml +58 -0
  33. kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml +115 -0
  34. kaggle_environments/envs/werewolf/scripts/measure_cost.py +251 -0
  35. kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py +135 -0
  36. kaggle_environments/envs/werewolf/scripts/rerender_html.py +87 -0
  37. kaggle_environments/envs/werewolf/scripts/run.py +93 -0
  38. kaggle_environments/envs/werewolf/scripts/run_block.py +237 -0
  39. kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py +222 -0
  40. kaggle_environments/envs/werewolf/scripts/self_play.py +196 -0
  41. kaggle_environments/envs/werewolf/scripts/utils.py +47 -0
  42. {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.4.dist-info}/METADATA +1 -1
  43. {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.4.dist-info}/RECORD +46 -8
  44. {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.4.dist-info}/WHEEL +0 -0
  45. {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.4.dist-info}/entry_points.txt +0 -0
  46. {kaggle_environments-1.23.3.dist-info → kaggle_environments-1.23.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,103 @@
1
+ # Configuration for the Werewolf game environment
2
+ game_config:
3
+ seed: 123
4
+ actTimeout: 900
5
+ runTimeout: 7200
6
+ discussion_protocol:
7
+ name: "RoundRobinDiscussion"
8
+ params:
9
+ max_rounds: 2
10
+ assign_random_first_speaker: true
11
+ day_voting_protocol:
12
+ name: "SimultaneousMajority"
13
+ params:
14
+ tie_break: 'no_elected'
15
+ werewolf_night_vote_protocol:
16
+ name: "SequentialVoting"
17
+ params:
18
+ assign_random_first_voter: true
19
+ night_elimination_reveal_level: role
20
+ day_exile_reveal_level: role
21
+ agents:
22
+ - role: "Werewolf"
23
+ id: "Kai"
24
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
25
+ agent_id: "llm/gemini/gemini-2.5-flash"
26
+ display_name: "gemini-2.5-flash"
27
+ agent_harness_name: "llm_harness"
28
+ chat_mode: "text"
29
+ enable_bid_reasoning: false
30
+ llms:
31
+ - model_name: "gemini/gemini-2.5-flash"
32
+ - role: "Werewolf"
33
+ id: "Jordan"
34
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png"
35
+ agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1"
36
+ display_name: "deepseek-chat-v3.1"
37
+ agent_harness_name: "llm_harness"
38
+ chat_mode: "text"
39
+ enable_bid_reasoning: false
40
+ llms:
41
+ - model_name: "openrouter/deepseek/deepseek-chat-v3.1"
42
+ - role: "Doctor"
43
+ role_params:
44
+ allow_self_save: true
45
+ id: "Charlie"
46
+ thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
47
+ agent_id: "llm/openrouter/openai/gpt-4o-mini"
48
+ display_name: "gpt-4o-mini"
49
+ agent_harness_name: "llm_harness"
50
+ chat_mode: "text"
51
+ enable_bid_reasoning: false
52
+ llms:
53
+ - model_name: "openrouter/openai/gpt-4o-mini"
54
+ - role: "Seer"
55
+ id: "Taylor"
56
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
57
+ agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507"
58
+ display_name: "qwen3-235b"
59
+ agent_harness_name: "llm_harness"
60
+ chat_mode: "text"
61
+ enable_bid_reasoning: false
62
+ llms:
63
+ - model_name: "openrouter/qwen/qwen3-235b-a22b-2507"
64
+ - role: "Villager"
65
+ id: "Alex"
66
+ thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg"
67
+ agent_id: "llm/openrouter/z-ai/glm-4.5"
68
+ display_name: "glm-4.5"
69
+ agent_harness_name: "llm_harness"
70
+ chat_mode: "text"
71
+ enable_bid_reasoning: false
72
+ llms:
73
+ - model_name: "openrouter/z-ai/glm-4.5"
74
+ - role: "Villager"
75
+ id: "Jamie"
76
+ thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
77
+ agent_id: "llm/openrouter/openai/gpt-oss-120b"
78
+ display_name: "gpt-oss-120b"
79
+ agent_harness_name: "llm_harness"
80
+ chat_mode: "text"
81
+ enable_bid_reasoning: false
82
+ llms:
83
+ - model_name: "openrouter/openai/gpt-oss-120b"
84
+ - role: "Villager"
85
+ id: "Quinn"
86
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
87
+ agent_id: "llm/gemini/gemini-2.5-pro"
88
+ display_name: "gemini-2.5-pro"
89
+ agent_harness_name: "llm_harness"
90
+ chat_mode: "text"
91
+ enable_bid_reasoning: false
92
+ llms:
93
+ - model_name: "gemini/gemini-2.5-pro"
94
+ - role: "Villager"
95
+ id: "Casey"
96
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
97
+ agent_id: "llm/openrouter/qwen/qwen3-30b-a3b"
98
+ display_name: "qwen3-30b"
99
+ agent_harness_name: "llm_harness"
100
+ chat_mode: "text"
101
+ enable_bid_reasoning: false
102
+ llms:
103
+ - model_name: "openrouter/qwen/qwen3-30b-a3b"
@@ -0,0 +1,105 @@
1
+ # Configuration for the Werewolf game environment
2
+ game_config:
3
+ seed: 123
4
+ actTimeout: 900
5
+ runTimeout: 7200
6
+ discussion_protocol:
7
+ name: "RoundByRoundBiddingDiscussion"
8
+ params:
9
+ bidding:
10
+ name: "SimpleBiddingProtocol"
11
+ max_rounds: 2
12
+ bid_result_public: true
13
+ day_voting_protocol:
14
+ name: "SimultaneousMajority"
15
+ params:
16
+ tie_break: 'random'
17
+ werewolf_night_vote_protocol:
18
+ name: "SequentialVoting"
19
+ params:
20
+ assign_random_first_voter: true
21
+ night_elimination_reveal_level: role
22
+ day_exile_reveal_level: role
23
+ agents:
24
+ - role: "Werewolf"
25
+ id: "Kai"
26
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
27
+ agent_id: "llm/gemini/gemini-2.5-flash"
28
+ display_name: "gemini-2.5-flash"
29
+ agent_harness_name: "llm_harness"
30
+ chat_mode: "text"
31
+ enable_bid_reasoning: false
32
+ llms:
33
+ - model_name: "gemini/gemini-2.5-flash"
34
+ - role: "Werewolf"
35
+ id: "Jordan"
36
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png"
37
+ agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1"
38
+ display_name: "deepseek-chat-v3.1"
39
+ agent_harness_name: "llm_harness"
40
+ chat_mode: "text"
41
+ enable_bid_reasoning: false
42
+ llms:
43
+ - model_name: "openrouter/deepseek/deepseek-chat-v3.1"
44
+ - role: "Doctor"
45
+ role_params:
46
+ allow_self_save: true
47
+ id: "Charlie"
48
+ thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
49
+ agent_id: "llm/openrouter/openai/gpt-4o-mini"
50
+ display_name: "gpt-4o-mini"
51
+ agent_harness_name: "llm_harness"
52
+ chat_mode: "text"
53
+ enable_bid_reasoning: false
54
+ llms:
55
+ - model_name: "openrouter/openai/gpt-4o-mini"
56
+ - role: "Seer"
57
+ id: "Taylor"
58
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
59
+ agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507"
60
+ display_name: "qwen3-235b"
61
+ agent_harness_name: "llm_harness"
62
+ chat_mode: "text"
63
+ enable_bid_reasoning: false
64
+ llms:
65
+ - model_name: "openrouter/qwen/qwen3-235b-a22b-2507"
66
+ - role: "Villager"
67
+ id: "Alex"
68
+ thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg"
69
+ agent_id: "llm/openrouter/z-ai/glm-4.5"
70
+ display_name: "glm-4.5"
71
+ agent_harness_name: "llm_harness"
72
+ chat_mode: "text"
73
+ enable_bid_reasoning: false
74
+ llms:
75
+ - model_name: "openrouter/z-ai/glm-4.5"
76
+ - role: "Villager"
77
+ id: "Jamie"
78
+ thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png"
79
+ agent_id: "llm/openrouter/openai/gpt-oss-120b"
80
+ display_name: "gpt-oss-120b"
81
+ agent_harness_name: "llm_harness"
82
+ chat_mode: "text"
83
+ enable_bid_reasoning: false
84
+ llms:
85
+ - model_name: "openrouter/openai/gpt-oss-120b"
86
+ - role: "Villager"
87
+ id: "Quinn"
88
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
89
+ agent_id: "llm/gemini/gemini-2.5-pro"
90
+ display_name: "gemini-2.5-pro"
91
+ agent_harness_name: "llm_harness"
92
+ chat_mode: "text"
93
+ enable_bid_reasoning: false
94
+ llms:
95
+ - model_name: "gemini/gemini-2.5-pro"
96
+ - role: "Villager"
97
+ id: "Casey"
98
+ thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png"
99
+ agent_id: "llm/openrouter/qwen/qwen3-30b-a3b"
100
+ display_name: "qwen3-30b"
101
+ agent_harness_name: "llm_harness"
102
+ chat_mode: "text"
103
+ enable_bid_reasoning: false
104
+ llms:
105
+ - model_name: "openrouter/qwen/qwen3-30b-a3b"
@@ -0,0 +1,58 @@
1
+ # Configuration for the Werewolf game environment
2
+ game_config:
3
+ seed: 42
4
+ actTimeout: 300
5
+ runTimeout: 3600
6
+ discussion_protocol:
7
+ name: "TurnByTurnBiddingDiscussion"
8
+ params:
9
+ max_turns: 16
10
+ day_voting_protocol:
11
+ name: "SequentialVoting"
12
+ werewolf_night_vote_protocol:
13
+ name: "SequentialVoting"
14
+ night_elimination_reveal_level: no_reveal
15
+ day_exile_reveal_level: no_reveal
16
+ agents:
17
+ - role: "Werewolf"
18
+ id: "Player1"
19
+ agent_id: "llm/gemini/gemini-2.5-flash"
20
+ display_name: "Player1 (Flash)"
21
+ agent_harness_name: "llm_harness"
22
+ chat_mode: "text"
23
+ llms:
24
+ - model_name: "gemini/gemini-2.5-flash"
25
+ - role: "Werewolf"
26
+ id: "Player2"
27
+ agent_id: "llm/gemini/gemini-2.5-pro"
28
+ display_name: "Player2 (Pro)"
29
+ agent_harness_name: "llm_harness"
30
+ chat_mode: "text"
31
+ llms:
32
+ - model_name: "gemini/gemini-2.5-pro"
33
+ - role: "Doctor"
34
+ role_params:
35
+ allow_self_save: true
36
+ id: "Player3"
37
+ agent_id: "random"
38
+ display_name: "Player3 (Random)"
39
+ - role: "Seer"
40
+ id: "Player4"
41
+ agent_id: "random"
42
+ display_name: "Player4 (Random)"
43
+ - role: "Villager"
44
+ id: "Player5"
45
+ agent_id: "random"
46
+ display_name: "Player5 (Random)"
47
+ - role: "Villager"
48
+ id: "Player6"
49
+ agent_id: "random"
50
+ display_name: "Player6 (Random)"
51
+ - role: "Villager"
52
+ id: "Player7"
53
+ agent_id: "random"
54
+ display_name: "Player7 (Random)"
55
+ - role: "Villager"
56
+ id: "Player8"
57
+ agent_id: "random"
58
+ display_name: "Player8 (Random)"
@@ -0,0 +1,115 @@
1
+ # Settings for the dump_audio.py script
2
+ script_settings:
3
+ server:
4
+ port: 7999
5
+ paths:
6
+ audio_dir_name: "audio"
7
+ debug_audio_dir_name: "debug_audio"
8
+ output_html_filename: "replay.html"
9
+ voices:
10
+ moderator: "enceladus"
11
+ players:
12
+ "gemini-2.0-flash-lite-001_1": 'Kore'
13
+ "gemini-2.0-flash-lite-001_2": 'Charon'
14
+ "gemini-2.5-flash_3": 'Leda'
15
+ "gemini-2.5-flash_4": 'Despina'
16
+ "gemini-2.5-flash_5": 'Erinome'
17
+ "gemini-2.5-flash_6": 'Gacrux'
18
+ "gemini-2.5-flash_7": 'Achird'
19
+ "gemini-2.5-flash_8": 'Puck'
20
+ audio:
21
+ static_moderator_messages:
22
+ night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]"
23
+ day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]"
24
+ discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]"
25
+ voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]"
26
+
27
+ # Configuration for the Werewolf game environment
28
+ game_config:
29
+ seed: 123
30
+ actTimeout: 300
31
+ runTimeout: 3600
32
+ discussion_protocol:
33
+ name: "RoundRobinDiscussion"
34
+ params:
35
+ max_rounds: 2
36
+ day_voting_protocol:
37
+ name: "SequentialVoting"
38
+ werewolf_night_vote_protocol:
39
+ name: "SequentialVoting"
40
+ # reveal_night_elimination_role: false
41
+ # reveal_day_exile_role: false
42
+ allow_doctor_self_save: true
43
+ agents:
44
+ - role: "Werewolf"
45
+ id: "Alex"
46
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
47
+ agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
48
+ display_name: "vertex_ai/gemini-2.0-flash-lite-001"
49
+ agent_harness_name: "llm_harness"
50
+ chat_mode: "text"
51
+ llms:
52
+ - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
53
+ - role: "Werewolf"
54
+ id: "Jordan"
55
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
56
+ agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001"
57
+ display_name: "vertex_ai/gemini-2.0-flash-lite-001"
58
+ agent_harness_name: "llm_harness"
59
+ chat_mode: "text"
60
+ llms:
61
+ - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001"
62
+ - role: "Doctor"
63
+ id: "Taylor"
64
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
65
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
66
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
67
+ agent_harness_name: "llm_harness"
68
+ chat_mode: "text"
69
+ llms:
70
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
71
+ - role: "Seer"
72
+ id: "Casey"
73
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
74
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
75
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
76
+ agent_harness_name: "llm_harness"
77
+ chat_mode: "text"
78
+ llms:
79
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
80
+ - role: "Villager"
81
+ id: "Riley"
82
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
83
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
84
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
85
+ agent_harness_name: "llm_harness"
86
+ chat_mode: "text"
87
+ llms:
88
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
89
+ - role: "Villager"
90
+ id: "Jamie"
91
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
92
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
93
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
94
+ agent_harness_name: "llm_harness"
95
+ chat_mode: "text"
96
+ llms:
97
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
98
+ - role: "Villager"
99
+ id: "Morgan"
100
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
101
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
102
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
103
+ agent_harness_name: "llm_harness"
104
+ chat_mode: "text"
105
+ llms:
106
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
107
+ - role: "Villager"
108
+ id: "Skyler"
109
+ thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png"
110
+ agent_id: "llm/vertex_ai/gemini-2.5-flash"
111
+ display_name: "llm/vertex_ai/gemini-2.5-flash"
112
+ agent_harness_name: "llm_harness"
113
+ chat_mode: "text"
114
+ llms:
115
+ - model_name: "llm/vertex_ai/gemini-2.5-flash"
@@ -0,0 +1,251 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import os
5
+ import random
6
+ from datetime import datetime
7
+
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import yaml
11
+
12
+ from kaggle_environments.envs.werewolf.runner import run_werewolf, setup_logger
13
+ from kaggle_environments.envs.werewolf.werewolf import LLM_MODEL_NAMES, CostSummary
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ AGENT_NAMES = ["Alex", "Jordan", "Taylor", "Casey", "Riley", "Jamie", "Morgan", "Skyler"]
18
+ DEFAULT_MODEL = "gemini/gemini-2.5-flash"
19
+
20
+
21
+ def setup_game_config(max_turns: int, base_config: dict, model_name: str):
22
+ """
23
+ Sets up the game configuration for a single run.
24
+ """
25
+ config = base_config.copy()
26
+
27
+ # Define roles and shuffle them
28
+ roles = ["Werewolf", "Werewolf", "Doctor", "Seer", "Villager", "Villager", "Villager", "Villager"]
29
+ random.shuffle(roles)
30
+ random.shuffle(AGENT_NAMES)
31
+
32
+ # Create agent configurations
33
+ agents_config = []
34
+ for i, role in enumerate(roles):
35
+ player_name = AGENT_NAMES[i]
36
+ agents_config.append(
37
+ {
38
+ "role": role,
39
+ "id": player_name,
40
+ "agent_id": f"llm/{model_name}",
41
+ "display_name": f"{model_name}/{player_name}",
42
+ "agent_harness_name": "llm_harness",
43
+ "chat_mode": "text",
44
+ "llms": [{"model_name": model_name}],
45
+ }
46
+ )
47
+
48
+ config["agents"] = agents_config
49
+
50
+ # Update discussion protocol with the specified max_turns
51
+ if "discussion_protocol" in config and config["discussion_protocol"]["name"] == "TurnByTurnBiddingDiscussion":
52
+ config["discussion_protocol"]["params"]["max_turns"] = max_turns
53
+ else:
54
+ logger.warning("Could not find 'TurnByTurnBiddingDiscussion' protocol to set max_turns.")
55
+
56
+ # Set a new random seed for each game to ensure role/name shuffling is different
57
+ config["seed"] = random.randint(0, 2**32 - 1)
58
+
59
+ agent_harnesses = [f"llm/{model_name}"] * len(roles)
60
+
61
+ return config, agent_harnesses
62
+
63
+
64
+ def plot_results(summary_data, output_dir):
65
+ """
66
+ Plots the results and saves them to files.
67
+ """
68
+ max_turns = sorted([int(k) for k in summary_data.keys()])
69
+ metrics = ["total_cost", "total_tokens", "total_prompt_tokens", "total_completion_tokens"]
70
+
71
+ for metric in metrics:
72
+ means = [summary_data[str(t)][metric]["mean"] for t in max_turns]
73
+ stds = [summary_data[str(t)][metric]["std"] for t in max_turns]
74
+
75
+ plt.figure(figsize=(10, 6))
76
+ plt.errorbar(max_turns, means, yerr=stds, fmt="-o", capsize=5, ecolor="red", markeredgecolor="black")
77
+ plt.xlabel("Maximum Turns in Discussion")
78
+ plt.ylabel(metric.replace("_", " ").title())
79
+ plt.title(f"{metric.replace('_', ' ').title()} vs. Maximum Turns")
80
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
81
+ plt.xticks(max_turns)
82
+
83
+ plot_filename = os.path.join(output_dir, f"{metric}_vs_max_turns.png")
84
+ plt.savefig(plot_filename)
85
+ plt.close()
86
+ logger.info(f"Saved plot: {plot_filename}")
87
+
88
+
89
+ def plot_token_trajectories(trajectories_data, output_dir):
90
+ """
91
+ Plots token usage trajectories, grouped by max_turns, and saves them to files.
92
+ """
93
+ for metric, trajectories_by_turns in trajectories_data.items():
94
+ if not trajectories_by_turns:
95
+ continue
96
+
97
+ plt.figure(figsize=(12, 8))
98
+
99
+ # Create a color map for the different turn settings
100
+ turn_keys = sorted(trajectories_by_turns.keys(), key=int)
101
+ colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys)))
102
+ color_map = {turns: color for turns, color in zip(turn_keys, colors)}
103
+
104
+ for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])):
105
+ for i, traj in enumerate(trajectories):
106
+ # Only add a label to the first trajectory of each group for a clean legend
107
+ label = f"Max Turns: {turns}" if i == 0 else None
108
+ plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label)
109
+
110
+ plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories")
111
+ plt.xlabel("Query Step")
112
+ plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step")
113
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
114
+ plt.legend()
115
+
116
+ plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png")
117
+ plt.savefig(plot_filename)
118
+ plt.close()
119
+ logger.info(f"Saved trajectory plot: {plot_filename}")
120
+
121
+
122
+ def main():
123
+ parser = argparse.ArgumentParser(description="Measure LLM cost for the Werewolf game.")
124
+ parser.add_argument(
125
+ "-c",
126
+ "--config_path",
127
+ type=str,
128
+ default=os.path.join(os.path.dirname(__file__), "configs/run/comprehensive.yaml"),
129
+ help="Path to the base YAML configuration file.",
130
+ )
131
+ parser.add_argument(
132
+ "-o",
133
+ "--output_dir",
134
+ type=str,
135
+ default="cost_measurement",
136
+ help="Output directory for logs, replays, and results.",
137
+ )
138
+ parser.add_argument(
139
+ "-m",
140
+ "--model_name",
141
+ type=str,
142
+ default=DEFAULT_MODEL,
143
+ choices=LLM_MODEL_NAMES,
144
+ help="LiteLLM model name to use for all agents.",
145
+ )
146
+ parser.add_argument("-d", "--disable_debug_mode", action="store_true", help="Disable debug mode.")
147
+
148
+ args = parser.parse_args()
149
+
150
+ # Create a unique subdirectory for this run
151
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
152
+ run_output_dir = os.path.join(args.output_dir, f"run_{timestamp}")
153
+ os.makedirs(run_output_dir, exist_ok=True)
154
+
155
+ log_filename = f"measure_cost_{timestamp}"
156
+ setup_logger(output_dir=run_output_dir, base_name=log_filename)
157
+ logger.info(f"Starting cost measurement script. Results will be saved in: {run_output_dir}")
158
+
159
+ # Load base game configuration
160
+ with open(args.config_path, "r") as f:
161
+ base_config = yaml.safe_load(f).get("game_config", {})
162
+
163
+ max_turns_to_test = [8, 12, 16, 20, 24]
164
+ runs_per_setting = 3
165
+ results = {
166
+ str(t): {"total_cost": [], "total_tokens": [], "total_prompt_tokens": [], "total_completion_tokens": []}
167
+ for t in max_turns_to_test
168
+ }
169
+ all_trajectories = {
170
+ "total_tokens": {str(t): [] for t in max_turns_to_test},
171
+ "reasoning_tokens": {str(t): [] for t in max_turns_to_test},
172
+ "text_tokens": {str(t): [] for t in max_turns_to_test},
173
+ }
174
+
175
+ for turns in max_turns_to_test:
176
+ logger.info(f"--- Starting runs for max_turns = {turns} ---")
177
+ for run in range(runs_per_setting):
178
+ base_name = f"game_turns_{turns}_run_{run + 1}"
179
+ logger.info(f"Starting {base_name}...")
180
+
181
+ game_config, agent_harnesses = setup_game_config(turns, base_config, args.model_name)
182
+
183
+ try:
184
+ final_env = run_werewolf(
185
+ output_dir=run_output_dir,
186
+ base_name=base_name,
187
+ config=game_config,
188
+ agents=agent_harnesses,
189
+ debug=not args.disable_debug_mode,
190
+ )
191
+
192
+ # Extract cost summary
193
+ cost_summary_dict = final_env.info.get("GAME_END", {}).get("cost_summary", {})
194
+ if cost_summary_dict:
195
+ cost_summary = CostSummary(**cost_summary_dict)
196
+ results[str(turns)]["total_cost"].append(cost_summary.total_cost)
197
+ results[str(turns)]["total_tokens"].append(cost_summary.total_tokens)
198
+ results[str(turns)]["total_prompt_tokens"].append(cost_summary.total_prompt_tokens)
199
+ results[str(turns)]["total_completion_tokens"].append(cost_summary.total_completion_tokens)
200
+ logger.info(f"Finished {base_name}. Total Cost: ${cost_summary.total_cost:.4f}")
201
+
202
+ for agent_summary in cost_summary.cost_per_agent:
203
+ if agent_summary.data and agent_summary.data.usage_history:
204
+ usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history]
205
+
206
+ total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts]
207
+ all_trajectories["total_tokens"][str(turns)].append(total_tokens_traj)
208
+
209
+ reasoning_tokens_traj = [
210
+ usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0
211
+ for usage in usage_history_dicts
212
+ ]
213
+ all_trajectories["reasoning_tokens"][str(turns)].append(reasoning_tokens_traj)
214
+
215
+ text_tokens_traj = [
216
+ (u.get("completion_tokens", 0) or 0)
217
+ - (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0)
218
+ for u in usage_history_dicts
219
+ ]
220
+ all_trajectories["text_tokens"][str(turns)].append(text_tokens_traj)
221
+ else:
222
+ logger.error(f"Could not find cost summary for {base_name}.")
223
+
224
+ except Exception as e:
225
+ logger.error(f"An error occurred during {base_name}: {e}", exc_info=True)
226
+
227
+ # Calculate mean and standard deviation
228
+ summary_data = {}
229
+ for turns, metrics in results.items():
230
+ summary_data[turns] = {}
231
+ for metric, values in metrics.items():
232
+ if values:
233
+ summary_data[turns][metric] = {"mean": np.mean(values), "std": np.std(values), "raw_values": values}
234
+ else:
235
+ summary_data[turns][metric] = {"mean": 0, "std": 0, "raw_values": []}
236
+
237
+ # Save summary to JSON
238
+ summary_filename = os.path.join(run_output_dir, "cost_analysis_summary.json")
239
+ with open(summary_filename, "w") as f:
240
+ json.dump(summary_data, f, indent=4)
241
+ logger.info(f"Saved summary results to {summary_filename}")
242
+
243
+ # Plot results
244
+ plot_results(summary_data, run_output_dir)
245
+ plot_token_trajectories(all_trajectories, run_output_dir)
246
+
247
+ logger.info("--- Cost measurement script finished ---")
248
+
249
+
250
+ if __name__ == "__main__":
251
+ main()