kaggle-environments 1.20.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaggle-environments might be problematic. Click here for more details.
- kaggle_environments/__init__.py +2 -2
- kaggle_environments/envs/cabt/cabt.js +8 -8
- kaggle_environments/envs/cabt/cg/cg.dll +0 -0
- kaggle_environments/envs/cabt/cg/libcg.so +0 -0
- kaggle_environments/envs/{open_spiel → open_spiel_env}/games/universal_poker/universal_poker.js +52 -28
- kaggle_environments/envs/{open_spiel/open_spiel.py → open_spiel_env/open_spiel_env.py} +37 -1
- kaggle_environments/envs/{open_spiel/test_open_spiel.py → open_spiel_env/test_open_spiel_env.py} +65 -1
- kaggle_environments/envs/werewolf/GAME_RULE.md +75 -0
- kaggle_environments/envs/werewolf/__init__.py +0 -0
- kaggle_environments/envs/werewolf/game/__init__.py +0 -0
- kaggle_environments/envs/werewolf/game/actions.py +268 -0
- kaggle_environments/envs/werewolf/game/base.py +115 -0
- kaggle_environments/envs/werewolf/game/consts.py +156 -0
- kaggle_environments/envs/werewolf/game/engine.py +580 -0
- kaggle_environments/envs/werewolf/game/night_elimination_manager.py +101 -0
- kaggle_environments/envs/werewolf/game/protocols/__init__.py +4 -0
- kaggle_environments/envs/werewolf/game/protocols/base.py +242 -0
- kaggle_environments/envs/werewolf/game/protocols/bid.py +248 -0
- kaggle_environments/envs/werewolf/game/protocols/chat.py +467 -0
- kaggle_environments/envs/werewolf/game/protocols/factory.py +59 -0
- kaggle_environments/envs/werewolf/game/protocols/vote.py +471 -0
- kaggle_environments/envs/werewolf/game/records.py +334 -0
- kaggle_environments/envs/werewolf/game/roles.py +326 -0
- kaggle_environments/envs/werewolf/game/states.py +214 -0
- kaggle_environments/envs/werewolf/game/test_actions.py +45 -0
- kaggle_environments/envs/werewolf/test_werewolf.py +161 -0
- kaggle_environments/envs/werewolf/test_werewolf_deterministic.py +211 -0
- kaggle_environments/envs/werewolf/werewolf.js +4377 -0
- kaggle_environments/envs/werewolf/werewolf.json +286 -0
- kaggle_environments/envs/werewolf/werewolf.py +602 -0
- kaggle_environments/static/player.html +19 -1
- {kaggle_environments-1.20.0.dist-info → kaggle_environments-1.21.0.dist-info}/METADATA +9 -4
- {kaggle_environments-1.20.0.dist-info → kaggle_environments-1.21.0.dist-info}/RECORD +55 -36
- kaggle_environments/envs/chess/chess.js +0 -4289
- kaggle_environments/envs/chess/chess.json +0 -60
- kaggle_environments/envs/chess/chess.py +0 -4241
- kaggle_environments/envs/chess/test_chess.py +0 -60
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/chess/chess.js +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/chess/image_config.jsonl +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/chess/openings.jsonl +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/connect_four/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/connect_four/connect_four.js +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/connect_four/connect_four_proxy.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/go/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/go/go.js +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/go/go_proxy.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/tic_tac_toe/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/tic_tac_toe/tic_tac_toe.js +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/tic_tac_toe/tic_tac_toe_proxy.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/universal_poker/__init__.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/games/universal_poker/universal_poker_proxy.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/html_playthrough_generator.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/observation.py +0 -0
- /kaggle_environments/envs/{open_spiel → open_spiel_env}/proxy.py +0 -0
- {kaggle_environments-1.20.0.dist-info → kaggle_environments-1.21.0.dist-info}/WHEEL +0 -0
- {kaggle_environments-1.20.0.dist-info → kaggle_environments-1.21.0.dist-info}/entry_points.txt +0 -0
- {kaggle_environments-1.20.0.dist-info → kaggle_environments-1.21.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import random
|
|
4
|
+
from os import getenv, path
|
|
5
|
+
from typing import Callable, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from kaggle_environments.envs.werewolf.game.consts import DetailedPhase, EnvInfoKeys, PerceivedThreatLevel
|
|
10
|
+
from .game.actions import (
|
|
11
|
+
Action,
|
|
12
|
+
BidAction,
|
|
13
|
+
ChatAction,
|
|
14
|
+
HealAction,
|
|
15
|
+
InspectAction,
|
|
16
|
+
NoOpAction,
|
|
17
|
+
VoteAction,
|
|
18
|
+
create_action,
|
|
19
|
+
)
|
|
20
|
+
from .game.base import PlayerID
|
|
21
|
+
from .game.consts import RoleConst
|
|
22
|
+
from .game.engine import Moderator
|
|
23
|
+
from .game.protocols.factory import create_protocol
|
|
24
|
+
from .game.records import WerewolfObservationModel, get_raw_observation, set_raw_observation
|
|
25
|
+
from .game.roles import create_players_from_agents_config
|
|
26
|
+
from .game.states import EventName, GameState, get_last_action_request
|
|
27
|
+
# from .harness.base import LLMCostTracker, LLMWerewolfAgent
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# --- Protocol Factory ---
|
|
32
|
+
DEFAULT_DISCUSSION_PROTOCOL_NAME = "RoundRobinDiscussion"
|
|
33
|
+
DEFAULT_VOTING_PROTOCOL_NAME = "SimultaneousMajority"
|
|
34
|
+
DEFAULT_BIDDING_PROTOCOL_NAME = "UrgencyBiddingProtocol"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# class AgentCost(BaseModel):
|
|
38
|
+
# total_cost: float = 0.0
|
|
39
|
+
# prompt_tokens: int = 0
|
|
40
|
+
# completion_tokens: int = 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# class AgentCostSummary(BaseModel):
|
|
44
|
+
# agent_config: Dict
|
|
45
|
+
# costs: AgentCost = Field(default_factory=AgentCost)
|
|
46
|
+
# data: Optional[LLMCostTracker] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# class CostSummary(BaseModel):
|
|
50
|
+
# cost_per_agent: List[AgentCostSummary] = Field(default_factory=list)
|
|
51
|
+
# total_cost: float = 0.0
|
|
52
|
+
# total_prompt_tokens: int = 0
|
|
53
|
+
# total_completion_tokens: int = 0
|
|
54
|
+
# total_tokens: int = 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
_PERCEIVED_THREAT_LEVELS = [item.value for item in PerceivedThreatLevel]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def random_agent(obs):
|
|
61
|
+
raw_obs = get_raw_observation(obs)
|
|
62
|
+
|
|
63
|
+
entries = raw_obs.new_player_event_views
|
|
64
|
+
current_phase = DetailedPhase(raw_obs.detailed_phase)
|
|
65
|
+
my_role = raw_obs.role
|
|
66
|
+
all_player_names = raw_obs.all_player_ids
|
|
67
|
+
my_id = raw_obs.player_id
|
|
68
|
+
alive_players = raw_obs.alive_players
|
|
69
|
+
day = raw_obs.day
|
|
70
|
+
phase = raw_obs.game_state_phase
|
|
71
|
+
common_args = {"day": day, "phase": phase, "actor_id": my_id}
|
|
72
|
+
|
|
73
|
+
action = NoOpAction(**common_args, reasoning="There's nothing to be done.") # Default action
|
|
74
|
+
threat_level = random.choice(_PERCEIVED_THREAT_LEVELS)
|
|
75
|
+
|
|
76
|
+
if current_phase == DetailedPhase.NIGHT_AWAIT_ACTIONS:
|
|
77
|
+
if my_role == RoleConst.WEREWOLF:
|
|
78
|
+
history_entry = get_last_action_request(entries, EventName.VOTE_REQUEST)
|
|
79
|
+
if history_entry:
|
|
80
|
+
valid_targets = history_entry.data.get("valid_targets")
|
|
81
|
+
if valid_targets:
|
|
82
|
+
target_id = random.choice(valid_targets)
|
|
83
|
+
action = VoteAction(
|
|
84
|
+
**common_args,
|
|
85
|
+
target_id=target_id,
|
|
86
|
+
reasoning="I randomly chose one.",
|
|
87
|
+
perceived_threat_level=threat_level,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
elif my_role == RoleConst.DOCTOR:
|
|
91
|
+
history_entry = get_last_action_request(entries, EventName.HEAL_REQUEST)
|
|
92
|
+
if history_entry:
|
|
93
|
+
valid_targets = history_entry.data["valid_candidates"]
|
|
94
|
+
if valid_targets:
|
|
95
|
+
target_id = random.choice(valid_targets)
|
|
96
|
+
action = HealAction(
|
|
97
|
+
**common_args,
|
|
98
|
+
target_id=target_id,
|
|
99
|
+
reasoning="I randomly chose one to heal.",
|
|
100
|
+
perceived_threat_level=threat_level,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
elif my_role == RoleConst.SEER:
|
|
104
|
+
history_entry = get_last_action_request(entries, EventName.INSPECT_REQUEST)
|
|
105
|
+
if history_entry:
|
|
106
|
+
valid_targets = history_entry.data["valid_candidates"]
|
|
107
|
+
if valid_targets:
|
|
108
|
+
target_id = random.choice(valid_targets)
|
|
109
|
+
action = InspectAction(
|
|
110
|
+
**common_args,
|
|
111
|
+
target_id=target_id,
|
|
112
|
+
reasoning="I randomly chose one to inspect.",
|
|
113
|
+
perceived_threat_level=threat_level,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
elif current_phase in [DetailedPhase.DAY_BIDDING_AWAIT, DetailedPhase.DAY_CHAT_AWAIT]:
|
|
117
|
+
if current_phase == DetailedPhase.DAY_BIDDING_AWAIT:
|
|
118
|
+
if my_id in alive_players:
|
|
119
|
+
action = BidAction(
|
|
120
|
+
**common_args,
|
|
121
|
+
amount=random.randint(1, 4),
|
|
122
|
+
reasoning="I am bidding randomly.",
|
|
123
|
+
perceived_threat_level=threat_level,
|
|
124
|
+
)
|
|
125
|
+
else: # It's a chat turn (DAY_CHAT_AWAIT)
|
|
126
|
+
if my_id in alive_players:
|
|
127
|
+
action = ChatAction(
|
|
128
|
+
**common_args,
|
|
129
|
+
message=random.choice(
|
|
130
|
+
[
|
|
131
|
+
"Hello everyone!",
|
|
132
|
+
f"I suspect {random.choice(all_player_names)}.",
|
|
133
|
+
"Any information to share?",
|
|
134
|
+
"I am a simple Villager just trying to survive.",
|
|
135
|
+
"Let's think carefully before voting.",
|
|
136
|
+
]
|
|
137
|
+
),
|
|
138
|
+
reasoning="I randomly chose one message.",
|
|
139
|
+
perceived_threat_level=threat_level,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
elif current_phase == DetailedPhase.DAY_VOTING_AWAIT:
|
|
143
|
+
if my_id in alive_players:
|
|
144
|
+
# A real agent would parse the prompt for valid targets
|
|
145
|
+
valid_targets = [p_id for p_id in alive_players if p_id != my_id]
|
|
146
|
+
if valid_targets:
|
|
147
|
+
action = VoteAction(
|
|
148
|
+
**common_args,
|
|
149
|
+
target_id=random.choice(valid_targets),
|
|
150
|
+
reasoning="I randomly chose one.",
|
|
151
|
+
perceived_threat_level=threat_level,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return action.serialize()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
FIXED_MESSAGE = "I am a simple villager."
|
|
158
|
+
FIXED_REASONING = "I am going to do one fixed thing."
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def deterministic_agent(obs):
|
|
162
|
+
raw_obs = get_raw_observation(obs)
|
|
163
|
+
|
|
164
|
+
entries = raw_obs.new_player_event_views
|
|
165
|
+
current_phase = DetailedPhase(raw_obs.detailed_phase)
|
|
166
|
+
my_role = raw_obs.role
|
|
167
|
+
my_id = raw_obs.player_id
|
|
168
|
+
alive_players = raw_obs.alive_players
|
|
169
|
+
day = raw_obs.day
|
|
170
|
+
phase = raw_obs.game_state_phase
|
|
171
|
+
common_args = {"day": day, "phase": phase, "actor_id": my_id}
|
|
172
|
+
|
|
173
|
+
action = NoOpAction(**common_args, reasoning="There's nothing to be done.") # Default action
|
|
174
|
+
threat_level = random.choice(_PERCEIVED_THREAT_LEVELS)
|
|
175
|
+
|
|
176
|
+
if current_phase == DetailedPhase.NIGHT_AWAIT_ACTIONS:
|
|
177
|
+
if my_role == RoleConst.WEREWOLF:
|
|
178
|
+
history_entry = get_last_action_request(entries, EventName.VOTE_REQUEST)
|
|
179
|
+
if history_entry:
|
|
180
|
+
valid_targets = history_entry.data.get("valid_targets")
|
|
181
|
+
if valid_targets:
|
|
182
|
+
# always select first valid
|
|
183
|
+
target_id = valid_targets[0]
|
|
184
|
+
action = VoteAction(
|
|
185
|
+
**common_args,
|
|
186
|
+
target_id=target_id,
|
|
187
|
+
reasoning=FIXED_REASONING,
|
|
188
|
+
perceived_threat_level=threat_level,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
elif my_role == RoleConst.DOCTOR:
|
|
192
|
+
history_entry = get_last_action_request(entries, EventName.HEAL_REQUEST)
|
|
193
|
+
if history_entry:
|
|
194
|
+
valid_targets = history_entry.data["valid_candidates"]
|
|
195
|
+
if valid_targets:
|
|
196
|
+
target_id = valid_targets[0]
|
|
197
|
+
action = HealAction(
|
|
198
|
+
**common_args,
|
|
199
|
+
target_id=target_id,
|
|
200
|
+
reasoning=FIXED_REASONING,
|
|
201
|
+
perceived_threat_level=threat_level,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
elif my_role == RoleConst.SEER:
|
|
205
|
+
history_entry = get_last_action_request(entries, EventName.INSPECT_REQUEST)
|
|
206
|
+
if history_entry:
|
|
207
|
+
valid_targets = history_entry.data["valid_candidates"]
|
|
208
|
+
if valid_targets:
|
|
209
|
+
target_id = valid_targets[0]
|
|
210
|
+
action = InspectAction(
|
|
211
|
+
**common_args,
|
|
212
|
+
target_id=target_id,
|
|
213
|
+
reasoning=FIXED_REASONING,
|
|
214
|
+
perceived_threat_level=threat_level,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
elif current_phase in [DetailedPhase.DAY_BIDDING_AWAIT, DetailedPhase.DAY_CHAT_AWAIT]:
|
|
218
|
+
if current_phase == DetailedPhase.DAY_BIDDING_AWAIT:
|
|
219
|
+
if my_id in alive_players:
|
|
220
|
+
action = BidAction(
|
|
221
|
+
**common_args,
|
|
222
|
+
amount=4,
|
|
223
|
+
reasoning=FIXED_REASONING,
|
|
224
|
+
perceived_threat_level=threat_level,
|
|
225
|
+
)
|
|
226
|
+
else: # It's a chat turn (DAY_CHAT_AWAIT)
|
|
227
|
+
if my_id in alive_players:
|
|
228
|
+
action = ChatAction(
|
|
229
|
+
**common_args,
|
|
230
|
+
message=FIXED_MESSAGE,
|
|
231
|
+
reasoning=FIXED_REASONING,
|
|
232
|
+
perceived_threat_level=threat_level,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
elif current_phase == DetailedPhase.DAY_VOTING_AWAIT:
|
|
236
|
+
if my_id in alive_players:
|
|
237
|
+
# A real agent would parse the prompt for valid targets
|
|
238
|
+
valid_targets = [p_id for p_id in alive_players if p_id != my_id]
|
|
239
|
+
if valid_targets:
|
|
240
|
+
action = VoteAction(
|
|
241
|
+
**common_args,
|
|
242
|
+
target_id=valid_targets[0],
|
|
243
|
+
reasoning=FIXED_REASONING,
|
|
244
|
+
perceived_threat_level=threat_level,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
return action.serialize()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class AgentFactoryWrapper:
|
|
251
|
+
"""
|
|
252
|
+
A wrapper that creates and manages separate agent instances for each player.
|
|
253
|
+
This is necessary for stateful agents to be used in the agent registry,
|
|
254
|
+
preventing them from sharing state (like memory or history) across different players.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
def __init__(self, agent_class, **kwargs):
|
|
258
|
+
self._agent_class = agent_class
|
|
259
|
+
self._shared_kwargs = kwargs
|
|
260
|
+
self._kwargs = {} # store configs of individual agents
|
|
261
|
+
self._instances = {}
|
|
262
|
+
self._agent_configs = None
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def agent_class(self):
|
|
266
|
+
return self._agent_class
|
|
267
|
+
|
|
268
|
+
def get_instance(self, player_id: PlayerID):
|
|
269
|
+
return self._instances.get(player_id)
|
|
270
|
+
|
|
271
|
+
def __call__(self, obs, config):
|
|
272
|
+
"""
|
|
273
|
+
The main callable method for the agent. It routes the call to the correct
|
|
274
|
+
player-specific agent instance.
|
|
275
|
+
"""
|
|
276
|
+
raw_obs = get_raw_observation(obs)
|
|
277
|
+
player_id = raw_obs.player_id # get the current active player id
|
|
278
|
+
|
|
279
|
+
if not player_id:
|
|
280
|
+
# This could happen on initial steps or for an inactive agent.
|
|
281
|
+
# Returning a NO_OP action is a safe fallback.
|
|
282
|
+
return NoOpAction(
|
|
283
|
+
day=raw_obs.day,
|
|
284
|
+
phase=raw_obs.game_state_phase,
|
|
285
|
+
actor_id="unknown_fallback",
|
|
286
|
+
reasoning="AgentFactoryWrapper: No player_id found in observation.",
|
|
287
|
+
).serialize()
|
|
288
|
+
|
|
289
|
+
if not self._agent_configs:
|
|
290
|
+
self._agent_configs = {agent_config.id: agent_config for agent_config in config.agents}
|
|
291
|
+
|
|
292
|
+
if player_id not in self._instances:
|
|
293
|
+
# Create a new agent instance for this player
|
|
294
|
+
self._kwargs[player_id] = {"agent_config": self._agent_configs.get(player_id)}
|
|
295
|
+
self._instances[player_id] = self._agent_class(**self._shared_kwargs, **self._kwargs[player_id])
|
|
296
|
+
return self._instances[player_id](obs)
|
|
297
|
+
|
|
298
|
+
def reset(self):
|
|
299
|
+
self._instances.clear()
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# --- Agent Registry ---
|
|
303
|
+
LLM_SYSTEM_PROMPT = "You are a master strategist playing the game of Werewolf. Your goal is to win. You win as a team and not as individuals."
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# *Package variable required by Kaggle Environments framework*
|
|
307
|
+
# These are base agents that the calling framework can choose from
|
|
308
|
+
# Provides a random_agent for testing and a convenient default 'llm' agent.
|
|
309
|
+
|
|
310
|
+
agents = {
|
|
311
|
+
"random": random_agent,
|
|
312
|
+
"deterministic": deterministic_agent,
|
|
313
|
+
# "llm": AgentFactoryWrapper(
|
|
314
|
+
# LLMWerewolfAgent,
|
|
315
|
+
# model_name=getenv("WEREWOLF_LLM_MODEL", "gemini/gemini-2.5-pro"),
|
|
316
|
+
# system_prompt=LLM_SYSTEM_PROMPT,
|
|
317
|
+
# ),
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def register_agents(agent_dict: Dict[str, Callable]):
|
|
322
|
+
agents.update(agent_dict)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def log_error(status_code, state, env):
|
|
326
|
+
invalid_action = any(player_state["status"] == status_code for player_state in state)
|
|
327
|
+
if invalid_action:
|
|
328
|
+
logger.error(f"{status_code} DETECTED")
|
|
329
|
+
for i, player_state in enumerate(state):
|
|
330
|
+
if player_state["status"] == status_code:
|
|
331
|
+
agent_config = env.configuration["agents"][i]
|
|
332
|
+
logger.error(f"agent_id={agent_config['id']} returns action with status code {status_code}.")
|
|
333
|
+
return invalid_action
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def interpreter(state, env):
|
|
337
|
+
"""
|
|
338
|
+
* Required interface function for kaggle environments package *
|
|
339
|
+
|
|
340
|
+
This is the primary interface for the kaggle environment (kEnv) to step game forward.
|
|
341
|
+
Briefly flow of logic is:
|
|
342
|
+
Initialization - kEnv creates werewolf object and chooses players. Schema definition for
|
|
343
|
+
this is in werewolf.json
|
|
344
|
+
1) kEnv calls interpreter() with current game state recorded in env.game_state
|
|
345
|
+
2) interpreter() reads game state and any new player actions and updates
|
|
346
|
+
the games state based on those actions and flow of the game to env.game_state.
|
|
347
|
+
3) interpreter() writes events to history data and also writes events about
|
|
348
|
+
state change in the game to env.game_state and returns back to kEnv
|
|
349
|
+
4) kEnv parses out the relevant game events via agent logic in harness/base.py,
|
|
350
|
+
constructs final prompt, and performs external API calls for models and records back
|
|
351
|
+
to env.game_state
|
|
352
|
+
Go back to 1 and continue
|
|
353
|
+
|
|
354
|
+
For example - consider discussion and voting by villagers. werewolf.interpreter()
|
|
355
|
+
updates phase and writes history entry that solicits players for discussion.
|
|
356
|
+
kEnv calls agents to get their discussion and writes them to the history/game state.
|
|
357
|
+
kEnv then calls interpreter() that then updates game phase and writes history entry soliciting
|
|
358
|
+
votes for exile. kEnv then calls agents and associated models to get their votes and writes
|
|
359
|
+
responses to game state. env then calls interpreter() and moderator collects votes, determine
|
|
360
|
+
who was exiled, performs that action and advances game phase and game state.
|
|
361
|
+
And so on...
|
|
362
|
+
|
|
363
|
+
Note - The UI is also updated after each call to interpreter() as that is the tick unit
|
|
364
|
+
for the game.
|
|
365
|
+
|
|
366
|
+
Note - env framework assumes that there is an action to be done by player, but
|
|
367
|
+
for werewolf there are places where moderator is the one taking the action (e.g.
|
|
368
|
+
counting votes and performing exile) so some game 'ticks' are larger than others.
|
|
369
|
+
|
|
370
|
+
state: list of dictionaries, one for each agent.
|
|
371
|
+
Each dict has: {observation, action, reward, status, info}
|
|
372
|
+
env: the kaggle_environments.Environment object itself including the env.game_state
|
|
373
|
+
"""
|
|
374
|
+
agent_error = False
|
|
375
|
+
for status_code in ["TIMEOUT", "ERROR", "INVALID"]:
|
|
376
|
+
if log_error(status_code, state, env):
|
|
377
|
+
agent_error = True
|
|
378
|
+
|
|
379
|
+
# --- Initialize Moderator and GameState if it's the start of an episode ---
|
|
380
|
+
if not hasattr(env, "moderator") or env.done: # env.done is true after reset by Kaggle core
|
|
381
|
+
initialize_moderator(state, env)
|
|
382
|
+
|
|
383
|
+
moderator: Moderator = env.moderator
|
|
384
|
+
game_state: GameState = env.game_state
|
|
385
|
+
|
|
386
|
+
# 1. Collect and parse actions from Kaggle agents
|
|
387
|
+
parsed_player_actions = parse_player_actions(state, moderator, game_state)
|
|
388
|
+
|
|
389
|
+
# 2. Advance the Moderator
|
|
390
|
+
moderator.advance(parsed_player_actions)
|
|
391
|
+
|
|
392
|
+
# 3. Update Kaggle state (observations, rewards, statuses)
|
|
393
|
+
is_game_done = moderator.is_game_over() or agent_error
|
|
394
|
+
current_info = {}
|
|
395
|
+
if is_game_done:
|
|
396
|
+
record_game_end(state, env, game_state, current_info, agent_error)
|
|
397
|
+
|
|
398
|
+
# 4. Moderator interprets player actions, updates game phase, and advance game player actions
|
|
399
|
+
active_player_ids_after_advance = set(moderator.get_active_player_ids())
|
|
400
|
+
|
|
401
|
+
# 4.1. Accumulate God mode observations from env for rendering
|
|
402
|
+
global_messages = env.game_state.consume_messages()
|
|
403
|
+
global_data = [rec.serialize() for rec in global_messages]
|
|
404
|
+
env.info[EnvInfoKeys.MODERATOR_OBS].append(global_data)
|
|
405
|
+
|
|
406
|
+
# 4.2. Update observations for individual agents
|
|
407
|
+
update_agent_messages(
|
|
408
|
+
state, env, moderator, game_state, is_game_done, current_info, active_player_ids_after_advance, agent_error
|
|
409
|
+
)
|
|
410
|
+
return state
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# Only applicable once the werewolf/harness/ folder is merged
|
|
414
|
+
# def collect_cost_summary(env) -> CostSummary:
|
|
415
|
+
# cost_summary = CostSummary()
|
|
416
|
+
|
|
417
|
+
# for agent_config in env.configuration.agents:
|
|
418
|
+
# player_id = agent_config["id"]
|
|
419
|
+
# agent_id = agent_config["agent_id"]
|
|
420
|
+
|
|
421
|
+
# agent_cost_summary = AgentCostSummary(agent_config=agent_config)
|
|
422
|
+
|
|
423
|
+
# if isinstance(agents.get(agent_id), AgentFactoryWrapper) and issubclass(
|
|
424
|
+
# agents[agent_id].agent_class, LLMWerewolfAgent
|
|
425
|
+
# ):
|
|
426
|
+
# agent_instance = agents[agent_id].get_instance(player_id)
|
|
427
|
+
# if agent_instance:
|
|
428
|
+
# cost_tracker = agent_instance.cost_tracker
|
|
429
|
+
# agent_cost = AgentCost(
|
|
430
|
+
# total_cost=cost_tracker.query_token_cost.total_costs_usd,
|
|
431
|
+
# prompt_tokens=cost_tracker.prompt_token_cost.total_tokens,
|
|
432
|
+
# completion_tokens=cost_tracker.completion_token_cost.total_tokens,
|
|
433
|
+
# )
|
|
434
|
+
# agent_cost_summary.costs = agent_cost
|
|
435
|
+
# agent_cost_summary.data = cost_tracker
|
|
436
|
+
|
|
437
|
+
# cost_summary.total_cost += agent_cost.total_cost
|
|
438
|
+
# cost_summary.total_prompt_tokens += agent_cost.prompt_tokens
|
|
439
|
+
# cost_summary.total_completion_tokens += agent_cost.completion_tokens
|
|
440
|
+
|
|
441
|
+
# cost_summary.cost_per_agent.append(agent_cost_summary)
|
|
442
|
+
|
|
443
|
+
# cost_summary.total_tokens = cost_summary.total_prompt_tokens + cost_summary.total_completion_tokens
|
|
444
|
+
# return cost_summary
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def record_game_end(state, env, game_state, current_info, agent_error):
|
|
448
|
+
# log game end to env.info using GameEndResultsDataEntry
|
|
449
|
+
game_end_entry = next(iter(game_state.get_event_by_name(EventName.GAME_END)), None)
|
|
450
|
+
if game_end_entry and game_end_entry.data:
|
|
451
|
+
current_info.update(game_end_entry.data.model_dump())
|
|
452
|
+
# Record if terminated with agent error. If so, the game record is invalid.
|
|
453
|
+
current_info["terminated_with_agent_error"] = agent_error
|
|
454
|
+
|
|
455
|
+
# Record cost from endpoints if any.
|
|
456
|
+
# current_info["cost_summary"] = collect_cost_summary(env).model_dump()
|
|
457
|
+
|
|
458
|
+
env.info[EnvInfoKeys.GAME_END] = current_info
|
|
459
|
+
# Determine winner based on game_state.history's GAME_END entry
|
|
460
|
+
if game_end_entry:
|
|
461
|
+
scores = game_end_entry.data.scores
|
|
462
|
+
for i, player_id in enumerate(env.player_id_str_list):
|
|
463
|
+
state[i].reward = scores[player_id]
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def update_agent_messages(
|
|
467
|
+
state, env, moderator, game_state, is_game_done, current_info, active_player_ids_after_advance, agent_error
|
|
468
|
+
):
|
|
469
|
+
for player_index, player_state in enumerate(state):
|
|
470
|
+
player_id_str = env.player_ids_map[player_index]
|
|
471
|
+
|
|
472
|
+
# skip if player not active and game is not done
|
|
473
|
+
if player_id_str not in active_player_ids_after_advance and not is_game_done:
|
|
474
|
+
player_state.status = "INACTIVE"
|
|
475
|
+
continue
|
|
476
|
+
|
|
477
|
+
# set the status of active player to ACTIVE
|
|
478
|
+
player_state.status = "ACTIVE"
|
|
479
|
+
player_obj = game_state.get_player_by_id(player_id_str)
|
|
480
|
+
|
|
481
|
+
# Observation processing
|
|
482
|
+
new_history_entries = player_obj.consume_messages()
|
|
483
|
+
|
|
484
|
+
obs = WerewolfObservationModel(
|
|
485
|
+
player_id=player_obj.id,
|
|
486
|
+
role=player_obj.role.name,
|
|
487
|
+
team=player_obj.role.team.value,
|
|
488
|
+
is_alive=player_obj.alive,
|
|
489
|
+
day=game_state.day_count,
|
|
490
|
+
detailed_phase=moderator.detailed_phase.value,
|
|
491
|
+
all_player_ids=game_state.all_player_ids,
|
|
492
|
+
player_thumbnails=env.player_thumbnails,
|
|
493
|
+
alive_players=[p.id for p in game_state.alive_players()],
|
|
494
|
+
revealed_players=game_state.revealed_players(),
|
|
495
|
+
new_visible_announcements=[entry.description for entry in new_history_entries],
|
|
496
|
+
new_player_event_views=new_history_entries,
|
|
497
|
+
game_state_phase=game_state.phase.value,
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
set_raw_observation(player_state, raw_obs=obs)
|
|
501
|
+
|
|
502
|
+
# Status
|
|
503
|
+
if is_game_done or agent_error:
|
|
504
|
+
player_state.status = "DONE"
|
|
505
|
+
elif player_id_str in active_player_ids_after_advance:
|
|
506
|
+
player_state.status = "ACTIVE"
|
|
507
|
+
else:
|
|
508
|
+
player_state.status = "INACTIVE"
|
|
509
|
+
|
|
510
|
+
# Info
|
|
511
|
+
player_state.info = current_info
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def parse_player_actions(state, moderator, game_state):
|
|
515
|
+
parsed_player_actions: Dict[str, Action] = {}
|
|
516
|
+
active_player_ids_from_moderator = moderator.get_active_player_ids()
|
|
517
|
+
|
|
518
|
+
for sub_state, player in zip(state, game_state.players):
|
|
519
|
+
player_id_str = player.id
|
|
520
|
+
if player_id_str in active_player_ids_from_moderator and sub_state.status == "ACTIVE":
|
|
521
|
+
serialized_action = sub_state.action
|
|
522
|
+
if serialized_action:
|
|
523
|
+
parsed_player_actions[player_id_str] = create_action(serialized_action)
|
|
524
|
+
return parsed_player_actions
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def initialize_moderator(state, env):
|
|
528
|
+
num_players = len(state)
|
|
529
|
+
|
|
530
|
+
agents_from_config = env.configuration.agents
|
|
531
|
+
|
|
532
|
+
# below checks for configuration consistency with agent count. If inconsistent, it will cause down stream subtle error.
|
|
533
|
+
if len(agents_from_config) < num_players:
|
|
534
|
+
raise ValueError(
|
|
535
|
+
f"Configuration has {len(agents_from_config)} agents, but {num_players} kaggle agents are present."
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
players = create_players_from_agents_config(agents_from_config)
|
|
539
|
+
|
|
540
|
+
env.game_state = GameState(
|
|
541
|
+
players=players,
|
|
542
|
+
history={},
|
|
543
|
+
night_elimination_reveal_level=env.configuration.night_elimination_reveal_level,
|
|
544
|
+
day_exile_reveal_level=env.configuration.day_exile_reveal_level,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
env.player_ids_map = {i: p.id for i, p in enumerate(players)}
|
|
548
|
+
env.player_id_str_list = [p.id for p in players]
|
|
549
|
+
|
|
550
|
+
env.player_thumbnails = {p.id: p.agent.thumbnail for p in players}
|
|
551
|
+
# Initialize protocols from configuration or defaults
|
|
552
|
+
discussion_protocol = create_protocol(
|
|
553
|
+
env.configuration.get("discussion_protocol", {}), default_name=DEFAULT_DISCUSSION_PROTOCOL_NAME
|
|
554
|
+
)
|
|
555
|
+
day_voting_protocol = create_protocol(
|
|
556
|
+
env.configuration.get("day_voting_protocol", {}), default_name=DEFAULT_VOTING_PROTOCOL_NAME
|
|
557
|
+
)
|
|
558
|
+
night_voting_protocol = create_protocol(
|
|
559
|
+
env.configuration.get("werewolf_night_vote_protocol", {}), default_name=DEFAULT_VOTING_PROTOCOL_NAME
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
logger.info(
|
|
563
|
+
f"Interpreter: Using Discussion: {type(discussion_protocol).__name__}, "
|
|
564
|
+
f"Day Voting: {type(day_voting_protocol).__name__}, "
|
|
565
|
+
f"Night WW Voting: {type(night_voting_protocol).__name__}"
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
env.moderator = Moderator(
|
|
569
|
+
state=env.game_state,
|
|
570
|
+
discussion=discussion_protocol,
|
|
571
|
+
day_voting=day_voting_protocol,
|
|
572
|
+
night_voting=night_voting_protocol,
|
|
573
|
+
night_elimination_reveal_level=env.configuration.night_elimination_reveal_level,
|
|
574
|
+
day_exile_reveal_level=env.configuration.day_exile_reveal_level,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
env.player_full_visible_history_cache = {p_id: [] for p_id in env.player_id_str_list}
|
|
578
|
+
env.info = {EnvInfoKeys.MODERATOR_OBS: []}
|
|
579
|
+
env.agents = agents
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def renderer(state, env):
|
|
583
|
+
if not hasattr(env, "moderator") or not hasattr(env, "game_state"):
|
|
584
|
+
return "Game not initialized by interpreter yet."
|
|
585
|
+
|
|
586
|
+
game_state: GameState = env.game_state
|
|
587
|
+
|
|
588
|
+
lines = []
|
|
589
|
+
for entry in game_state.consume_messages():
|
|
590
|
+
lines.append(entry.description)
|
|
591
|
+
return "\n\n".join(lines)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def html_renderer():
|
|
595
|
+
js_path = path.abspath(path.join(path.dirname(__file__), "werewolf.js"))
|
|
596
|
+
with open(js_path, encoding="utf-8") as buff:
|
|
597
|
+
return buff.read()
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
jsonpath = path.abspath(path.join(path.dirname(__file__), "werewolf.json"))
|
|
601
|
+
with open(jsonpath) as handle:
|
|
602
|
+
specification = json.load(handle)
|
|
@@ -147,6 +147,7 @@
|
|
|
147
147
|
parent: ref.current,
|
|
148
148
|
preact,
|
|
149
149
|
styled,
|
|
150
|
+
__mainContext: context,
|
|
150
151
|
});
|
|
151
152
|
} catch (error) {
|
|
152
153
|
if (debug) console.error("Static render failed:", error);
|
|
@@ -180,6 +181,7 @@
|
|
|
180
181
|
parent: ref.current,
|
|
181
182
|
preact,
|
|
182
183
|
styled,
|
|
184
|
+
__mainContext: context,
|
|
183
185
|
});
|
|
184
186
|
} catch (error) {
|
|
185
187
|
if (debug) console.error("Animation render failed:", error);
|
|
@@ -570,6 +572,13 @@
|
|
|
570
572
|
setTimeout(playNext, context.speed);
|
|
571
573
|
});
|
|
572
574
|
|
|
575
|
+
// Use useState to hold the functions that components will use.
|
|
576
|
+
// We initialize them with the original functions.
|
|
577
|
+
const [playFn, setPlayFn] = useState(() => contextRef.current.play);
|
|
578
|
+
const [pauseFn, setPauseFn] = useState(() => contextRef.current.pause);
|
|
579
|
+
const [stepFn, setStepFn] = useState(() => contextRef.current.setStep);
|
|
580
|
+
const [playingFn, setPlayingFn] = useState(() => contextRef.current.setPlaying);
|
|
581
|
+
|
|
573
582
|
const updateContext = (o) => {
|
|
574
583
|
const context = contextRef.current;
|
|
575
584
|
Object.assign(context, o, {
|
|
@@ -578,6 +587,11 @@
|
|
|
578
587
|
rerender();
|
|
579
588
|
};
|
|
580
589
|
|
|
590
|
+
contextRef.current.setPlay = setPlayFn;
|
|
591
|
+
contextRef.current.setPause = setPauseFn;
|
|
592
|
+
contextRef.current.setSetStep = setStepFn;
|
|
593
|
+
contextRef.current.setSetPlaying = setPlayingFn;
|
|
594
|
+
|
|
581
595
|
// First time setup.
|
|
582
596
|
useEffect(() => {
|
|
583
597
|
// Timeout is used to ensure useEffect renders once.
|
|
@@ -767,8 +781,12 @@
|
|
|
767
781
|
);
|
|
768
782
|
};
|
|
769
783
|
|
|
784
|
+
// Create a new value object on every render.
|
|
785
|
+
// This object includes the *current* play/pause functions from useState.
|
|
786
|
+
const providerValue = { ...contextRef.current, play: playFn, pause: pauseFn, setStep: stepFn, setPlaying: playingFn };
|
|
787
|
+
|
|
770
788
|
return h`
|
|
771
|
-
<${Context.Provider} value=${
|
|
789
|
+
<${Context.Provider} value=${providerValue}>
|
|
772
790
|
<${Player} className="${contextRef.current.environment.viewer ? "no-border" : ""}" />
|
|
773
791
|
<//>`;
|
|
774
792
|
};
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaggle-environments
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.21.0
|
|
4
4
|
Summary: Kaggle Environments
|
|
5
5
|
Author-email: Kaggle <support@kaggle.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
7
7
|
License-File: LICENSE
|
|
8
8
|
Requires-Dist: accelerate
|
|
9
9
|
Requires-Dist: bitsandbytes
|
|
10
|
-
Requires-Dist: Chessnut >= 0.4.1
|
|
11
10
|
Requires-Dist: Flask >= 1.1.2
|
|
12
11
|
Requires-Dist: gymnasium == 1.2.0
|
|
13
12
|
Requires-Dist: gymnax==0.0.8
|
|
@@ -15,11 +14,17 @@ Requires-Dist: jax
|
|
|
15
14
|
Requires-Dist: jsonschema >= 3.0.1
|
|
16
15
|
Requires-Dist: litellm
|
|
17
16
|
Requires-Dist: numpy >= 2.2.6
|
|
18
|
-
Requires-Dist: open_spiel >= 1.6.
|
|
17
|
+
Requires-Dist: open_spiel >= 1.6.8
|
|
19
18
|
Requires-Dist: pettingzoo == 1.24.0
|
|
19
|
+
Requires-Dist: pokerkit==0.6.3
|
|
20
|
+
Requires-Dist: pydantic >= 2.11.4
|
|
21
|
+
Requires-Dist: pygame
|
|
22
|
+
Requires-Dist: pyjson5
|
|
23
|
+
Requires-Dist: termcolor
|
|
20
24
|
Requires-Dist: requests >= 2.25.1
|
|
21
|
-
Requires-Dist: scipy >= 1.15.3
|
|
22
25
|
Requires-Dist: shimmy >= 1.2.1
|
|
23
26
|
Requires-Dist: stable-baselines3 == 2.7.0
|
|
24
27
|
Requires-Dist: transformers >= 4.33.1
|
|
28
|
+
Requires-Dist: tenacity
|
|
29
|
+
Requires-Dist: google-auth >= 2.35.0
|
|
25
30
|
Project-URL: Homepage, https://github.com/Kaggle/kaggle-environments
|