PyPI - kaggle-environments - Versions diffs - 0.2.1__py3-none-any.whl → 1.20.1__py3-none-any.whl - Mend

kaggle-environments 0.2.1py3-none-any.whl → 1.20.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kaggle-environments might be problematic. Click here for more details.

Files changed (214) hide show

kaggle_environments/envs/rps/rps.py ADDED Viewed

@@ -0,0 +1,90 @@
+import json
+from os import path
+from .agents import agents as all_agents
+from .utils import get_score
+def interpreter(state, env):
+    player1 = state[0]
+    player2 = state[1]
+    # Specification can fully handle the reset.
+    if env.done:
+        return state
+    def is_valid_action(player, sign_count):
+        return player.action is not None and isinstance(player.action, int) and 0 <= player.action < sign_count
+    # Check for validity of actions
+    is_player1_valid = is_valid_action(player1, env.configuration.signs)
+    is_player2_valid = is_valid_action(player2, env.configuration.signs)
+    if not is_player2_valid:
+        player2.status = "INVALID"
+        player2.reward = 0
+        if is_player1_valid:
+            player1.status = "DONE"
+            player1.reward = 1
+            return state
+    if not is_player1_valid:
+        player1.status = "INVALID"
+        player1.reward = 0
+        if is_player2_valid:
+            player2.status = "DONE"
+            player2.reward = 1
+            return state
+        else:
+            return state
+    score = get_score(player1.action, player2.action)
+    player1.observation.lastOpponentAction = player2.action
+    player1.reward += score
+    player2.observation.lastOpponentAction = player1.action
+    player2.reward -= score
+    player1.observation.reward = int(player1.reward)
+    player2.observation.reward = int(player2.reward)
+    remaining_steps = env.configuration.episodeSteps - player1.observation.step - 1
+    # This is the last step
+    if remaining_steps <= 1:
+        player1.status = "DONE"
+        player2.status = "DONE"
+        # Player performance too similar, consider the match a tie.
+        if abs(player1.reward) < env.configuration.tieRewardThreshold:
+            player1.reward = 0
+            player2.reward = 0
+    return state
+def renderer(state, env):
+    sign_names = ["Rock", "Paper", "Scissors", "Spock", "Lizard"]
+    rounds_played = len(env.steps)
+    board = ""
+    # This line prints results each round, good for debugging
+    for i in range(1, rounds_played):
+        step = env.steps[i]
+        right_move = step[0].observation.lastOpponentAction
+        left_move = step[1].observation.lastOpponentAction
+        board += f"Round {i}: {sign_names[left_move]} vs {sign_names[right_move]}, Score: {step[0].reward} to {step[1].reward}\n"
+    board += f"Game ended on round {rounds_played - 1}, final score: {state[0].reward} to {state[0].reward}\n"
+    return board
+dir_path = path.dirname(__file__)
+json_path = path.abspath(path.join(dir_path, "rps.json"))
+with open(json_path) as json_file:
+    specification = json.load(json_file)
+def html_renderer():
+    js_path = path.abspath(path.join(dir_path, "rps.js"))
+    with open(js_path, encoding="utf-8") as js_file:
+        return js_file.read()
+agents = all_agents

kaggle_environments/envs/rps/test_rps.py ADDED Viewed

@@ -0,0 +1,110 @@
+from kaggle_environments import make
+from .agents import agents, paper, rock
+def negative_move_agent(observation, configuration):
+    return -1
+def too_big_sign_agent(observation, configuration):
+    return 1000000
+def non_integer_agent(observation, configuration):
+    return 0.3
+def none_agent(observation, configuration):
+    return None
+def test_rps_completes():
+    env = make("rps", configuration={"episodeSteps": 10, "tieRewardThreshold": 1})
+    env.run([rock, rock])
+    json = env.toJSON()
+    assert json["name"] == "rps"
+    assert json["statuses"] == ["DONE", "DONE"]
+def test_all_agents():
+    env = make("rps", configuration={"episodeSteps": 3, "tieRewardThreshold": 1})
+    for agent in agents:
+        env.run([agent, agent])
+        json = env.toJSON()
+        assert json["statuses"] == ["DONE", "DONE"]
+def test_tie():
+    env = make("rps", configuration={"episodeSteps": 3, "tieRewardThreshold": 1})
+    env.run([rock, rock])
+    assert (
+        env.render(mode="ansi")
+        == "Round 1: Rock vs Rock, Score: 0 to 0\nRound 2: Rock vs Rock, Score: 0 to 0\nGame ended on round 2, final score: 0 to 0\n"
+    )
+    json = env.toJSON()
+    assert json["rewards"] == [0, 0]
+    assert json["statuses"] == ["DONE", "DONE"]
+def test_threshold_tie():
+    env = make("rps", configuration={"episodeSteps": 3, "tieRewardThreshold": 4})
+    env.run([rock, paper])
+    assert (
+        env.render(mode="ansi")
+        == "Round 1: Rock vs Paper, Score: -1.0 to 1.0\nRound 2: Rock vs Paper, Score: 0 to 0\nGame ended on round 2, final score: 0 to 0\n"
+    )
+    json = env.toJSON()
+    assert json["rewards"] == [0, 0]
+    assert json["statuses"] == ["DONE", "DONE"]
+def test_win():
+    env = make("rps", configuration={"episodeSteps": 2, "tieRewardThreshold": 1})
+    env.run([paper, rock])
+    json = env.toJSON()
+    print(json)
+    assert json["rewards"] == [1, -1]
+    assert json["statuses"] == ["DONE", "DONE"]
+def test_loss():
+    env = make("rps", configuration={"episodeSteps": 2, "tieRewardThreshold": 1})
+    env.run([rock, paper])
+    json = env.toJSON()
+    assert json["rewards"] == [-1, 1]
+    assert json["statuses"] == ["DONE", "DONE"]
+def test_negative_move():
+    env = make("rps", configuration={"episodeSteps": 10, "tieRewardThreshold": 1})
+    env.run([negative_move_agent, rock])
+    json = env.toJSON()
+    assert json["rewards"] == [None, 1]
+    assert json["statuses"] == ["INVALID", "DONE"]
+def test_non_integer_move():
+    env = make("rps", configuration={"episodeSteps": 10, "tieRewardThreshold": 1})
+    env.run([non_integer_agent, rock])
+    json = env.toJSON()
+    assert json["rewards"] == [None, 1]
+    assert json["statuses"] == ["INVALID", "DONE"]
+def test_too_big_move():
+    env = make("rps", configuration={"episodeSteps": 10, "tieRewardThreshold": 1})
+    env.run([paper, too_big_sign_agent])
+    json = env.toJSON()
+    assert json["rewards"] == [1, None]
+    assert json["statuses"] == ["DONE", "INVALID"]
+def test_agent_reward():
+    env = make("rps", configuration={"episodeSteps": 2, "tieRewardThreshold": 1})
+    env.run([paper, rock])
+    json = env.toJSON()
+    last_step = json["steps"][-1]
+    assert last_step[0]["observation"]["step"] == last_step[0]["observation"]["reward"]
+    assert last_step[0]["observation"]["reward"] == -last_step[1]["observation"]["reward"]
+    assert json["statuses"] == ["DONE", "DONE"]

kaggle_environments/envs/rps/utils.py ADDED Viewed

@@ -0,0 +1,7 @@
+import math
+def get_score(left_move, right_move):
+    # This method exists in this file so it can be consumed from rps.py and agents.py without a circular dependency
+    delta = right_move - left_move if (left_move + right_move) % 2 == 0 else left_move - right_move
+    return 0 if delta == 0 else math.copysign(1, delta)

kaggle_environments/envs/tictactoe/test_tictactoe.py CHANGED Viewed

@@ -13,7 +13,8 @@
 # limitations under the License.
 import time
-from kaggle_environments import make, evaluate, utils
+from kaggle_environments import errors, evaluate, make, utils
 env = None
@@ -30,7 +31,7 @@ def custom2(obs):
 def custom3(obs):
     step = sum(1 for mark in obs.board if mark == obs.mark)
-    time.sleep(2)
+    time.sleep(4)
     return [1, 3, 5, 7][step]
@@ -42,10 +43,16 @@ def custom5():
     return -1
+def custom6(obs):
+    step = sum(1 for mark in obs.board if mark == obs.mark)
+    time.sleep(2)
+    return [1, 3, 5, 7][step]
 def before_each(state=None):
     global env
     steps = [] if state == None else [state]
-    env = make("tictactoe", steps=steps, debug=True)
+    env = make("tictactoe", steps=steps, debug=False)
 def test_to_json():
@@ -64,14 +71,14 @@ def test_can_reset():
             "action": 0,
             "status": "ACTIVE",
             "info": {},
-            "observation": {"mark": 1, "board": [0, 0, 0, 0, 0, 0, 0, 0, 0]},
+            "observation": {"remainingOverageTime": 2, "mark": 1, "board": [0, 0, 0, 0, 0, 0, 0, 0, 0], "step": 0},
             "reward": 0,
         },
         {
             "action": 0,
             "status": "INACTIVE",
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "reward": 0,
         },
     ]
@@ -85,14 +92,14 @@ def test_can_place_valid_mark():
             "action": 4,
             "status": "INACTIVE",
             "info": {},
-            "observation": {"mark": 1, "board": [0, 0, 0, 0, 1, 0, 0, 0, 0]},
+            "observation": {"remainingOverageTime": 2, "mark": 1, "board": [0, 0, 0, 0, 1, 0, 0, 0, 0], "step": 1},
             "reward": 0,
         },
         {
             "action": 0,  # None caused the default action to be applied.
             "status": "ACTIVE",
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "reward": 0,
         },
     ]
@@ -108,14 +115,14 @@ def test_can_place_invalid_mark():
             "action": 0,
             "status": "DONE",
             "info": {},
-            "observation": {"mark": 1, "board": [0, 0, 0, 0, 1, 0, 0, 0, 0]},
+            "observation": {"remainingOverageTime": 2, "mark": 1, "board": [0, 0, 0, 0, 1, 0, 0, 0, 0], "step": 2},
             "reward": 0,
         },
         {
             "action": 4,
             "status": "INVALID",
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "reward": None,
         },
     ]
@@ -131,14 +138,14 @@ def test_can_place_winning_mark():
             "action": 7,
             "status": "DONE",
             "info": {},
-            "observation": {"mark": 1, "board": [2, 1, 0, 1, 1, 0, 2, 1, 2]},
+            "observation": {"remainingOverageTime": 2, "mark": 1, "board": [2, 1, 0, 1, 1, 0, 2, 1, 2], "step": 1},
             "reward": 1,
         },
         {
             "action": 0,
             "status": "DONE",
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "reward": -1,
         },
     ]
@@ -155,8 +162,7 @@ def test_can_step_through_agents():
     before_each()
     while not env.done:
         action1 = env.agents.random(env.state[0].observation)
-        action2 = env.agents.reaction(
-            utils.structify({"board": env.state[0].observation.board, "mark": 2}))
+        action2 = env.agents.reaction(utils.structify({"board": env.state[0].observation.board, "mark": 2}))
         env.step([action1, action2])
     assert env.state[0].reward + env.state[1].reward == 0
@@ -169,8 +175,7 @@ def test_can_run_agents():
 def test_can_evaluate():
     rewards = evaluate("tictactoe", ["random", "reaction"], num_episodes=2)
-    assert (rewards[0][0] + rewards[0][1] ==
-            0) and rewards[1][0] + rewards[1][1] == 0
+    assert (rewards[0][0] + rewards[0][1] == 0) and rewards[1][0] + rewards[1][1] == 0
 def test_can_run_custom_agents():
@@ -181,83 +186,44 @@ def test_can_run_custom_agents():
             "action": 6,
             "reward": 1,
             "info": {},
-            "observation": {"board": [1, 2, 1, 2, 1, 2, 1, 0, 0], "mark": 1},
+            "observation": {"remainingOverageTime": 2, "board": [1, 2, 1, 2, 1, 2, 1, 0, 0], "mark": 1, "step": 7},
             "status": "DONE",
         },
         {
             "action": 0,
             "reward": -1,
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "status": "DONE",
         },
     ]
 def test_agents_can_timeout_on_init():
-    env = make("tictactoe", debug=True, configuration={
-               "agentTimeout": 1, "actTimeout": 1})
+    env = make("tictactoe", debug=False)
     state = env.run([custom1, custom3])[-1]
-    assert state == [
-        {
-            "action": 0,
-            "reward": 0,
-            "info": {},
-            "observation": {"board": [1, 0, 0, 0, 0, 0, 0, 0, 0], "mark": 1},
-            "status": "DONE",
-        },
-        {
-            "action": None,
-            "reward": None,
-            "info": {},
-            "observation": {"mark": 2},
-            "status": "TIMEOUT",
-        },
-    ]
+    assert state[1]["status"] == "TIMEOUT"
+    assert state[1]["observation"]["remainingOverageTime"] < 0
 def test_agents_can_timeout_on_act():
-    env = make("tictactoe", debug=True, configuration={
-               "agentTimeout": 5, "actTimeout": 1})
-    state = env.run([custom1, custom3])[-1]
-    assert state == [
-        {
-            "action": 0,
-            "reward": 0,
-            "info": {},
-            "observation": {"board": [1, 2, 1, 0, 0, 0, 0, 0, 0], "mark": 1},
-            "status": "DONE",
-        },
-        {
-            "action": None,
-            "reward": None,
-            "info": {},
-            "observation": {"mark": 2},
-            "status": "TIMEOUT",
-        },
-    ]
+    env = make("tictactoe", debug=False)
+    state = env.run([custom1, custom6])[-1]
+    print(state)
+    assert state[1]["status"] == "TIMEOUT"
+    assert state[1]["observation"]["remainingOverageTime"] < 0
 def test_run_timeout():
-    env = make("tictactoe", debug=True, configuration={
-               "agentTimeout": 10, "actTimeout": 10, "runTimeout": 6})
-    state = env.run([custom1, custom3])[-1]
-    assert state == [
-        {
-            "action": 0,
-            "reward": 0,
-            "info": {},
-            "observation": {"board": [1, 2, 1, 2, 1, 2, 0, 0, 0], "mark": 1},
-            "status": "ACTIVE",
-        },
-        {
-            "action": 5,
-            "reward": 0,
-            "info": {},
-            "observation": {"mark": 2},
-            "status": "INACTIVE",
-        },
-    ]
+    env = make("tictactoe", debug=False, configuration={"actTimeout": 10, "runTimeout": 1})
+    try:
+        state = env.run([custom1, custom3])[-1]
+    except errors.DeadlineExceeded:
+        pass
+    except:
+        assert False, "should fail with deadline exceeded"
+    else:
+        assert False, "Should fail when runtimeout is reached"
 def test_agents_can_error():
@@ -268,14 +234,14 @@ def test_agents_can_error():
             "action": 0,
             "reward": 0,
             "info": {},
-            "observation": {"board": [1, 0, 0, 0, 0, 0, 0, 0, 0], "mark": 1},
+            "observation": {"remainingOverageTime": 2, "board": [1, 0, 0, 0, 0, 0, 0, 0, 0], "mark": 1, "step": 2},
             "status": "DONE",
         },
         {
             "action": None,
             "reward": None,
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "status": "ERROR",
         },
     ]
@@ -289,14 +255,14 @@ def test_agents_can_have_invalid_actions():
             "action": 0,
             "reward": 0,
             "info": {},
-            "observation": {"board": [1, 0, 0, 0, 0, 0, 0, 0, 0], "mark": 1},
+            "observation": {"remainingOverageTime": 2, "board": [1, 0, 0, 0, 0, 0, 0, 0, 0], "mark": 1, "step": 2},
             "status": "DONE",
         },
         {
             "action": None,
             "reward": None,
             "info": {},
-            "observation": {"mark": 2},
+            "observation": {"remainingOverageTime": 2, "mark": 2},
             "status": "INVALID",
         },
     ]

kaggle-environments 0.2.1__py3-none-any.whl → 1.20.1__py3-none-any.whl

Potentially problematic release.

kaggle-environments 0.2.1py3-none-any.whl → 1.20.1py3-none-any.whl