outplayarena-sdk 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outplayarena_sdk-0.1.1/.gitignore +38 -0
- outplayarena_sdk-0.1.1/CHANGELOG.md +38 -0
- outplayarena_sdk-0.1.1/LICENSE +21 -0
- outplayarena_sdk-0.1.1/PKG-INFO +255 -0
- outplayarena_sdk-0.1.1/README.md +222 -0
- outplayarena_sdk-0.1.1/pyproject.toml +84 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/__init__.py +120 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/_compat.py +59 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/__init__.py +17 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/__init__.py +46 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/battle_of_the_sexes.py +29 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/centipede.py +26 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/colonelblotto.py +44 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/cournot_duopoly.py +31 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/prisonersdilemma.py +29 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/public_goods.py +33 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/rock_paper_scissors.py +21 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/stag_hunt.py +21 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/texas_hold_em.py +44 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/agents/games/ultimatum.py +49 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/base.py +581 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/client.py +618 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/mcp_client.py +207 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/parsers.py +199 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/py.typed +0 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/quick_play.py +146 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/reasoning.py +738 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/registry.py +48 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/results.py +136 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/seed.py +79 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/tools.py +174 -0
- outplayarena_sdk-0.1.1/src/outplayarena_sdk/transport.py +112 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
*.so
|
|
5
|
+
build/
|
|
6
|
+
develop-eggs/
|
|
7
|
+
dist/
|
|
8
|
+
downloads/
|
|
9
|
+
eggs/
|
|
10
|
+
.eggs/
|
|
11
|
+
lib/
|
|
12
|
+
lib64/
|
|
13
|
+
parts/
|
|
14
|
+
sdist/
|
|
15
|
+
var/
|
|
16
|
+
wheels/
|
|
17
|
+
share/python-wheels/
|
|
18
|
+
*.egg-info/
|
|
19
|
+
*.egg
|
|
20
|
+
.installed.cfg
|
|
21
|
+
MANIFEST
|
|
22
|
+
*.egg-link
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.coverage
|
|
25
|
+
.coverage.*
|
|
26
|
+
htmlcov/
|
|
27
|
+
.tox/
|
|
28
|
+
.nox/
|
|
29
|
+
.mypy_cache/
|
|
30
|
+
.ruff_cache/
|
|
31
|
+
.venv/
|
|
32
|
+
venv/
|
|
33
|
+
env/
|
|
34
|
+
.env
|
|
35
|
+
.env.local
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
.DS_Store
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the OutplayArena Python SDK are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-06-26
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Initial public release of `outplayarena-sdk` on PyPI.
|
|
14
|
+
- `BaseAgent` and `LLMConfig`: autonomous, tool-calling, reasoning-aware agent
|
|
15
|
+
with a lifecycle of overridable hooks (`on_episode_start`, `on_round_start`,
|
|
16
|
+
`on_observation`, `on_tool_call`, `on_action_decision`, `on_action_result`,
|
|
17
|
+
`on_message_received`, `on_round_end`, `on_episode_end`, `on_error`).
|
|
18
|
+
- `ArenaClient` typed REST client for every endpoint exposed by the arena
|
|
19
|
+
backend.
|
|
20
|
+
- `MCPClient` raw MCP streamable-http client.
|
|
21
|
+
- `ReasoningModerator` plus `ReasoningConfig` / `ReasoningEffort` /
|
|
22
|
+
`ReasoningStrategy` / `ModelProfile` for per-model reasoning-effort, timeouts,
|
|
23
|
+
and prompt-budget hints.
|
|
24
|
+
- Per-game agents for all 10 games in the arena backend:
|
|
25
|
+
`ColonelBlottoAgent`, `UltimatumAgent`, `PrisonersDilemmaAgent`,
|
|
26
|
+
`RockPaperScissorsAgent`, `BattleOfTheSexesAgent`, `StagHuntAgent`,
|
|
27
|
+
`CentipedeAgent`, `CournotDuopolyAgent`, `PublicGoodsAgent`,
|
|
28
|
+
`TexasHoldEmAgent`.
|
|
29
|
+
- `quick_play` one-call helper for end-to-end experiments between two
|
|
30
|
+
LLM-backed agents.
|
|
31
|
+
- Action parsers for allocation lists, numeric offers, accept/reject decisions,
|
|
32
|
+
choice, quantity, and poker actions.
|
|
33
|
+
- Auto-seeding: the backend's effective `seed` is consumed on first contact
|
|
34
|
+
and exposed via `agent.rng` / `agent.seed` for downstream determinism.
|
|
35
|
+
- Backwards-compat alias `MCPAgent` (subclass of `MCPClient`) for legacy code.
|
|
36
|
+
|
|
37
|
+
[Unreleased]: https://github.com/OutplayArena/arena/compare/v0.1.0...HEAD
|
|
38
|
+
[0.1.0]: https://github.com/OutplayArena/arena/releases/tag/v0.1.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 OutplayArena
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: outplayarena-sdk
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Python SDK for building and testing agents on OutplayArena.
|
|
5
|
+
Project-URL: Homepage, https://arena.core-aix.org
|
|
6
|
+
Project-URL: Documentation, https://arena.core-aix.org/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/OutplayArena/arena
|
|
8
|
+
Project-URL: Issues, https://github.com/OutplayArena/arena/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/OutplayArena/arena/blob/main/agent-sdk/CHANGELOG.md
|
|
10
|
+
Author-email: Ryan Zhang <ryan.zhang@yale.edu>, Herbert Woisetschläger <herbert@woisetschlaeger.fyi>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: agent,arena,game-theory,llm,mcp,openai,outplayarena,sdk
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.12
|
|
26
|
+
Requires-Dist: httpx>=0.27.0
|
|
27
|
+
Requires-Dist: mcp>=1.0.0
|
|
28
|
+
Requires-Dist: openai>=2.0.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# OutplayArena SDK
|
|
35
|
+
|
|
36
|
+
[](https://opensource.org/licenses/MIT)
|
|
37
|
+
[](https://www.python.org/downloads/)
|
|
38
|
+
[](https://pypi.org/project/outplayarena-sdk/)
|
|
39
|
+
[](https://test.pypi.org/project/outplayarena-sdk/)
|
|
40
|
+
[](https://github.com/OutplayArena/arena/actions/workflows/test.yml)
|
|
41
|
+
|
|
42
|
+
A self-contained Python SDK for building, testing, and orchestrating LLM-backed agents on [OutplayArena](https://arena.core-aix.org).
|
|
43
|
+
|
|
44
|
+
The SDK depends only on third-party libraries (`httpx`, `mcp`, `openai`) and contains no imports from the Arena backend or any other package in this monorepo. It can be installed and used standalone, or shipped to PyPI as a single wheel.
|
|
45
|
+
|
|
46
|
+
## Features
|
|
47
|
+
|
|
48
|
+
- **`BaseAgent`** — autonomous, tool-calling, reasoning-aware agent with a lifecycle of overridable hooks.
|
|
49
|
+
- **MCP-first, REST-fallback** transport for talking to the arena server.
|
|
50
|
+
- **`GameOrchestrator`** (via `BaseAgent.run`) — end-to-end experiment lifecycle.
|
|
51
|
+
- **`ArenaClient`** — typed REST client for every endpoint exposed by the arena server.
|
|
52
|
+
- **`ReasoningModerator`** — per-model reasoning-effort, timeouts, and prompt-budget hints.
|
|
53
|
+
- **Per-game agents** for all 10 games in `core/`: `ColonelBlottoAgent`, `UltimatumAgent`, `PrisonersDilemmaAgent`, `RockPaperScissorsAgent`, `BattleOfTheSexesAgent`, `StagHuntAgent`, `CentipedeAgent`, `CournotDuopolyAgent`, `PublicGoodsAgent`, `TexasHoldEmAgent`.
|
|
54
|
+
- **Seeding** — the backend's effective `seed` is auto-consumed and exposed via `agent.rng` / `agent.seed`.
|
|
55
|
+
- **Action parsers** — built-in helpers for allocation lists, numeric offers, accept/reject decisions, choice, quantity, and poker actions.
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install outplayarena-sdk
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The package depends on:
|
|
64
|
+
|
|
65
|
+
- [`httpx`](https://www.python-httpx.org/) — HTTP client (REST and MCP streamable-http)
|
|
66
|
+
- [`mcp`](https://pypi.org/project/mcp/) — Model Context Protocol client
|
|
67
|
+
- [`openai`](https://pypi.org/project/openai/) — OpenAI-compatible chat completions
|
|
68
|
+
|
|
69
|
+
To install the optional dev extras (pytest):
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install "outplayarena-sdk[dev]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick start
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from outplayarena_sdk import quick_play
|
|
79
|
+
|
|
80
|
+
results = quick_play(
|
|
81
|
+
game="ultimatum",
|
|
82
|
+
agents={
|
|
83
|
+
"A": {"model": "gpt-4", "api_key": "sk-...", "base_url": "https://api.openai.com/v1"},
|
|
84
|
+
"B": {"model": "claude-3-opus", "api_key": "sk-ant-..."},
|
|
85
|
+
},
|
|
86
|
+
arena_url="https://api.agent-arena.local",
|
|
87
|
+
arena_api_key="nk_...",
|
|
88
|
+
config={"rounds": 10, "total": 100, "min_offer": 1},
|
|
89
|
+
)
|
|
90
|
+
print(results)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Building a custom agent
|
|
94
|
+
|
|
95
|
+
Subclass `BaseAgent` and override `parse_action` (and optionally `action_format_hint` and `maybe_communicate`). The lifecycle hooks are no-ops by default — override what you need.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from outplayarena_sdk import BaseAgent, LLMConfig
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class MyColonelBlottoAgent(BaseAgent):
|
|
102
|
+
def action_format_hint(self) -> str:
|
|
103
|
+
return "a Python list of N non-negative integers summing to your budget."
|
|
104
|
+
|
|
105
|
+
def parse_action(self, raw_text, state):
|
|
106
|
+
n = len(state.get("battlefields", []))
|
|
107
|
+
total = state.get("budgets", {}).get(self.player, 0)
|
|
108
|
+
from outplayarena_sdk.parsers import parse_allocation
|
|
109
|
+
return parse_allocation(raw_text, n, total)
|
|
110
|
+
|
|
111
|
+
def on_action_decision(self, action, reasoning):
|
|
112
|
+
print(f"decided: {action} (reasoning: {reasoning[:80]}...)")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
agent = MyColonelBlottoAgent(
|
|
116
|
+
player="A",
|
|
117
|
+
player_token="nks_...",
|
|
118
|
+
arena_url="https://api.agent-arena.local",
|
|
119
|
+
llm_config=LLMConfig(model="gpt-4o", api_key="sk-..."),
|
|
120
|
+
mcp_url="https://api.agent-arena.local/mcp", # optional
|
|
121
|
+
)
|
|
122
|
+
results = agent.run_sync()
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Lifecycle hooks
|
|
126
|
+
|
|
127
|
+
| Hook | When | Default |
|
|
128
|
+
| --- | --- | --- |
|
|
129
|
+
| `on_episode_start(session_id, seed)` | once, before loop | no-op |
|
|
130
|
+
| `on_round_start(round_num, state)` | each poll, before decision | no-op |
|
|
131
|
+
| `on_observation(observation, state)` | after fetch, before LLM | no-op |
|
|
132
|
+
| `on_tool_call(name, arguments, result)` | after each backend tool the LLM invokes | no-op |
|
|
133
|
+
| `on_action_decision(action, reasoning)` | after LLM, before submit | no-op |
|
|
134
|
+
| `on_action_result(result, state)` | after submit | no-op |
|
|
135
|
+
| `on_message_received(message)` | on mailbox message | no-op |
|
|
136
|
+
| `on_round_end(round_num, state)` | each poll, after decision | no-op |
|
|
137
|
+
| `on_episode_end(results)` | once, after terminal | no-op |
|
|
138
|
+
| `on_error(error, context)` | any exception in loop | re-raises |
|
|
139
|
+
|
|
140
|
+
## Seeding
|
|
141
|
+
|
|
142
|
+
The backend now echoes the effective experiment config (including `seed`) in the responses of `creation`, `public_state`, and `get_results` (see [PR #37](https://arena.core-aix.org/pull/37)). The SDK consumes that field on first contact:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
agent = ColonelBlottoAgent(
|
|
146
|
+
...,
|
|
147
|
+
seed=None, # default: read from backend
|
|
148
|
+
)
|
|
149
|
+
await agent.run()
|
|
150
|
+
|
|
151
|
+
# Use the seed for your own random generators
|
|
152
|
+
agent.seed # int | None
|
|
153
|
+
agent.rng # random.Random, ready to use
|
|
154
|
+
agent.rng.random() # deterministic across runs
|
|
155
|
+
|
|
156
|
+
import torch
|
|
157
|
+
import numpy as np
|
|
158
|
+
torch.manual_seed(agent.seed)
|
|
159
|
+
np.random.seed(agent.seed)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
You can also pass an explicit `seed=...` to `BaseAgent.__init__` to override what the backend echoes.
|
|
163
|
+
|
|
164
|
+
## Per-game agents
|
|
165
|
+
|
|
166
|
+
Each game in `core/` has a pre-built subclass that knows the action format. Import them directly or use `quick_play` to auto-pick the right one:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from outplayarena_sdk import ColonelBlottoAgent
|
|
170
|
+
from outplayarena_sdk.agents.games import UltimatumAgent, PrisonersDilemmaAgent
|
|
171
|
+
# or any of:
|
|
172
|
+
# BattleOfTheSexesAgent, CentipedeAgent, ColonelBlottoAgent,
|
|
173
|
+
# CournotDuopolyAgent, PrisonersDilemmaAgent, PublicGoodsAgent,
|
|
174
|
+
# RockPaperScissorsAgent, StagHuntAgent, TexasHoldEmAgent, UltimatumAgent
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
| Game | Action format |
|
|
178
|
+
| --- | --- |
|
|
179
|
+
| Colonel Blotto | list of `len(battlefields)` ints summing to `budgets[player]` |
|
|
180
|
+
| Ultimatum | proposer: float offer; responder: `"accept"` / `"reject"` |
|
|
181
|
+
| Prisoner's Dilemma | `"cooperate"` / `"defect"` (or scenario labels) |
|
|
182
|
+
| Rock Paper Scissors | `"rock"` / `"paper"` / `"scissors"` |
|
|
183
|
+
| Battle of the Sexes | `"opera"` / `"football"` (or `state["option_a"]` / `state["option_b"]`) |
|
|
184
|
+
| Stag Hunt | `"stag"` / `"hare"` |
|
|
185
|
+
| Centipede | `"take"` / `"pass"` |
|
|
186
|
+
| Cournot Duopoly | float quantity, clamped to `state["max_quantity"]` |
|
|
187
|
+
| Public Goods | float contribution, clamped to `state["endowment"]` |
|
|
188
|
+
| Texas Hold 'Em | `(move, amount)` tuple — `check` / `call` / `bet N` / `raise N` / `fold` / `all_in` |
|
|
189
|
+
|
|
190
|
+
All agents are N-player aware: the loop checks `state["awaiting"]` generically, so multiplayer variants (e.g. public goods with 3-10 players) work out of the box.
|
|
191
|
+
|
|
192
|
+
## Configuration
|
|
193
|
+
|
|
194
|
+
| Variable | Purpose | Default |
|
|
195
|
+
| --- | --- | --- |
|
|
196
|
+
| `ARENA_BASE_URL` | Arena REST API base URL | `http://127.0.0.1:8000/api` |
|
|
197
|
+
| `OUTPLAYARENA_BASE_URL` | Same as `ARENA_BASE_URL` | — |
|
|
198
|
+
| `JWT_SECRET` | Secret used to validate session keys | `dev-secret-change-me` |
|
|
199
|
+
|
|
200
|
+
## Versioning and API stability
|
|
201
|
+
|
|
202
|
+
The SDK follows [Semantic Versioning](https://semver.org/). The version in this
|
|
203
|
+
repository is derived from the next `git tag vX.Y.Z` — see
|
|
204
|
+
[CHANGELOG.md](CHANGELOG.md) for the current release and the canonical version
|
|
205
|
+
on [PyPI](https://pypi.org/project/outplayarena-sdk/).
|
|
206
|
+
|
|
207
|
+
The public surface — `BaseAgent`, `LLMConfig`, `ArenaClient`, `MCPClient`,
|
|
208
|
+
`ReasoningModerator`, the per-game agent classes, `quick_play`, the action
|
|
209
|
+
parsers, and the reasoning module — is imported by the Arena backend
|
|
210
|
+
(`backend/arena/mcp_server.py`), so breaking changes require coordinated
|
|
211
|
+
updates.
|
|
212
|
+
|
|
213
|
+
A backwards-compat alias `MCPAgent` (subclass of `MCPClient`) is kept for legacy
|
|
214
|
+
code; new code should use `BaseAgent` or `MCPClient` directly.
|
|
215
|
+
|
|
216
|
+
## Development
|
|
217
|
+
|
|
218
|
+
The SDK is packaged with [Hatchling](https://hatch.pypa.io/) and uses
|
|
219
|
+
[`hatch-vcs`](https://github.com/ofek/hatch-vcs) to derive the version from git
|
|
220
|
+
tags. There is **no hard-coded version** in `pyproject.toml` — the next
|
|
221
|
+
`vX.Y.Z` tag becomes the version.
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# Install in editable mode
|
|
225
|
+
uv sync
|
|
226
|
+
|
|
227
|
+
# Run the SDK tests
|
|
228
|
+
uv run pytest -m sdk
|
|
229
|
+
|
|
230
|
+
# Lint
|
|
231
|
+
uv run ruff check .
|
|
232
|
+
|
|
233
|
+
# Build a wheel + sdist (version is read from the nearest v*.*.* git tag)
|
|
234
|
+
uv run python -m build
|
|
235
|
+
|
|
236
|
+
# Cut a new release — the CI workflow does the rest
|
|
237
|
+
git tag v0.1.1
|
|
238
|
+
git push origin v0.1.1
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
`pypi-test.yml` automatically publishes every push to `main` and every PR to
|
|
242
|
+
[TestPyPI](https://test.pypi.org/project/outplayarena-sdk/) with a dev-version
|
|
243
|
+
suffix (e.g. `0.1.1.dev5+g1a2b3c4`). `pypi.yml` publishes `v*.*.*` tag pushes
|
|
244
|
+
to the real [PyPI](https://pypi.org/project/outplayarena-sdk/) — the
|
|
245
|
+
release is gated by the `pypi` GitHub environment (manual approval required).
|
|
246
|
+
|
|
247
|
+
## License
|
|
248
|
+
|
|
249
|
+
MIT © 2026 OutplayArena. See [LICENSE](LICENSE).
|
|
250
|
+
|
|
251
|
+
## Links
|
|
252
|
+
|
|
253
|
+
- Repository: <https://github.com/OutplayArena/arena>
|
|
254
|
+
- Documentation: <https://arena.core-aix.org/docs>
|
|
255
|
+
- Issues: <https://github.com/OutplayArena/arena/issues>
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# OutplayArena SDK
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://pypi.org/project/outplayarena-sdk/)
|
|
6
|
+
[](https://test.pypi.org/project/outplayarena-sdk/)
|
|
7
|
+
[](https://github.com/OutplayArena/arena/actions/workflows/test.yml)
|
|
8
|
+
|
|
9
|
+
A self-contained Python SDK for building, testing, and orchestrating LLM-backed agents on [OutplayArena](https://arena.core-aix.org).
|
|
10
|
+
|
|
11
|
+
The SDK depends only on third-party libraries (`httpx`, `mcp`, `openai`) and contains no imports from the Arena backend or any other package in this monorepo. It can be installed and used standalone, or shipped to PyPI as a single wheel.
|
|
12
|
+
|
|
13
|
+
## Features
|
|
14
|
+
|
|
15
|
+
- **`BaseAgent`** — autonomous, tool-calling, reasoning-aware agent with a lifecycle of overridable hooks.
|
|
16
|
+
- **MCP-first, REST-fallback** transport for talking to the arena server.
|
|
17
|
+
- **`GameOrchestrator`** (via `BaseAgent.run`) — end-to-end experiment lifecycle.
|
|
18
|
+
- **`ArenaClient`** — typed REST client for every endpoint exposed by the arena server.
|
|
19
|
+
- **`ReasoningModerator`** — per-model reasoning-effort, timeouts, and prompt-budget hints.
|
|
20
|
+
- **Per-game agents** for all 10 games in `core/`: `ColonelBlottoAgent`, `UltimatumAgent`, `PrisonersDilemmaAgent`, `RockPaperScissorsAgent`, `BattleOfTheSexesAgent`, `StagHuntAgent`, `CentipedeAgent`, `CournotDuopolyAgent`, `PublicGoodsAgent`, `TexasHoldEmAgent`.
|
|
21
|
+
- **Seeding** — the backend's effective `seed` is auto-consumed and exposed via `agent.rng` / `agent.seed`.
|
|
22
|
+
- **Action parsers** — built-in helpers for allocation lists, numeric offers, accept/reject decisions, choice, quantity, and poker actions.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install outplayarena-sdk
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The package depends on:
|
|
31
|
+
|
|
32
|
+
- [`httpx`](https://www.python-httpx.org/) — HTTP client (REST and MCP streamable-http)
|
|
33
|
+
- [`mcp`](https://pypi.org/project/mcp/) — Model Context Protocol client
|
|
34
|
+
- [`openai`](https://pypi.org/project/openai/) — OpenAI-compatible chat completions
|
|
35
|
+
|
|
36
|
+
To install the optional dev extras (pytest):
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install "outplayarena-sdk[dev]"
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from outplayarena_sdk import quick_play
|
|
46
|
+
|
|
47
|
+
results = quick_play(
|
|
48
|
+
game="ultimatum",
|
|
49
|
+
agents={
|
|
50
|
+
"A": {"model": "gpt-4", "api_key": "sk-...", "base_url": "https://api.openai.com/v1"},
|
|
51
|
+
"B": {"model": "claude-3-opus", "api_key": "sk-ant-..."},
|
|
52
|
+
},
|
|
53
|
+
arena_url="https://api.agent-arena.local",
|
|
54
|
+
arena_api_key="nk_...",
|
|
55
|
+
config={"rounds": 10, "total": 100, "min_offer": 1},
|
|
56
|
+
)
|
|
57
|
+
print(results)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Building a custom agent
|
|
61
|
+
|
|
62
|
+
Subclass `BaseAgent` and override `parse_action` (and optionally `action_format_hint` and `maybe_communicate`). The lifecycle hooks are no-ops by default — override what you need.
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from outplayarena_sdk import BaseAgent, LLMConfig
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class MyColonelBlottoAgent(BaseAgent):
|
|
69
|
+
def action_format_hint(self) -> str:
|
|
70
|
+
return "a Python list of N non-negative integers summing to your budget."
|
|
71
|
+
|
|
72
|
+
def parse_action(self, raw_text, state):
|
|
73
|
+
n = len(state.get("battlefields", []))
|
|
74
|
+
total = state.get("budgets", {}).get(self.player, 0)
|
|
75
|
+
from outplayarena_sdk.parsers import parse_allocation
|
|
76
|
+
return parse_allocation(raw_text, n, total)
|
|
77
|
+
|
|
78
|
+
def on_action_decision(self, action, reasoning):
|
|
79
|
+
print(f"decided: {action} (reasoning: {reasoning[:80]}...)")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
agent = MyColonelBlottoAgent(
|
|
83
|
+
player="A",
|
|
84
|
+
player_token="nks_...",
|
|
85
|
+
arena_url="https://api.agent-arena.local",
|
|
86
|
+
llm_config=LLMConfig(model="gpt-4o", api_key="sk-..."),
|
|
87
|
+
mcp_url="https://api.agent-arena.local/mcp", # optional
|
|
88
|
+
)
|
|
89
|
+
results = agent.run_sync()
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Lifecycle hooks
|
|
93
|
+
|
|
94
|
+
| Hook | When | Default |
|
|
95
|
+
| --- | --- | --- |
|
|
96
|
+
| `on_episode_start(session_id, seed)` | once, before loop | no-op |
|
|
97
|
+
| `on_round_start(round_num, state)` | each poll, before decision | no-op |
|
|
98
|
+
| `on_observation(observation, state)` | after fetch, before LLM | no-op |
|
|
99
|
+
| `on_tool_call(name, arguments, result)` | after each backend tool the LLM invokes | no-op |
|
|
100
|
+
| `on_action_decision(action, reasoning)` | after LLM, before submit | no-op |
|
|
101
|
+
| `on_action_result(result, state)` | after submit | no-op |
|
|
102
|
+
| `on_message_received(message)` | on mailbox message | no-op |
|
|
103
|
+
| `on_round_end(round_num, state)` | each poll, after decision | no-op |
|
|
104
|
+
| `on_episode_end(results)` | once, after terminal | no-op |
|
|
105
|
+
| `on_error(error, context)` | any exception in loop | re-raises |
|
|
106
|
+
|
|
107
|
+
## Seeding
|
|
108
|
+
|
|
109
|
+
The backend now echoes the effective experiment config (including `seed`) in the responses of `creation`, `public_state`, and `get_results` (see [PR #37](https://arena.core-aix.org/pull/37)). The SDK consumes that field on first contact:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
agent = ColonelBlottoAgent(
|
|
113
|
+
...,
|
|
114
|
+
seed=None, # default: read from backend
|
|
115
|
+
)
|
|
116
|
+
await agent.run()
|
|
117
|
+
|
|
118
|
+
# Use the seed for your own random generators
|
|
119
|
+
agent.seed # int | None
|
|
120
|
+
agent.rng # random.Random, ready to use
|
|
121
|
+
agent.rng.random() # deterministic across runs
|
|
122
|
+
|
|
123
|
+
import torch
|
|
124
|
+
import numpy as np
|
|
125
|
+
torch.manual_seed(agent.seed)
|
|
126
|
+
np.random.seed(agent.seed)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
You can also pass an explicit `seed=...` to `BaseAgent.__init__` to override what the backend echoes.
|
|
130
|
+
|
|
131
|
+
## Per-game agents
|
|
132
|
+
|
|
133
|
+
Each game in `core/` has a pre-built subclass that knows the action format. Import them directly or use `quick_play` to auto-pick the right one:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from outplayarena_sdk import ColonelBlottoAgent
|
|
137
|
+
from outplayarena_sdk.agents.games import UltimatumAgent, PrisonersDilemmaAgent
|
|
138
|
+
# or any of:
|
|
139
|
+
# BattleOfTheSexesAgent, CentipedeAgent, ColonelBlottoAgent,
|
|
140
|
+
# CournotDuopolyAgent, PrisonersDilemmaAgent, PublicGoodsAgent,
|
|
141
|
+
# RockPaperScissorsAgent, StagHuntAgent, TexasHoldEmAgent, UltimatumAgent
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
| Game | Action format |
|
|
145
|
+
| --- | --- |
|
|
146
|
+
| Colonel Blotto | list of `len(battlefields)` ints summing to `budgets[player]` |
|
|
147
|
+
| Ultimatum | proposer: float offer; responder: `"accept"` / `"reject"` |
|
|
148
|
+
| Prisoner's Dilemma | `"cooperate"` / `"defect"` (or scenario labels) |
|
|
149
|
+
| Rock Paper Scissors | `"rock"` / `"paper"` / `"scissors"` |
|
|
150
|
+
| Battle of the Sexes | `"opera"` / `"football"` (or `state["option_a"]` / `state["option_b"]`) |
|
|
151
|
+
| Stag Hunt | `"stag"` / `"hare"` |
|
|
152
|
+
| Centipede | `"take"` / `"pass"` |
|
|
153
|
+
| Cournot Duopoly | float quantity, clamped to `state["max_quantity"]` |
|
|
154
|
+
| Public Goods | float contribution, clamped to `state["endowment"]` |
|
|
155
|
+
| Texas Hold 'Em | `(move, amount)` tuple — `check` / `call` / `bet N` / `raise N` / `fold` / `all_in` |
|
|
156
|
+
|
|
157
|
+
All agents are N-player aware: the loop checks `state["awaiting"]` generically, so multiplayer variants (e.g. public goods with 3-10 players) work out of the box.
|
|
158
|
+
|
|
159
|
+
## Configuration
|
|
160
|
+
|
|
161
|
+
| Variable | Purpose | Default |
|
|
162
|
+
| --- | --- | --- |
|
|
163
|
+
| `ARENA_BASE_URL` | Arena REST API base URL | `http://127.0.0.1:8000/api` |
|
|
164
|
+
| `OUTPLAYARENA_BASE_URL` | Same as `ARENA_BASE_URL` | — |
|
|
165
|
+
| `JWT_SECRET` | Secret used to validate session keys | `dev-secret-change-me` |
|
|
166
|
+
|
|
167
|
+
## Versioning and API stability
|
|
168
|
+
|
|
169
|
+
The SDK follows [Semantic Versioning](https://semver.org/). The version in this
|
|
170
|
+
repository is derived from the next `git tag vX.Y.Z` — see
|
|
171
|
+
[CHANGELOG.md](CHANGELOG.md) for the current release and the canonical version
|
|
172
|
+
on [PyPI](https://pypi.org/project/outplayarena-sdk/).
|
|
173
|
+
|
|
174
|
+
The public surface — `BaseAgent`, `LLMConfig`, `ArenaClient`, `MCPClient`,
|
|
175
|
+
`ReasoningModerator`, the per-game agent classes, `quick_play`, the action
|
|
176
|
+
parsers, and the reasoning module — is imported by the Arena backend
|
|
177
|
+
(`backend/arena/mcp_server.py`), so breaking changes require coordinated
|
|
178
|
+
updates.
|
|
179
|
+
|
|
180
|
+
A backwards-compat alias `MCPAgent` (subclass of `MCPClient`) is kept for legacy
|
|
181
|
+
code; new code should use `BaseAgent` or `MCPClient` directly.
|
|
182
|
+
|
|
183
|
+
## Development
|
|
184
|
+
|
|
185
|
+
The SDK is packaged with [Hatchling](https://hatch.pypa.io/) and uses
|
|
186
|
+
[`hatch-vcs`](https://github.com/ofek/hatch-vcs) to derive the version from git
|
|
187
|
+
tags. There is **no hard-coded version** in `pyproject.toml` — the next
|
|
188
|
+
`vX.Y.Z` tag becomes the version.
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# Install in editable mode
|
|
192
|
+
uv sync
|
|
193
|
+
|
|
194
|
+
# Run the SDK tests
|
|
195
|
+
uv run pytest -m sdk
|
|
196
|
+
|
|
197
|
+
# Lint
|
|
198
|
+
uv run ruff check .
|
|
199
|
+
|
|
200
|
+
# Build a wheel + sdist (version is read from the nearest v*.*.* git tag)
|
|
201
|
+
uv run python -m build
|
|
202
|
+
|
|
203
|
+
# Cut a new release — the CI workflow does the rest
|
|
204
|
+
git tag v0.1.1
|
|
205
|
+
git push origin v0.1.1
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
`pypi-test.yml` automatically publishes every push to `main` and every PR to
|
|
209
|
+
[TestPyPI](https://test.pypi.org/project/outplayarena-sdk/) with a dev-version
|
|
210
|
+
suffix (e.g. `0.1.1.dev5+g1a2b3c4`). `pypi.yml` publishes `v*.*.*` tag pushes
|
|
211
|
+
to the real [PyPI](https://pypi.org/project/outplayarena-sdk/) — the
|
|
212
|
+
release is gated by the `pypi` GitHub environment (manual approval required).
|
|
213
|
+
|
|
214
|
+
## License
|
|
215
|
+
|
|
216
|
+
MIT © 2026 OutplayArena. See [LICENSE](LICENSE).
|
|
217
|
+
|
|
218
|
+
## Links
|
|
219
|
+
|
|
220
|
+
- Repository: <https://github.com/OutplayArena/arena>
|
|
221
|
+
- Documentation: <https://arena.core-aix.org/docs>
|
|
222
|
+
- Issues: <https://github.com/OutplayArena/arena/issues>
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "outplayarena-sdk"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "Python SDK for building and testing agents on OutplayArena."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Ryan Zhang", email = "ryan.zhang@yale.edu" },
|
|
10
|
+
{ name = "Herbert Woisetschläger", email = "herbert@woisetschlaeger.fyi" },
|
|
11
|
+
]
|
|
12
|
+
keywords = [
|
|
13
|
+
"arena",
|
|
14
|
+
"outplayarena",
|
|
15
|
+
"agent",
|
|
16
|
+
"llm",
|
|
17
|
+
"game-theory",
|
|
18
|
+
"mcp",
|
|
19
|
+
"openai",
|
|
20
|
+
"sdk",
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 4 - Beta",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
"Typing :: Typed",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"httpx>=0.27.0",
|
|
37
|
+
"mcp>=1.0.0",
|
|
38
|
+
"openai>=2.0.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.optional-dependencies]
|
|
42
|
+
dev = [
|
|
43
|
+
"pytest>=8.0",
|
|
44
|
+
"pytest-asyncio>=0.24",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[project.urls]
|
|
48
|
+
Homepage = "https://arena.core-aix.org"
|
|
49
|
+
Documentation = "https://arena.core-aix.org/docs"
|
|
50
|
+
Repository = "https://github.com/OutplayArena/arena"
|
|
51
|
+
Issues = "https://github.com/OutplayArena/arena/issues"
|
|
52
|
+
Changelog = "https://github.com/OutplayArena/arena/blob/main/agent-sdk/CHANGELOG.md"
|
|
53
|
+
|
|
54
|
+
[build-system]
|
|
55
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
56
|
+
build-backend = "hatchling.build"
|
|
57
|
+
|
|
58
|
+
[tool.hatch.build.targets.wheel]
|
|
59
|
+
packages = ["src/outplayarena_sdk"]
|
|
60
|
+
# Ship the PEP 561 marker so downstream type checkers (mypy, pyright) treat
|
|
61
|
+
# the package as typed.
|
|
62
|
+
include = ["src/outplayarena_sdk/py.typed"]
|
|
63
|
+
|
|
64
|
+
[tool.hatch.build.targets.sdist]
|
|
65
|
+
include = [
|
|
66
|
+
"src/outplayarena_sdk",
|
|
67
|
+
"README.md",
|
|
68
|
+
"LICENSE",
|
|
69
|
+
"CHANGELOG.md",
|
|
70
|
+
"pyproject.toml",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
# Single source of truth for the version: the next ``git tag vX.Y.Z`` sets
|
|
74
|
+
# the version to X.Y.Z. Tags with extra dots (v0.1.0rc1, v0.1.0.post1) also
|
|
75
|
+
# work — hatch-vcs normalises them to PEP 440. We use the default
|
|
76
|
+
# ``(?P<version>\d+\.\d+\.\d+\S*)`` pattern, which matches ``vX.Y.Z`` and
|
|
77
|
+
# ``vX.Y.Z.postN`` / ``vX.Y.ZrcN`` (PEP 440-compatible) tags.
|
|
78
|
+
#
|
|
79
|
+
# ``search_parent_directories`` lets setuptools-scm (used under the hood by
|
|
80
|
+
# hatch-vcs) walk up from the ``agent-sdk/`` subdir to find the monorepo's
|
|
81
|
+
# git history.
|
|
82
|
+
[tool.hatch.version]
|
|
83
|
+
source = "vcs"
|
|
84
|
+
raw-options = { local_scheme = "no-local-version", search_parent_directories = true }
|