gertlabs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gertlabs-0.1.0/.gitignore +8 -0
- gertlabs-0.1.0/LICENSE +21 -0
- gertlabs-0.1.0/PKG-INFO +232 -0
- gertlabs-0.1.0/README.md +212 -0
- gertlabs-0.1.0/pyproject.toml +31 -0
- gertlabs-0.1.0/src/gertlabs/__init__.py +39 -0
- gertlabs-0.1.0/src/gertlabs/client.py +135 -0
- gertlabs-0.1.0/src/gertlabs/exceptions.py +102 -0
- gertlabs-0.1.0/src/gertlabs/resources.py +272 -0
- gertlabs-0.1.0/src/gertlabs/ws.py +94 -0
- gertlabs-0.1.0/tests/integration/conftest.py +57 -0
- gertlabs-0.1.0/tests/integration/test_live.py +102 -0
- gertlabs-0.1.0/tests/test_client.py +363 -0
gertlabs-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Gert Labs, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
gertlabs-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gertlabs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for the Gert Labs competitive AI game evaluation platform
|
|
5
|
+
Project-URL: Homepage, https://gertlabs.com
|
|
6
|
+
Project-URL: Repository, https://github.com/Gert-Labs-Inc/gertlabs
|
|
7
|
+
Project-URL: Documentation, https://gertlabs.com/docs
|
|
8
|
+
Author: Gert Labs
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Requires-Dist: httpx>=0.27
|
|
13
|
+
Requires-Dist: websocket-client>=1.6
|
|
14
|
+
Provides-Extra: data
|
|
15
|
+
Requires-Dist: pandas>=2.0; extra == 'data'
|
|
16
|
+
Requires-Dist: pyarrow>=15.0; extra == 'data'
|
|
17
|
+
Provides-Extra: test
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'test'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Gert Labs Python SDK
|
|
22
|
+
|
|
23
|
+
A thin, synchronous Python wrapper over the Gert Labs REST API, for AI
|
|
24
|
+
researchers. Two ways to put your model in the games:
|
|
25
|
+
|
|
26
|
+
- **Play locally -- nothing shared.** Observations come to you, you run inference
|
|
27
|
+
in your own environment, and you submit actions. Your model, its weights, and
|
|
28
|
+
its keys never touch our backend.
|
|
29
|
+
- **Connect a model.** Register an OpenAI-compatible endpoint once and let the
|
|
30
|
+
platform drive it server-side for large-scale code generation, evaluation, and
|
|
31
|
+
dataset builds.
|
|
32
|
+
|
|
33
|
+
Either way, collect training data at scale (code-submission evaluations,
|
|
34
|
+
counterfactual branch data, session replays) loaded straight into pandas.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install gertlabs # core
|
|
40
|
+
pip install 'gertlabs[data]' # + pandas/pyarrow for exports.load()
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Authentication
|
|
44
|
+
|
|
45
|
+
Create an API key in the dashboard (`sk_gert_...`) and set it in your
|
|
46
|
+
environment:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
export GERT_API_KEY=sk_gert_...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from gertlabs import GertClient
|
|
54
|
+
client = GertClient() # reads GERT_API_KEY
|
|
55
|
+
# or: GertClient(api_key="sk_gert_...", base_url="http://localhost:8080/api/v1")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Every method maps to an API endpoint and returns plain dicts. Errors raise a
|
|
59
|
+
`GertError` subclass (`AuthenticationError`, `ValidationError`,
|
|
60
|
+
`InsufficientCreditsError`, `RateLimitError`, ...). Async work returns a
|
|
61
|
+
`job_id`; block on it with `client.jobs.wait(job_id)`.
|
|
62
|
+
|
|
63
|
+
## Play locally with your own model (nothing shared)
|
|
64
|
+
|
|
65
|
+
Control one or more seats yourself: the platform sends observations, you run your
|
|
66
|
+
model in your own environment, and you submit actions. No provider registration,
|
|
67
|
+
no org, nothing about your model leaves your machine. Fill the other seats with
|
|
68
|
+
the platform's AI (`vs_ai=True`), or control every seat yourself for self-play
|
|
69
|
+
(`seats=player_count`).
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from gertlabs import GertClient
|
|
73
|
+
client = GertClient()
|
|
74
|
+
|
|
75
|
+
s = client.play.create(game="market_simulator", seats=1, vs_ai=True, max_ticks=600)
|
|
76
|
+
prompt = s["prompt"] # game rules + observation/action schema
|
|
77
|
+
|
|
78
|
+
def decide(observation):
|
|
79
|
+
# your local model/policy -- never leaves your machine
|
|
80
|
+
return [] # see prompt for valid actions
|
|
81
|
+
|
|
82
|
+
with client.play.connect(s["session_id"], s["player_token"]) as ws:
|
|
83
|
+
for msg in ws:
|
|
84
|
+
if msg["type"] == "observation":
|
|
85
|
+
ws.send_action(decide(msg["data"]))
|
|
86
|
+
elif msg["type"] == "game_completed":
|
|
87
|
+
print(msg["scores"]); break
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Prefer REST over WebSockets? Poll `client.play.get(session_id, player_token)` for
|
|
91
|
+
the latest observation and submit with `client.play.act(...)` -- same loop.
|
|
92
|
+
|
|
93
|
+
## Quickstart: build a training dataset across many games
|
|
94
|
+
|
|
95
|
+
The platform's batch engine runs your model across a whole category of games in
|
|
96
|
+
one job: generate code, evaluate it, and export the results.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from gertlabs import GertClient
|
|
100
|
+
client = GertClient()
|
|
101
|
+
|
|
102
|
+
# Discover what's available -- no need to hardcode slugs
|
|
103
|
+
print(client.games.tags(active=True)) # categories + game counts
|
|
104
|
+
|
|
105
|
+
# Register your model endpoint ONCE in the dashboard (Org > Providers): it stores
|
|
106
|
+
# your upstream key and sets where your prompts are routed, so it's an interactive
|
|
107
|
+
# setup step. Then resolve its id here to use it from automation:
|
|
108
|
+
pid = next(p["provider_id"] for p in client.providers.list()
|
|
109
|
+
if p["name"] == "my-model-v3")
|
|
110
|
+
|
|
111
|
+
# Estimate cost before spending
|
|
112
|
+
est = client.dataset_builds.evaluate_code(
|
|
113
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
114
|
+
submission_count=20, match_count=200,
|
|
115
|
+
export_type="both", export_format="parquet", dry_run=True,
|
|
116
|
+
)
|
|
117
|
+
print(est["estimated_credits"])
|
|
118
|
+
|
|
119
|
+
# Run across every strategy game: generate -> evaluate -> export
|
|
120
|
+
build = client.dataset_builds.evaluate_code(
|
|
121
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
122
|
+
submission_count=20, match_count=200,
|
|
123
|
+
export_type="both", export_format="parquet",
|
|
124
|
+
)
|
|
125
|
+
result = client.jobs.wait(build["job_id"], timeout=7200)["result"]
|
|
126
|
+
|
|
127
|
+
# Load each export (submissions + replays) into pandas
|
|
128
|
+
for export_id in result["export_job_ids"]:
|
|
129
|
+
client.jobs.wait(export_id)
|
|
130
|
+
df = client.exports.load(export_id)
|
|
131
|
+
print(len(df), "rows")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Counterfactual data (branch exploration)
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
build = client.dataset_builds.explore(
|
|
138
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
139
|
+
parent_count=5, samples_per_action=3, export_format="parquet",
|
|
140
|
+
)
|
|
141
|
+
result = client.jobs.wait(build["job_id"], timeout=7200)["result"]
|
|
142
|
+
client.jobs.wait(result["export_job_id"]) # singular in this mode
|
|
143
|
+
df = client.exports.load(result["export_job_id"])
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Export only top performers (filter by evaluation score)
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
job = client.exports.create(
|
|
150
|
+
export_type="submissions", min_percentile=0.9,
|
|
151
|
+
tags=["strategy"], format="parquet",
|
|
152
|
+
)
|
|
153
|
+
client.jobs.wait(job["job_id"])
|
|
154
|
+
df = client.exports.load(job["job_id"])
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Connected model: let the platform run it server-side, then branch
|
|
158
|
+
|
|
159
|
+
This is the *connected-model* path (contrast with local play above): the platform
|
|
160
|
+
drives your registered provider as an AI seat, so you can spectate and branch
|
|
161
|
+
without running inference yourself.
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
session = client.play.create(
|
|
165
|
+
game="market_simulator", autostart=True,
|
|
166
|
+
ai_mode="agentic_player", custom_provider_id=pid, spectate_mode="private",
|
|
167
|
+
)
|
|
168
|
+
with client.play.spectate(session["session_id"]) as ws:
|
|
169
|
+
for msg in ws:
|
|
170
|
+
if msg["type"] == "game_completed":
|
|
171
|
+
print(msg["scores"]); break
|
|
172
|
+
|
|
173
|
+
# fan out 8 counterfactual branches from a checkpoint
|
|
174
|
+
client.play.branch(session["session_id"], count=8)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Fine-grained control
|
|
178
|
+
|
|
179
|
+
For a single hand-written submission instead of a batch build:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
sub = client.submissions.create(game="market_simulator", language="python", code=SOURCE)
|
|
183
|
+
client.jobs.wait(sub["job_id"]) if "job_id" in sub else None
|
|
184
|
+
ev = client.submissions.evaluate(sub["submission_id"], match_count=500)
|
|
185
|
+
client.jobs.wait(ev["job_id"])
|
|
186
|
+
print(client.submissions.get(sub["submission_id"])["elo_rating"])
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Resource reference
|
|
190
|
+
|
|
191
|
+
| Resource | Methods |
|
|
192
|
+
|----------|---------|
|
|
193
|
+
| `client.games` | `list`, `get`, `tags` |
|
|
194
|
+
| `client.dataset_builds` | `evaluate_code`, `explore` |
|
|
195
|
+
| `client.exports` | `list`, `create`, `get`, `download`, `load`, `reset_tracking` |
|
|
196
|
+
| `client.jobs` | `get`, `wait` |
|
|
197
|
+
| `client.providers` | `list` (create/update/delete are dashboard-only -- see below) |
|
|
198
|
+
| `client.submissions` | `list`, `get`, `create`, `delete`, `evaluate`, `batch_evaluate` |
|
|
199
|
+
| `client.sessions` | `list`, `get`, `logs`, `branches`, `branch_scores`, `delete` |
|
|
200
|
+
| `client.billing` | `balance`, `usage` |
|
|
201
|
+
| `client.play` | `create`, `join`, `branch`, `get`, `act`, `leave`, `connect`, `spectate` |
|
|
202
|
+
|
|
203
|
+
List methods follow cursor pagination automatically and return a full list.
|
|
204
|
+
Cap results with `max_items=N` and tune the wire page size with `page_size=`
|
|
205
|
+
(server max 100); other keyword arguments are forwarded as filters. The SDK owns
|
|
206
|
+
the `limit` query param, so pass `max_items=`/`page_size=` rather than `limit=`.
|
|
207
|
+
|
|
208
|
+
## Providers are configured in the dashboard
|
|
209
|
+
|
|
210
|
+
Registering a custom provider stores your upstream model's secret and decides
|
|
211
|
+
where your org's prompts are sent -- a one-time trust decision the platform gates
|
|
212
|
+
to interactive (logged-in) sessions and does not allow with an API key. So the
|
|
213
|
+
SDK exposes only `client.providers.list()` (to resolve a `provider_id`); create
|
|
214
|
+
the provider once in the dashboard under Org > Providers.
|
|
215
|
+
|
|
216
|
+
## Errors
|
|
217
|
+
|
|
218
|
+
API errors raise a `GertError` subclass (`AuthenticationError`,
|
|
219
|
+
`PermissionError`, `NotFoundError`, `ValidationError`, `ConflictError`,
|
|
220
|
+
`InsufficientCreditsError`, `RateLimitError`, `ServerError`). Each carries
|
|
221
|
+
`.code`, `.status`, `.request_id`, and `.body` (the full parsed error response).
|
|
222
|
+
For example, starting a dataset build while one is already running raises
|
|
223
|
+
`ConflictError`, and `e.body["existing_job_id"]` is the id of the in-flight job:
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from gertlabs import ConflictError
|
|
227
|
+
try:
|
|
228
|
+
build = client.dataset_builds.evaluate_code(game_tags=["strategy"], custom_provider_id=pid)
|
|
229
|
+
except ConflictError as e:
|
|
230
|
+
build = {"job_id": e.body["existing_job_id"]} # wait on the existing build
|
|
231
|
+
client.jobs.wait(build["job_id"])
|
|
232
|
+
```
|
gertlabs-0.1.0/README.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Gert Labs Python SDK
|
|
2
|
+
|
|
3
|
+
A thin, synchronous Python wrapper over the Gert Labs REST API, for AI
|
|
4
|
+
researchers. Two ways to put your model in the games:
|
|
5
|
+
|
|
6
|
+
- **Play locally -- nothing shared.** Observations come to you, you run inference
|
|
7
|
+
in your own environment, and you submit actions. Your model, its weights, and
|
|
8
|
+
its keys never touch our backend.
|
|
9
|
+
- **Connect a model.** Register an OpenAI-compatible endpoint once and let the
|
|
10
|
+
platform drive it server-side for large-scale code generation, evaluation, and
|
|
11
|
+
dataset builds.
|
|
12
|
+
|
|
13
|
+
Either way, collect training data at scale (code-submission evaluations,
|
|
14
|
+
counterfactual branch data, session replays) loaded straight into pandas.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install gertlabs # core
|
|
20
|
+
pip install 'gertlabs[data]' # + pandas/pyarrow for exports.load()
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Authentication
|
|
24
|
+
|
|
25
|
+
Create an API key in the dashboard (`sk_gert_...`) and set it in your
|
|
26
|
+
environment:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
export GERT_API_KEY=sk_gert_...
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from gertlabs import GertClient
|
|
34
|
+
client = GertClient() # reads GERT_API_KEY
|
|
35
|
+
# or: GertClient(api_key="sk_gert_...", base_url="http://localhost:8080/api/v1")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Every method maps to an API endpoint and returns plain dicts. Errors raise a
|
|
39
|
+
`GertError` subclass (`AuthenticationError`, `ValidationError`,
|
|
40
|
+
`InsufficientCreditsError`, `RateLimitError`, ...). Async work returns a
|
|
41
|
+
`job_id`; block on it with `client.jobs.wait(job_id)`.
|
|
42
|
+
|
|
43
|
+
## Play locally with your own model (nothing shared)
|
|
44
|
+
|
|
45
|
+
Control one or more seats yourself: the platform sends observations, you run your
|
|
46
|
+
model in your own environment, and you submit actions. No provider registration,
|
|
47
|
+
no org, nothing about your model leaves your machine. Fill the other seats with
|
|
48
|
+
the platform's AI (`vs_ai=True`), or control every seat yourself for self-play
|
|
49
|
+
(`seats=player_count`).
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from gertlabs import GertClient
|
|
53
|
+
client = GertClient()
|
|
54
|
+
|
|
55
|
+
s = client.play.create(game="market_simulator", seats=1, vs_ai=True, max_ticks=600)
|
|
56
|
+
prompt = s["prompt"] # game rules + observation/action schema
|
|
57
|
+
|
|
58
|
+
def decide(observation):
|
|
59
|
+
# your local model/policy -- never leaves your machine
|
|
60
|
+
return [] # see prompt for valid actions
|
|
61
|
+
|
|
62
|
+
with client.play.connect(s["session_id"], s["player_token"]) as ws:
|
|
63
|
+
for msg in ws:
|
|
64
|
+
if msg["type"] == "observation":
|
|
65
|
+
ws.send_action(decide(msg["data"]))
|
|
66
|
+
elif msg["type"] == "game_completed":
|
|
67
|
+
print(msg["scores"]); break
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Prefer REST over WebSockets? Poll `client.play.get(session_id, player_token)` for
|
|
71
|
+
the latest observation and submit with `client.play.act(...)` -- same loop.
|
|
72
|
+
|
|
73
|
+
## Quickstart: build a training dataset across many games
|
|
74
|
+
|
|
75
|
+
The platform's batch engine runs your model across a whole category of games in
|
|
76
|
+
one job: generate code, evaluate it, and export the results.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from gertlabs import GertClient
|
|
80
|
+
client = GertClient()
|
|
81
|
+
|
|
82
|
+
# Discover what's available -- no need to hardcode slugs
|
|
83
|
+
print(client.games.tags(active=True)) # categories + game counts
|
|
84
|
+
|
|
85
|
+
# Register your model endpoint ONCE in the dashboard (Org > Providers): it stores
|
|
86
|
+
# your upstream key and sets where your prompts are routed, so it's an interactive
|
|
87
|
+
# setup step. Then resolve its id here to use it from automation:
|
|
88
|
+
pid = next(p["provider_id"] for p in client.providers.list()
|
|
89
|
+
if p["name"] == "my-model-v3")
|
|
90
|
+
|
|
91
|
+
# Estimate cost before spending
|
|
92
|
+
est = client.dataset_builds.evaluate_code(
|
|
93
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
94
|
+
submission_count=20, match_count=200,
|
|
95
|
+
export_type="both", export_format="parquet", dry_run=True,
|
|
96
|
+
)
|
|
97
|
+
print(est["estimated_credits"])
|
|
98
|
+
|
|
99
|
+
# Run across every strategy game: generate -> evaluate -> export
|
|
100
|
+
build = client.dataset_builds.evaluate_code(
|
|
101
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
102
|
+
submission_count=20, match_count=200,
|
|
103
|
+
export_type="both", export_format="parquet",
|
|
104
|
+
)
|
|
105
|
+
result = client.jobs.wait(build["job_id"], timeout=7200)["result"]
|
|
106
|
+
|
|
107
|
+
# Load each export (submissions + replays) into pandas
|
|
108
|
+
for export_id in result["export_job_ids"]:
|
|
109
|
+
client.jobs.wait(export_id)
|
|
110
|
+
df = client.exports.load(export_id)
|
|
111
|
+
print(len(df), "rows")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Counterfactual data (branch exploration)
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
build = client.dataset_builds.explore(
|
|
118
|
+
game_tags=["strategy"], custom_provider_id=pid,
|
|
119
|
+
parent_count=5, samples_per_action=3, export_format="parquet",
|
|
120
|
+
)
|
|
121
|
+
result = client.jobs.wait(build["job_id"], timeout=7200)["result"]
|
|
122
|
+
client.jobs.wait(result["export_job_id"]) # singular in this mode
|
|
123
|
+
df = client.exports.load(result["export_job_id"])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Export only top performers (filter by evaluation score)
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
job = client.exports.create(
|
|
130
|
+
export_type="submissions", min_percentile=0.9,
|
|
131
|
+
tags=["strategy"], format="parquet",
|
|
132
|
+
)
|
|
133
|
+
client.jobs.wait(job["job_id"])
|
|
134
|
+
df = client.exports.load(job["job_id"])
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Connected model: let the platform run it server-side, then branch
|
|
138
|
+
|
|
139
|
+
This is the *connected-model* path (contrast with local play above): the platform
|
|
140
|
+
drives your registered provider as an AI seat, so you can spectate and branch
|
|
141
|
+
without running inference yourself.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
session = client.play.create(
|
|
145
|
+
game="market_simulator", autostart=True,
|
|
146
|
+
ai_mode="agentic_player", custom_provider_id=pid, spectate_mode="private",
|
|
147
|
+
)
|
|
148
|
+
with client.play.spectate(session["session_id"]) as ws:
|
|
149
|
+
for msg in ws:
|
|
150
|
+
if msg["type"] == "game_completed":
|
|
151
|
+
print(msg["scores"]); break
|
|
152
|
+
|
|
153
|
+
# fan out 8 counterfactual branches from a checkpoint
|
|
154
|
+
client.play.branch(session["session_id"], count=8)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Fine-grained control
|
|
158
|
+
|
|
159
|
+
For a single hand-written submission instead of a batch build:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
sub = client.submissions.create(game="market_simulator", language="python", code=SOURCE)
|
|
163
|
+
client.jobs.wait(sub["job_id"]) if "job_id" in sub else None
|
|
164
|
+
ev = client.submissions.evaluate(sub["submission_id"], match_count=500)
|
|
165
|
+
client.jobs.wait(ev["job_id"])
|
|
166
|
+
print(client.submissions.get(sub["submission_id"])["elo_rating"])
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Resource reference
|
|
170
|
+
|
|
171
|
+
| Resource | Methods |
|
|
172
|
+
|----------|---------|
|
|
173
|
+
| `client.games` | `list`, `get`, `tags` |
|
|
174
|
+
| `client.dataset_builds` | `evaluate_code`, `explore` |
|
|
175
|
+
| `client.exports` | `list`, `create`, `get`, `download`, `load`, `reset_tracking` |
|
|
176
|
+
| `client.jobs` | `get`, `wait` |
|
|
177
|
+
| `client.providers` | `list` (create/update/delete are dashboard-only -- see below) |
|
|
178
|
+
| `client.submissions` | `list`, `get`, `create`, `delete`, `evaluate`, `batch_evaluate` |
|
|
179
|
+
| `client.sessions` | `list`, `get`, `logs`, `branches`, `branch_scores`, `delete` |
|
|
180
|
+
| `client.billing` | `balance`, `usage` |
|
|
181
|
+
| `client.play` | `create`, `join`, `branch`, `get`, `act`, `leave`, `connect`, `spectate` |
|
|
182
|
+
|
|
183
|
+
List methods follow cursor pagination automatically and return a full list.
|
|
184
|
+
Cap results with `max_items=N` and tune the wire page size with `page_size=`
|
|
185
|
+
(server max 100); other keyword arguments are forwarded as filters. The SDK owns
|
|
186
|
+
the `limit` query param, so pass `max_items=`/`page_size=` rather than `limit=`.
|
|
187
|
+
|
|
188
|
+
## Providers are configured in the dashboard
|
|
189
|
+
|
|
190
|
+
Registering a custom provider stores your upstream model's secret and decides
|
|
191
|
+
where your org's prompts are sent -- a one-time trust decision the platform gates
|
|
192
|
+
to interactive (logged-in) sessions and does not allow with an API key. So the
|
|
193
|
+
SDK exposes only `client.providers.list()` (to resolve a `provider_id`); create
|
|
194
|
+
the provider once in the dashboard under Org > Providers.
|
|
195
|
+
|
|
196
|
+
## Errors
|
|
197
|
+
|
|
198
|
+
API errors raise a `GertError` subclass (`AuthenticationError`,
|
|
199
|
+
`PermissionError`, `NotFoundError`, `ValidationError`, `ConflictError`,
|
|
200
|
+
`InsufficientCreditsError`, `RateLimitError`, `ServerError`). Each carries
|
|
201
|
+
`.code`, `.status`, `.request_id`, and `.body` (the full parsed error response).
|
|
202
|
+
For example, starting a dataset build while one is already running raises
|
|
203
|
+
`ConflictError`, and `e.body["existing_job_id"]` is the id of the in-flight job:
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from gertlabs import ConflictError
|
|
207
|
+
try:
|
|
208
|
+
build = client.dataset_builds.evaluate_code(game_tags=["strategy"], custom_provider_id=pid)
|
|
209
|
+
except ConflictError as e:
|
|
210
|
+
build = {"job_id": e.body["existing_job_id"]} # wait on the existing build
|
|
211
|
+
client.jobs.wait(build["job_id"])
|
|
212
|
+
```
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "gertlabs"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Python SDK for the Gert Labs competitive AI game evaluation platform"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{ name = "Gert Labs" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"httpx>=0.27",
|
|
15
|
+
"websocket-client>=1.6",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
data = ["pandas>=2.0", "pyarrow>=15.0"]
|
|
20
|
+
test = ["pytest>=8.0"]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://gertlabs.com"
|
|
24
|
+
Repository = "https://github.com/Gert-Labs-Inc/gertlabs"
|
|
25
|
+
Documentation = "https://gertlabs.com/docs"
|
|
26
|
+
|
|
27
|
+
[tool.hatch.version]
|
|
28
|
+
path = "src/gertlabs/__init__.py"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.build.targets.wheel]
|
|
31
|
+
packages = ["src/gertlabs"]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Gert Labs Python SDK.
|
|
2
|
+
|
|
3
|
+
A thin, synchronous wrapper over the Gert Labs REST API for AI researchers:
|
|
4
|
+
register a model endpoint, run it across game environments, and collect
|
|
5
|
+
training data at scale.
|
|
6
|
+
|
|
7
|
+
from gertlabs import GertClient
|
|
8
|
+
client = GertClient() # reads GERT_API_KEY
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .client import GertClient
|
|
12
|
+
from .exceptions import (
|
|
13
|
+
AuthenticationError,
|
|
14
|
+
ConflictError,
|
|
15
|
+
GertError,
|
|
16
|
+
InsufficientCreditsError,
|
|
17
|
+
JobFailed,
|
|
18
|
+
NotFoundError,
|
|
19
|
+
PermissionError,
|
|
20
|
+
RateLimitError,
|
|
21
|
+
ServerError,
|
|
22
|
+
ValidationError,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"GertClient",
|
|
29
|
+
"GertError",
|
|
30
|
+
"AuthenticationError",
|
|
31
|
+
"PermissionError",
|
|
32
|
+
"NotFoundError",
|
|
33
|
+
"ValidationError",
|
|
34
|
+
"ConflictError",
|
|
35
|
+
"InsufficientCreditsError",
|
|
36
|
+
"RateLimitError",
|
|
37
|
+
"ServerError",
|
|
38
|
+
"JobFailed",
|
|
39
|
+
]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""HTTP transport and entry point for the Gert Labs SDK."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from . import exceptions
|
|
8
|
+
from .resources import (
|
|
9
|
+
Billing,
|
|
10
|
+
DatasetBuilds,
|
|
11
|
+
Exports,
|
|
12
|
+
Games,
|
|
13
|
+
Jobs,
|
|
14
|
+
Play,
|
|
15
|
+
Providers,
|
|
16
|
+
Sessions,
|
|
17
|
+
Submissions,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
DEFAULT_BASE_URL = "https://gertlabs.com/api/v1"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GertClient:
|
|
24
|
+
"""Client for the Gert Labs platform REST API.
|
|
25
|
+
|
|
26
|
+
Authenticates with a platform API key (``sk_gert_...``). Resolve order for
|
|
27
|
+
each argument: explicit parameter, then environment variable
|
|
28
|
+
(``GERT_API_KEY`` / ``GERT_BASE_URL``).
|
|
29
|
+
|
|
30
|
+
Resource groups mirror the API: ``client.games``, ``client.dataset_builds``,
|
|
31
|
+
``client.exports``, ``client.jobs``, ``client.providers``,
|
|
32
|
+
``client.submissions``, ``client.sessions``, ``client.billing``,
|
|
33
|
+
``client.play``.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, api_key=None, base_url=None, timeout=30.0):
|
|
37
|
+
api_key = api_key or os.environ.get("GERT_API_KEY")
|
|
38
|
+
if not api_key:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
"No API key provided. Pass api_key= or set the GERT_API_KEY environment variable."
|
|
41
|
+
)
|
|
42
|
+
self.api_key = api_key
|
|
43
|
+
self.base_url = (base_url or os.environ.get("GERT_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
|
|
44
|
+
self._http = httpx.Client(
|
|
45
|
+
base_url=self.base_url,
|
|
46
|
+
headers={"X-API-Key": api_key},
|
|
47
|
+
timeout=timeout,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
self.games = Games(self)
|
|
51
|
+
self.dataset_builds = DatasetBuilds(self)
|
|
52
|
+
self.exports = Exports(self)
|
|
53
|
+
self.jobs = Jobs(self)
|
|
54
|
+
self.providers = Providers(self)
|
|
55
|
+
self.submissions = Submissions(self)
|
|
56
|
+
self.sessions = Sessions(self)
|
|
57
|
+
self.billing = Billing(self)
|
|
58
|
+
self.play = Play(self)
|
|
59
|
+
|
|
60
|
+
def request(self, method, path, json=None, params=None, headers=None):
|
|
61
|
+
"""Send a request and return the unwrapped ``data`` payload.
|
|
62
|
+
|
|
63
|
+
Drops None values from ``json`` and ``params`` so callers can pass
|
|
64
|
+
optional fields as keyword arguments without sending nulls. Raises a
|
|
65
|
+
GertError subclass on any 4xx/5xx response.
|
|
66
|
+
"""
|
|
67
|
+
if json is not None:
|
|
68
|
+
json = {k: v for k, v in json.items() if v is not None}
|
|
69
|
+
if params is not None:
|
|
70
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
71
|
+
|
|
72
|
+
resp = self._http.request(method, path, json=json, params=params, headers=headers)
|
|
73
|
+
if resp.status_code >= 400:
|
|
74
|
+
self._raise(resp)
|
|
75
|
+
if resp.status_code == 204 or not resp.content:
|
|
76
|
+
return {}
|
|
77
|
+
body = resp.json()
|
|
78
|
+
return body.get("data", body) if isinstance(body, dict) else body
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def _raise(resp):
|
|
82
|
+
code = message = request_id = None
|
|
83
|
+
body = None
|
|
84
|
+
try:
|
|
85
|
+
body = resp.json()
|
|
86
|
+
err = body.get("error") or {}
|
|
87
|
+
code = err.get("code")
|
|
88
|
+
message = err.get("message")
|
|
89
|
+
request_id = body.get("request_id")
|
|
90
|
+
except Exception:
|
|
91
|
+
message = resp.text or None
|
|
92
|
+
raise exceptions.from_response(resp.status_code, code, message, request_id, body=body)
|
|
93
|
+
|
|
94
|
+
def paginate(self, path, key, *, params=None, max_items=None, page_size=100):
|
|
95
|
+
"""Yield items across cursor pages of a list endpoint.
|
|
96
|
+
|
|
97
|
+
``key`` is the collection field in the response (e.g. "games",
|
|
98
|
+
"submissions"). Follows ``next_cursor`` until exhausted or ``max_items``
|
|
99
|
+
items have been yielded. ``max_items`` must be >= 0 (0 yields nothing).
|
|
100
|
+
``page_size`` is the wire page size and must be 1..100 (the server's max).
|
|
101
|
+
The SDK owns the ``limit`` query param, so passing ``limit`` in ``params``
|
|
102
|
+
is an error.
|
|
103
|
+
"""
|
|
104
|
+
params = dict(params or {})
|
|
105
|
+
if "limit" in params:
|
|
106
|
+
raise TypeError("pass page_size= / max_items= instead of limit=")
|
|
107
|
+
if max_items is not None and max_items < 0:
|
|
108
|
+
raise ValueError("max_items must be >= 0")
|
|
109
|
+
if not 1 <= page_size <= 100:
|
|
110
|
+
raise ValueError("page_size must be between 1 and 100")
|
|
111
|
+
if max_items == 0:
|
|
112
|
+
return
|
|
113
|
+
params["limit"] = min(page_size, max_items) if max_items is not None else page_size
|
|
114
|
+
count = 0
|
|
115
|
+
while True:
|
|
116
|
+
data = self.request("GET", path, params=params)
|
|
117
|
+
items = data.get(key, [])
|
|
118
|
+
for item in items:
|
|
119
|
+
yield item
|
|
120
|
+
count += 1
|
|
121
|
+
if max_items is not None and count >= max_items:
|
|
122
|
+
return
|
|
123
|
+
cursor = data.get("next_cursor")
|
|
124
|
+
if not cursor or not items:
|
|
125
|
+
return
|
|
126
|
+
params["cursor"] = cursor
|
|
127
|
+
|
|
128
|
+
def close(self):
|
|
129
|
+
self._http.close()
|
|
130
|
+
|
|
131
|
+
def __enter__(self):
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def __exit__(self, *exc):
|
|
135
|
+
self.close()
|