ptuner 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ptuner-0.1.0/.gitignore +41 -0
- ptuner-0.1.0/PKG-INFO +264 -0
- ptuner-0.1.0/README.md +238 -0
- ptuner-0.1.0/__init__.py +0 -0
- ptuner-0.1.0/examples/benchmark_sentiment.py +549 -0
- ptuner-0.1.0/pyproject.toml +37 -0
- ptuner-0.1.0/src/__init__.py +0 -0
- ptuner-0.1.0/src/ptuner/__init__.py +3 -0
- ptuner-0.1.0/src/ptuner/client.py +235 -0
- ptuner-0.1.0/tests/test_client.py +318 -0
ptuner-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# OS
|
|
2
|
+
.DS_Store
|
|
3
|
+
Thumbs.db
|
|
4
|
+
|
|
5
|
+
# IDEs
|
|
6
|
+
.vscode/
|
|
7
|
+
.idea/
|
|
8
|
+
*.swp
|
|
9
|
+
*.swo
|
|
10
|
+
*~
|
|
11
|
+
|
|
12
|
+
# Go
|
|
13
|
+
app/tmp/
|
|
14
|
+
|
|
15
|
+
# Node / Frontend
|
|
16
|
+
frontend/node_modules/
|
|
17
|
+
frontend/dist/
|
|
18
|
+
frontend/.env
|
|
19
|
+
frontend/.env.local
|
|
20
|
+
frontend/.env.*.local
|
|
21
|
+
|
|
22
|
+
# Python
|
|
23
|
+
__pycache__/
|
|
24
|
+
*.pyc
|
|
25
|
+
.venv/
|
|
26
|
+
client/.venv/
|
|
27
|
+
client/*.egg-info/
|
|
28
|
+
client/dist/
|
|
29
|
+
client/.pytest_cache/
|
|
30
|
+
*.egg-info/
|
|
31
|
+
|
|
32
|
+
# Deploy secrets (real values — only _example committed)
|
|
33
|
+
deploy/gcp/scripts/01_create_secrets.sh
|
|
34
|
+
|
|
35
|
+
# Firebase
|
|
36
|
+
.firebase/
|
|
37
|
+
.firebaserc
|
|
38
|
+
|
|
39
|
+
# Misc
|
|
40
|
+
*.log
|
|
41
|
+
*.pid
|
ptuner-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ptuner
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client library for the ptuner prompt-tuning API
|
|
5
|
+
Project-URL: Homepage, https://prompts.church
|
|
6
|
+
Project-URL: Repository, https://github.com/ptuner/ptuner
|
|
7
|
+
Project-URL: Documentation, https://github.com/ptuner/ptuner/tree/main/client#readme
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: httpx<1,>=0.27
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
24
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# ptuner
|
|
28
|
+
|
|
29
|
+
Python client for the **ptuner** prompt-tuning API.
|
|
30
|
+
|
|
31
|
+
Evaluate, compare and iterate on LLM prompts with dataset-driven benchmarks,
|
|
32
|
+
exact-match scoring, and LLM-as-judge evaluation.
|
|
33
|
+
|
|
34
|
+
**Hosted at [prompts.church](https://prompts.church)**
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install ptuner
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick Start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from ptuner import PtunerClient
|
|
46
|
+
|
|
47
|
+
client = PtunerClient(
|
|
48
|
+
base_url="https://api.prompts.church",
|
|
49
|
+
api_key="sk_...",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# 1. Create a project
|
|
53
|
+
project = client.create_project(
|
|
54
|
+
name="Sentiment Analysis",
|
|
55
|
+
description="Classify customer feedback",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# 2. Create a prompt with a version
|
|
59
|
+
prompt = client.create_prompt(
|
|
60
|
+
project["id"],
|
|
61
|
+
name="Sentiment Classifier",
|
|
62
|
+
slug="sentiment-v1",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
version = client.create_version(
|
|
66
|
+
prompt["id"],
|
|
67
|
+
system_template=(
|
|
68
|
+
"You are a sentiment classifier. "
|
|
69
|
+
"Respond with exactly one word: positive, negative, or neutral."
|
|
70
|
+
),
|
|
71
|
+
message_template="Text: {{ text }}\n\nSentiment:",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# 3. Create a dataset
|
|
75
|
+
dataset = client.create_dataset(project["id"], name="Customer Reviews")
|
|
76
|
+
|
|
77
|
+
reviews = [
|
|
78
|
+
{"text": "This product is amazing!", "label": "positive"},
|
|
79
|
+
{"text": "Terrible quality, broke after one day.", "label": "negative"},
|
|
80
|
+
{"text": "The package arrived on time.", "label": "neutral"},
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
for r in reviews:
|
|
84
|
+
client.create_datapoint(
|
|
85
|
+
dataset["id"],
|
|
86
|
+
message_params=[{"role": "user", "params": {"text": r["text"]}}],
|
|
87
|
+
exact_match_label=r["label"],
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# 4. Store your LLM API key (one-time)
|
|
91
|
+
client.create_credential(
|
|
92
|
+
provider="openai",
|
|
93
|
+
api_key="sk-your-openai-key",
|
|
94
|
+
display_label="My Key",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# 5. Run evaluation
|
|
98
|
+
run = client.create_eval_run(
|
|
99
|
+
project_id=project["id"],
|
|
100
|
+
prompt_version_id=version["id"],
|
|
101
|
+
dataset_id=dataset["id"],
|
|
102
|
+
model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
|
|
103
|
+
judge_config={"judge_model": "gpt-5-mini"},
|
|
104
|
+
iterations=3,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 6. Wait and check results
|
|
108
|
+
import time
|
|
109
|
+
for _ in range(30):
|
|
110
|
+
status = client.get_eval_run(run["id"])
|
|
111
|
+
if status["status"] in ("completed", "failed"):
|
|
112
|
+
break
|
|
113
|
+
time.sleep(2)
|
|
114
|
+
|
|
115
|
+
results = client.list_eval_results(run["id"])
|
|
116
|
+
exact = [r["exact_match_score"] for r in results if r.get("exact_match_score") is not None]
|
|
117
|
+
judge = [r["judge_score"] for r in results if r.get("judge_score") is not None]
|
|
118
|
+
|
|
119
|
+
if exact:
|
|
120
|
+
print(f"Exact match accuracy: {sum(exact)/len(exact):.1%}")
|
|
121
|
+
if judge:
|
|
122
|
+
print(f"Judge avg score: {sum(judge)/len(judge):.2f}")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Authentication
|
|
126
|
+
|
|
127
|
+
Pass either an API key or a Firebase JWT token:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
# API key (recommended)
|
|
131
|
+
client = PtunerClient(base_url="https://api.prompts.church", api_key="sk_...")
|
|
132
|
+
|
|
133
|
+
# Firebase JWT
|
|
134
|
+
client = PtunerClient(base_url="https://api.prompts.church", token="eyJ...")
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Generate an API key in the UI at **Settings → Generate API Key**.
|
|
138
|
+
|
|
139
|
+
## Structured JSON Output
|
|
140
|
+
|
|
141
|
+
Force models to return structured JSON by adding `json_schema` when creating
|
|
142
|
+
a prompt version:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
version = client.create_version(
|
|
146
|
+
prompt["id"],
|
|
147
|
+
system_template="You are a sentiment expert. Return JSON with sentiment and confidence.",
|
|
148
|
+
message_template="Text: {{ text }}",
|
|
149
|
+
json_schema={
|
|
150
|
+
"type": "object",
|
|
151
|
+
"properties": {
|
|
152
|
+
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
|
|
153
|
+
"confidence": {"type": "number"},
|
|
154
|
+
},
|
|
155
|
+
"required": ["sentiment", "confidence"],
|
|
156
|
+
"additionalProperties": False,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
This works across all providers (OpenAI, Anthropic, Google) — ptuner
|
|
162
|
+
translates the schema to each provider's structured output format automatically.
|
|
163
|
+
|
|
164
|
+
Omit `json_schema` (or set it to `None`) for plain text mode.
|
|
165
|
+
|
|
166
|
+
## Comparing Prompt Versions
|
|
167
|
+
|
|
168
|
+
A common workflow: iterate on a prompt and compare versions against the same dataset.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
v2 = client.create_version(
|
|
172
|
+
prompt["id"],
|
|
173
|
+
system_template="You are a sentiment analysis expert. Respond: positive, negative, or neutral.",
|
|
174
|
+
message_template="Text: {{ text }}\n\nSentiment:",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
run_v2 = client.create_eval_run(
|
|
178
|
+
project_id=project["id"],
|
|
179
|
+
prompt_version_id=v2["id"],
|
|
180
|
+
dataset_id=dataset["id"],
|
|
181
|
+
model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
|
|
182
|
+
iterations=3,
|
|
183
|
+
)
|
|
184
|
+
# Compare results between v1 and v2 in the UI or via the API
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## API Reference
|
|
188
|
+
|
|
189
|
+
### Client
|
|
190
|
+
|
|
191
|
+
| Method | Description |
|
|
192
|
+
|---|---|
|
|
193
|
+
| `PtunerClient(base_url, api_key=, token=, timeout=)` | Create a client |
|
|
194
|
+
| `client.close()` | Close the HTTP connection |
|
|
195
|
+
|
|
196
|
+
Supports context manager: `with PtunerClient(...) as client:`
|
|
197
|
+
|
|
198
|
+
### User
|
|
199
|
+
|
|
200
|
+
| Method | Description |
|
|
201
|
+
|---|---|
|
|
202
|
+
| `get_me()` | Get current user info |
|
|
203
|
+
| `generate_api_key()` | Generate a new API key |
|
|
204
|
+
|
|
205
|
+
### Projects
|
|
206
|
+
|
|
207
|
+
| Method | Description |
|
|
208
|
+
|---|---|
|
|
209
|
+
| `list_projects()` | List all projects |
|
|
210
|
+
| `create_project(name, description="")` | Create a project |
|
|
211
|
+
| `get_project(project_id)` | Get project details |
|
|
212
|
+
| `list_members(project_id)` | List project members |
|
|
213
|
+
| `add_member(project_id, email, role="editor")` | Add a member |
|
|
214
|
+
|
|
215
|
+
### Prompts & Versions
|
|
216
|
+
|
|
217
|
+
| Method | Description |
|
|
218
|
+
|---|---|
|
|
219
|
+
| `list_prompts(project_id)` | List prompts in a project |
|
|
220
|
+
| `create_prompt(project_id, name, slug)` | Create a prompt |
|
|
221
|
+
| `list_versions(prompt_id)` | List versions of a prompt |
|
|
222
|
+
| `create_version(prompt_id, system_template=, message_template=, json_schema=)` | Create a version |
|
|
223
|
+
|
|
224
|
+
### Datasets & Datapoints
|
|
225
|
+
|
|
226
|
+
| Method | Description |
|
|
227
|
+
|---|---|
|
|
228
|
+
| `list_datasets(project_id)` | List datasets |
|
|
229
|
+
| `create_dataset(project_id, name)` | Create a dataset |
|
|
230
|
+
| `list_datapoints(dataset_id)` | List datapoints |
|
|
231
|
+
| `create_datapoint(dataset_id, system_params=, message_params=, exact_match_label=, acceptance_criteria=, labels=)` | Add a datapoint |
|
|
232
|
+
| `update_datapoint(datapoint_id, **fields)` | Update a datapoint |
|
|
233
|
+
| `delete_datapoint(datapoint_id)` | Delete a datapoint |
|
|
234
|
+
|
|
235
|
+
### LLM Credentials
|
|
236
|
+
|
|
237
|
+
| Method | Description |
|
|
238
|
+
|---|---|
|
|
239
|
+
| `list_credentials()` | List stored credentials |
|
|
240
|
+
| `create_credential(provider, api_key, project_id=, display_label=)` | Store a credential |
|
|
241
|
+
| `update_credential(credential_id, **fields)` | Update a credential |
|
|
242
|
+
| `delete_credential(credential_id)` | Delete a credential |
|
|
243
|
+
| `resolve_credential(project_id, provider)` | Resolve which credential will be used |
|
|
244
|
+
|
|
245
|
+
### Eval Runs
|
|
246
|
+
|
|
247
|
+
| Method | Description |
|
|
248
|
+
|---|---|
|
|
249
|
+
| `create_eval_run(project_id, prompt_version_id, dataset_id, model_config=, judge_config=, iterations=1)` | Start an eval run |
|
|
250
|
+
| `get_eval_run(run_id)` | Get run status |
|
|
251
|
+
| `list_eval_results(run_id)` | Get run results |
|
|
252
|
+
| `list_project_runs(project_id)` | List all runs in a project |
|
|
253
|
+
|
|
254
|
+
## Examples
|
|
255
|
+
|
|
256
|
+
See [examples/benchmark_sentiment.py](examples/benchmark_sentiment.py) for a
|
|
257
|
+
full end-to-end benchmark that compares multiple models with both plain text
|
|
258
|
+
and structured JSON output.
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT
|
|
263
|
+
results = client.list_eval_results(run["id"])
|
|
264
|
+
```
|
ptuner-0.1.0/README.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# ptuner
|
|
2
|
+
|
|
3
|
+
Python client for the **ptuner** prompt-tuning API.
|
|
4
|
+
|
|
5
|
+
Evaluate, compare and iterate on LLM prompts with dataset-driven benchmarks,
|
|
6
|
+
exact-match scoring, and LLM-as-judge evaluation.
|
|
7
|
+
|
|
8
|
+
**Hosted at [prompts.church](https://prompts.church)**
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install ptuner
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quick Start
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from ptuner import PtunerClient
|
|
20
|
+
|
|
21
|
+
client = PtunerClient(
|
|
22
|
+
base_url="https://api.prompts.church",
|
|
23
|
+
api_key="sk_...",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# 1. Create a project
|
|
27
|
+
project = client.create_project(
|
|
28
|
+
name="Sentiment Analysis",
|
|
29
|
+
description="Classify customer feedback",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# 2. Create a prompt with a version
|
|
33
|
+
prompt = client.create_prompt(
|
|
34
|
+
project["id"],
|
|
35
|
+
name="Sentiment Classifier",
|
|
36
|
+
slug="sentiment-v1",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
version = client.create_version(
|
|
40
|
+
prompt["id"],
|
|
41
|
+
system_template=(
|
|
42
|
+
"You are a sentiment classifier. "
|
|
43
|
+
"Respond with exactly one word: positive, negative, or neutral."
|
|
44
|
+
),
|
|
45
|
+
message_template="Text: {{ text }}\n\nSentiment:",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# 3. Create a dataset
|
|
49
|
+
dataset = client.create_dataset(project["id"], name="Customer Reviews")
|
|
50
|
+
|
|
51
|
+
reviews = [
|
|
52
|
+
{"text": "This product is amazing!", "label": "positive"},
|
|
53
|
+
{"text": "Terrible quality, broke after one day.", "label": "negative"},
|
|
54
|
+
{"text": "The package arrived on time.", "label": "neutral"},
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
for r in reviews:
|
|
58
|
+
client.create_datapoint(
|
|
59
|
+
dataset["id"],
|
|
60
|
+
message_params=[{"role": "user", "params": {"text": r["text"]}}],
|
|
61
|
+
exact_match_label=r["label"],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# 4. Store your LLM API key (one-time)
|
|
65
|
+
client.create_credential(
|
|
66
|
+
provider="openai",
|
|
67
|
+
api_key="sk-your-openai-key",
|
|
68
|
+
display_label="My Key",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# 5. Run evaluation
|
|
72
|
+
run = client.create_eval_run(
|
|
73
|
+
project_id=project["id"],
|
|
74
|
+
prompt_version_id=version["id"],
|
|
75
|
+
dataset_id=dataset["id"],
|
|
76
|
+
model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
|
|
77
|
+
judge_config={"judge_model": "gpt-5-mini"},
|
|
78
|
+
iterations=3,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# 6. Wait and check results
|
|
82
|
+
import time
|
|
83
|
+
for _ in range(30):
|
|
84
|
+
status = client.get_eval_run(run["id"])
|
|
85
|
+
if status["status"] in ("completed", "failed"):
|
|
86
|
+
break
|
|
87
|
+
time.sleep(2)
|
|
88
|
+
|
|
89
|
+
results = client.list_eval_results(run["id"])
|
|
90
|
+
exact = [r["exact_match_score"] for r in results if r.get("exact_match_score") is not None]
|
|
91
|
+
judge = [r["judge_score"] for r in results if r.get("judge_score") is not None]
|
|
92
|
+
|
|
93
|
+
if exact:
|
|
94
|
+
print(f"Exact match accuracy: {sum(exact)/len(exact):.1%}")
|
|
95
|
+
if judge:
|
|
96
|
+
print(f"Judge avg score: {sum(judge)/len(judge):.2f}")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Authentication
|
|
100
|
+
|
|
101
|
+
Pass either an API key or a Firebase JWT token:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
# API key (recommended)
|
|
105
|
+
client = PtunerClient(base_url="https://api.prompts.church", api_key="sk_...")
|
|
106
|
+
|
|
107
|
+
# Firebase JWT
|
|
108
|
+
client = PtunerClient(base_url="https://api.prompts.church", token="eyJ...")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Generate an API key in the UI at **Settings → Generate API Key**.
|
|
112
|
+
|
|
113
|
+
## Structured JSON Output
|
|
114
|
+
|
|
115
|
+
Force models to return structured JSON by adding `json_schema` when creating
|
|
116
|
+
a prompt version:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
version = client.create_version(
|
|
120
|
+
prompt["id"],
|
|
121
|
+
system_template="You are a sentiment expert. Return JSON with sentiment and confidence.",
|
|
122
|
+
message_template="Text: {{ text }}",
|
|
123
|
+
json_schema={
|
|
124
|
+
"type": "object",
|
|
125
|
+
"properties": {
|
|
126
|
+
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
|
|
127
|
+
"confidence": {"type": "number"},
|
|
128
|
+
},
|
|
129
|
+
"required": ["sentiment", "confidence"],
|
|
130
|
+
"additionalProperties": False,
|
|
131
|
+
},
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
This works across all providers (OpenAI, Anthropic, Google) — ptuner
|
|
136
|
+
translates the schema to each provider's structured output format automatically.
|
|
137
|
+
|
|
138
|
+
Omit `json_schema` (or set it to `None`) for plain text mode.
|
|
139
|
+
|
|
140
|
+
## Comparing Prompt Versions
|
|
141
|
+
|
|
142
|
+
A common workflow: iterate on a prompt and compare versions against the same dataset.
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
v2 = client.create_version(
|
|
146
|
+
prompt["id"],
|
|
147
|
+
system_template="You are a sentiment analysis expert. Respond: positive, negative, or neutral.",
|
|
148
|
+
message_template="Text: {{ text }}\n\nSentiment:",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
run_v2 = client.create_eval_run(
|
|
152
|
+
project_id=project["id"],
|
|
153
|
+
prompt_version_id=v2["id"],
|
|
154
|
+
dataset_id=dataset["id"],
|
|
155
|
+
model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
|
|
156
|
+
iterations=3,
|
|
157
|
+
)
|
|
158
|
+
# Compare results between v1 and v2 in the UI or via the API
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## API Reference
|
|
162
|
+
|
|
163
|
+
### Client
|
|
164
|
+
|
|
165
|
+
| Method | Description |
|
|
166
|
+
|---|---|
|
|
167
|
+
| `PtunerClient(base_url, api_key=, token=, timeout=)` | Create a client |
|
|
168
|
+
| `client.close()` | Close the HTTP connection |
|
|
169
|
+
|
|
170
|
+
Supports context manager: `with PtunerClient(...) as client:`
|
|
171
|
+
|
|
172
|
+
### User
|
|
173
|
+
|
|
174
|
+
| Method | Description |
|
|
175
|
+
|---|---|
|
|
176
|
+
| `get_me()` | Get current user info |
|
|
177
|
+
| `generate_api_key()` | Generate a new API key |
|
|
178
|
+
|
|
179
|
+
### Projects
|
|
180
|
+
|
|
181
|
+
| Method | Description |
|
|
182
|
+
|---|---|
|
|
183
|
+
| `list_projects()` | List all projects |
|
|
184
|
+
| `create_project(name, description="")` | Create a project |
|
|
185
|
+
| `get_project(project_id)` | Get project details |
|
|
186
|
+
| `list_members(project_id)` | List project members |
|
|
187
|
+
| `add_member(project_id, email, role="editor")` | Add a member |
|
|
188
|
+
|
|
189
|
+
### Prompts & Versions
|
|
190
|
+
|
|
191
|
+
| Method | Description |
|
|
192
|
+
|---|---|
|
|
193
|
+
| `list_prompts(project_id)` | List prompts in a project |
|
|
194
|
+
| `create_prompt(project_id, name, slug)` | Create a prompt |
|
|
195
|
+
| `list_versions(prompt_id)` | List versions of a prompt |
|
|
196
|
+
| `create_version(prompt_id, system_template=, message_template=, json_schema=)` | Create a version |
|
|
197
|
+
|
|
198
|
+
### Datasets & Datapoints
|
|
199
|
+
|
|
200
|
+
| Method | Description |
|
|
201
|
+
|---|---|
|
|
202
|
+
| `list_datasets(project_id)` | List datasets |
|
|
203
|
+
| `create_dataset(project_id, name)` | Create a dataset |
|
|
204
|
+
| `list_datapoints(dataset_id)` | List datapoints |
|
|
205
|
+
| `create_datapoint(dataset_id, system_params=, message_params=, exact_match_label=, acceptance_criteria=, labels=)` | Add a datapoint |
|
|
206
|
+
| `update_datapoint(datapoint_id, **fields)` | Update a datapoint |
|
|
207
|
+
| `delete_datapoint(datapoint_id)` | Delete a datapoint |
|
|
208
|
+
|
|
209
|
+
### LLM Credentials
|
|
210
|
+
|
|
211
|
+
| Method | Description |
|
|
212
|
+
|---|---|
|
|
213
|
+
| `list_credentials()` | List stored credentials |
|
|
214
|
+
| `create_credential(provider, api_key, project_id=, display_label=)` | Store a credential |
|
|
215
|
+
| `update_credential(credential_id, **fields)` | Update a credential |
|
|
216
|
+
| `delete_credential(credential_id)` | Delete a credential |
|
|
217
|
+
| `resolve_credential(project_id, provider)` | Resolve which credential will be used |
|
|
218
|
+
|
|
219
|
+
### Eval Runs
|
|
220
|
+
|
|
221
|
+
| Method | Description |
|
|
222
|
+
|---|---|
|
|
223
|
+
| `create_eval_run(project_id, prompt_version_id, dataset_id, model_config=, judge_config=, iterations=1)` | Start an eval run |
|
|
224
|
+
| `get_eval_run(run_id)` | Get run status |
|
|
225
|
+
| `list_eval_results(run_id)` | Get run results |
|
|
226
|
+
| `list_project_runs(project_id)` | List all runs in a project |
|
|
227
|
+
|
|
228
|
+
## Examples
|
|
229
|
+
|
|
230
|
+
See [examples/benchmark_sentiment.py](examples/benchmark_sentiment.py) for a
|
|
231
|
+
full end-to-end benchmark that compares multiple models with both plain text
|
|
232
|
+
and structured JSON output.
|
|
233
|
+
|
|
234
|
+
## License
|
|
235
|
+
|
|
236
|
+
MIT
|
|
237
|
+
results = client.list_eval_results(run["id"])
|
|
238
|
+
```
|
ptuner-0.1.0/__init__.py
ADDED
|
File without changes
|