ptuner 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ # OS
2
+ .DS_Store
3
+ Thumbs.db
4
+
5
+ # IDEs
6
+ .vscode/
7
+ .idea/
8
+ *.swp
9
+ *.swo
10
+ *~
11
+
12
+ # Go
13
+ app/tmp/
14
+
15
+ # Node / Frontend
16
+ frontend/node_modules/
17
+ frontend/dist/
18
+ frontend/.env
19
+ frontend/.env.local
20
+ frontend/.env.*.local
21
+
22
+ # Python
23
+ __pycache__/
24
+ *.pyc
25
+ .venv/
26
+ client/.venv/
27
+ client/*.egg-info/
28
+ client/dist/
29
+ client/.pytest_cache/
30
+ *.egg-info/
31
+
32
+ # Deploy secrets (real values — only _example committed)
33
+ deploy/gcp/scripts/01_create_secrets.sh
34
+
35
+ # Firebase
36
+ .firebase/
37
+ .firebaserc
38
+
39
+ # Misc
40
+ *.log
41
+ *.pid
ptuner-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: ptuner
3
+ Version: 0.1.0
4
+ Summary: Python client library for the ptuner prompt-tuning API
5
+ Project-URL: Homepage, https://prompts.church
6
+ Project-URL: Repository, https://github.com/ptuner/ptuner
7
+ Project-URL: Documentation, https://github.com/ptuner/ptuner/tree/main/client#readme
8
+ License-Expression: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: httpx<1,>=0.27
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
23
+ Requires-Dist: pytest>=8; extra == 'dev'
24
+ Requires-Dist: respx>=0.21; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # ptuner
28
+
29
+ Python client for the **ptuner** prompt-tuning API.
30
+
31
+ Evaluate, compare and iterate on LLM prompts with dataset-driven benchmarks,
32
+ exact-match scoring, and LLM-as-judge evaluation.
33
+
34
+ **Hosted at [prompts.church](https://prompts.church)**
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install ptuner
40
+ ```
41
+
42
+ ## Quick Start
43
+
44
+ ```python
45
+ from ptuner import PtunerClient
46
+
47
+ client = PtunerClient(
48
+ base_url="https://api.prompts.church",
49
+ api_key="sk_...",
50
+ )
51
+
52
+ # 1. Create a project
53
+ project = client.create_project(
54
+ name="Sentiment Analysis",
55
+ description="Classify customer feedback",
56
+ )
57
+
58
+ # 2. Create a prompt with a version
59
+ prompt = client.create_prompt(
60
+ project["id"],
61
+ name="Sentiment Classifier",
62
+ slug="sentiment-v1",
63
+ )
64
+
65
+ version = client.create_version(
66
+ prompt["id"],
67
+ system_template=(
68
+ "You are a sentiment classifier. "
69
+ "Respond with exactly one word: positive, negative, or neutral."
70
+ ),
71
+ message_template="Text: {{ text }}\n\nSentiment:",
72
+ )
73
+
74
+ # 3. Create a dataset
75
+ dataset = client.create_dataset(project["id"], name="Customer Reviews")
76
+
77
+ reviews = [
78
+ {"text": "This product is amazing!", "label": "positive"},
79
+ {"text": "Terrible quality, broke after one day.", "label": "negative"},
80
+ {"text": "The package arrived on time.", "label": "neutral"},
81
+ ]
82
+
83
+ for r in reviews:
84
+ client.create_datapoint(
85
+ dataset["id"],
86
+ message_params=[{"role": "user", "params": {"text": r["text"]}}],
87
+ exact_match_label=r["label"],
88
+ )
89
+
90
+ # 4. Store your LLM API key (one-time)
91
+ client.create_credential(
92
+ provider="openai",
93
+ api_key="sk-your-openai-key",
94
+ display_label="My Key",
95
+ )
96
+
97
+ # 5. Run evaluation
98
+ run = client.create_eval_run(
99
+ project_id=project["id"],
100
+ prompt_version_id=version["id"],
101
+ dataset_id=dataset["id"],
102
+ model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
103
+ judge_config={"judge_model": "gpt-5-mini"},
104
+ iterations=3,
105
+ )
106
+
107
+ # 6. Wait and check results
108
+ import time
109
+ for _ in range(30):
110
+ status = client.get_eval_run(run["id"])
111
+ if status["status"] in ("completed", "failed"):
112
+ break
113
+ time.sleep(2)
114
+
115
+ results = client.list_eval_results(run["id"])
116
+ exact = [r["exact_match_score"] for r in results if r.get("exact_match_score") is not None]
117
+ judge = [r["judge_score"] for r in results if r.get("judge_score") is not None]
118
+
119
+ if exact:
120
+ print(f"Exact match accuracy: {sum(exact)/len(exact):.1%}")
121
+ if judge:
122
+ print(f"Judge avg score: {sum(judge)/len(judge):.2f}")
123
+ ```
124
+
125
+ ## Authentication
126
+
127
+ Pass either an API key or a Firebase JWT token:
128
+
129
+ ```python
130
+ # API key (recommended)
131
+ client = PtunerClient(base_url="https://api.prompts.church", api_key="sk_...")
132
+
133
+ # Firebase JWT
134
+ client = PtunerClient(base_url="https://api.prompts.church", token="eyJ...")
135
+ ```
136
+
137
+ Generate an API key in the UI at **Settings → Generate API Key**.
138
+
139
+ ## Structured JSON Output
140
+
141
+ Force models to return structured JSON by adding `json_schema` when creating
142
+ a prompt version:
143
+
144
+ ```python
145
+ version = client.create_version(
146
+ prompt["id"],
147
+ system_template="You are a sentiment expert. Return JSON with sentiment and confidence.",
148
+ message_template="Text: {{ text }}",
149
+ json_schema={
150
+ "type": "object",
151
+ "properties": {
152
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
153
+ "confidence": {"type": "number"},
154
+ },
155
+ "required": ["sentiment", "confidence"],
156
+ "additionalProperties": False,
157
+ },
158
+ )
159
+ ```
160
+
161
+ This works across all providers (OpenAI, Anthropic, Google) — ptuner
162
+ translates the schema to each provider's structured output format automatically.
163
+
164
+ Omit `json_schema` (or set it to `None`) for plain text mode.
165
+
166
+ ## Comparing Prompt Versions
167
+
168
+ A common workflow: iterate on a prompt and compare versions against the same dataset.
169
+
170
+ ```python
171
+ v2 = client.create_version(
172
+ prompt["id"],
173
+ system_template="You are a sentiment analysis expert. Respond: positive, negative, or neutral.",
174
+ message_template="Text: {{ text }}\n\nSentiment:",
175
+ )
176
+
177
+ run_v2 = client.create_eval_run(
178
+ project_id=project["id"],
179
+ prompt_version_id=v2["id"],
180
+ dataset_id=dataset["id"],
181
+ model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
182
+ iterations=3,
183
+ )
184
+ # Compare results between v1 and v2 in the UI or via the API
185
+ ```
186
+
187
+ ## API Reference
188
+
189
+ ### Client
190
+
191
+ | Method | Description |
192
+ |---|---|
193
+ | `PtunerClient(base_url, api_key=, token=, timeout=)` | Create a client |
194
+ | `client.close()` | Close the HTTP connection |
195
+
196
+ Supports context manager: `with PtunerClient(...) as client:`
197
+
198
+ ### User
199
+
200
+ | Method | Description |
201
+ |---|---|
202
+ | `get_me()` | Get current user info |
203
+ | `generate_api_key()` | Generate a new API key |
204
+
205
+ ### Projects
206
+
207
+ | Method | Description |
208
+ |---|---|
209
+ | `list_projects()` | List all projects |
210
+ | `create_project(name, description="")` | Create a project |
211
+ | `get_project(project_id)` | Get project details |
212
+ | `list_members(project_id)` | List project members |
213
+ | `add_member(project_id, email, role="editor")` | Add a member |
214
+
215
+ ### Prompts & Versions
216
+
217
+ | Method | Description |
218
+ |---|---|
219
+ | `list_prompts(project_id)` | List prompts in a project |
220
+ | `create_prompt(project_id, name, slug)` | Create a prompt |
221
+ | `list_versions(prompt_id)` | List versions of a prompt |
222
+ | `create_version(prompt_id, system_template=, message_template=, json_schema=)` | Create a version |
223
+
224
+ ### Datasets & Datapoints
225
+
226
+ | Method | Description |
227
+ |---|---|
228
+ | `list_datasets(project_id)` | List datasets |
229
+ | `create_dataset(project_id, name)` | Create a dataset |
230
+ | `list_datapoints(dataset_id)` | List datapoints |
231
+ | `create_datapoint(dataset_id, system_params=, message_params=, exact_match_label=, acceptance_criteria=, labels=)` | Add a datapoint |
232
+ | `update_datapoint(datapoint_id, **fields)` | Update a datapoint |
233
+ | `delete_datapoint(datapoint_id)` | Delete a datapoint |
234
+
235
+ ### LLM Credentials
236
+
237
+ | Method | Description |
238
+ |---|---|
239
+ | `list_credentials()` | List stored credentials |
240
+ | `create_credential(provider, api_key, project_id=, display_label=)` | Store a credential |
241
+ | `update_credential(credential_id, **fields)` | Update a credential |
242
+ | `delete_credential(credential_id)` | Delete a credential |
243
+ | `resolve_credential(project_id, provider)` | Resolve which credential will be used |
244
+
245
+ ### Eval Runs
246
+
247
+ | Method | Description |
248
+ |---|---|
249
+ | `create_eval_run(project_id, prompt_version_id, dataset_id, model_config=, judge_config=, iterations=1)` | Start an eval run |
250
+ | `get_eval_run(run_id)` | Get run status |
251
+ | `list_eval_results(run_id)` | Get run results |
252
+ | `list_project_runs(project_id)` | List all runs in a project |
253
+
254
+ ## Examples
255
+
256
+ See [examples/benchmark_sentiment.py](examples/benchmark_sentiment.py) for a
257
+ full end-to-end benchmark that compares multiple models with both plain text
258
+ and structured JSON output.
259
+
260
+ ## License
261
+
262
+ MIT
263
+ results = client.list_eval_results(run["id"])
264
+ ```
ptuner-0.1.0/README.md ADDED
@@ -0,0 +1,238 @@
1
+ # ptuner
2
+
3
+ Python client for the **ptuner** prompt-tuning API.
4
+
5
+ Evaluate, compare and iterate on LLM prompts with dataset-driven benchmarks,
6
+ exact-match scoring, and LLM-as-judge evaluation.
7
+
8
+ **Hosted at [prompts.church](https://prompts.church)**
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ pip install ptuner
14
+ ```
15
+
16
+ ## Quick Start
17
+
18
+ ```python
19
+ from ptuner import PtunerClient
20
+
21
+ client = PtunerClient(
22
+ base_url="https://api.prompts.church",
23
+ api_key="sk_...",
24
+ )
25
+
26
+ # 1. Create a project
27
+ project = client.create_project(
28
+ name="Sentiment Analysis",
29
+ description="Classify customer feedback",
30
+ )
31
+
32
+ # 2. Create a prompt with a version
33
+ prompt = client.create_prompt(
34
+ project["id"],
35
+ name="Sentiment Classifier",
36
+ slug="sentiment-v1",
37
+ )
38
+
39
+ version = client.create_version(
40
+ prompt["id"],
41
+ system_template=(
42
+ "You are a sentiment classifier. "
43
+ "Respond with exactly one word: positive, negative, or neutral."
44
+ ),
45
+ message_template="Text: {{ text }}\n\nSentiment:",
46
+ )
47
+
48
+ # 3. Create a dataset
49
+ dataset = client.create_dataset(project["id"], name="Customer Reviews")
50
+
51
+ reviews = [
52
+ {"text": "This product is amazing!", "label": "positive"},
53
+ {"text": "Terrible quality, broke after one day.", "label": "negative"},
54
+ {"text": "The package arrived on time.", "label": "neutral"},
55
+ ]
56
+
57
+ for r in reviews:
58
+ client.create_datapoint(
59
+ dataset["id"],
60
+ message_params=[{"role": "user", "params": {"text": r["text"]}}],
61
+ exact_match_label=r["label"],
62
+ )
63
+
64
+ # 4. Store your LLM API key (one-time)
65
+ client.create_credential(
66
+ provider="openai",
67
+ api_key="sk-your-openai-key",
68
+ display_label="My Key",
69
+ )
70
+
71
+ # 5. Run evaluation
72
+ run = client.create_eval_run(
73
+ project_id=project["id"],
74
+ prompt_version_id=version["id"],
75
+ dataset_id=dataset["id"],
76
+ model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
77
+ judge_config={"judge_model": "gpt-5-mini"},
78
+ iterations=3,
79
+ )
80
+
81
+ # 6. Wait and check results
82
+ import time
83
+ for _ in range(30):
84
+ status = client.get_eval_run(run["id"])
85
+ if status["status"] in ("completed", "failed"):
86
+ break
87
+ time.sleep(2)
88
+
89
+ results = client.list_eval_results(run["id"])
90
+ exact = [r["exact_match_score"] for r in results if r.get("exact_match_score") is not None]
91
+ judge = [r["judge_score"] for r in results if r.get("judge_score") is not None]
92
+
93
+ if exact:
94
+ print(f"Exact match accuracy: {sum(exact)/len(exact):.1%}")
95
+ if judge:
96
+ print(f"Judge avg score: {sum(judge)/len(judge):.2f}")
97
+ ```
98
+
99
+ ## Authentication
100
+
101
+ Pass either an API key or a Firebase JWT token:
102
+
103
+ ```python
104
+ # API key (recommended)
105
+ client = PtunerClient(base_url="https://api.prompts.church", api_key="sk_...")
106
+
107
+ # Firebase JWT
108
+ client = PtunerClient(base_url="https://api.prompts.church", token="eyJ...")
109
+ ```
110
+
111
+ Generate an API key in the UI at **Settings → Generate API Key**.
112
+
113
+ ## Structured JSON Output
114
+
115
+ Force models to return structured JSON by adding `json_schema` when creating
116
+ a prompt version:
117
+
118
+ ```python
119
+ version = client.create_version(
120
+ prompt["id"],
121
+ system_template="You are a sentiment expert. Return JSON with sentiment and confidence.",
122
+ message_template="Text: {{ text }}",
123
+ json_schema={
124
+ "type": "object",
125
+ "properties": {
126
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
127
+ "confidence": {"type": "number"},
128
+ },
129
+ "required": ["sentiment", "confidence"],
130
+ "additionalProperties": False,
131
+ },
132
+ )
133
+ ```
134
+
135
+ This works across all providers (OpenAI, Anthropic, Google) — ptuner
136
+ translates the schema to each provider's structured output format automatically.
137
+
138
+ Omit `json_schema` (or set it to `None`) for plain text mode.
139
+
140
+ ## Comparing Prompt Versions
141
+
142
+ A common workflow: iterate on a prompt and compare versions against the same dataset.
143
+
144
+ ```python
145
+ v2 = client.create_version(
146
+ prompt["id"],
147
+ system_template="You are a sentiment analysis expert. Respond: positive, negative, or neutral.",
148
+ message_template="Text: {{ text }}\n\nSentiment:",
149
+ )
150
+
151
+ run_v2 = client.create_eval_run(
152
+ project_id=project["id"],
153
+ prompt_version_id=v2["id"],
154
+ dataset_id=dataset["id"],
155
+ model_config={"model": "gpt-5-nano", "provider": "openai", "temperature": 0.0},
156
+ iterations=3,
157
+ )
158
+ # Compare results between v1 and v2 in the UI or via the API
159
+ ```
160
+
161
+ ## API Reference
162
+
163
+ ### Client
164
+
165
+ | Method | Description |
166
+ |---|---|
167
+ | `PtunerClient(base_url, api_key=, token=, timeout=)` | Create a client |
168
+ | `client.close()` | Close the HTTP connection |
169
+
170
+ Supports context manager: `with PtunerClient(...) as client:`
171
+
172
+ ### User
173
+
174
+ | Method | Description |
175
+ |---|---|
176
+ | `get_me()` | Get current user info |
177
+ | `generate_api_key()` | Generate a new API key |
178
+
179
+ ### Projects
180
+
181
+ | Method | Description |
182
+ |---|---|
183
+ | `list_projects()` | List all projects |
184
+ | `create_project(name, description="")` | Create a project |
185
+ | `get_project(project_id)` | Get project details |
186
+ | `list_members(project_id)` | List project members |
187
+ | `add_member(project_id, email, role="editor")` | Add a member |
188
+
189
+ ### Prompts & Versions
190
+
191
+ | Method | Description |
192
+ |---|---|
193
+ | `list_prompts(project_id)` | List prompts in a project |
194
+ | `create_prompt(project_id, name, slug)` | Create a prompt |
195
+ | `list_versions(prompt_id)` | List versions of a prompt |
196
+ | `create_version(prompt_id, system_template=, message_template=, json_schema=)` | Create a version |
197
+
198
+ ### Datasets & Datapoints
199
+
200
+ | Method | Description |
201
+ |---|---|
202
+ | `list_datasets(project_id)` | List datasets |
203
+ | `create_dataset(project_id, name)` | Create a dataset |
204
+ | `list_datapoints(dataset_id)` | List datapoints |
205
+ | `create_datapoint(dataset_id, system_params=, message_params=, exact_match_label=, acceptance_criteria=, labels=)` | Add a datapoint |
206
+ | `update_datapoint(datapoint_id, **fields)` | Update a datapoint |
207
+ | `delete_datapoint(datapoint_id)` | Delete a datapoint |
208
+
209
+ ### LLM Credentials
210
+
211
+ | Method | Description |
212
+ |---|---|
213
+ | `list_credentials()` | List stored credentials |
214
+ | `create_credential(provider, api_key, project_id=, display_label=)` | Store a credential |
215
+ | `update_credential(credential_id, **fields)` | Update a credential |
216
+ | `delete_credential(credential_id)` | Delete a credential |
217
+ | `resolve_credential(project_id, provider)` | Resolve which credential will be used |
218
+
219
+ ### Eval Runs
220
+
221
+ | Method | Description |
222
+ |---|---|
223
+ | `create_eval_run(project_id, prompt_version_id, dataset_id, model_config=, judge_config=, iterations=1)` | Start an eval run |
224
+ | `get_eval_run(run_id)` | Get run status |
225
+ | `list_eval_results(run_id)` | Get run results |
226
+ | `list_project_runs(project_id)` | List all runs in a project |
227
+
228
+ ## Examples
229
+
230
+ See [examples/benchmark_sentiment.py](examples/benchmark_sentiment.py) for a
231
+ full end-to-end benchmark that compares multiple models with both plain text
232
+ and structured JSON output.
233
+
234
+ ## License
235
+
236
+ MIT
237
+ results = client.list_eval_results(run["id"])
238
+ ```
File without changes