ptm-client 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ptm_client-0.0.1/PKG-INFO +268 -0
- ptm_client-0.0.1/README.md +242 -0
- ptm_client-0.0.1/pyproject.toml +55 -0
- ptm_client-0.0.1/setup.cfg +4 -0
- ptm_client-0.0.1/src/ptm_client/__init__.py +7 -0
- ptm_client-0.0.1/src/ptm_client/client.py +209 -0
- ptm_client-0.0.1/src/ptm_client/errors.py +15 -0
- ptm_client-0.0.1/src/ptm_client.egg-info/PKG-INFO +268 -0
- ptm_client-0.0.1/src/ptm_client.egg-info/SOURCES.txt +11 -0
- ptm_client-0.0.1/src/ptm_client.egg-info/dependency_links.txt +1 -0
- ptm_client-0.0.1/src/ptm_client.egg-info/requires.txt +6 -0
- ptm_client-0.0.1/src/ptm_client.egg-info/top_level.txt +1 -0
- ptm_client-0.0.1/tests/test_client.py +373 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ptm-client
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Lightweight PTM API client for integration with external Python services
|
|
5
|
+
Author: 15Five Engineering
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Project-URL: Repository, https://github.com/15five/prompt-test-manager
|
|
8
|
+
Project-URL: Documentation, https://github.com/15five/prompt-test-manager/tree/main/packages/ptm-client
|
|
9
|
+
Project-URL: Changelog, https://github.com/15five/prompt-test-manager/blob/main/docs/ptm-client-packaging-roadmap.md#changelog
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: Other/Proprietary License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Software Development :: Testing
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.12
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: requests<3.0,>=2.32
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest<9.0,>=8.2; extra == "dev"
|
|
24
|
+
Requires-Dist: responses<1.0,>=0.25; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff<1.0,>=0.6; extra == "dev"
|
|
26
|
+
|
|
27
|
+
# ptm-client
|
|
28
|
+
|
|
29
|
+
Lightweight Python client for the [Prompt Test Manager (PTM)](https://github.com/15five/prompt-test-manager) API. Zero dependencies beyond `requests`.
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
### From PyPI (when published)
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install ptm-client
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### From source (development)
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install -e packages/ptm-client
|
|
43
|
+
# or with dev/test dependencies:
|
|
44
|
+
pip install -e "packages/ptm-client[dev]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Docker mount (no install needed)
|
|
48
|
+
|
|
49
|
+
```yaml
|
|
50
|
+
# docker-compose.override.yml
|
|
51
|
+
services:
|
|
52
|
+
app:
|
|
53
|
+
volumes:
|
|
54
|
+
- /path/to/prompt-test-manager/packages/ptm-client/src:/opt/ptm-client-src:ro
|
|
55
|
+
environment:
|
|
56
|
+
PYTHONPATH: /opt/ptm-client-src:/app
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quick Start
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from ptm_client import PTMClient
|
|
63
|
+
|
|
64
|
+
client = PTMClient(base_url="http://localhost:8010", token="your-api-token")
|
|
65
|
+
|
|
66
|
+
# List prompts
|
|
67
|
+
prompts = client.list_prompts(tag="my_team")
|
|
68
|
+
|
|
69
|
+
# Get prompt detail
|
|
70
|
+
detail = client.get_prompt("my_team.summarizer")
|
|
71
|
+
|
|
72
|
+
# Get prompt test cases
|
|
73
|
+
tests = client.get_prompt_tests("my_team.summarizer")
|
|
74
|
+
|
|
75
|
+
# Run a repository evaluation
|
|
76
|
+
run = client.run_eval(
|
|
77
|
+
prompt_ids=["my_team.summarizer"],
|
|
78
|
+
provider_ids=["openai_gpt41_mini"],
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Run a manual evaluation
|
|
82
|
+
run = client.run_manual_eval({
|
|
83
|
+
"prompt_text": "...",
|
|
84
|
+
"tests": [{"description": "test", "vars": {"name": "World"}}],
|
|
85
|
+
"provider_profiles": ["openai_gpt41_mini"],
|
|
86
|
+
"visibility_scope": "org_visible",
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
# Wait for completion
|
|
90
|
+
result = client.wait_for_run(run["run_key"], timeout=120)
|
|
91
|
+
|
|
92
|
+
# Get HTML report
|
|
93
|
+
html = client.run_report(run["run_key"])
|
|
94
|
+
|
|
95
|
+
# Get JSON report
|
|
96
|
+
json_report = client.run_report(run["run_key"], format="json")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## API Reference
|
|
100
|
+
|
|
101
|
+
### `PTMClient(base_url, token, timeout=30)`
|
|
102
|
+
|
|
103
|
+
Create a client. `token` is a PTM personal access token (`ptm_u_...`) or service account token (`ptm_sa_...`). `timeout` is the HTTP request timeout in seconds.
|
|
104
|
+
|
|
105
|
+
### Prompts
|
|
106
|
+
|
|
107
|
+
- **`list_prompts(tag=None)`** — list all prompts, optionally filtered by tag
|
|
108
|
+
- **`get_prompt(prompt_id)`** — get full prompt detail (prompt_text, tags, metadata)
|
|
109
|
+
- **`get_prompt_tests(prompt_id)`** — get test cases, deepeval metrics, KPIs
|
|
110
|
+
|
|
111
|
+
### Providers
|
|
112
|
+
|
|
113
|
+
- **`list_providers()`** — list available LLM provider profiles
|
|
114
|
+
|
|
115
|
+
### Evaluations
|
|
116
|
+
|
|
117
|
+
- **`run_eval(prompt_ids, provider_ids, **kwargs)`** — submit repository evaluation
|
|
118
|
+
- **`run_manual_eval(payload)`** — submit manual evaluation with custom prompt + tests
|
|
119
|
+
- **`run_prompt_eval(prompt_id, provider_ids, *, inject_vars=None, extra_tests=None, visibility_scope="org_visible", label=None)`** — fetch a prompt from PTM, merge runtime vars/tests, and submit (recommended for service integrations)
|
|
120
|
+
|
|
121
|
+
### Runs
|
|
122
|
+
|
|
123
|
+
- **`get_run(run_key)`** — get run status (includes score, passed_tests, total_tests)
|
|
124
|
+
- **`wait_for_run(run_key, timeout=300, poll_interval=5)`** — block until terminal state
|
|
125
|
+
- **`run_report(run_key, format="html")`** — get report (html, json, markdown, csv)
|
|
126
|
+
|
|
127
|
+
## Test Cases and Scoring
|
|
128
|
+
|
|
129
|
+
PTM evaluates with up to three scoring layers. Use any combination.
|
|
130
|
+
|
|
131
|
+
### Promptfoo assertions — deterministic pass/fail checks
|
|
132
|
+
|
|
133
|
+
Go in the `assert` array inside each test case:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
{
|
|
137
|
+
"description": "test case with assertions",
|
|
138
|
+
"vars": {"transcript": "..."},
|
|
139
|
+
"assert": [
|
|
140
|
+
{"type": "javascript", "value": "/meeting purpose/i.test(output)", "description": "has_purpose"},
|
|
141
|
+
{"type": "icontains", "value": "API migration", "description": "mentions_topic"},
|
|
142
|
+
{"type": "javascript", "value": "output.length >= 100", "description": "min_length"},
|
|
143
|
+
],
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### DeepEval metrics — semantic quality scoring via judge LLM
|
|
148
|
+
|
|
149
|
+
Go in `additional_metrics` at the payload root:
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
{
|
|
153
|
+
"additional_metrics": [
|
|
154
|
+
{"name": "relevance", "criteria": "Output addresses the input topic with specific details.", "threshold": 0.7},
|
|
155
|
+
{"name": "structure", "criteria": "Output has clear sections and logical flow.", "threshold": 0.7},
|
|
156
|
+
],
|
|
157
|
+
"judge_profile": "openai_gpt41_mini",
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### KPI configs — custom weighted expressions
|
|
162
|
+
|
|
163
|
+
Go in `additional_kpis` at the payload root:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
{
|
|
167
|
+
"additional_kpis": [
|
|
168
|
+
{"name": "cost_ok", "description": "Under $0.05", "expression": "1 if cost < 0.05 else 0", "weight": 1.0},
|
|
169
|
+
{"name": "fast", "description": "Under 10s", "expression": "1 if latency_ms < 10000 else 0", "weight": 1.0},
|
|
170
|
+
],
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Common patterns
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
# Promptfoo only (no judge LLM needed)
|
|
178
|
+
client.run_manual_eval({"tests": [{"vars": {...}, "assert": [...]}], ...})
|
|
179
|
+
|
|
180
|
+
# DeepEval only (semantic scoring, no deterministic checks)
|
|
181
|
+
client.run_manual_eval({"tests": [{"vars": {...}}], "additional_metrics": [...], ...})
|
|
182
|
+
|
|
183
|
+
# All three layers
|
|
184
|
+
client.run_manual_eval({"tests": [{"vars": {...}, "assert": [...]}], "additional_metrics": [...], "additional_kpis": [...], ...})
|
|
185
|
+
|
|
186
|
+
# No scoring (just run prompt, capture output)
|
|
187
|
+
client.run_manual_eval({"tests": [{"vars": {...}}], ...})
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
See `docs/ptm-client-integration.md` for the full test case reference with all assertion types, metric fields, and KPI variables.
|
|
191
|
+
|
|
192
|
+
## Inline Test Cases
|
|
193
|
+
|
|
194
|
+
### `run_manual_eval` — full control
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
run = client.run_manual_eval({
|
|
198
|
+
"label": "my_custom_eval",
|
|
199
|
+
"prompt_text": '[{"role": "system", "content": "Summarize."}, {"role": "user", "content": "{{text}}"}]',
|
|
200
|
+
"tests": [
|
|
201
|
+
{"description": "short text", "vars": {"text": "The quick brown fox."}},
|
|
202
|
+
],
|
|
203
|
+
"provider_profiles": ["openai_gpt41_mini"],
|
|
204
|
+
"visibility_scope": "org_visible",
|
|
205
|
+
"cost_threshold": 1.0,
|
|
206
|
+
"latency_threshold_ms": 30000,
|
|
207
|
+
})
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### `run_prompt_eval` — fetch prompt from PTM + inject live data
|
|
211
|
+
|
|
212
|
+
Recommended for service integrations:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
run = client.run_prompt_eval(
|
|
216
|
+
prompt_id="my_team.summarizer",
|
|
217
|
+
provider_ids=["openai_gpt41_mini"],
|
|
218
|
+
inject_vars={"transcript": real_transcript, "meeting_title": "Weekly 1:1"},
|
|
219
|
+
)
|
|
220
|
+
result = client.wait_for_run(run["run_key"], timeout=120)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
With extra test cases:
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
run = client.run_prompt_eval(
|
|
227
|
+
prompt_id="my_team.summarizer",
|
|
228
|
+
provider_ids=["openai_gpt41_mini"],
|
|
229
|
+
extra_tests=[
|
|
230
|
+
{"description": "edge case", "vars": {"transcript": edge_case_text}},
|
|
231
|
+
],
|
|
232
|
+
visibility_scope="private_only",
|
|
233
|
+
label="meeting_recap_edge_cases",
|
|
234
|
+
)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Error Handling
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from ptm_client import PTMClient, PTMError, PTMTimeoutError
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
result = client.wait_for_run(run_key, timeout=60)
|
|
244
|
+
except PTMTimeoutError:
|
|
245
|
+
print("Run did not complete in time")
|
|
246
|
+
except PTMError as e:
|
|
247
|
+
print(f"PTM API error ({e.status_code}): {e}")
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## More
|
|
251
|
+
|
|
252
|
+
- **[Integration guide](../../docs/ptm-client-integration.md)** — install methods, test case types, scoring layers, Django/FastAPI examples, chained evals
|
|
253
|
+
- **[Packaging roadmap](../../docs/ptm-client-packaging-roadmap.md)** — PyPI/GitHub Packages publishing plan
|
|
254
|
+
- **[Examples](../../docs/examples/)** — runnable Python scripts for every use case
|
|
255
|
+
|
|
256
|
+
## Dependencies
|
|
257
|
+
|
|
258
|
+
`requests` only. No FastAPI, SQLAlchemy, Streamlit, or other PTM server deps.
|
|
259
|
+
|
|
260
|
+
## Development
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
pip install -e "packages/ptm-client[dev]"
|
|
264
|
+
cd packages/ptm-client
|
|
265
|
+
pytest tests/ -v
|
|
266
|
+
ruff check src/ tests/
|
|
267
|
+
ruff format src/ tests/
|
|
268
|
+
```
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# ptm-client
|
|
2
|
+
|
|
3
|
+
Lightweight Python client for the [Prompt Test Manager (PTM)](https://github.com/15five/prompt-test-manager) API. Zero dependencies beyond `requests`.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
### From PyPI (when published)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install ptm-client
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
### From source (development)
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install -e packages/ptm-client
|
|
17
|
+
# or with dev/test dependencies:
|
|
18
|
+
pip install -e "packages/ptm-client[dev]"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Docker mount (no install needed)
|
|
22
|
+
|
|
23
|
+
```yaml
|
|
24
|
+
# docker-compose.override.yml
|
|
25
|
+
services:
|
|
26
|
+
app:
|
|
27
|
+
volumes:
|
|
28
|
+
- /path/to/prompt-test-manager/packages/ptm-client/src:/opt/ptm-client-src:ro
|
|
29
|
+
environment:
|
|
30
|
+
PYTHONPATH: /opt/ptm-client-src:/app
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from ptm_client import PTMClient
|
|
37
|
+
|
|
38
|
+
client = PTMClient(base_url="http://localhost:8010", token="your-api-token")
|
|
39
|
+
|
|
40
|
+
# List prompts
|
|
41
|
+
prompts = client.list_prompts(tag="my_team")
|
|
42
|
+
|
|
43
|
+
# Get prompt detail
|
|
44
|
+
detail = client.get_prompt("my_team.summarizer")
|
|
45
|
+
|
|
46
|
+
# Get prompt test cases
|
|
47
|
+
tests = client.get_prompt_tests("my_team.summarizer")
|
|
48
|
+
|
|
49
|
+
# Run a repository evaluation
|
|
50
|
+
run = client.run_eval(
|
|
51
|
+
prompt_ids=["my_team.summarizer"],
|
|
52
|
+
provider_ids=["openai_gpt41_mini"],
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Run a manual evaluation
|
|
56
|
+
run = client.run_manual_eval({
|
|
57
|
+
"prompt_text": "...",
|
|
58
|
+
"tests": [{"description": "test", "vars": {"name": "World"}}],
|
|
59
|
+
"provider_profiles": ["openai_gpt41_mini"],
|
|
60
|
+
"visibility_scope": "org_visible",
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
# Wait for completion
|
|
64
|
+
result = client.wait_for_run(run["run_key"], timeout=120)
|
|
65
|
+
|
|
66
|
+
# Get HTML report
|
|
67
|
+
html = client.run_report(run["run_key"])
|
|
68
|
+
|
|
69
|
+
# Get JSON report
|
|
70
|
+
json_report = client.run_report(run["run_key"], format="json")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## API Reference
|
|
74
|
+
|
|
75
|
+
### `PTMClient(base_url, token, timeout=30)`
|
|
76
|
+
|
|
77
|
+
Create a client. `token` is a PTM personal access token (`ptm_u_...`) or service account token (`ptm_sa_...`). `timeout` is the HTTP request timeout in seconds.
|
|
78
|
+
|
|
79
|
+
### Prompts
|
|
80
|
+
|
|
81
|
+
- **`list_prompts(tag=None)`** — list all prompts, optionally filtered by tag
|
|
82
|
+
- **`get_prompt(prompt_id)`** — get full prompt detail (prompt_text, tags, metadata)
|
|
83
|
+
- **`get_prompt_tests(prompt_id)`** — get test cases, deepeval metrics, KPIs
|
|
84
|
+
|
|
85
|
+
### Providers
|
|
86
|
+
|
|
87
|
+
- **`list_providers()`** — list available LLM provider profiles
|
|
88
|
+
|
|
89
|
+
### Evaluations
|
|
90
|
+
|
|
91
|
+
- **`run_eval(prompt_ids, provider_ids, **kwargs)`** — submit repository evaluation
|
|
92
|
+
- **`run_manual_eval(payload)`** — submit manual evaluation with custom prompt + tests
|
|
93
|
+
- **`run_prompt_eval(prompt_id, provider_ids, *, inject_vars=None, extra_tests=None, visibility_scope="org_visible", label=None)`** — fetch a prompt from PTM, merge runtime vars/tests, and submit (recommended for service integrations)
|
|
94
|
+
|
|
95
|
+
### Runs
|
|
96
|
+
|
|
97
|
+
- **`get_run(run_key)`** — get run status (includes score, passed_tests, total_tests)
|
|
98
|
+
- **`wait_for_run(run_key, timeout=300, poll_interval=5)`** — block until terminal state
|
|
99
|
+
- **`run_report(run_key, format="html")`** — get report (html, json, markdown, csv)
|
|
100
|
+
|
|
101
|
+
## Test Cases and Scoring
|
|
102
|
+
|
|
103
|
+
PTM evaluates with up to three scoring layers. Use any combination.
|
|
104
|
+
|
|
105
|
+
### Promptfoo assertions — deterministic pass/fail checks
|
|
106
|
+
|
|
107
|
+
Go in the `assert` array inside each test case:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
{
|
|
111
|
+
"description": "test case with assertions",
|
|
112
|
+
"vars": {"transcript": "..."},
|
|
113
|
+
"assert": [
|
|
114
|
+
{"type": "javascript", "value": "/meeting purpose/i.test(output)", "description": "has_purpose"},
|
|
115
|
+
{"type": "icontains", "value": "API migration", "description": "mentions_topic"},
|
|
116
|
+
{"type": "javascript", "value": "output.length >= 100", "description": "min_length"},
|
|
117
|
+
],
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### DeepEval metrics — semantic quality scoring via judge LLM
|
|
122
|
+
|
|
123
|
+
Go in `additional_metrics` at the payload root:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
{
|
|
127
|
+
"additional_metrics": [
|
|
128
|
+
{"name": "relevance", "criteria": "Output addresses the input topic with specific details.", "threshold": 0.7},
|
|
129
|
+
{"name": "structure", "criteria": "Output has clear sections and logical flow.", "threshold": 0.7},
|
|
130
|
+
],
|
|
131
|
+
"judge_profile": "openai_gpt41_mini",
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### KPI configs — custom weighted expressions
|
|
136
|
+
|
|
137
|
+
Go in `additional_kpis` at the payload root:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
{
|
|
141
|
+
"additional_kpis": [
|
|
142
|
+
{"name": "cost_ok", "description": "Under $0.05", "expression": "1 if cost < 0.05 else 0", "weight": 1.0},
|
|
143
|
+
{"name": "fast", "description": "Under 10s", "expression": "1 if latency_ms < 10000 else 0", "weight": 1.0},
|
|
144
|
+
],
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Common patterns
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# Promptfoo only (no judge LLM needed)
|
|
152
|
+
client.run_manual_eval({"tests": [{"vars": {...}, "assert": [...]}], ...})
|
|
153
|
+
|
|
154
|
+
# DeepEval only (semantic scoring, no deterministic checks)
|
|
155
|
+
client.run_manual_eval({"tests": [{"vars": {...}}], "additional_metrics": [...], ...})
|
|
156
|
+
|
|
157
|
+
# All three layers
|
|
158
|
+
client.run_manual_eval({"tests": [{"vars": {...}, "assert": [...]}], "additional_metrics": [...], "additional_kpis": [...], ...})
|
|
159
|
+
|
|
160
|
+
# No scoring (just run prompt, capture output)
|
|
161
|
+
client.run_manual_eval({"tests": [{"vars": {...}}], ...})
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
See `docs/ptm-client-integration.md` for the full test case reference with all assertion types, metric fields, and KPI variables.
|
|
165
|
+
|
|
166
|
+
## Inline Test Cases
|
|
167
|
+
|
|
168
|
+
### `run_manual_eval` — full control
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
run = client.run_manual_eval({
|
|
172
|
+
"label": "my_custom_eval",
|
|
173
|
+
"prompt_text": '[{"role": "system", "content": "Summarize."}, {"role": "user", "content": "{{text}}"}]',
|
|
174
|
+
"tests": [
|
|
175
|
+
{"description": "short text", "vars": {"text": "The quick brown fox."}},
|
|
176
|
+
],
|
|
177
|
+
"provider_profiles": ["openai_gpt41_mini"],
|
|
178
|
+
"visibility_scope": "org_visible",
|
|
179
|
+
"cost_threshold": 1.0,
|
|
180
|
+
"latency_threshold_ms": 30000,
|
|
181
|
+
})
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### `run_prompt_eval` — fetch prompt from PTM + inject live data
|
|
185
|
+
|
|
186
|
+
Recommended for service integrations:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
run = client.run_prompt_eval(
|
|
190
|
+
prompt_id="my_team.summarizer",
|
|
191
|
+
provider_ids=["openai_gpt41_mini"],
|
|
192
|
+
inject_vars={"transcript": real_transcript, "meeting_title": "Weekly 1:1"},
|
|
193
|
+
)
|
|
194
|
+
result = client.wait_for_run(run["run_key"], timeout=120)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
With extra test cases:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
run = client.run_prompt_eval(
|
|
201
|
+
prompt_id="my_team.summarizer",
|
|
202
|
+
provider_ids=["openai_gpt41_mini"],
|
|
203
|
+
extra_tests=[
|
|
204
|
+
{"description": "edge case", "vars": {"transcript": edge_case_text}},
|
|
205
|
+
],
|
|
206
|
+
visibility_scope="private_only",
|
|
207
|
+
label="meeting_recap_edge_cases",
|
|
208
|
+
)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Error Handling
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from ptm_client import PTMClient, PTMError, PTMTimeoutError
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
result = client.wait_for_run(run_key, timeout=60)
|
|
218
|
+
except PTMTimeoutError:
|
|
219
|
+
print("Run did not complete in time")
|
|
220
|
+
except PTMError as e:
|
|
221
|
+
print(f"PTM API error ({e.status_code}): {e}")
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## More
|
|
225
|
+
|
|
226
|
+
- **[Integration guide](../../docs/ptm-client-integration.md)** — install methods, test case types, scoring layers, Django/FastAPI examples, chained evals
|
|
227
|
+
- **[Packaging roadmap](../../docs/ptm-client-packaging-roadmap.md)** — PyPI/GitHub Packages publishing plan
|
|
228
|
+
- **[Examples](../../docs/examples/)** — runnable Python scripts for every use case
|
|
229
|
+
|
|
230
|
+
## Dependencies
|
|
231
|
+
|
|
232
|
+
`requests` only. No FastAPI, SQLAlchemy, Streamlit, or other PTM server deps.
|
|
233
|
+
|
|
234
|
+
## Development
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
pip install -e "packages/ptm-client[dev]"
|
|
238
|
+
cd packages/ptm-client
|
|
239
|
+
pytest tests/ -v
|
|
240
|
+
ruff check src/ tests/
|
|
241
|
+
ruff format src/ tests/
|
|
242
|
+
```
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ptm-client"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Lightweight PTM API client for integration with external Python services"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = {text = "Proprietary"}
|
|
12
|
+
authors = [{name = "15Five Engineering"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"License :: Other/Proprietary License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Programming Language :: Python :: 3.13",
|
|
19
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
20
|
+
"Topic :: Software Development :: Testing",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"Typing :: Typed",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"requests>=2.32,<3.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Repository = "https://github.com/15five/prompt-test-manager"
|
|
30
|
+
Documentation = "https://github.com/15five/prompt-test-manager/tree/main/packages/ptm-client"
|
|
31
|
+
Changelog = "https://github.com/15five/prompt-test-manager/blob/main/docs/ptm-client-packaging-roadmap.md#changelog"
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
dev = [
|
|
35
|
+
"pytest>=8.2,<9.0",
|
|
36
|
+
"responses>=0.25,<1.0",
|
|
37
|
+
"ruff>=0.6,<1.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["src"]
|
|
42
|
+
|
|
43
|
+
[tool.ruff]
|
|
44
|
+
line-length = 100
|
|
45
|
+
target-version = "py312"
|
|
46
|
+
|
|
47
|
+
[tool.ruff.lint]
|
|
48
|
+
select = ["E", "F", "I", "B", "UP"]
|
|
49
|
+
|
|
50
|
+
[tool.ruff.format]
|
|
51
|
+
quote-style = "double"
|
|
52
|
+
indent-style = "space"
|
|
53
|
+
|
|
54
|
+
[tool.pytest.ini_options]
|
|
55
|
+
testpaths = ["tests"]
|