certaintylabs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ # Env and secrets (use .env.example as template)
2
+ .env
3
+ .env.local
4
+ *.env
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ .eggs/
10
+ *.egg-info/
11
+ .venv/
12
+ venv/
13
+ .kaggle-venv/
14
+
15
+ # IDE
16
+ .idea/
17
+ .vscode/
18
+ *.swp
19
+ *.swo
20
+
21
+ # Build and cache
22
+ .pytest_cache/
23
+ .coverage
24
+ htmlcov/
25
+ dist/
26
+ build/
27
+ .next/
28
+
29
+ # Project-specific
30
+ certainty_workspace/
31
+ *.pt
32
+ *.log
33
+
34
+ node_modules/
@@ -0,0 +1,271 @@
1
+ Metadata-Version: 2.4
2
+ Name: certaintylabs
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the Certainty Labs API — constraint enforcement for production LLMs
5
+ Project-URL: Homepage, https://certaintylabs.ai
6
+ Project-URL: Documentation, https://certaintylabs.ai/platform/docs
7
+ Project-URL: Repository, https://github.com/certainty-labs/certainty-sdk
8
+ Author: Certainty Labs
9
+ License-Expression: MIT
10
+ Keywords: certainty,constraints,energy-based-model,llm,reranking
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.9
23
+ Requires-Dist: httpx>=0.27.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Certainty Python SDK
27
+
28
+ Python client for the [Certainty Labs](https://certaintylabs.ai) API — constraint enforcement for production LLMs.
29
+
30
+ Train TransEBM energy models and rerank LLM outputs in a few lines of code.
31
+
32
+ The SDK supports **bring your own data** (in-memory or local JSONL), **tune training** (epochs, batch size, model size, learning rate, etc.), and **use your own LLM** in rerank to generate candidates (openai_api_key + openai_base_url). You provide EORM-format training data; generate it externally if needed (see below).
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install certaintylabs
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ from certaintylabs import Certainty
44
+
45
+ client = Certainty() # Uses fixed API URL; set CERTAINTY_API_KEY for auth
46
+
47
+ # Check the server is running
48
+ health = client.health()
49
+ print(health.version) # "0.1.0"
50
+
51
+ # Train on the built-in GSM8K math reasoning dataset
52
+ result = client.train(epochs=10, d_model=768)
53
+ print(f"Accuracy: {result.best_val_acc:.1%} in {result.elapsed_seconds:.0f}s")
54
+
55
+ # Rerank LLM candidate answers
56
+ best = client.rerank(
57
+ candidates=[
58
+ "Janet sells 16 - 3 - 4 = 9 eggs. 9 * 2 = $18. The answer is 18.",
59
+ "Janet has 16 eggs, sells all. 16 * 2 = $32.",
60
+ "Janet sells 16 - 3 - 4 = 9 duck eggs. 9 * $2 = $18. The answer is $18.",
61
+ ],
62
+ prompt="Janet's ducks lay 16 eggs per day. She eats three and bakes muffins with four. She sells the rest at $2 each. How much does she make?",
63
+ )
64
+ print(best.best_candidate)
65
+ ```
66
+
67
+ ## Async Support
68
+
69
+ ```python
70
+ from certaintylabs import AsyncCertainty
71
+
72
+ async def main():
73
+ async with AsyncCertainty() as client:
74
+ result = await client.train(epochs=5)
75
+ best = await client.rerank(["A", "B", "C"], prompt="...")
76
+ ```
77
+
78
+ ## Production: API key
79
+
80
+ The API base URL is fixed; you don't configure it. Set your API key via environment variable:
81
+
82
+ ```bash
83
+ export CERTAINTY_API_KEY="ck_your_key_here"
84
+ ```
85
+
86
+ ```python
87
+ from certaintylabs import Certainty
88
+
89
+ # Reads CERTAINTY_API_KEY from env
90
+ client = Certainty()
91
+ client.health()
92
+ ```
93
+
94
+ You can also pass `api_key` explicitly to override the environment.
95
+
96
+ ## Data options
97
+
98
+ | Option | SDK / API |
99
+ |--------|-----------|
100
+ | **Built-in dataset** | `train(epochs=10)` with no data → uses GSM8K |
101
+ | **Your data** | `train_with_data(samples)` or `train_from_file("path.jsonl")` or `train(data=...)` |
102
+ | **Rerank** | `rerank(candidates, prompt=...)` or have the API generate candidates: `rerank(prompt=..., openai_api_key=..., n_candidates=5)` |
103
+
104
+ ### Generating your own data externally
105
+
106
+ Training data must be **EORM format**: one JSON object per line with `question`, `label` (0 or 1), and `gen_text`. Create this data with your own pipeline (e.g. your LLM + your labeling rules or model-as-judge). Save as `.jsonl` and use `train_from_file(path)` or send the list to `train(data=...)`.
107
+
108
+ ## API Reference
109
+
110
+ ### `Certainty(api_key=None, timeout=300.0)`
111
+
112
+ | Parameter | Type | Default |
113
+ |------------|-----------------|--------------------------|
114
+ | `api_key` | `str` or `None` | `None` → env `CERTAINTY_API_KEY` or no auth |
115
+ | `timeout` | `float` | `300.0` |
116
+
117
+ The API base URL is fixed and not configurable.
118
+
119
+ ### Methods
120
+
121
+ #### `client.health() -> HealthResponse`
122
+
123
+ Returns API status and version.
124
+
125
+ #### Using your own data
126
+
127
+ You can train on in-memory data or a local JSONL file instead of server-side data.
128
+
129
+ **In-memory:** each record is a dict with `question`, `label`, and `gen_text` (EORM format).
130
+
131
+ ```python
132
+ samples = [
133
+ {"question": "What is 2+2?", "label": 1, "gen_text": "The answer is 4."},
134
+ {"question": "What is 3*3?", "label": 1, "gen_text": "The answer is 9."},
135
+ ]
136
+ result = client.train_with_data(samples, epochs=10)
137
+ ```
138
+
139
+ **Local file:** one JSON object per line (same keys).
140
+
141
+ ```python
142
+ result = client.train_from_file("my_data.jsonl", epochs=15, lr=1e-4)
143
+ ```
144
+
145
+ **Low-level:** pass `data=...` or `data_path=...` into `client.train()` for full control.
146
+
147
+ #### Tuning training parameters
148
+
149
+ Override defaults via keyword arguments or a `TrainingParams` object (omit fields to keep API defaults):
150
+
151
+ ```python
152
+ from certaintylabs import Certainty, TrainingParams
153
+
154
+ client = Certainty()
155
+
156
+ # Via kwargs
157
+ result = client.train(epochs=15, batch_size=2, lr=1e-4, max_length=1024)
158
+
159
+ # Via TrainingParams (good for reusing a config)
160
+ params = TrainingParams(epochs=15, batch_size=2, lr=1e-4, validate_every=2)
161
+ result = client.train(training_params=params)
162
+ # Or with your own data
163
+ result = client.train_with_data(samples, training_params=params)
164
+ ```
165
+
166
+ `TrainingParams` supports: `epochs`, `batch_size`, `d_model`, `n_heads`, `n_layers`, `lr`, `max_length`, `validate_every`, `val_holdout`.
167
+
168
+ #### Rerank with your own model to generate candidates
169
+
170
+ You can either pass pre-generated candidates or have the API **generate candidates with your LLM** and then rerank them in one call. Use your own base model API (OpenAI, Claude, Llama, etc.) for generation:
171
+
172
+ ```python
173
+ # Option A: You provide candidates (e.g. from your own LLM elsewhere)
174
+ best = client.rerank(
175
+ candidates=["answer A", "answer B", "answer C"],
176
+ prompt="What is 2+2?",
177
+ )
178
+
179
+ # Option B: API generates n_candidates with your LLM, then reranks
180
+ best = client.rerank(
181
+ prompt="What is 2+2?",
182
+ openai_api_key="sk-...",
183
+ openai_model="gpt-4o-mini",
184
+ openai_base_url="https://api.openai.com/v1",
185
+ n_candidates=5,
186
+ )
187
+ print(best.best_candidate) # best of the 5 generated answers
188
+ ```
189
+
190
+ #### `client.train(**kwargs) -> TrainResponse`
191
+
192
+ Train a TransEBM. Data source: `data` (list of records), `data_path` (server path), or neither (built-in GSM8K). Key parameters:
193
+
194
+ | Parameter | Type | Default |
195
+ |-------------------|-----------------|-----------|
196
+ | `yaml_content` | `str` or `None` | `None` |
197
+ | `data_path` | `str` or `None` | `None` |
198
+ | `data` | list of dicts | `None` |
199
+ | `epochs` | `int` | `20` |
200
+ | `d_model` | `int` | `768` |
201
+ | `n_heads` | `int` | `4` |
202
+ | `n_layers` | `int` | `2` |
203
+ | `lr` | `float` | `5e-5` |
204
+ | `max_length` | `int` | `2048` |
205
+ | `training_params` | `TrainingParams` or `None` | `None` |
206
+
207
+ ```python
208
+ result = client.train(data_path="path/to/gsm8k.jsonl", epochs=10)
209
+ print(result.model_path) # "./certainty_workspace/model/..."
210
+ print(result.best_val_acc) # 0.85
211
+ ```
212
+
213
+ #### `client.rerank(...) -> RerankResponse`
214
+
215
+ Rerank LLM outputs using a trained TransEBM. Either pass **candidates** you already have, or omit candidates and set **openai_api_key** (and optionally **openai_model**, **openai_base_url**) so the API generates **n_candidates** with your LLM and then reranks them.
216
+
217
+ | Parameter | Type | Default |
218
+ |--------------------|-----------------|---------|
219
+ | `candidates` | `List[str]` or `None` | `None` (use with `openai_api_key` to generate) |
220
+ | `prompt` | `str` | `""` |
221
+ | `model_path` | `str` | `"./certainty_workspace/model/ebm_certainty_model.pt"` |
222
+ | `tokenizer_path` | `str` or `None` | `None` |
223
+ | `openai_api_key` | `str` or `None` | `None` |
224
+ | `openai_model` | `str` or `None` | `None` |
225
+ | `openai_base_url` | `str` or `None` | `None` |
226
+ | `n_candidates` | `int` | `5` (only used when generating via your API) |
227
+
228
+ ```python
229
+ best = client.rerank(
230
+ candidates=["answer A", "answer B", "answer C"],
231
+ prompt="What is 2+2?",
232
+ )
233
+ print(best.best_candidate) # the highest-scored candidate
234
+ print(best.all_energies) # energy scores for each candidate
235
+ ```
236
+
237
+ #### `client.pipeline(**kwargs) -> PipelineResponse`
238
+
239
+ Run train (on your data or built-in) then optionally rerank. Pass `data` or `data_path` to use your data; omit for built-in. Pass `candidates` to rerank after training.
240
+
241
+ ```python
242
+ result = client.pipeline(epochs=10, candidates=["answer A", "answer B"])
243
+ print(result.train.best_val_acc)
244
+ if result.rerank:
245
+ print(result.rerank.best_candidate)
246
+ ```
247
+
248
+ ## Error Handling
249
+
250
+ ```python
251
+ from certaintylabs import Certainty, APIError, ConnectionError
252
+
253
+ client = Certainty()
254
+
255
+ try:
256
+ client.compile("invalid yaml: [[[")
257
+ except APIError as e:
258
+ print(e.status_code) # 400
259
+ print(e.detail) # error message from the server
260
+
261
+ try:
262
+ # ConnectionError when server unreachable (base URL is fixed)
263
+ client = Certainty(timeout=2.0)
264
+ client.health()
265
+ except ConnectionError as e:
266
+ print(e) # "Could not connect to <api-url>: ..."
267
+ ```
268
+
269
+ ## License
270
+
271
+ MIT
@@ -0,0 +1,246 @@
1
+ # Certainty Python SDK
2
+
3
+ Python client for the [Certainty Labs](https://certaintylabs.ai) API — constraint enforcement for production LLMs.
4
+
5
+ Train TransEBM energy models and rerank LLM outputs in a few lines of code.
6
+
7
+ The SDK supports **bring your own data** (in-memory or local JSONL), **tune training** (epochs, batch size, model size, learning rate, etc.), and **use your own LLM** in rerank to generate candidates (openai_api_key + openai_base_url). You provide EORM-format training data; generate it externally if needed (see below).
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install certaintylabs
13
+ ```
14
+
15
+ ## Quick Start
16
+
17
+ ```python
18
+ from certaintylabs import Certainty
19
+
20
+ client = Certainty() # Uses fixed API URL; set CERTAINTY_API_KEY for auth
21
+
22
+ # Check the server is running
23
+ health = client.health()
24
+ print(health.version) # "0.1.0"
25
+
26
+ # Train on the built-in GSM8K math reasoning dataset
27
+ result = client.train(epochs=10, d_model=768)
28
+ print(f"Accuracy: {result.best_val_acc:.1%} in {result.elapsed_seconds:.0f}s")
29
+
30
+ # Rerank LLM candidate answers
31
+ best = client.rerank(
32
+ candidates=[
33
+ "Janet sells 16 - 3 - 4 = 9 eggs. 9 * 2 = $18. The answer is 18.",
34
+ "Janet has 16 eggs, sells all. 16 * 2 = $32.",
35
+ "Janet sells 16 - 3 - 4 = 9 duck eggs. 9 * $2 = $18. The answer is $18.",
36
+ ],
37
+ prompt="Janet's ducks lay 16 eggs per day. She eats three and bakes muffins with four. She sells the rest at $2 each. How much does she make?",
38
+ )
39
+ print(best.best_candidate)
40
+ ```
41
+
42
+ ## Async Support
43
+
44
+ ```python
45
+ from certaintylabs import AsyncCertainty
46
+
47
+ async def main():
48
+ async with AsyncCertainty() as client:
49
+ result = await client.train(epochs=5)
50
+ best = await client.rerank(["A", "B", "C"], prompt="...")
51
+ ```
52
+
53
+ ## Production: API key
54
+
55
+ The API base URL is fixed; you don't configure it. Set your API key via environment variable:
56
+
57
+ ```bash
58
+ export CERTAINTY_API_KEY="ck_your_key_here"
59
+ ```
60
+
61
+ ```python
62
+ from certaintylabs import Certainty
63
+
64
+ # Reads CERTAINTY_API_KEY from env
65
+ client = Certainty()
66
+ client.health()
67
+ ```
68
+
69
+ You can also pass `api_key` explicitly to override the environment.
70
+
71
+ ## Data options
72
+
73
+ | Option | SDK / API |
74
+ |--------|-----------|
75
+ | **Built-in dataset** | `train(epochs=10)` with no data → uses GSM8K |
76
+ | **Your data** | `train_with_data(samples)` or `train_from_file("path.jsonl")` or `train(data=...)` |
77
+ | **Rerank** | `rerank(candidates, prompt=...)` or have the API generate candidates: `rerank(prompt=..., openai_api_key=..., n_candidates=5)` |
78
+
79
+ ### Generating your own data externally
80
+
81
+ Training data must be **EORM format**: one JSON object per line with `question`, `label` (0 or 1), and `gen_text`. Create this data with your own pipeline (e.g. your LLM + your labeling rules or model-as-judge). Save as `.jsonl` and use `train_from_file(path)` or send the list to `train(data=...)`.
82
+
83
+ ## API Reference
84
+
85
+ ### `Certainty(api_key=None, timeout=300.0)`
86
+
87
+ | Parameter | Type | Default |
88
+ |------------|-----------------|--------------------------|
89
+ | `api_key` | `str` or `None` | `None` → env `CERTAINTY_API_KEY` or no auth |
90
+ | `timeout` | `float` | `300.0` |
91
+
92
+ The API base URL is fixed and not configurable.
93
+
94
+ ### Methods
95
+
96
+ #### `client.health() -> HealthResponse`
97
+
98
+ Returns API status and version.
99
+
100
+ #### Using your own data
101
+
102
+ You can train on in-memory data or a local JSONL file instead of server-side data.
103
+
104
+ **In-memory:** each record is a dict with `question`, `label`, and `gen_text` (EORM format).
105
+
106
+ ```python
107
+ samples = [
108
+ {"question": "What is 2+2?", "label": 1, "gen_text": "The answer is 4."},
109
+ {"question": "What is 3*3?", "label": 1, "gen_text": "The answer is 9."},
110
+ ]
111
+ result = client.train_with_data(samples, epochs=10)
112
+ ```
113
+
114
+ **Local file:** one JSON object per line (same keys).
115
+
116
+ ```python
117
+ result = client.train_from_file("my_data.jsonl", epochs=15, lr=1e-4)
118
+ ```
119
+
120
+ **Low-level:** pass `data=...` or `data_path=...` into `client.train()` for full control.
121
+
122
+ #### Tuning training parameters
123
+
124
+ Override defaults via keyword arguments or a `TrainingParams` object (omit fields to keep API defaults):
125
+
126
+ ```python
127
+ from certaintylabs import Certainty, TrainingParams
128
+
129
+ client = Certainty()
130
+
131
+ # Via kwargs
132
+ result = client.train(epochs=15, batch_size=2, lr=1e-4, max_length=1024)
133
+
134
+ # Via TrainingParams (good for reusing a config)
135
+ params = TrainingParams(epochs=15, batch_size=2, lr=1e-4, validate_every=2)
136
+ result = client.train(training_params=params)
137
+ # Or with your own data
138
+ result = client.train_with_data(samples, training_params=params)
139
+ ```
140
+
141
+ `TrainingParams` supports: `epochs`, `batch_size`, `d_model`, `n_heads`, `n_layers`, `lr`, `max_length`, `validate_every`, `val_holdout`.
142
+
143
+ #### Rerank with your own model to generate candidates
144
+
145
+ You can either pass pre-generated candidates or have the API **generate candidates with your LLM** and then rerank them in one call. Use your own base model API (OpenAI, Claude, Llama, etc.) for generation:
146
+
147
+ ```python
148
+ # Option A: You provide candidates (e.g. from your own LLM elsewhere)
149
+ best = client.rerank(
150
+ candidates=["answer A", "answer B", "answer C"],
151
+ prompt="What is 2+2?",
152
+ )
153
+
154
+ # Option B: API generates n_candidates with your LLM, then reranks
155
+ best = client.rerank(
156
+ prompt="What is 2+2?",
157
+ openai_api_key="sk-...",
158
+ openai_model="gpt-4o-mini",
159
+ openai_base_url="https://api.openai.com/v1",
160
+ n_candidates=5,
161
+ )
162
+ print(best.best_candidate) # best of the 5 generated answers
163
+ ```
164
+
165
+ #### `client.train(**kwargs) -> TrainResponse`
166
+
167
+ Train a TransEBM. Data source: `data` (list of records), `data_path` (server path), or neither (built-in GSM8K). Key parameters:
168
+
169
+ | Parameter | Type | Default |
170
+ |-------------------|-----------------|-----------|
171
+ | `yaml_content` | `str` or `None` | `None` |
172
+ | `data_path` | `str` or `None` | `None` |
173
+ | `data` | list of dicts | `None` |
174
+ | `epochs` | `int` | `20` |
175
+ | `d_model` | `int` | `768` |
176
+ | `n_heads` | `int` | `4` |
177
+ | `n_layers` | `int` | `2` |
178
+ | `lr` | `float` | `5e-5` |
179
+ | `max_length` | `int` | `2048` |
180
+ | `training_params` | `TrainingParams` or `None` | `None` |
181
+
182
+ ```python
183
+ result = client.train(data_path="path/to/gsm8k.jsonl", epochs=10)
184
+ print(result.model_path) # "./certainty_workspace/model/..."
185
+ print(result.best_val_acc) # 0.85
186
+ ```
187
+
188
+ #### `client.rerank(...) -> RerankResponse`
189
+
190
+ Rerank LLM outputs using a trained TransEBM. Either pass **candidates** you already have, or omit candidates and set **openai_api_key** (and optionally **openai_model**, **openai_base_url**) so the API generates **n_candidates** with your LLM and then reranks them.
191
+
192
+ | Parameter | Type | Default |
193
+ |--------------------|-----------------|---------|
194
+ | `candidates` | `List[str]` or `None` | `None` (use with `openai_api_key` to generate) |
195
+ | `prompt` | `str` | `""` |
196
+ | `model_path` | `str` | `"./certainty_workspace/model/ebm_certainty_model.pt"` |
197
+ | `tokenizer_path` | `str` or `None` | `None` |
198
+ | `openai_api_key` | `str` or `None` | `None` |
199
+ | `openai_model` | `str` or `None` | `None` |
200
+ | `openai_base_url` | `str` or `None` | `None` |
201
+ | `n_candidates` | `int` | `5` (only used when generating via your API) |
202
+
203
+ ```python
204
+ best = client.rerank(
205
+ candidates=["answer A", "answer B", "answer C"],
206
+ prompt="What is 2+2?",
207
+ )
208
+ print(best.best_candidate) # the highest-scored candidate
209
+ print(best.all_energies) # energy scores for each candidate
210
+ ```
211
+
212
+ #### `client.pipeline(**kwargs) -> PipelineResponse`
213
+
214
+ Run train (on your data or built-in) then optionally rerank. Pass `data` or `data_path` to use your data; omit for built-in. Pass `candidates` to rerank after training.
215
+
216
+ ```python
217
+ result = client.pipeline(epochs=10, candidates=["answer A", "answer B"])
218
+ print(result.train.best_val_acc)
219
+ if result.rerank:
220
+ print(result.rerank.best_candidate)
221
+ ```
222
+
223
+ ## Error Handling
224
+
225
+ ```python
226
+ from certaintylabs import Certainty, APIError, ConnectionError
227
+
228
+ client = Certainty()
229
+
230
+ try:
231
+ client.compile("invalid yaml: [[[")
232
+ except APIError as e:
233
+ print(e.status_code) # 400
234
+ print(e.detail) # error message from the server
235
+
236
+ try:
237
+ # ConnectionError when server unreachable (base URL is fixed)
238
+ client = Certainty(timeout=2.0)
239
+ client.health()
240
+ except ConnectionError as e:
241
+ print(e) # "Could not connect to <api-url>: ..."
242
+ ```
243
+
244
+ ## License
245
+
246
+ MIT
@@ -0,0 +1,30 @@
1
+ """Certainty Labs Python SDK."""
2
+
3
+ from certaintylabs.async_client import AsyncCertainty
4
+ from certaintylabs.client import Certainty
5
+ from certaintylabs.exceptions import APIError, CertaintyError, ConnectionError, TimeoutError
6
+ from certaintylabs.types import (
7
+ HealthResponse,
8
+ PipelineResponse,
9
+ RerankResponse,
10
+ ScoreResponse,
11
+ TrainResponse,
12
+ TrainingParams,
13
+ )
14
+
15
+ __all__ = [
16
+ "Certainty",
17
+ "AsyncCertainty",
18
+ "CertaintyError",
19
+ "APIError",
20
+ "ConnectionError",
21
+ "TimeoutError",
22
+ "HealthResponse",
23
+ "TrainResponse",
24
+ "TrainingParams",
25
+ "RerankResponse",
26
+ "ScoreResponse",
27
+ "PipelineResponse",
28
+ ]
29
+
30
+ __version__ = "0.1.0"
@@ -0,0 +1,308 @@
1
+ """Asynchronous client for the Certainty Labs API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import httpx
10
+
11
+ from certaintylabs.exceptions import APIError, ConnectionError, TimeoutError
12
+ from certaintylabs.types import (
13
+ HealthResponse,
14
+ PipelineResponse,
15
+ RerankResponse,
16
+ ScoreResponse,
17
+ TrainResponse,
18
+ TrainingParams,
19
+ )
20
+
21
+ # Fixed API base URL — users do not configure this.
22
+ _BASE_URL = "https://sandboxtesting101--certainty-labs-api.modal.run"
23
+ _DEFAULT_TIMEOUT = 300.0
24
+
25
+ _ENV_API_KEY = "CERTAINTY_API_KEY"
26
+
27
+
28
+ class AsyncCertainty:
29
+ """Asynchronous Python client for the Certainty Labs API.
30
+
31
+ Set your API key via environment variable: CERTAINTY_API_KEY.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ api_key: Optional[str] = None,
37
+ timeout: float = _DEFAULT_TIMEOUT,
38
+ ):
39
+ self.base_url = _BASE_URL.rstrip("/")
40
+ self.api_key = api_key if api_key is not None else os.environ.get(_ENV_API_KEY)
41
+ self.timeout = timeout
42
+
43
+ headers: Dict[str, str] = {"Content-Type": "application/json"}
44
+ if self.api_key:
45
+ headers["Authorization"] = f"Bearer {self.api_key}"
46
+
47
+ self._client = httpx.AsyncClient(
48
+ base_url=self.base_url,
49
+ headers=headers,
50
+ timeout=timeout,
51
+ )
52
+
53
+ async def _request(self, method: str, path: str, **kwargs: Any) -> dict:
54
+ try:
55
+ resp = await self._client.request(method, path, **kwargs)
56
+ except httpx.ConnectError as e:
57
+ raise ConnectionError(self.base_url, e) from e
58
+ except httpx.TimeoutException as e:
59
+ raise TimeoutError(self.timeout, path) from e
60
+
61
+ if resp.status_code >= 400:
62
+ body = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
63
+ raise APIError(
64
+ status_code=resp.status_code,
65
+ detail=body.get("detail", resp.text),
66
+ error_type=body.get("error_type"),
67
+ )
68
+ return resp.json()
69
+
70
+ # ── Endpoints ─────────────────────────────────────────────────────
71
+
72
+ async def health(self) -> HealthResponse:
73
+ """Check API health and version."""
74
+ data = await self._request("GET", "/health")
75
+ return HealthResponse(status=data["status"], version=data["version"])
76
+
77
+ async def train(
78
+ self,
79
+ *,
80
+ data_path: Optional[str] = None,
81
+ data: Optional[List[Dict[str, Any]]] = None,
82
+ tokenizer_name: Optional[str] = None,
83
+ epochs: int = 20,
84
+ batch_size: int = 1,
85
+ d_model: int = 768,
86
+ n_heads: int = 4,
87
+ n_layers: int = 2,
88
+ lr: float = 5e-5,
89
+ max_length: int = 2048,
90
+ validate_every: int = 1,
91
+ val_holdout: float = 0.2,
92
+ training_params: Optional[TrainingParams] = None,
93
+ ) -> TrainResponse:
94
+ """Train a TransEBM. Use ``data`` or ``data_path``, or neither for built-in dataset. Use ``tokenizer_name`` for Qwen/Llama (e.g. qwen2.5-7b, llama-3.1-8b)."""
95
+ payload: Dict[str, Any] = {
96
+ "epochs": epochs,
97
+ "batch_size": batch_size,
98
+ "d_model": d_model,
99
+ "n_heads": n_heads,
100
+ "n_layers": n_layers,
101
+ "lr": lr,
102
+ "max_length": max_length,
103
+ "validate_every": validate_every,
104
+ "val_holdout": val_holdout,
105
+ }
106
+ if tokenizer_name is not None:
107
+ payload["tokenizer_name"] = tokenizer_name
108
+ if training_params:
109
+ for k, v in vars(training_params).items():
110
+ if v is not None:
111
+ payload[k] = v
112
+ if data_path is not None:
113
+ payload["data_path"] = data_path
114
+ if data is not None:
115
+ payload["data"] = data
116
+
117
+ data_resp = await self._request("POST", "/train", json=payload)
118
+ return TrainResponse(
119
+ model_path=data_resp["model_path"],
120
+ best_val_acc=data_resp["best_val_acc"],
121
+ epochs_trained=data_resp["epochs_trained"],
122
+ elapsed_seconds=data_resp["elapsed_seconds"],
123
+ )
124
+
125
+ async def train_with_data(
126
+ self,
127
+ samples: List[Dict[str, Any]],
128
+ *,
129
+ tokenizer_name: Optional[str] = None,
130
+ epochs: int = 20,
131
+ batch_size: int = 1,
132
+ d_model: int = 768,
133
+ n_heads: int = 4,
134
+ n_layers: int = 2,
135
+ lr: float = 5e-5,
136
+ max_length: int = 2048,
137
+ validate_every: int = 1,
138
+ val_holdout: float = 0.2,
139
+ training_params: Optional[TrainingParams] = None,
140
+ ) -> TrainResponse:
141
+ """Train on in-memory data. Each item in ``samples`` should have keys: question, label, gen_text."""
142
+ return await self.train(
143
+ data=samples,
144
+ tokenizer_name=tokenizer_name,
145
+ epochs=epochs,
146
+ batch_size=batch_size,
147
+ d_model=d_model,
148
+ n_heads=n_heads,
149
+ n_layers=n_layers,
150
+ lr=lr,
151
+ max_length=max_length,
152
+ validate_every=validate_every,
153
+ val_holdout=val_holdout,
154
+ training_params=training_params,
155
+ )
156
+
157
+ async def train_from_file(
158
+ self,
159
+ path: str,
160
+ *,
161
+ tokenizer_name: Optional[str] = None,
162
+ epochs: int = 20,
163
+ batch_size: int = 1,
164
+ d_model: int = 768,
165
+ n_heads: int = 4,
166
+ n_layers: int = 2,
167
+ lr: float = 5e-5,
168
+ max_length: int = 2048,
169
+ validate_every: int = 1,
170
+ val_holdout: float = 0.2,
171
+ training_params: Optional[TrainingParams] = None,
172
+ ) -> TrainResponse:
173
+ """Train on a local EORM JSONL file. Reads the file and sends records to the API."""
174
+ records: List[Dict[str, Any]] = []
175
+ with open(path, "r", encoding="utf-8") as f:
176
+ for line in f:
177
+ line = line.strip()
178
+ if not line:
179
+ continue
180
+ records.append(json.loads(line))
181
+ return await self.train_with_data(
182
+ records,
183
+ tokenizer_name=tokenizer_name,
184
+ epochs=epochs,
185
+ batch_size=batch_size,
186
+ d_model=d_model,
187
+ n_heads=n_heads,
188
+ n_layers=n_layers,
189
+ lr=lr,
190
+ max_length=max_length,
191
+ validate_every=validate_every,
192
+ val_holdout=val_holdout,
193
+ training_params=training_params,
194
+ )
195
+
196
+ async def rerank(
197
+ self,
198
+ candidates: Optional[List[str]] = None,
199
+ prompt: str = "",
200
+ model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
201
+ tokenizer_path: Optional[str] = None,
202
+ openai_api_key: Optional[str] = None,
203
+ openai_model: Optional[str] = None,
204
+ openai_base_url: Optional[str] = None,
205
+ hf_model: Optional[str] = None,
206
+ hf_token: Optional[str] = None,
207
+ n_candidates: int = 5,
208
+ ) -> RerankResponse:
209
+ """Rerank LLM candidate outputs using a trained TransEBM model.
210
+
211
+ Pass pre-generated ``candidates``, or leave empty and set ``openai_api_key`` or
212
+ ``hf_model`` + ``hf_token`` so the API generates ``n_candidates``, then reranks.
213
+ """
214
+ payload: Dict[str, Any] = {
215
+ "candidates": candidates if candidates is not None else [],
216
+ "prompt": prompt,
217
+ "model_path": model_path,
218
+ }
219
+ if tokenizer_path is not None:
220
+ payload["tokenizer_path"] = tokenizer_path
221
+ if openai_api_key is not None:
222
+ payload["openai_api_key"] = openai_api_key
223
+ if openai_model is not None:
224
+ payload["openai_model"] = openai_model
225
+ if openai_base_url is not None:
226
+ payload["openai_base_url"] = openai_base_url
227
+ if hf_model is not None:
228
+ payload["hf_model"] = hf_model
229
+ if hf_token is not None:
230
+ payload["hf_token"] = hf_token
231
+ if (candidates is None or len(candidates) == 0) and (openai_api_key is not None or (hf_model and hf_token)):
232
+ payload["n_candidates"] = n_candidates
233
+
234
+ data = await self._request("POST", "/rerank", json=payload)
235
+ return RerankResponse(
236
+ best_candidate=data["best_candidate"],
237
+ best_index=data["best_index"],
238
+ all_energies=data["all_energies"],
239
+ )
240
+
241
+ async def score(
242
+ self,
243
+ texts: List[str],
244
+ prompt: str = "",
245
+ model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
246
+ tokenizer_path: Optional[str] = None,
247
+ ) -> ScoreResponse:
248
+ """Get EBM energy scores for one or more outputs (verifiable/interpretable AI: logging, audit, confidence)."""
249
+ payload: Dict[str, Any] = {
250
+ "texts": texts,
251
+ "prompt": prompt,
252
+ "model_path": model_path,
253
+ }
254
+ if tokenizer_path is not None:
255
+ payload["tokenizer_path"] = tokenizer_path
256
+ data = await self._request("POST", "/score", json=payload)
257
+ return ScoreResponse(energies=data["energies"])
258
+
259
+ async def pipeline(
260
+ self,
261
+ *,
262
+ data_path: Optional[str] = None,
263
+ data: Optional[List[Dict[str, Any]]] = None,
264
+ tokenizer_name: Optional[str] = None,
265
+ epochs: int = 10,
266
+ batch_size: int = 1,
267
+ d_model: int = 768,
268
+ n_heads: int = 4,
269
+ n_layers: int = 2,
270
+ lr: float = 5e-5,
271
+ max_length: int = 2048,
272
+ validate_every: int = 1,
273
+ val_holdout: float = 0.2,
274
+ candidates: Optional[List[str]] = None,
275
+ ) -> PipelineResponse:
276
+ """Run train (on your data or built-in) then optionally rerank candidates."""
277
+ payload: Dict[str, Any] = {
278
+ "epochs": epochs,
279
+ "batch_size": batch_size,
280
+ "d_model": d_model,
281
+ "n_heads": n_heads,
282
+ "n_layers": n_layers,
283
+ "lr": lr,
284
+ "max_length": max_length,
285
+ "validate_every": validate_every,
286
+ "val_holdout": val_holdout,
287
+ }
288
+ if tokenizer_name is not None:
289
+ payload["tokenizer_name"] = tokenizer_name
290
+ if data_path is not None:
291
+ payload["data_path"] = data_path
292
+ if data is not None:
293
+ payload["data"] = data
294
+ if candidates is not None:
295
+ payload["candidates"] = candidates
296
+
297
+ data_resp = await self._request("POST", "/pipeline", json=payload)
298
+ return PipelineResponse._from_dict(data_resp)
299
+
300
+ async def close(self) -> None:
301
+ """Close the underlying HTTP connection pool."""
302
+ await self._client.aclose()
303
+
304
+ async def __aenter__(self) -> "AsyncCertainty":
305
+ return self
306
+
307
+ async def __aexit__(self, *args: Any) -> None:
308
+ await self.close()
@@ -0,0 +1,332 @@
1
+ """Synchronous client for the Certainty Labs API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import httpx
10
+
11
+ from certaintylabs.exceptions import APIError, ConnectionError, TimeoutError
12
+ from certaintylabs.types import (
13
+ HealthResponse,
14
+ PipelineResponse,
15
+ RerankResponse,
16
+ ScoreResponse,
17
+ TrainResponse,
18
+ TrainingParams,
19
+ )
20
+
21
+ # Fixed API base URL — users do not configure this.
22
+ _BASE_URL = "https://sandboxtesting101--certainty-labs-api.modal.run"
23
+ _DEFAULT_TIMEOUT = 300.0
24
+
25
+ _ENV_API_KEY = "CERTAINTY_API_KEY"
26
+
27
+
28
+ class Certainty:
29
+ """Synchronous Python client for the Certainty Labs API.
30
+
31
+ Set your API key via environment variable::
32
+
33
+ export CERTAINTY_API_KEY="ck_..."
34
+
35
+ Then in code::
36
+
37
+ from certaintylabs import Certainty
38
+
39
+ client = Certainty() # reads api_key from env
40
+ result = client.train(epochs=10)
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ api_key: Optional[str] = None,
46
+ timeout: float = _DEFAULT_TIMEOUT,
47
+ ):
48
+ self.base_url = _BASE_URL.rstrip("/")
49
+ self.api_key = api_key if api_key is not None else os.environ.get(_ENV_API_KEY)
50
+ self.timeout = timeout
51
+
52
+ headers: Dict[str, str] = {"Content-Type": "application/json"}
53
+ if self.api_key:
54
+ headers["Authorization"] = f"Bearer {self.api_key}"
55
+
56
+ self._client = httpx.Client(
57
+ base_url=self.base_url,
58
+ headers=headers,
59
+ timeout=timeout,
60
+ )
61
+
62
+ def _request(self, method: str, path: str, **kwargs: Any) -> dict:
63
+ try:
64
+ resp = self._client.request(method, path, **kwargs)
65
+ except httpx.ConnectError as e:
66
+ raise ConnectionError(self.base_url, e) from e
67
+ except httpx.TimeoutException as e:
68
+ raise TimeoutError(self.timeout, path) from e
69
+
70
+ if resp.status_code >= 400:
71
+ body = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
72
+ raise APIError(
73
+ status_code=resp.status_code,
74
+ detail=body.get("detail", resp.text),
75
+ error_type=body.get("error_type"),
76
+ )
77
+ return resp.json()
78
+
79
+ # ── Endpoints ─────────────────────────────────────────────────────
80
+
81
+ def health(self) -> HealthResponse:
82
+ """Check API health and version."""
83
+ data = self._request("GET", "/health")
84
+ return HealthResponse(status=data["status"], version=data["version"])
85
+
86
+ def train(
87
+ self,
88
+ *,
89
+ data_path: Optional[str] = None,
90
+ data: Optional[List[Dict[str, Any]]] = None,
91
+ tokenizer_name: Optional[str] = None,
92
+ epochs: int = 20,
93
+ batch_size: int = 1,
94
+ d_model: int = 768,
95
+ n_heads: int = 4,
96
+ n_layers: int = 2,
97
+ lr: float = 5e-5,
98
+ max_length: int = 2048,
99
+ validate_every: int = 1,
100
+ val_holdout: float = 0.2,
101
+ training_params: Optional[TrainingParams] = None,
102
+ ) -> TrainResponse:
103
+ """Train a TransEBM energy model.
104
+
105
+ Data source (one of):
106
+ - ``data``: in-memory list of {question, label, gen_text} dicts
107
+ - ``data_path``: server path to EORM JSONL (or use ``train_from_file`` for local path)
108
+ If neither is given, the server uses its built-in GSM8K dataset.
109
+
110
+ Use ``tokenizer_name`` for Qwen/Llama compatibility (e.g. ``qwen2.5-7b``, ``llama-3.1-8b`` or full HF ID).
111
+ Use ``training_params`` to pass a TrainingParams object; explicit kwargs override.
112
+ """
113
+ payload: Dict[str, Any] = {
114
+ "epochs": epochs,
115
+ "batch_size": batch_size,
116
+ "d_model": d_model,
117
+ "n_heads": n_heads,
118
+ "n_layers": n_layers,
119
+ "lr": lr,
120
+ "max_length": max_length,
121
+ "validate_every": validate_every,
122
+ "val_holdout": val_holdout,
123
+ }
124
+ if tokenizer_name is not None:
125
+ payload["tokenizer_name"] = tokenizer_name
126
+ if training_params:
127
+ for k, v in vars(training_params).items():
128
+ if v is not None:
129
+ payload[k] = v
130
+ if data_path is not None:
131
+ payload["data_path"] = data_path
132
+ if data is not None:
133
+ payload["data"] = data
134
+
135
+ data_resp = self._request("POST", "/train", json=payload)
136
+ return TrainResponse(
137
+ model_path=data_resp["model_path"],
138
+ best_val_acc=data_resp["best_val_acc"],
139
+ epochs_trained=data_resp["epochs_trained"],
140
+ elapsed_seconds=data_resp["elapsed_seconds"],
141
+ )
142
+
143
+ def train_with_data(
144
+ self,
145
+ samples: List[Dict[str, Any]],
146
+ *,
147
+ tokenizer_name: Optional[str] = None,
148
+ epochs: int = 20,
149
+ batch_size: int = 1,
150
+ d_model: int = 768,
151
+ n_heads: int = 4,
152
+ n_layers: int = 2,
153
+ lr: float = 5e-5,
154
+ max_length: int = 2048,
155
+ validate_every: int = 1,
156
+ val_holdout: float = 0.2,
157
+ training_params: Optional[TrainingParams] = None,
158
+ ) -> TrainResponse:
159
+ """Train on in-memory data. Each item in ``samples`` should have keys: question, label, gen_text."""
160
+ return self.train(
161
+ data=samples,
162
+ tokenizer_name=tokenizer_name,
163
+ epochs=epochs,
164
+ batch_size=batch_size,
165
+ d_model=d_model,
166
+ n_heads=n_heads,
167
+ n_layers=n_layers,
168
+ lr=lr,
169
+ max_length=max_length,
170
+ validate_every=validate_every,
171
+ val_holdout=val_holdout,
172
+ training_params=training_params,
173
+ )
174
+
175
+ def train_from_file(
176
+ self,
177
+ path: str,
178
+ *,
179
+ tokenizer_name: Optional[str] = None,
180
+ epochs: int = 20,
181
+ batch_size: int = 1,
182
+ d_model: int = 768,
183
+ n_heads: int = 4,
184
+ n_layers: int = 2,
185
+ lr: float = 5e-5,
186
+ max_length: int = 2048,
187
+ validate_every: int = 1,
188
+ val_holdout: float = 0.2,
189
+ training_params: Optional[TrainingParams] = None,
190
+ ) -> TrainResponse:
191
+ """Train on a local EORM JSONL file. Reads the file and sends records to the API."""
192
+ records: List[Dict[str, Any]] = []
193
+ with open(path, "r", encoding="utf-8") as f:
194
+ for line in f:
195
+ line = line.strip()
196
+ if not line:
197
+ continue
198
+ records.append(json.loads(line))
199
+ return self.train_with_data(
200
+ records,
201
+ tokenizer_name=tokenizer_name,
202
+ epochs=epochs,
203
+ batch_size=batch_size,
204
+ d_model=d_model,
205
+ n_heads=n_heads,
206
+ n_layers=n_layers,
207
+ lr=lr,
208
+ max_length=max_length,
209
+ validate_every=validate_every,
210
+ val_holdout=val_holdout,
211
+ training_params=training_params,
212
+ )
213
+
214
+ def rerank(
215
+ self,
216
+ candidates: Optional[List[str]] = None,
217
+ prompt: str = "",
218
+ model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
219
+ tokenizer_path: Optional[str] = None,
220
+ openai_api_key: Optional[str] = None,
221
+ openai_model: Optional[str] = None,
222
+ openai_base_url: Optional[str] = None,
223
+ hf_model: Optional[str] = None,
224
+ hf_token: Optional[str] = None,
225
+ n_candidates: int = 5,
226
+ ) -> RerankResponse:
227
+ """Rerank LLM candidate outputs using a trained TransEBM model.
228
+
229
+ Pass pre-generated ``candidates``, or leave candidates empty and set either
230
+ ``openai_api_key`` (and optionally ``openai_model``, ``openai_base_url``) or
231
+ ``hf_model`` + ``hf_token`` (Hugging Face Inference for Qwen/Llama) so the API
232
+ generates ``n_candidates``, then reranks them.
233
+ """
234
+ payload: Dict[str, Any] = {
235
+ "candidates": candidates if candidates is not None else [],
236
+ "prompt": prompt,
237
+ "model_path": model_path,
238
+ }
239
+ if tokenizer_path is not None:
240
+ payload["tokenizer_path"] = tokenizer_path
241
+ if openai_api_key is not None:
242
+ payload["openai_api_key"] = openai_api_key
243
+ if openai_model is not None:
244
+ payload["openai_model"] = openai_model
245
+ if openai_base_url is not None:
246
+ payload["openai_base_url"] = openai_base_url
247
+ if hf_model is not None:
248
+ payload["hf_model"] = hf_model
249
+ if hf_token is not None:
250
+ payload["hf_token"] = hf_token
251
+ if (candidates is None or len(candidates) == 0) and (openai_api_key is not None or (hf_model and hf_token)):
252
+ payload["n_candidates"] = n_candidates
253
+
254
+ data = self._request("POST", "/rerank", json=payload)
255
+ return RerankResponse(
256
+ best_candidate=data["best_candidate"],
257
+ best_index=data["best_index"],
258
+ all_energies=data["all_energies"],
259
+ )
260
+
261
+ def score(
262
+ self,
263
+ texts: List[str],
264
+ prompt: str = "",
265
+ model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
266
+ tokenizer_path: Optional[str] = None,
267
+ ) -> ScoreResponse:
268
+ """Get EBM energy scores for one or more outputs (no reranking).
269
+
270
+ Use for verifiable/interpretable AI: log confidence, audit reliability, track scores over time.
271
+ Lower energy = higher confidence / more constraint-satisfying.
272
+ """
273
+ payload: Dict[str, Any] = {
274
+ "texts": texts,
275
+ "prompt": prompt,
276
+ "model_path": model_path,
277
+ }
278
+ if tokenizer_path is not None:
279
+ payload["tokenizer_path"] = tokenizer_path
280
+ data = self._request("POST", "/score", json=payload)
281
+ return ScoreResponse(energies=data["energies"])
282
+
283
+ def pipeline(
284
+ self,
285
+ *,
286
+ data_path: Optional[str] = None,
287
+ data: Optional[List[Dict[str, Any]]] = None,
288
+ tokenizer_name: Optional[str] = None,
289
+ epochs: int = 10,
290
+ batch_size: int = 1,
291
+ d_model: int = 768,
292
+ n_heads: int = 4,
293
+ n_layers: int = 2,
294
+ lr: float = 5e-5,
295
+ max_length: int = 2048,
296
+ validate_every: int = 1,
297
+ val_holdout: float = 0.2,
298
+ candidates: Optional[List[str]] = None,
299
+ ) -> PipelineResponse:
300
+ """Run train (on your data or built-in) then optionally rerank candidates."""
301
+ payload: Dict[str, Any] = {
302
+ "epochs": epochs,
303
+ "batch_size": batch_size,
304
+ "d_model": d_model,
305
+ "n_heads": n_heads,
306
+ "n_layers": n_layers,
307
+ "lr": lr,
308
+ "max_length": max_length,
309
+ "validate_every": validate_every,
310
+ "val_holdout": val_holdout,
311
+ }
312
+ if tokenizer_name is not None:
313
+ payload["tokenizer_name"] = tokenizer_name
314
+ if data_path is not None:
315
+ payload["data_path"] = data_path
316
+ if data is not None:
317
+ payload["data"] = data
318
+ if candidates is not None:
319
+ payload["candidates"] = candidates
320
+
321
+ data_resp = self._request("POST", "/pipeline", json=payload)
322
+ return PipelineResponse._from_dict(data_resp)
323
+
324
+ def close(self) -> None:
325
+ """Close the underlying HTTP connection pool."""
326
+ self._client.close()
327
+
328
+ def __enter__(self) -> "Certainty":
329
+ return self
330
+
331
+ def __exit__(self, *args: Any) -> None:
332
+ self.close()
@@ -0,0 +1,48 @@
1
+ """Exception types for the Certainty SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+
8
+ class CertaintyError(Exception):
9
+ """Base exception for all Certainty SDK errors."""
10
+
11
+
12
+ class APIError(CertaintyError):
13
+ """The API returned a non-2xx response."""
14
+
15
+ def __init__(
16
+ self,
17
+ status_code: int,
18
+ detail: str,
19
+ error_type: Optional[str] = None,
20
+ ):
21
+ self.status_code = status_code
22
+ self.detail = detail
23
+ self.error_type = error_type
24
+ msg = f"[{status_code}] {detail}"
25
+ if error_type:
26
+ msg = f"[{status_code}] {error_type}: {detail}"
27
+ super().__init__(msg)
28
+
29
+
30
+ class ConnectionError(CertaintyError):
31
+ """Could not connect to the Certainty API server."""
32
+
33
+ def __init__(self, base_url: str, cause: Optional[Exception] = None):
34
+ self.base_url = base_url
35
+ self.cause = cause
36
+ super().__init__(f"Could not connect to {base_url}: {cause}")
37
+
38
+
39
+ class TimeoutError(CertaintyError):
40
+ """The request timed out."""
41
+
42
+ def __init__(self, timeout: float, endpoint: str):
43
+ self.timeout = timeout
44
+ self.endpoint = endpoint
45
+ super().__init__(
46
+ f"Request to {endpoint} timed out after {timeout}s. "
47
+ f"Training can be slow — try increasing timeout."
48
+ )
@@ -0,0 +1,76 @@
1
+ """Typed response objects for the Certainty Labs API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ # Optional: pass to train() for clearer training config
10
+ @dataclass
11
+ class TrainingParams:
12
+ """Training hyperparameters. Omit fields to use API defaults."""
13
+
14
+ tokenizer_name: Optional[str] = None # HuggingFace ID or alias, e.g. qwen2.5-7b, llama-3.1-8b
15
+ epochs: Optional[int] = None
16
+ batch_size: Optional[int] = None
17
+ d_model: Optional[int] = None
18
+ n_heads: Optional[int] = None
19
+ n_layers: Optional[int] = None
20
+ lr: Optional[float] = None
21
+ max_length: Optional[int] = None
22
+ validate_every: Optional[int] = None
23
+ val_holdout: Optional[float] = None
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class HealthResponse:
28
+ status: str
29
+ version: str
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class TrainResponse:
34
+ model_path: str
35
+ best_val_acc: float
36
+ epochs_trained: int
37
+ elapsed_seconds: float
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class RerankResponse:
42
+ best_candidate: str
43
+ best_index: int
44
+ all_energies: List[float]
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class ScoreResponse:
49
+ """Energy scores for one or more outputs (verifiable/interpretable AI: logging, audit, confidence)."""
50
+ energies: List[float] # Lower = higher confidence / more constraint-satisfying
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class PipelineResponse:
55
+ train: TrainResponse
56
+ rerank: Optional[RerankResponse]
57
+
58
+ @classmethod
59
+ def _from_dict(cls, data: dict) -> "PipelineResponse":
60
+ rerank = None
61
+ if data.get("rerank"):
62
+ r = data["rerank"]
63
+ rerank = RerankResponse(
64
+ best_candidate=r["best_candidate"],
65
+ best_index=r["best_index"],
66
+ all_energies=r["all_energies"],
67
+ )
68
+ return cls(
69
+ train=TrainResponse(
70
+ model_path=data["train"]["model_path"],
71
+ best_val_acc=data["train"]["best_val_acc"],
72
+ epochs_trained=data["train"]["epochs_trained"],
73
+ elapsed_seconds=data["train"]["elapsed_seconds"],
74
+ ),
75
+ rerank=rerank,
76
+ )
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "certaintylabs"
7
+ version = "0.1.0"
8
+ description = "Python SDK for the Certainty Labs API — constraint enforcement for production LLMs"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ dependencies = ["httpx>=0.27.0"]
13
+ authors = [{ name = "Certainty Labs" }]
14
+ keywords = ["llm", "constraints", "energy-based-model", "reranking", "certainty"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ "Typing :: Typed",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://certaintylabs.ai"
31
+ Documentation = "https://certaintylabs.ai/platform/docs"
32
+ Repository = "https://github.com/certainty-labs/certainty-sdk"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["certaintylabs"]