PyPI - certaintylabs - Versions diffs - 0.1.0__tar.gz - Mend

certaintylabs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

certaintylabs-0.1.0/.gitignore +34 -0
certaintylabs-0.1.0/PKG-INFO +271 -0
certaintylabs-0.1.0/README.md +246 -0
certaintylabs-0.1.0/certaintylabs/__init__.py +30 -0
certaintylabs-0.1.0/certaintylabs/async_client.py +308 -0
certaintylabs-0.1.0/certaintylabs/client.py +332 -0
certaintylabs-0.1.0/certaintylabs/exceptions.py +48 -0
certaintylabs-0.1.0/certaintylabs/types.py +76 -0
certaintylabs-0.1.0/pyproject.toml +35 -0

certaintylabs-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,34 @@
+# Env and secrets (use .env.example as template)
+.env
+.env.local
+*.env
+# Python
+__pycache__/
+*.py[cod]
+.eggs/
+*.egg-info/
+.venv/
+venv/
+.kaggle-venv/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Build and cache
+.pytest_cache/
+.coverage
+htmlcov/
+dist/
+build/
+.next/
+# Project-specific
+certainty_workspace/
+*.pt
+*.log
+node_modules/

certaintylabs-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,271 @@
+Metadata-Version: 2.4
+Name: certaintylabs
+Version: 0.1.0
+Summary: Python SDK for the Certainty Labs API — constraint enforcement for production LLMs
+Project-URL: Homepage, https://certaintylabs.ai
+Project-URL: Documentation, https://certaintylabs.ai/platform/docs
+Project-URL: Repository, https://github.com/certainty-labs/certainty-sdk
+Author: Certainty Labs
+License-Expression: MIT
+Keywords: certainty,constraints,energy-based-model,llm,reranking
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Requires-Dist: httpx>=0.27.0
+Description-Content-Type: text/markdown
+# Certainty Python SDK
+Python client for the [Certainty Labs](https://certaintylabs.ai) API — constraint enforcement for production LLMs.
+Train TransEBM energy models and rerank LLM outputs in a few lines of code.
+The SDK supports **bring your own data** (in-memory or local JSONL), **tune training** (epochs, batch size, model size, learning rate, etc.), and **use your own LLM** in rerank to generate candidates (openai_api_key + openai_base_url). You provide EORM-format training data; generate it externally if needed (see below).
+## Install
+```bash
+pip install certaintylabs
+```
+## Quick Start
+```python
+from certaintylabs import Certainty
+client = Certainty()  # Uses fixed API URL; set CERTAINTY_API_KEY for auth
+# Check the server is running
+health = client.health()
+print(health.version)  # "0.1.0"
+# Train on the built-in GSM8K math reasoning dataset
+result = client.train(epochs=10, d_model=768)
+print(f"Accuracy: {result.best_val_acc:.1%} in {result.elapsed_seconds:.0f}s")
+# Rerank LLM candidate answers
+best = client.rerank(
+    candidates=[
+        "Janet sells 16 - 3 - 4 = 9 eggs. 9 * 2 = $18. The answer is 18.",
+        "Janet has 16 eggs, sells all. 16 * 2 = $32.",
+        "Janet sells 16 - 3 - 4 = 9 duck eggs. 9 * $2 = $18. The answer is $18.",
+    ],
+    prompt="Janet's ducks lay 16 eggs per day. She eats three and bakes muffins with four. She sells the rest at $2 each. How much does she make?",
+)
+print(best.best_candidate)
+```
+## Async Support
+```python
+from certaintylabs import AsyncCertainty
+async def main():
+    async with AsyncCertainty() as client:
+        result = await client.train(epochs=5)
+        best = await client.rerank(["A", "B", "C"], prompt="...")
+```
+## Production: API key
+The API base URL is fixed; you don't configure it. Set your API key via environment variable:
+```bash
+export CERTAINTY_API_KEY="ck_your_key_here"
+```
+```python
+from certaintylabs import Certainty
+# Reads CERTAINTY_API_KEY from env
+client = Certainty()
+client.health()
+```
+You can also pass `api_key` explicitly to override the environment.
+## Data options
+| Option | SDK / API |
+|--------|-----------|
+| **Built-in dataset** | `train(epochs=10)` with no data → uses GSM8K |
+| **Your data** | `train_with_data(samples)` or `train_from_file("path.jsonl")` or `train(data=...)` |
+| **Rerank** | `rerank(candidates, prompt=...)` or have the API generate candidates: `rerank(prompt=..., openai_api_key=..., n_candidates=5)` |
+### Generating your own data externally
+Training data must be **EORM format**: one JSON object per line with `question`, `label` (0 or 1), and `gen_text`. Create this data with your own pipeline (e.g. your LLM + your labeling rules or model-as-judge). Save as `.jsonl` and use `train_from_file(path)` or send the list to `train(data=...)`.
+## API Reference
+### `Certainty(api_key=None, timeout=300.0)`
+| Parameter  | Type            | Default                  |
+|------------|-----------------|--------------------------|
+| `api_key`  | `str` or `None` | `None` → env `CERTAINTY_API_KEY` or no auth |
+| `timeout`  | `float`         | `300.0`                  |
+The API base URL is fixed and not configurable.
+### Methods
+#### `client.health() -> HealthResponse`
+Returns API status and version.
+#### Using your own data
+You can train on in-memory data or a local JSONL file instead of server-side data.
+**In-memory:** each record is a dict with `question`, `label`, and `gen_text` (EORM format).
+```python
+samples = [
+    {"question": "What is 2+2?", "label": 1, "gen_text": "The answer is 4."},
+    {"question": "What is 3*3?", "label": 1, "gen_text": "The answer is 9."},
+]
+result = client.train_with_data(samples, epochs=10)
+```
+**Local file:** one JSON object per line (same keys).
+```python
+result = client.train_from_file("my_data.jsonl", epochs=15, lr=1e-4)
+```
+**Low-level:** pass `data=...` or `data_path=...` into `client.train()` for full control.
+#### Tuning training parameters
+Override defaults via keyword arguments or a `TrainingParams` object (omit fields to keep API defaults):
+```python
+from certaintylabs import Certainty, TrainingParams
+client = Certainty()
+# Via kwargs
+result = client.train(epochs=15, batch_size=2, lr=1e-4, max_length=1024)
+# Via TrainingParams (good for reusing a config)
+params = TrainingParams(epochs=15, batch_size=2, lr=1e-4, validate_every=2)
+result = client.train(training_params=params)
+# Or with your own data
+result = client.train_with_data(samples, training_params=params)
+```
+`TrainingParams` supports: `epochs`, `batch_size`, `d_model`, `n_heads`, `n_layers`, `lr`, `max_length`, `validate_every`, `val_holdout`.
+#### Rerank with your own model to generate candidates
+You can either pass pre-generated candidates or have the API **generate candidates with your LLM** and then rerank them in one call. Use your own base model API (OpenAI, Claude, Llama, etc.) for generation:
+```python
+# Option A: You provide candidates (e.g. from your own LLM elsewhere)
+best = client.rerank(
+    candidates=["answer A", "answer B", "answer C"],
+    prompt="What is 2+2?",
+)
+# Option B: API generates n_candidates with your LLM, then reranks
+best = client.rerank(
+    prompt="What is 2+2?",
+    openai_api_key="sk-...",
+    openai_model="gpt-4o-mini",
+    openai_base_url="https://api.openai.com/v1",
+    n_candidates=5,
+)
+print(best.best_candidate)  # best of the 5 generated answers
+```
+#### `client.train(**kwargs) -> TrainResponse`
+Train a TransEBM. Data source: `data` (list of records), `data_path` (server path), or neither (built-in GSM8K). Key parameters:
+| Parameter         | Type            | Default   |
+|-------------------|-----------------|-----------|
+| `yaml_content`    | `str` or `None` | `None`    |
+| `data_path`       | `str` or `None` | `None`    |
+| `data`            | list of dicts   | `None`    |
+| `epochs`          | `int`           | `20`      |
+| `d_model`         | `int`           | `768`     |
+| `n_heads`         | `int`           | `4`       |
+| `n_layers`        | `int`           | `2`       |
+| `lr`              | `float`         | `5e-5`    |
+| `max_length`      | `int`           | `2048`    |
+| `training_params` | `TrainingParams` or `None` | `None` |
+```python
+result = client.train(data_path="path/to/gsm8k.jsonl", epochs=10)
+print(result.model_path)     # "./certainty_workspace/model/..."
+print(result.best_val_acc)   # 0.85
+```
+#### `client.rerank(...) -> RerankResponse`
+Rerank LLM outputs using a trained TransEBM. Either pass **candidates** you already have, or omit candidates and set **openai_api_key** (and optionally **openai_model**, **openai_base_url**) so the API generates **n_candidates** with your LLM and then reranks them.
+| Parameter          | Type            | Default |
+|--------------------|-----------------|---------|
+| `candidates`       | `List[str]` or `None` | `None` (use with `openai_api_key` to generate) |
+| `prompt`           | `str`           | `""`    |
+| `model_path`       | `str`           | `"./certainty_workspace/model/ebm_certainty_model.pt"` |
+| `tokenizer_path`   | `str` or `None` | `None`  |
+| `openai_api_key`   | `str` or `None` | `None`  |
+| `openai_model`     | `str` or `None` | `None`  |
+| `openai_base_url`  | `str` or `None` | `None`  |
+| `n_candidates`     | `int`           | `5` (only used when generating via your API) |
+```python
+best = client.rerank(
+    candidates=["answer A", "answer B", "answer C"],
+    prompt="What is 2+2?",
+)
+print(best.best_candidate)  # the highest-scored candidate
+print(best.all_energies)    # energy scores for each candidate
+```
+#### `client.pipeline(**kwargs) -> PipelineResponse`
+Run train (on your data or built-in) then optionally rerank. Pass `data` or `data_path` to use your data; omit for built-in. Pass `candidates` to rerank after training.
+```python
+result = client.pipeline(epochs=10, candidates=["answer A", "answer B"])
+print(result.train.best_val_acc)
+if result.rerank:
+    print(result.rerank.best_candidate)
+```
+## Error Handling
+```python
+from certaintylabs import Certainty, APIError, ConnectionError
+client = Certainty()
+try:
+    client.compile("invalid yaml: [[[")
+except APIError as e:
+    print(e.status_code)  # 400
+    print(e.detail)       # error message from the server
+try:
+    # ConnectionError when server unreachable (base URL is fixed)
+    client = Certainty(timeout=2.0)
+    client.health()
+except ConnectionError as e:
+    print(e)  # "Could not connect to <api-url>: ..."
+```
+## License
+MIT

certaintylabs-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,246 @@
+# Certainty Python SDK
+Python client for the [Certainty Labs](https://certaintylabs.ai) API — constraint enforcement for production LLMs.
+Train TransEBM energy models and rerank LLM outputs in a few lines of code.
+The SDK supports **bring your own data** (in-memory or local JSONL), **tune training** (epochs, batch size, model size, learning rate, etc.), and **use your own LLM** in rerank to generate candidates (openai_api_key + openai_base_url). You provide EORM-format training data; generate it externally if needed (see below).
+## Install
+```bash
+pip install certaintylabs
+```
+## Quick Start
+```python
+from certaintylabs import Certainty
+client = Certainty()  # Uses fixed API URL; set CERTAINTY_API_KEY for auth
+# Check the server is running
+health = client.health()
+print(health.version)  # "0.1.0"
+# Train on the built-in GSM8K math reasoning dataset
+result = client.train(epochs=10, d_model=768)
+print(f"Accuracy: {result.best_val_acc:.1%} in {result.elapsed_seconds:.0f}s")
+# Rerank LLM candidate answers
+best = client.rerank(
+    candidates=[
+        "Janet sells 16 - 3 - 4 = 9 eggs. 9 * 2 = $18. The answer is 18.",
+        "Janet has 16 eggs, sells all. 16 * 2 = $32.",
+        "Janet sells 16 - 3 - 4 = 9 duck eggs. 9 * $2 = $18. The answer is $18.",
+    ],
+    prompt="Janet's ducks lay 16 eggs per day. She eats three and bakes muffins with four. She sells the rest at $2 each. How much does she make?",
+)
+print(best.best_candidate)
+```
+## Async Support
+```python
+from certaintylabs import AsyncCertainty
+async def main():
+    async with AsyncCertainty() as client:
+        result = await client.train(epochs=5)
+        best = await client.rerank(["A", "B", "C"], prompt="...")
+```
+## Production: API key
+The API base URL is fixed; you don't configure it. Set your API key via environment variable:
+```bash
+export CERTAINTY_API_KEY="ck_your_key_here"
+```
+```python
+from certaintylabs import Certainty
+# Reads CERTAINTY_API_KEY from env
+client = Certainty()
+client.health()
+```
+You can also pass `api_key` explicitly to override the environment.
+## Data options
+| Option | SDK / API |
+|--------|-----------|
+| **Built-in dataset** | `train(epochs=10)` with no data → uses GSM8K |
+| **Your data** | `train_with_data(samples)` or `train_from_file("path.jsonl")` or `train(data=...)` |
+| **Rerank** | `rerank(candidates, prompt=...)` or have the API generate candidates: `rerank(prompt=..., openai_api_key=..., n_candidates=5)` |
+### Generating your own data externally
+Training data must be **EORM format**: one JSON object per line with `question`, `label` (0 or 1), and `gen_text`. Create this data with your own pipeline (e.g. your LLM + your labeling rules or model-as-judge). Save as `.jsonl` and use `train_from_file(path)` or send the list to `train(data=...)`.
+## API Reference
+### `Certainty(api_key=None, timeout=300.0)`
+| Parameter  | Type            | Default                  |
+|------------|-----------------|--------------------------|
+| `api_key`  | `str` or `None` | `None` → env `CERTAINTY_API_KEY` or no auth |
+| `timeout`  | `float`         | `300.0`                  |
+The API base URL is fixed and not configurable.
+### Methods
+#### `client.health() -> HealthResponse`
+Returns API status and version.
+#### Using your own data
+You can train on in-memory data or a local JSONL file instead of server-side data.
+**In-memory:** each record is a dict with `question`, `label`, and `gen_text` (EORM format).
+```python
+samples = [
+    {"question": "What is 2+2?", "label": 1, "gen_text": "The answer is 4."},
+    {"question": "What is 3*3?", "label": 1, "gen_text": "The answer is 9."},
+]
+result = client.train_with_data(samples, epochs=10)
+```
+**Local file:** one JSON object per line (same keys).
+```python
+result = client.train_from_file("my_data.jsonl", epochs=15, lr=1e-4)
+```
+**Low-level:** pass `data=...` or `data_path=...` into `client.train()` for full control.
+#### Tuning training parameters
+Override defaults via keyword arguments or a `TrainingParams` object (omit fields to keep API defaults):
+```python
+from certaintylabs import Certainty, TrainingParams
+client = Certainty()
+# Via kwargs
+result = client.train(epochs=15, batch_size=2, lr=1e-4, max_length=1024)
+# Via TrainingParams (good for reusing a config)
+params = TrainingParams(epochs=15, batch_size=2, lr=1e-4, validate_every=2)
+result = client.train(training_params=params)
+# Or with your own data
+result = client.train_with_data(samples, training_params=params)
+```
+`TrainingParams` supports: `epochs`, `batch_size`, `d_model`, `n_heads`, `n_layers`, `lr`, `max_length`, `validate_every`, `val_holdout`.
+#### Rerank with your own model to generate candidates
+You can either pass pre-generated candidates or have the API **generate candidates with your LLM** and then rerank them in one call. Use your own base model API (OpenAI, Claude, Llama, etc.) for generation:
+```python
+# Option A: You provide candidates (e.g. from your own LLM elsewhere)
+best = client.rerank(
+    candidates=["answer A", "answer B", "answer C"],
+    prompt="What is 2+2?",
+)
+# Option B: API generates n_candidates with your LLM, then reranks
+best = client.rerank(
+    prompt="What is 2+2?",
+    openai_api_key="sk-...",
+    openai_model="gpt-4o-mini",
+    openai_base_url="https://api.openai.com/v1",
+    n_candidates=5,
+)
+print(best.best_candidate)  # best of the 5 generated answers
+```
+#### `client.train(**kwargs) -> TrainResponse`
+Train a TransEBM. Data source: `data` (list of records), `data_path` (server path), or neither (built-in GSM8K). Key parameters:
+| Parameter         | Type            | Default   |
+|-------------------|-----------------|-----------|
+| `yaml_content`    | `str` or `None` | `None`    |
+| `data_path`       | `str` or `None` | `None`    |
+| `data`            | list of dicts   | `None`    |
+| `epochs`          | `int`           | `20`      |
+| `d_model`         | `int`           | `768`     |
+| `n_heads`         | `int`           | `4`       |
+| `n_layers`        | `int`           | `2`       |
+| `lr`              | `float`         | `5e-5`    |
+| `max_length`      | `int`           | `2048`    |
+| `training_params` | `TrainingParams` or `None` | `None` |
+```python
+result = client.train(data_path="path/to/gsm8k.jsonl", epochs=10)
+print(result.model_path)     # "./certainty_workspace/model/..."
+print(result.best_val_acc)   # 0.85
+```
+#### `client.rerank(...) -> RerankResponse`
+Rerank LLM outputs using a trained TransEBM. Either pass **candidates** you already have, or omit candidates and set **openai_api_key** (and optionally **openai_model**, **openai_base_url**) so the API generates **n_candidates** with your LLM and then reranks them.
+| Parameter          | Type            | Default |
+|--------------------|-----------------|---------|
+| `candidates`       | `List[str]` or `None` | `None` (use with `openai_api_key` to generate) |
+| `prompt`           | `str`           | `""`    |
+| `model_path`       | `str`           | `"./certainty_workspace/model/ebm_certainty_model.pt"` |
+| `tokenizer_path`   | `str` or `None` | `None`  |
+| `openai_api_key`   | `str` or `None` | `None`  |
+| `openai_model`     | `str` or `None` | `None`  |
+| `openai_base_url`  | `str` or `None` | `None`  |
+| `n_candidates`     | `int`           | `5` (only used when generating via your API) |
+```python
+best = client.rerank(
+    candidates=["answer A", "answer B", "answer C"],
+    prompt="What is 2+2?",
+)
+print(best.best_candidate)  # the highest-scored candidate
+print(best.all_energies)    # energy scores for each candidate
+```
+#### `client.pipeline(**kwargs) -> PipelineResponse`
+Run train (on your data or built-in) then optionally rerank. Pass `data` or `data_path` to use your data; omit for built-in. Pass `candidates` to rerank after training.
+```python
+result = client.pipeline(epochs=10, candidates=["answer A", "answer B"])
+print(result.train.best_val_acc)
+if result.rerank:
+    print(result.rerank.best_candidate)
+```
+## Error Handling
+```python
+from certaintylabs import Certainty, APIError, ConnectionError
+client = Certainty()
+try:
+    client.compile("invalid yaml: [[[")
+except APIError as e:
+    print(e.status_code)  # 400
+    print(e.detail)       # error message from the server
+try:
+    # ConnectionError when server unreachable (base URL is fixed)
+    client = Certainty(timeout=2.0)
+    client.health()
+except ConnectionError as e:
+    print(e)  # "Could not connect to <api-url>: ..."
+```
+## License
+MIT

certaintylabs-0.1.0/certaintylabs/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Certainty Labs Python SDK."""
+from certaintylabs.async_client import AsyncCertainty
+from certaintylabs.client import Certainty
+from certaintylabs.exceptions import APIError, CertaintyError, ConnectionError, TimeoutError
+from certaintylabs.types import (
+    HealthResponse,
+    PipelineResponse,
+    RerankResponse,
+    ScoreResponse,
+    TrainResponse,
+    TrainingParams,
+)
+__all__ = [
+    "Certainty",
+    "AsyncCertainty",
+    "CertaintyError",
+    "APIError",
+    "ConnectionError",
+    "TimeoutError",
+    "HealthResponse",
+    "TrainResponse",
+    "TrainingParams",
+    "RerankResponse",
+    "ScoreResponse",
+    "PipelineResponse",
+]
+__version__ = "0.1.0"

certaintylabs-0.1.0/certaintylabs/async_client.py ADDED Viewed

@@ -0,0 +1,308 @@
+"""Asynchronous client for the Certainty Labs API."""
+from __future__ import annotations
+import json
+import os
+from typing import Any, Dict, List, Optional
+import httpx
+from certaintylabs.exceptions import APIError, ConnectionError, TimeoutError
+from certaintylabs.types import (
+    HealthResponse,
+    PipelineResponse,
+    RerankResponse,
+    ScoreResponse,
+    TrainResponse,
+    TrainingParams,
+)
+# Fixed API base URL — users do not configure this.
+_BASE_URL = "https://sandboxtesting101--certainty-labs-api.modal.run"
+_DEFAULT_TIMEOUT = 300.0
+_ENV_API_KEY = "CERTAINTY_API_KEY"
+class AsyncCertainty:
+    """Asynchronous Python client for the Certainty Labs API.
+    Set your API key via environment variable: CERTAINTY_API_KEY.
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        timeout: float = _DEFAULT_TIMEOUT,
+    ):
+        self.base_url = _BASE_URL.rstrip("/")
+        self.api_key = api_key if api_key is not None else os.environ.get(_ENV_API_KEY)
+        self.timeout = timeout
+        headers: Dict[str, str] = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        self._client = httpx.AsyncClient(
+            base_url=self.base_url,
+            headers=headers,
+            timeout=timeout,
+        )
+    async def _request(self, method: str, path: str, **kwargs: Any) -> dict:
+        try:
+            resp = await self._client.request(method, path, **kwargs)
+        except httpx.ConnectError as e:
+            raise ConnectionError(self.base_url, e) from e
+        except httpx.TimeoutException as e:
+            raise TimeoutError(self.timeout, path) from e
+        if resp.status_code >= 400:
+            body = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
+            raise APIError(
+                status_code=resp.status_code,
+                detail=body.get("detail", resp.text),
+                error_type=body.get("error_type"),
+            )
+        return resp.json()
+    # ── Endpoints ─────────────────────────────────────────────────────
+    async def health(self) -> HealthResponse:
+        """Check API health and version."""
+        data = await self._request("GET", "/health")
+        return HealthResponse(status=data["status"], version=data["version"])
+    async def train(
+        self,
+        *,
+        data_path: Optional[str] = None,
+        data: Optional[List[Dict[str, Any]]] = None,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train a TransEBM. Use ``data`` or ``data_path``, or neither for built-in dataset. Use ``tokenizer_name`` for Qwen/Llama (e.g. qwen2.5-7b, llama-3.1-8b)."""
+        payload: Dict[str, Any] = {
+            "epochs": epochs,
+            "batch_size": batch_size,
+            "d_model": d_model,
+            "n_heads": n_heads,
+            "n_layers": n_layers,
+            "lr": lr,
+            "max_length": max_length,
+            "validate_every": validate_every,
+            "val_holdout": val_holdout,
+        }
+        if tokenizer_name is not None:
+            payload["tokenizer_name"] = tokenizer_name
+        if training_params:
+            for k, v in vars(training_params).items():
+                if v is not None:
+                    payload[k] = v
+        if data_path is not None:
+            payload["data_path"] = data_path
+        if data is not None:
+            payload["data"] = data
+        data_resp = await self._request("POST", "/train", json=payload)
+        return TrainResponse(
+            model_path=data_resp["model_path"],
+            best_val_acc=data_resp["best_val_acc"],
+            epochs_trained=data_resp["epochs_trained"],
+            elapsed_seconds=data_resp["elapsed_seconds"],
+        )
+    async def train_with_data(
+        self,
+        samples: List[Dict[str, Any]],
+        *,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train on in-memory data. Each item in ``samples`` should have keys: question, label, gen_text."""
+        return await self.train(
+            data=samples,
+            tokenizer_name=tokenizer_name,
+            epochs=epochs,
+            batch_size=batch_size,
+            d_model=d_model,
+            n_heads=n_heads,
+            n_layers=n_layers,
+            lr=lr,
+            max_length=max_length,
+            validate_every=validate_every,
+            val_holdout=val_holdout,
+            training_params=training_params,
+        )
+    async def train_from_file(
+        self,
+        path: str,
+        *,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train on a local EORM JSONL file. Reads the file and sends records to the API."""
+        records: List[Dict[str, Any]] = []
+        with open(path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                records.append(json.loads(line))
+        return await self.train_with_data(
+            records,
+            tokenizer_name=tokenizer_name,
+            epochs=epochs,
+            batch_size=batch_size,
+            d_model=d_model,
+            n_heads=n_heads,
+            n_layers=n_layers,
+            lr=lr,
+            max_length=max_length,
+            validate_every=validate_every,
+            val_holdout=val_holdout,
+            training_params=training_params,
+        )
+    async def rerank(
+        self,
+        candidates: Optional[List[str]] = None,
+        prompt: str = "",
+        model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
+        tokenizer_path: Optional[str] = None,
+        openai_api_key: Optional[str] = None,
+        openai_model: Optional[str] = None,
+        openai_base_url: Optional[str] = None,
+        hf_model: Optional[str] = None,
+        hf_token: Optional[str] = None,
+        n_candidates: int = 5,
+    ) -> RerankResponse:
+        """Rerank LLM candidate outputs using a trained TransEBM model.
+        Pass pre-generated ``candidates``, or leave empty and set ``openai_api_key`` or
+        ``hf_model`` + ``hf_token`` so the API generates ``n_candidates``, then reranks.
+        """
+        payload: Dict[str, Any] = {
+            "candidates": candidates if candidates is not None else [],
+            "prompt": prompt,
+            "model_path": model_path,
+        }
+        if tokenizer_path is not None:
+            payload["tokenizer_path"] = tokenizer_path
+        if openai_api_key is not None:
+            payload["openai_api_key"] = openai_api_key
+        if openai_model is not None:
+            payload["openai_model"] = openai_model
+        if openai_base_url is not None:
+            payload["openai_base_url"] = openai_base_url
+        if hf_model is not None:
+            payload["hf_model"] = hf_model
+        if hf_token is not None:
+            payload["hf_token"] = hf_token
+        if (candidates is None or len(candidates) == 0) and (openai_api_key is not None or (hf_model and hf_token)):
+            payload["n_candidates"] = n_candidates
+        data = await self._request("POST", "/rerank", json=payload)
+        return RerankResponse(
+            best_candidate=data["best_candidate"],
+            best_index=data["best_index"],
+            all_energies=data["all_energies"],
+        )
+    async def score(
+        self,
+        texts: List[str],
+        prompt: str = "",
+        model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
+        tokenizer_path: Optional[str] = None,
+    ) -> ScoreResponse:
+        """Get EBM energy scores for one or more outputs (verifiable/interpretable AI: logging, audit, confidence)."""
+        payload: Dict[str, Any] = {
+            "texts": texts,
+            "prompt": prompt,
+            "model_path": model_path,
+        }
+        if tokenizer_path is not None:
+            payload["tokenizer_path"] = tokenizer_path
+        data = await self._request("POST", "/score", json=payload)
+        return ScoreResponse(energies=data["energies"])
+    async def pipeline(
+        self,
+        *,
+        data_path: Optional[str] = None,
+        data: Optional[List[Dict[str, Any]]] = None,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 10,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        candidates: Optional[List[str]] = None,
+    ) -> PipelineResponse:
+        """Run train (on your data or built-in) then optionally rerank candidates."""
+        payload: Dict[str, Any] = {
+            "epochs": epochs,
+            "batch_size": batch_size,
+            "d_model": d_model,
+            "n_heads": n_heads,
+            "n_layers": n_layers,
+            "lr": lr,
+            "max_length": max_length,
+            "validate_every": validate_every,
+            "val_holdout": val_holdout,
+        }
+        if tokenizer_name is not None:
+            payload["tokenizer_name"] = tokenizer_name
+        if data_path is not None:
+            payload["data_path"] = data_path
+        if data is not None:
+            payload["data"] = data
+        if candidates is not None:
+            payload["candidates"] = candidates
+        data_resp = await self._request("POST", "/pipeline", json=payload)
+        return PipelineResponse._from_dict(data_resp)
+    async def close(self) -> None:
+        """Close the underlying HTTP connection pool."""
+        await self._client.aclose()
+    async def __aenter__(self) -> "AsyncCertainty":
+        return self
+    async def __aexit__(self, *args: Any) -> None:
+        await self.close()

certaintylabs-0.1.0/certaintylabs/client.py ADDED Viewed

@@ -0,0 +1,332 @@
+"""Synchronous client for the Certainty Labs API."""
+from __future__ import annotations
+import json
+import os
+from typing import Any, Dict, List, Optional
+import httpx
+from certaintylabs.exceptions import APIError, ConnectionError, TimeoutError
+from certaintylabs.types import (
+    HealthResponse,
+    PipelineResponse,
+    RerankResponse,
+    ScoreResponse,
+    TrainResponse,
+    TrainingParams,
+)
+# Fixed API base URL — users do not configure this.
+_BASE_URL = "https://sandboxtesting101--certainty-labs-api.modal.run"
+_DEFAULT_TIMEOUT = 300.0
+_ENV_API_KEY = "CERTAINTY_API_KEY"
+class Certainty:
+    """Synchronous Python client for the Certainty Labs API.
+    Set your API key via environment variable::
+        export CERTAINTY_API_KEY="ck_..."
+    Then in code::
+        from certaintylabs import Certainty
+        client = Certainty()  # reads api_key from env
+        result = client.train(epochs=10)
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        timeout: float = _DEFAULT_TIMEOUT,
+    ):
+        self.base_url = _BASE_URL.rstrip("/")
+        self.api_key = api_key if api_key is not None else os.environ.get(_ENV_API_KEY)
+        self.timeout = timeout
+        headers: Dict[str, str] = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        self._client = httpx.Client(
+            base_url=self.base_url,
+            headers=headers,
+            timeout=timeout,
+        )
+    def _request(self, method: str, path: str, **kwargs: Any) -> dict:
+        try:
+            resp = self._client.request(method, path, **kwargs)
+        except httpx.ConnectError as e:
+            raise ConnectionError(self.base_url, e) from e
+        except httpx.TimeoutException as e:
+            raise TimeoutError(self.timeout, path) from e
+        if resp.status_code >= 400:
+            body = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
+            raise APIError(
+                status_code=resp.status_code,
+                detail=body.get("detail", resp.text),
+                error_type=body.get("error_type"),
+            )
+        return resp.json()
+    # ── Endpoints ─────────────────────────────────────────────────────
+    def health(self) -> HealthResponse:
+        """Check API health and version."""
+        data = self._request("GET", "/health")
+        return HealthResponse(status=data["status"], version=data["version"])
+    def train(
+        self,
+        *,
+        data_path: Optional[str] = None,
+        data: Optional[List[Dict[str, Any]]] = None,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train a TransEBM energy model.
+        Data source (one of):
+          - ``data``: in-memory list of {question, label, gen_text} dicts
+          - ``data_path``: server path to EORM JSONL (or use ``train_from_file`` for local path)
+        If neither is given, the server uses its built-in GSM8K dataset.
+        Use ``tokenizer_name`` for Qwen/Llama compatibility (e.g. ``qwen2.5-7b``, ``llama-3.1-8b`` or full HF ID).
+        Use ``training_params`` to pass a TrainingParams object; explicit kwargs override.
+        """
+        payload: Dict[str, Any] = {
+            "epochs": epochs,
+            "batch_size": batch_size,
+            "d_model": d_model,
+            "n_heads": n_heads,
+            "n_layers": n_layers,
+            "lr": lr,
+            "max_length": max_length,
+            "validate_every": validate_every,
+            "val_holdout": val_holdout,
+        }
+        if tokenizer_name is not None:
+            payload["tokenizer_name"] = tokenizer_name
+        if training_params:
+            for k, v in vars(training_params).items():
+                if v is not None:
+                    payload[k] = v
+        if data_path is not None:
+            payload["data_path"] = data_path
+        if data is not None:
+            payload["data"] = data
+        data_resp = self._request("POST", "/train", json=payload)
+        return TrainResponse(
+            model_path=data_resp["model_path"],
+            best_val_acc=data_resp["best_val_acc"],
+            epochs_trained=data_resp["epochs_trained"],
+            elapsed_seconds=data_resp["elapsed_seconds"],
+        )
+    def train_with_data(
+        self,
+        samples: List[Dict[str, Any]],
+        *,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train on in-memory data. Each item in ``samples`` should have keys: question, label, gen_text."""
+        return self.train(
+            data=samples,
+            tokenizer_name=tokenizer_name,
+            epochs=epochs,
+            batch_size=batch_size,
+            d_model=d_model,
+            n_heads=n_heads,
+            n_layers=n_layers,
+            lr=lr,
+            max_length=max_length,
+            validate_every=validate_every,
+            val_holdout=val_holdout,
+            training_params=training_params,
+        )
+    def train_from_file(
+        self,
+        path: str,
+        *,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 20,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        training_params: Optional[TrainingParams] = None,
+    ) -> TrainResponse:
+        """Train on a local EORM JSONL file. Reads the file and sends records to the API."""
+        records: List[Dict[str, Any]] = []
+        with open(path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                records.append(json.loads(line))
+        return self.train_with_data(
+            records,
+            tokenizer_name=tokenizer_name,
+            epochs=epochs,
+            batch_size=batch_size,
+            d_model=d_model,
+            n_heads=n_heads,
+            n_layers=n_layers,
+            lr=lr,
+            max_length=max_length,
+            validate_every=validate_every,
+            val_holdout=val_holdout,
+            training_params=training_params,
+        )
+    def rerank(
+        self,
+        candidates: Optional[List[str]] = None,
+        prompt: str = "",
+        model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
+        tokenizer_path: Optional[str] = None,
+        openai_api_key: Optional[str] = None,
+        openai_model: Optional[str] = None,
+        openai_base_url: Optional[str] = None,
+        hf_model: Optional[str] = None,
+        hf_token: Optional[str] = None,
+        n_candidates: int = 5,
+    ) -> RerankResponse:
+        """Rerank LLM candidate outputs using a trained TransEBM model.
+        Pass pre-generated ``candidates``, or leave candidates empty and set either
+        ``openai_api_key`` (and optionally ``openai_model``, ``openai_base_url``) or
+        ``hf_model`` + ``hf_token`` (Hugging Face Inference for Qwen/Llama) so the API
+        generates ``n_candidates``, then reranks them.
+        """
+        payload: Dict[str, Any] = {
+            "candidates": candidates if candidates is not None else [],
+            "prompt": prompt,
+            "model_path": model_path,
+        }
+        if tokenizer_path is not None:
+            payload["tokenizer_path"] = tokenizer_path
+        if openai_api_key is not None:
+            payload["openai_api_key"] = openai_api_key
+        if openai_model is not None:
+            payload["openai_model"] = openai_model
+        if openai_base_url is not None:
+            payload["openai_base_url"] = openai_base_url
+        if hf_model is not None:
+            payload["hf_model"] = hf_model
+        if hf_token is not None:
+            payload["hf_token"] = hf_token
+        if (candidates is None or len(candidates) == 0) and (openai_api_key is not None or (hf_model and hf_token)):
+            payload["n_candidates"] = n_candidates
+        data = self._request("POST", "/rerank", json=payload)
+        return RerankResponse(
+            best_candidate=data["best_candidate"],
+            best_index=data["best_index"],
+            all_energies=data["all_energies"],
+        )
+    def score(
+        self,
+        texts: List[str],
+        prompt: str = "",
+        model_path: str = "./certainty_workspace/model/ebm_certainty_model.pt",
+        tokenizer_path: Optional[str] = None,
+    ) -> ScoreResponse:
+        """Get EBM energy scores for one or more outputs (no reranking).
+        Use for verifiable/interpretable AI: log confidence, audit reliability, track scores over time.
+        Lower energy = higher confidence / more constraint-satisfying.
+        """
+        payload: Dict[str, Any] = {
+            "texts": texts,
+            "prompt": prompt,
+            "model_path": model_path,
+        }
+        if tokenizer_path is not None:
+            payload["tokenizer_path"] = tokenizer_path
+        data = self._request("POST", "/score", json=payload)
+        return ScoreResponse(energies=data["energies"])
+    def pipeline(
+        self,
+        *,
+        data_path: Optional[str] = None,
+        data: Optional[List[Dict[str, Any]]] = None,
+        tokenizer_name: Optional[str] = None,
+        epochs: int = 10,
+        batch_size: int = 1,
+        d_model: int = 768,
+        n_heads: int = 4,
+        n_layers: int = 2,
+        lr: float = 5e-5,
+        max_length: int = 2048,
+        validate_every: int = 1,
+        val_holdout: float = 0.2,
+        candidates: Optional[List[str]] = None,
+    ) -> PipelineResponse:
+        """Run train (on your data or built-in) then optionally rerank candidates."""
+        payload: Dict[str, Any] = {
+            "epochs": epochs,
+            "batch_size": batch_size,
+            "d_model": d_model,
+            "n_heads": n_heads,
+            "n_layers": n_layers,
+            "lr": lr,
+            "max_length": max_length,
+            "validate_every": validate_every,
+            "val_holdout": val_holdout,
+        }
+        if tokenizer_name is not None:
+            payload["tokenizer_name"] = tokenizer_name
+        if data_path is not None:
+            payload["data_path"] = data_path
+        if data is not None:
+            payload["data"] = data
+        if candidates is not None:
+            payload["candidates"] = candidates
+        data_resp = self._request("POST", "/pipeline", json=payload)
+        return PipelineResponse._from_dict(data_resp)
+    def close(self) -> None:
+        """Close the underlying HTTP connection pool."""
+        self._client.close()
+    def __enter__(self) -> "Certainty":
+        return self
+    def __exit__(self, *args: Any) -> None:
+        self.close()

certaintylabs-0.1.0/certaintylabs/exceptions.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Exception types for the Certainty SDK."""
+from __future__ import annotations
+from typing import Optional
+class CertaintyError(Exception):
+    """Base exception for all Certainty SDK errors."""
+class APIError(CertaintyError):
+    """The API returned a non-2xx response."""
+    def __init__(
+        self,
+        status_code: int,
+        detail: str,
+        error_type: Optional[str] = None,
+    ):
+        self.status_code = status_code
+        self.detail = detail
+        self.error_type = error_type
+        msg = f"[{status_code}] {detail}"
+        if error_type:
+            msg = f"[{status_code}] {error_type}: {detail}"
+        super().__init__(msg)
+class ConnectionError(CertaintyError):
+    """Could not connect to the Certainty API server."""
+    def __init__(self, base_url: str, cause: Optional[Exception] = None):
+        self.base_url = base_url
+        self.cause = cause
+        super().__init__(f"Could not connect to {base_url}: {cause}")
+class TimeoutError(CertaintyError):
+    """The request timed out."""
+    def __init__(self, timeout: float, endpoint: str):
+        self.timeout = timeout
+        self.endpoint = endpoint
+        super().__init__(
+            f"Request to {endpoint} timed out after {timeout}s. "
+            f"Training can be slow — try increasing timeout."
+        )

certaintylabs-0.1.0/certaintylabs/types.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Typed response objects for the Certainty Labs API."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+# Optional: pass to train() for clearer training config
+@dataclass
+class TrainingParams:
+    """Training hyperparameters. Omit fields to use API defaults."""
+    tokenizer_name: Optional[str] = None  # HuggingFace ID or alias, e.g. qwen2.5-7b, llama-3.1-8b
+    epochs: Optional[int] = None
+    batch_size: Optional[int] = None
+    d_model: Optional[int] = None
+    n_heads: Optional[int] = None
+    n_layers: Optional[int] = None
+    lr: Optional[float] = None
+    max_length: Optional[int] = None
+    validate_every: Optional[int] = None
+    val_holdout: Optional[float] = None
+@dataclass(frozen=True)
+class HealthResponse:
+    status: str
+    version: str
+@dataclass(frozen=True)
+class TrainResponse:
+    model_path: str
+    best_val_acc: float
+    epochs_trained: int
+    elapsed_seconds: float
+@dataclass(frozen=True)
+class RerankResponse:
+    best_candidate: str
+    best_index: int
+    all_energies: List[float]
+@dataclass(frozen=True)
+class ScoreResponse:
+    """Energy scores for one or more outputs (verifiable/interpretable AI: logging, audit, confidence)."""
+    energies: List[float]  # Lower = higher confidence / more constraint-satisfying
+@dataclass(frozen=True)
+class PipelineResponse:
+    train: TrainResponse
+    rerank: Optional[RerankResponse]
+    @classmethod
+    def _from_dict(cls, data: dict) -> "PipelineResponse":
+        rerank = None
+        if data.get("rerank"):
+            r = data["rerank"]
+            rerank = RerankResponse(
+                best_candidate=r["best_candidate"],
+                best_index=r["best_index"],
+                all_energies=r["all_energies"],
+            )
+        return cls(
+            train=TrainResponse(
+                model_path=data["train"]["model_path"],
+                best_val_acc=data["train"]["best_val_acc"],
+                epochs_trained=data["train"]["epochs_trained"],
+                elapsed_seconds=data["train"]["elapsed_seconds"],
+            ),
+            rerank=rerank,
+        )

certaintylabs-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,35 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "certaintylabs"
+version = "0.1.0"
+description = "Python SDK for the Certainty Labs API — constraint enforcement for production LLMs"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.9"
+dependencies = ["httpx>=0.27.0"]
+authors = [{ name = "Certainty Labs" }]
+keywords = ["llm", "constraints", "energy-based-model", "reranking", "certainty"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Typing :: Typed",
+]
+[project.urls]
+Homepage = "https://certaintylabs.ai"
+Documentation = "https://certaintylabs.ai/platform/docs"
+Repository = "https://github.com/certainty-labs/certainty-sdk"
+[tool.hatch.build.targets.wheel]
+packages = ["certaintylabs"]