PyPI - cassette-sdk - Versions diffs - 0.1.0__tar.gz - Mend

cassette-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

cassette_sdk-0.1.0/.gitignore +9 -0
cassette_sdk-0.1.0/PKG-INFO +57 -0
cassette_sdk-0.1.0/README.md +39 -0
cassette_sdk-0.1.0/cassette/__init__.py +51 -0
cassette_sdk-0.1.0/cassette/drift.py +174 -0
cassette_sdk-0.1.0/cassette/recorder.py +115 -0
cassette_sdk-0.1.0/cassette/registry.py +47 -0
cassette_sdk-0.1.0/pyproject.toml +28 -0

cassette_sdk-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,9 @@
+node_modules/
+dist/
+.wrangler/
+*.log
+.dev.vars
+.env
+__pycache__/
+*.egg-info/
+.venv/

cassette_sdk-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,57 @@
+Metadata-Version: 2.4
+Name: cassette-sdk
+Version: 0.1.0
+Summary: One-call shim to route LLM SDK traffic through the Cassette record/replay gateway
+Project-URL: Homepage, https://github.com/NOVUS-STUDIOS-DEV/cassette
+Project-URL: Source, https://github.com/NOVUS-STUDIOS-DEV/cassette
+Author: Cassette
+License: MIT
+Keywords: agent,anthropic,ci,llm,openai,record,replay,testing,vcr
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Software Development :: Testing
+Requires-Python: >=3.9
+Requires-Dist: httpx>=0.24
+Description-Content-Type: text/markdown
+# cassette-sdk
+Record/replay LLM & agent API calls so your tests run **fast, free, and deterministic**.
+The first time your tests run, Cassette records each LLM response to a local file. After that it
+replays the saved response — no network, no API key, no token cost, and no random failures from the
+model wording things differently.
+## Install
+```bash
+pip install cassette-sdk
+```
+## Use (in-process, no gateway)
+```python
+from cassette.recorder import http_client
+from openai import OpenAI
+client = OpenAI(http_client=http_client(project="demo"))  # records → replays locally
+```
+Modes via `CASSETTE_MODE`: `record` | `replay` | `auto` (default). Cassettes are plain JSON in
+`./.cassettes` and diff cleanly in PRs.
+## Detect real regressions
+```python
+from cassette.drift import compare_cassette_files
+result = compare_cassette_files("baseline.json", "new.json")
+print(result.verdict)  # identical | benign | regression
+```
+It ignores harmless rewording but flags changed tool calls, structured-output shape changes, and
+truncation as regressions.
+Free and open source (MIT). Team features (shared registry + GitHub PR merge-gate) at
+[cassette.dev](https://cassette.dev).

cassette_sdk-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,39 @@
+# cassette-sdk
+Record/replay LLM & agent API calls so your tests run **fast, free, and deterministic**.
+The first time your tests run, Cassette records each LLM response to a local file. After that it
+replays the saved response — no network, no API key, no token cost, and no random failures from the
+model wording things differently.
+## Install
+```bash
+pip install cassette-sdk
+```
+## Use (in-process, no gateway)
+```python
+from cassette.recorder import http_client
+from openai import OpenAI
+client = OpenAI(http_client=http_client(project="demo"))  # records → replays locally
+```
+Modes via `CASSETTE_MODE`: `record` | `replay` | `auto` (default). Cassettes are plain JSON in
+`./.cassettes` and diff cleanly in PRs.
+## Detect real regressions
+```python
+from cassette.drift import compare_cassette_files
+result = compare_cassette_files("baseline.json", "new.json")
+print(result.verdict)  # identical | benign | regression
+```
+It ignores harmless rewording but flags changed tool calls, structured-output shape changes, and
+truncation as regressions.
+Free and open source (MIT). Team features (shared registry + GitHub PR merge-gate) at
+[cassette.dev](https://cassette.dev).

cassette_sdk-0.1.0/cassette/__init__.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Cassette — optional one-call Python shim.
+You don't strictly need this: setting OPENAI_BASE_URL / ANTHROPIC_BASE_URL to the gateway URL is
+enough. This wrapper just composes those URLs from CASSETTE_* env vars and covers every provider at
+once, so a single `cassette.use()` call wires the whole test process.
+    import cassette
+    cassette.use()            # reads CASSETTE_GATEWAY / CASSETTE_PROJECT / CASSETTE_MODE
+    # ...all OpenAI/Anthropic/Google SDK calls now route through the gateway
+Env vars:
+    CASSETTE_GATEWAY   default http://localhost:8787
+    CASSETTE_PROJECT   default "default"
+    CASSETTE_MODE      record | replay | auto   (default "auto")
+"""
+from __future__ import annotations
+import os
+__all__ = ["use", "base_url"]
+_PROVIDER_SUFFIX = {
+    "openai": "/openai/v1",
+    "anthropic": "/anthropic",
+    "google": "/google",
+}
+def base_url(provider: str, *, gateway: str | None = None, project: str | None = None,
+             mode: str | None = None) -> str:
+    """Return the gateway base URL for a provider, e.g. for manual client construction."""
+    gateway = (gateway or os.environ.get("CASSETTE_GATEWAY", "http://localhost:8787")).rstrip("/")
+    project = project or os.environ.get("CASSETTE_PROJECT", "default")
+    mode = mode or os.environ.get("CASSETTE_MODE", "auto")
+    if provider not in _PROVIDER_SUFFIX:
+        raise ValueError(f"unknown provider {provider!r}; known: {list(_PROVIDER_SUFFIX)}")
+    return f"{gateway}/{project}/{mode}{_PROVIDER_SUFFIX[provider]}"
+def use(*, gateway: str | None = None, project: str | None = None, mode: str | None = None) -> dict:
+    """Point the standard SDK base-URL env vars at the Cassette gateway.
+    Returns the mapping it set, for logging/inspection. Idempotent.
+    """
+    env_map = {
+        "OPENAI_BASE_URL": base_url("openai", gateway=gateway, project=project, mode=mode),
+        "ANTHROPIC_BASE_URL": base_url("anthropic", gateway=gateway, project=project, mode=mode),
+        "GOOGLE_GEMINI_BASE_URL": base_url("google", gateway=gateway, project=project, mode=mode),
+    }
+    os.environ.update(env_map)
+    return env_map

cassette_sdk-0.1.0/cassette/drift.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""Drift detection — the "did it REALLY break?" engine. This is the moat.
+A byte-diff is useless for LLM output: the model rewording an answer is NOT a regression, but a
+changed tool call, a changed JSON shape, or a truncated response IS. This module classifies the
+difference between two recorded responses into:
+    IDENTICAL    — same bytes
+    BENIGN       — only free-text wording changed; structure & behavior identical (non-determinism)
+    REGRESSION   — behavior changed: tool calls, structured output shape, or stop reason differ
+It is provider-aware (OpenAI + Anthropic chat shapes) and falls back to a generic JSON/text diff.
+An optional `semantic_judge` hook can UPGRADE a benign text change to a regression when the meaning
+diverges (embedding distance or an LLM judge) — that hook, trained on the cross-org corpus, is the
+part competitors can't cheaply copy. Default behavior is conservative and deterministic.
+Design rule (fail safe): when unsure whether a text-only change matters, default to BENIGN so the
+gate doesn't cry wolf — but ALWAYS surface the diff so a human can bless or reject it. The gate
+informs; it never silently decides.
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Callable, Optional
+class Verdict(str, Enum):
+    IDENTICAL = "identical"
+    BENIGN = "benign"        # wording-only drift; behavior unchanged
+    REGRESSION = "regression"  # behavior changed — block the merge pending review
+@dataclass
+class DriftResult:
+    verdict: Verdict
+    reasons: list[str] = field(default_factory=list)
+    # structured behavioral signals, useful for the PR-check diff UI
+    tool_calls_changed: bool = False
+    structure_changed: bool = False
+    stop_reason_changed: bool = False
+    text_changed: bool = False
+    @property
+    def is_regression(self) -> bool:
+        return self.verdict == Verdict.REGRESSION
+# A semantic judge takes (old_text, new_text) and returns True if the MEANING changed materially.
+SemanticJudge = Callable[[str, str], bool]
+# --- provider-aware extraction of the BEHAVIORAL signal from a response body ---
+@dataclass
+class Behavior:
+    text: str
+    tool_calls: list  # normalized [{name, arguments}]
+    stop_reason: Optional[str]
+    structured: Optional[object]  # parsed JSON if the text itself is JSON (structured output)
+def _normalize_tool_call(name: str, arguments) -> dict:
+    # arguments may be a JSON string (OpenAI) or a dict (Anthropic input) — canonicalize to a dict
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except (ValueError, TypeError):
+            pass
+    return {"name": name, "arguments": arguments}
+def extract_behavior(body: str, provider: str = "") -> Behavior:
+    try:
+        data = json.loads(body)
+    except (ValueError, TypeError):
+        return Behavior(text=body or "", tool_calls=[], stop_reason=None, structured=None)
+    text, tool_calls, stop = "", [], None
+    # OpenAI chat completions
+    if isinstance(data, dict) and "choices" in data:
+        choice = (data.get("choices") or [{}])[0]
+        msg = choice.get("message", {}) if isinstance(choice, dict) else {}
+        text = msg.get("content") or ""
+        for tc in msg.get("tool_calls") or []:
+            fn = tc.get("function", {})
+            tool_calls.append(_normalize_tool_call(fn.get("name", ""), fn.get("arguments")))
+        stop = choice.get("finish_reason")
+    # Anthropic messages
+    elif isinstance(data, dict) and "content" in data and isinstance(data["content"], list):
+        parts = []
+        for block in data["content"]:
+            if block.get("type") == "text":
+                parts.append(block.get("text", ""))
+            elif block.get("type") == "tool_use":
+                tool_calls.append(_normalize_tool_call(block.get("name", ""), block.get("input")))
+        text = "".join(parts)
+        stop = data.get("stop_reason")
+    # generic fallback: treat the whole JSON as the structured payload
+    else:
+        return Behavior(text=body, tool_calls=[], stop_reason=None, structured=data)
+    structured = None
+    if text:
+        try:
+            structured = json.loads(text)  # the model was asked for JSON output
+        except (ValueError, TypeError):
+            pass
+    return Behavior(text=text, tool_calls=tool_calls, stop_reason=stop, structured=structured)
+def _shape(value: object) -> object:
+    """Recursive type/key skeleton of a JSON value (ignores leaf values)."""
+    if isinstance(value, dict):
+        return {k: _shape(value[k]) for k in sorted(value)}
+    if isinstance(value, list):
+        return ["<list>"] if not value else [_shape(value[0])]
+    return type(value).__name__
+def compare(old_body: str, new_body: str, *, provider: str = "",
+            semantic_judge: Optional[SemanticJudge] = None) -> DriftResult:
+    """Classify the drift between two recorded response bodies."""
+    if old_body == new_body:
+        return DriftResult(Verdict.IDENTICAL, ["byte-identical"])
+    a, b = extract_behavior(old_body, provider), extract_behavior(new_body, provider)
+    res = DriftResult(Verdict.BENIGN)
+    # 1) tool calls — the strongest behavioral signal
+    if a.tool_calls != b.tool_calls:
+        res.tool_calls_changed = True
+        res.verdict = Verdict.REGRESSION
+        res.reasons.append(f"tool calls changed: {a.tool_calls!r} -> {b.tool_calls!r}")
+    # 2) structured-output shape (the model was asked for JSON)
+    if a.structured is not None or b.structured is not None:
+        if _shape(a.structured) != _shape(b.structured):
+            res.structure_changed = True
+            res.verdict = Verdict.REGRESSION
+            res.reasons.append("structured-output shape changed")
+    # 3) stop / finish reason (e.g. 'stop' -> 'length' means truncation)
+    if a.stop_reason != b.stop_reason:
+        res.stop_reason_changed = True
+        res.verdict = Verdict.REGRESSION
+        res.reasons.append(f"stop reason changed: {a.stop_reason} -> {b.stop_reason}")
+    # 4) free text — benign by default (non-determinism), unless a semantic judge disagrees
+    if a.text != b.text:
+        res.text_changed = True
+        if res.verdict != Verdict.REGRESSION:
+            if semantic_judge is not None and semantic_judge(a.text, b.text):
+                res.verdict = Verdict.REGRESSION
+                res.reasons.append("semantic judge: answer meaning changed materially")
+            else:
+                res.reasons.append("free-text wording changed (treated as benign non-determinism)")
+    if not res.reasons:
+        res.reasons.append("non-behavioral difference only")
+    return res
+def compare_cassette_files(path_a: str, path_b: str,
+                           semantic_judge: Optional[SemanticJudge] = None) -> DriftResult:
+    """Compare two cassette JSON files (the blessed baseline vs a PR's recording)."""
+    with open(path_a) as fa, open(path_b) as fb:
+        ca, cb = json.load(fa), json.load(fb)
+    provider = ca.get("request", {}).get("provider", "")
+    return compare(ca["response"]["body"], cb["response"]["body"],
+                   provider=provider, semantic_judge=semantic_judge)

cassette_sdk-0.1.0/cassette/recorder.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""In-process recorder — the recording surface Cassette OWNS (no gateway required).
+This is the strategic core after the moat inversion: recording must NOT depend on routing traffic
+through a gateway, because a gateway VCR is a commodity an incumbent can bundle and a customer can
+self-host. Here we record at the HTTP-client layer inside the test process itself.
+    from cassette.recorder import http_client
+    from openai import OpenAI
+    client = OpenAI(http_client=http_client(project="demo"))   # records/replays locally
+    # ...calls now hit local cassettes; no gateway, no extra infra in CI
+Modes (env CASSETTE_MODE): record | replay | auto (default auto).
+Local cassette dir (env CASSETTE_DIR): default ./.cassettes
+The local cassette files conform to SPEC.md (the portable ".har of agent test traffic"). The hosted
+backend (shared registry, RBAC, GitHub Checks merge-gate, semantic-drift matcher) is a CLIENT-SERVER
+layer ON TOP of this format — never a precondition for recording. That boundary is the whole moat.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import os
+from pathlib import Path
+from typing import Optional
+try:
+    import httpx
+except ImportError:  # pragma: no cover
+    httpx = None  # type: ignore
+_VOLATILE = {"stream_options", "user", "metadata"}
+_DROP_RESP_HEADERS = {"content-length", "content-encoding", "transfer-encoding", "connection"}
+class CassetteMiss(RuntimeError):
+    """Raised in replay mode when no cassette exists for a request (fails SAFE, never silently)."""
+def _canonical(value: object) -> str:
+    if value is None or not isinstance(value, (dict, list)):
+        return json.dumps(value)
+    if isinstance(value, list):
+        return "[" + ",".join(_canonical(v) for v in value) + "]"
+    keys = sorted(k for k in value if k not in _VOLATILE)
+    return "{" + ",".join(json.dumps(k) + ":" + _canonical(value[k]) for k in keys) + "}"
+def fingerprint(method: str, url: str, body: str) -> str:
+    norm = body
+    if body:
+        try:
+            norm = _canonical(json.loads(body))
+        except (ValueError, TypeError):
+            pass
+    return hashlib.sha256("\n".join([method.upper(), url, norm]).encode()).hexdigest()
+if httpx is not None:
+    class CassetteTransport(httpx.BaseTransport):
+        """An httpx transport that records to / replays from local cassette files."""
+        def __init__(self, inner: httpx.BaseTransport, cassette_dir: Path, mode: str):
+            self._inner = inner
+            self._dir = cassette_dir
+            self._mode = mode
+            self._dir.mkdir(parents=True, exist_ok=True)
+        def handle_request(self, request: "httpx.Request") -> "httpx.Response":
+            body = request.content.decode("utf-8", "ignore")
+            fp = fingerprint(request.method, str(request.url), body)
+            path = self._dir / f"{fp}.json"
+            if self._mode in ("replay", "auto") and path.exists():
+                rec = json.loads(path.read_text())
+                headers = {k: v for k, v in rec["response"]["headers"].items()
+                           if k.lower() not in _DROP_RESP_HEADERS}
+                headers["x-cassette"] = "replay"
+                return httpx.Response(
+                    rec["response"]["status"], headers=headers,
+                    content=rec["response"]["body"].encode(), request=request,
+                )
+            if self._mode == "replay":
+                raise CassetteMiss(f"no cassette for {request.method} {request.url} (fp={fp[:12]})")
+            # record (or auto-miss → fails SAFE: just records)
+            resp = self._inner.handle_request(request)
+            content = resp.read()
+            if resp.status_code < 400:
+                path.write_text(json.dumps({
+                    "v": 1,
+                    "fingerprint": fp,
+                    "request": {"method": request.method, "url": str(request.url), "body": body},
+                    "response": {
+                        "status": resp.status_code,
+                        "headers": dict(resp.headers),
+                        "body": content.decode("utf-8", "ignore"),
+                    },
+                }, indent=2))
+            return httpx.Response(resp.status_code, headers=resp.headers, content=content,
+                                  request=request)
+def http_client(*, project: Optional[str] = None, mode: Optional[str] = None,
+                cassette_dir: Optional[str] = None) -> "httpx.Client":
+    """Build an httpx.Client that records/replays locally — pass to OpenAI(http_client=...)."""
+    if httpx is None:
+        raise RuntimeError("cassette.recorder requires httpx (pip install httpx)")
+    project = project or os.environ.get("CASSETTE_PROJECT", "default")
+    mode = mode or os.environ.get("CASSETTE_MODE", "auto")
+    base = Path(cassette_dir or os.environ.get("CASSETTE_DIR", ".cassettes")) / project
+    transport = CassetteTransport(httpx.HTTPTransport(), base, mode)
+    return httpx.Client(transport=transport)

cassette_sdk-0.1.0/cassette/registry.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Registry client — sync local cassettes to/from the hosted team registry (the paid layer).
+The OSS recorder works fully offline; this is the opt-in bridge that pushes a CI run's cassettes to
+the shared registry so the GitHub merge-gate can compare them. Requires a seat token.
+    from cassette.registry import push_dir
+    push_dir(".cassettes/demo", project="acme/app", ref="pr-42", token="...")
+"""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Optional
+try:
+    import httpx
+except ImportError:  # pragma: no cover
+    httpx = None  # type: ignore
+def _base() -> str:
+    return os.environ.get("CASSETTE_BACKEND", "https://api.cassette.dev").rstrip("/")
+def push_dir(cassette_dir: str, *, project: str, ref: str, token: Optional[str] = None) -> int:
+    """Upload every cassette in a local dir to the registry under <project>/<ref>. Returns count."""
+    if httpx is None:
+        raise RuntimeError("cassette.registry requires httpx")
+    token = token or os.environ.get("CASSETTE_TOKEN")
+    if not token:
+        raise RuntimeError("no seat token (set CASSETTE_TOKEN)")
+    items = []
+    for f in Path(cassette_dir).glob("*.json"):
+        rec = json.loads(f.read_text())
+        items.append({"fingerprint": rec.get("fingerprint", f.stem), "body": json.dumps(rec)})
+    if not items:
+        return 0
+    resp = httpx.post(
+        f"{_base()}/v1/{project}/cassettes",
+        params={"ref": ref},
+        headers={"authorization": f"Bearer {token}"},
+        json=items,
+        timeout=30,
+    )
+    resp.raise_for_status()
+    return int(resp.json().get("pushed", 0))

cassette_sdk-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,28 @@
+[project]
+name = "cassette-sdk"
+version = "0.1.0"
+description = "One-call shim to route LLM SDK traffic through the Cassette record/replay gateway"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+authors = [{ name = "Cassette" }]
+keywords = ["llm", "agent", "testing", "record", "replay", "vcr", "openai", "anthropic", "ci"]
+dependencies = ["httpx>=0.24"]
+classifiers = [
+  "Development Status :: 3 - Alpha",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Topic :: Software Development :: Testing",
+]
+[project.urls]
+Homepage = "https://github.com/NOVUS-STUDIOS-DEV/cassette"
+Source = "https://github.com/NOVUS-STUDIOS-DEV/cassette"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["cassette"]