promptecho 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
promptecho/__init__.py ADDED
@@ -0,0 +1,81 @@
1
+ """promptecho — record & replay for LLM API calls.
2
+
3
+ Public API:
4
+ promptecho.use_cassette(path, mode="once", match_on=None) # decorator + context manager
5
+ promptecho.Mode # record modes
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import functools
11
+ from contextlib import contextmanager
12
+
13
+ from .cassette import Cassette
14
+ from .transport import Mode
15
+
16
+ __all__ = ["use_cassette", "Mode", "Cassette"]
17
+ __version__ = "0.1.0"
18
+
19
+
20
+ @contextmanager
21
+ def _activate(cassette: Cassette, mode: Mode):
22
+ """Patch httpx for the duration of the block, then restore and flush.
23
+
24
+ While active, every httpx-based client (Anthropic, OpenAI, raw httpx) routes
25
+ through the record/replay decision (see patch.py / DESIGN.md §1).
26
+ """
27
+ from .patch import install, uninstall
28
+
29
+ if mode is Mode.ALL:
30
+ cassette.interactions.clear() # re-record from scratch
31
+ cassette._dirty = True
32
+
33
+ saved = install(cassette, mode)
34
+ try:
35
+ yield cassette
36
+ finally:
37
+ uninstall(saved)
38
+ cassette.save()
39
+
40
+
41
+ class _UseCassette:
42
+ """Works as both a decorator and a context manager (like vcrpy.use_cassette)."""
43
+
44
+ def __init__(self, path: str, mode: str | Mode = Mode.ONCE, match_on=None):
45
+ self.path = path
46
+ self.mode = Mode(mode)
47
+ self.match_on = match_on
48
+
49
+ def _load(self) -> Cassette:
50
+ return Cassette.load(self.path, match_on=self.match_on)
51
+
52
+ def __enter__(self):
53
+ self._cm = _activate(self._load(), self.mode)
54
+ return self._cm.__enter__()
55
+
56
+ def __exit__(self, *exc):
57
+ return self._cm.__exit__(*exc)
58
+
59
+ def __call__(self, func):
60
+ @functools.wraps(func)
61
+ def wrapper(*args, **kwargs):
62
+ with _activate(self._load(), self.mode):
63
+ return func(*args, **kwargs)
64
+
65
+ return wrapper
66
+
67
+
68
+ def use_cassette(path: str, mode: str | Mode = Mode.ONCE, match_on=None) -> _UseCassette:
69
+ """Record on first run, replay forever after.
70
+
71
+ Usage as a decorator::
72
+
73
+ @promptecho.use_cassette("cassettes/foo.yaml")
74
+ def test_foo(): ...
75
+
76
+ or as a context manager::
77
+
78
+ with promptecho.use_cassette("cassettes/foo.yaml", mode="none"):
79
+ client.messages.create(...)
80
+ """
81
+ return _UseCassette(path, mode=mode, match_on=match_on)
promptecho/cassette.py ADDED
@@ -0,0 +1,138 @@
1
+ """Cassette: the on-disk record of interactions. Human-readable YAML.
2
+
3
+ A cassette is a list of (request, response) interactions keyed by request
4
+ fingerprint. It is designed to diff cleanly in PRs and to be safe to commit
5
+ (secrets are redacted before anything reaches disk).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import dataclasses
11
+ import os
12
+ from dataclasses import dataclass, field
13
+
14
+ import yaml
15
+
16
+ from .matcher import DEFAULT_MATCH_ON, fingerprint
17
+
18
+ REDACTED = "REDACTED"
19
+ REDACT_HEADERS = {"authorization", "x-api-key", "openai-organization"}
20
+
21
+
22
+ @dataclass
23
+ class Response:
24
+ status: int
25
+ headers: dict
26
+ streaming: bool = False
27
+ body: object | None = None # non-streaming body: JSON, str, or base64 str when binary
28
+ events: list[str] = field(default_factory=list) # ordered raw SSE events
29
+ binary: bool = False # if True, body is base64-encoded raw bytes
30
+
31
+
32
+ @dataclass
33
+ class Interaction:
34
+ method: str
35
+ url: str
36
+ match_key: str
37
+ matched_on: list[str]
38
+ body: dict
39
+ response: Response
40
+
41
+
42
+ @dataclass
43
+ class Cassette:
44
+ path: str
45
+ match_on: list[str] = field(default_factory=lambda: list(DEFAULT_MATCH_ON))
46
+ interactions: list[Interaction] = field(default_factory=list)
47
+ _dirty: bool = False
48
+
49
+ # --- lookup -----------------------------------------------------------
50
+ def find(self, key: str) -> Interaction | None:
51
+ for ix in self.interactions:
52
+ if ix.match_key == key:
53
+ return ix
54
+ return None
55
+
56
+ def record(self, method: str, url: str, body: dict, response: Response) -> None:
57
+ key = fingerprint(body, self.match_on)
58
+ self.interactions.append(
59
+ Interaction(
60
+ method=method,
61
+ url=url,
62
+ match_key=key,
63
+ matched_on=list(self.match_on),
64
+ body=body,
65
+ response=_redact_response(response),
66
+ )
67
+ )
68
+ self._dirty = True
69
+
70
+ # --- persistence ------------------------------------------------------
71
+ @classmethod
72
+ def load(cls, path: str, match_on: list[str] | None = None) -> "Cassette":
73
+ mo = match_on or list(DEFAULT_MATCH_ON)
74
+ if not os.path.exists(path):
75
+ return cls(path=path, match_on=mo)
76
+ with open(path) as f:
77
+ raw = yaml.safe_load(f) or {}
78
+ interactions = [_interaction_from_dict(d) for d in raw.get("interactions", [])]
79
+ return cls(path=path, match_on=raw.get("match_on", mo), interactions=interactions)
80
+
81
+ def save(self) -> None:
82
+ if not self._dirty:
83
+ return
84
+ os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
85
+ doc = {
86
+ "version": 1,
87
+ "match_on": self.match_on,
88
+ "interactions": [_interaction_to_dict(ix) for ix in self.interactions],
89
+ }
90
+ with open(self.path, "w") as f:
91
+ yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True, width=100)
92
+ self._dirty = False
93
+
94
+
95
+ # --- (de)serialization helpers -------------------------------------------
96
+ def _redact_response(resp: Response) -> Response:
97
+ headers = {k: (REDACTED if k.lower() in REDACT_HEADERS else v) for k, v in resp.headers.items()}
98
+ return dataclasses.replace(resp, headers=headers)
99
+
100
+
101
+ def _interaction_to_dict(ix: Interaction) -> dict:
102
+ r = ix.response
103
+ response = {"status": r.status, "headers": r.headers, "streaming": r.streaming}
104
+ if r.binary:
105
+ response["binary"] = True
106
+ if r.streaming:
107
+ response["events"] = r.events
108
+ else:
109
+ response["body"] = r.body
110
+ return {
111
+ "request": {
112
+ "method": ix.method,
113
+ "url": ix.url,
114
+ "match_key": ix.match_key,
115
+ "matched_on": ix.matched_on,
116
+ "body": ix.body,
117
+ },
118
+ "response": response,
119
+ }
120
+
121
+
122
+ def _interaction_from_dict(d: dict) -> Interaction:
123
+ req, resp = d["request"], d["response"]
124
+ return Interaction(
125
+ method=req["method"],
126
+ url=req["url"],
127
+ match_key=req["match_key"],
128
+ matched_on=req.get("matched_on", list(DEFAULT_MATCH_ON)),
129
+ body=req.get("body", {}),
130
+ response=Response(
131
+ status=resp["status"],
132
+ headers=resp.get("headers", {}),
133
+ streaming=resp.get("streaming", False),
134
+ body=resp.get("body"),
135
+ events=resp.get("events", []),
136
+ binary=resp.get("binary", False),
137
+ ),
138
+ )
promptecho/matcher.py ADDED
@@ -0,0 +1,115 @@
1
+ """Request fingerprinting — the deterministic core of replay matching.
2
+
3
+ We match on a *normalized fingerprint* of the request fields that actually
4
+ determine the response, never on raw bytes (see DESIGN.md §2).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import json
11
+
12
+ DEFAULT_MATCH_ON = [
13
+ "model", "messages", "system", "tools", "tool_choice",
14
+ # Reasoning-model knobs that change the response without changing the prompt:
15
+ # OpenAI o-series, Anthropic extended thinking, OpenRouter unified field.
16
+ # If these aren't matched, "reasoning_effort=high" and "low" tests collide.
17
+ "reasoning_effort", "reasoning", "thinking",
18
+ ]
19
+
20
+
21
+ def canonical_json(obj: object) -> str:
22
+ """Stable serialization: sorted keys, no insignificant whitespace.
23
+
24
+ Ensures re-serialization of the same logical request can't change the key.
25
+ """
26
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
27
+
28
+
29
+ def pick(body: dict, match_on: list[str]) -> dict:
30
+ """Keep only the load-bearing fields, in a stable shape."""
31
+ return {k: body[k] for k in match_on if k in body}
32
+
33
+
34
+ def fingerprint(body: dict, match_on: list[str] | None = None) -> str:
35
+ """Map a request body to the cassette key for the recording it should replay.
36
+
37
+ The same logical request always yields the same key; volatile fields that
38
+ aren't in ``match_on`` cannot affect it.
39
+ """
40
+ fields = pick(body, match_on or DEFAULT_MATCH_ON)
41
+ digest = hashlib.sha256(canonical_json(fields).encode("utf-8")).hexdigest()
42
+ return digest[:16]
43
+
44
+
45
+ def diff_fields(incoming: dict, recorded: dict, match_on: list[str]) -> list[str]:
46
+ """Names of top-level matched fields whose values differ. Cheap pointer; for the
47
+ human-readable leaf-level diff used in cassette-miss errors, see :func:`diff_request`."""
48
+ return [k for k in match_on if incoming.get(k) != recorded.get(k)]
49
+
50
+
51
+ _MISSING = object() # sentinel for "field/element not present on this side"
52
+
53
+
54
+ def _walk_diff(incoming, recorded, path: str):
55
+ """Yield (path, recorded_value, incoming_value) for each leaf-level difference.
56
+
57
+ Walks both structures in parallel. dicts are compared by key (sorted for stable
58
+ output); lists by index, with extras flagged. Anything else is a leaf — emit and
59
+ stop. Recursion depth is bounded by request shape (chat messages rarely nest deep).
60
+ """
61
+ if incoming == recorded:
62
+ return
63
+ if type(incoming) is not type(recorded):
64
+ yield path, recorded, incoming
65
+ return
66
+ if isinstance(incoming, dict):
67
+ for k in sorted(set(incoming) | set(recorded)):
68
+ sub = f"{path}.{k}" if path else k
69
+ i_val = incoming.get(k, _MISSING)
70
+ r_val = recorded.get(k, _MISSING)
71
+ if i_val is _MISSING or r_val is _MISSING:
72
+ yield sub, r_val, i_val
73
+ else:
74
+ yield from _walk_diff(i_val, r_val, sub)
75
+ return
76
+ if isinstance(incoming, list):
77
+ for i in range(max(len(incoming), len(recorded))):
78
+ sub = f"{path}[{i}]"
79
+ if i >= len(incoming):
80
+ yield sub, recorded[i], _MISSING
81
+ elif i >= len(recorded):
82
+ yield sub, _MISSING, incoming[i]
83
+ else:
84
+ yield from _walk_diff(incoming[i], recorded[i], sub)
85
+ return
86
+ yield path, recorded, incoming
87
+
88
+
89
+ def _truncate(v, limit: int = 80) -> str:
90
+ if v is _MISSING:
91
+ return "<not present>"
92
+ s = v if isinstance(v, str) else canonical_json(v)
93
+ return s if len(s) <= limit else s[: limit - 3] + "..."
94
+
95
+
96
+ def diff_request(incoming: dict, recorded: dict, match_on: list[str]) -> str:
97
+ """Multi-line, human-readable field-level diff of two request bodies.
98
+
99
+ Restricted to ``match_on`` fields — volatile fields outside the match set are
100
+ intentionally hidden, since they can't have caused the miss. Returns the empty
101
+ string when no matched fields differ; callers should treat that as "no diff."
102
+ """
103
+ lines = []
104
+ for field in match_on:
105
+ i_val = incoming.get(field, _MISSING)
106
+ r_val = recorded.get(field, _MISSING)
107
+ if i_val is _MISSING and r_val is _MISSING:
108
+ continue
109
+ if i_val == r_val:
110
+ continue
111
+ for path, r, i in _walk_diff(i_val, r_val, field):
112
+ lines.append(f" {path}:")
113
+ lines.append(f" recorded: {_truncate(r)}")
114
+ lines.append(f" incoming: {_truncate(i)}")
115
+ return "\n".join(lines)
@@ -0,0 +1,160 @@
1
+ """Per-provider request normalization.
2
+
3
+ Different providers (and even one SDK across versions) express the *same logical
4
+ prompt* in different shapes:
5
+
6
+ - Anthropic puts the system prompt in a top-level ``system`` param; OpenAI puts
7
+ it in a ``system``/``developer`` role message.
8
+ - Message content may be a bare string or a list of typed content blocks.
9
+ - Tool defs differ: Anthropic ``{name, description, input_schema}`` vs OpenAI
10
+ ``{type: function, function: {name, description, parameters}}``.
11
+
12
+ ``normalize()`` maps a raw request body into one canonical shape, so logically
13
+ identical calls produce the same fingerprint. This is the thing a raw-bytes HTTP
14
+ VCR fundamentally cannot do.
15
+
16
+ The canonical body is also what gets written to the cassette — a provider-agnostic
17
+ view of the call, which is arguably more readable than the raw provider JSON.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from urllib.parse import urlsplit
23
+
24
+ CANONICAL_ROLES_AS_SYSTEM = ("system", "developer")
25
+
26
+
27
+ # --- detection ------------------------------------------------------------
28
+ def detect(url: str, body: dict) -> str:
29
+ """Best-effort provider detection: URL host first, then body shape."""
30
+ host = (urlsplit(url).hostname or "").lower()
31
+ if "anthropic" in host:
32
+ return "anthropic"
33
+ if "openai" in host or "azure" in host:
34
+ return "openai"
35
+
36
+ # Shape fallback (covers localhost / proxies / gateways).
37
+ if "system" in body:
38
+ return "anthropic"
39
+ messages = body.get("messages") or []
40
+ if any(isinstance(m, dict) and m.get("role") in CANONICAL_ROLES_AS_SYSTEM for m in messages):
41
+ return "openai"
42
+ tools = body.get("tools") or []
43
+ if any(isinstance(t, dict) and "input_schema" in t for t in tools):
44
+ return "anthropic"
45
+ if any(isinstance(t, dict) and "function" in t for t in tools):
46
+ return "openai"
47
+ return "generic"
48
+
49
+
50
+ # --- shared canonicalizers ------------------------------------------------
51
+ def _canon_content(content):
52
+ """Collapse a single text block to a bare string; canonicalize text blocks."""
53
+ if isinstance(content, str):
54
+ return content
55
+ if isinstance(content, list):
56
+ blocks = [_canon_block(b) for b in content]
57
+ if len(blocks) == 1 and isinstance(blocks[0], dict) and blocks[0].get("type") == "text":
58
+ return blocks[0]["text"]
59
+ return blocks
60
+ return content
61
+
62
+
63
+ def _canon_block(block):
64
+ if isinstance(block, dict) and block.get("type") == "text" and "text" in block:
65
+ return {"type": "text", "text": block["text"]}
66
+ return block
67
+
68
+
69
+ def _canon_message(message):
70
+ if not isinstance(message, dict):
71
+ return message
72
+ out = dict(message)
73
+ if "content" in out:
74
+ out["content"] = _canon_content(out["content"])
75
+ return out
76
+
77
+
78
+ def _canon_system(system):
79
+ if isinstance(system, list):
80
+ texts = [b.get("text", "") for b in system
81
+ if isinstance(b, dict) and b.get("type") == "text"]
82
+ if texts:
83
+ return "\n".join(texts)
84
+ return system
85
+
86
+
87
+ def _canon_tool(tool):
88
+ if not isinstance(tool, dict):
89
+ return tool
90
+ if isinstance(tool.get("function"), dict): # OpenAI shape
91
+ fn = tool["function"]
92
+ canon = {"name": fn.get("name"), "description": fn.get("description"),
93
+ "parameters": fn.get("parameters")}
94
+ else: # Anthropic / generic
95
+ canon = {"name": tool.get("name"), "description": tool.get("description"),
96
+ "parameters": tool.get("input_schema", tool.get("parameters"))}
97
+ return {k: v for k, v in canon.items() if v is not None}
98
+
99
+
100
+ def _canon_tool_choice(choice):
101
+ if isinstance(choice, str):
102
+ return {"mode": choice}
103
+ if isinstance(choice, dict):
104
+ if isinstance(choice.get("function"), dict): # OpenAI
105
+ return {"mode": "tool", "name": choice["function"].get("name")}
106
+ if "type" in choice: # Anthropic
107
+ out = {"mode": choice["type"]}
108
+ if choice.get("name"):
109
+ out["name"] = choice["name"]
110
+ return out
111
+ return choice
112
+
113
+
114
+ def _apply_common(out: dict) -> dict:
115
+ if "messages" in out:
116
+ out["messages"] = [_canon_message(m) for m in out["messages"]]
117
+ if "tools" in out:
118
+ out["tools"] = [_canon_tool(t) for t in out["tools"]]
119
+ if "tool_choice" in out:
120
+ out["tool_choice"] = _canon_tool_choice(out["tool_choice"])
121
+ return out
122
+
123
+
124
+ # --- per-provider ---------------------------------------------------------
125
+ def _anthropic(body: dict) -> dict:
126
+ out = dict(body)
127
+ if "system" in out:
128
+ out["system"] = _canon_system(out["system"])
129
+ return _apply_common(out)
130
+
131
+
132
+ def _openai(body: dict) -> dict:
133
+ out = dict(body)
134
+ system_parts, rest = [], []
135
+ for m in out.get("messages", []):
136
+ if isinstance(m, dict) and m.get("role") in CANONICAL_ROLES_AS_SYSTEM:
137
+ c = m.get("content", "")
138
+ system_parts.append(c if isinstance(c, str) else _canon_content(c))
139
+ else:
140
+ rest.append(m)
141
+ if system_parts and "system" not in out:
142
+ out["system"] = "\n".join(p for p in system_parts if isinstance(p, str))
143
+ out["messages"] = rest
144
+ if "max_completion_tokens" in out and "max_tokens" not in out:
145
+ out["max_tokens"] = out.pop("max_completion_tokens")
146
+ return _apply_common(out)
147
+
148
+
149
+ def _generic(body: dict) -> dict:
150
+ return _apply_common(dict(body))
151
+
152
+
153
+ _NORMALIZERS = {"anthropic": _anthropic, "openai": _openai, "generic": _generic}
154
+
155
+
156
+ def normalize(url: str, body: dict) -> dict:
157
+ """Map a raw provider request body into the canonical shape used for matching."""
158
+ if not isinstance(body, dict):
159
+ return body
160
+ return _NORMALIZERS[detect(url, body)](body)
promptecho/patch.py ADDED
@@ -0,0 +1,134 @@
1
+ """httpx interception — the wiring that makes record/replay real.
2
+
3
+ We monkeypatch ``httpx.HTTPTransport.handle_request`` (and the async twin) so
4
+ every client built on httpx — Anthropic, OpenAI, raw httpx — routes through the
5
+ record/replay decision in :mod:`promptecho.transport`. This is the same approach
6
+ respx and vcrpy's httpx stub use. See DESIGN.md §1.
7
+
8
+ On record we read the full upstream response, capture it, and return a fresh
9
+ buffered response so the SDK can consume it normally. Streaming (SSE) responses
10
+ are captured as their ordered events and re-emitted byte-for-byte on replay.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import base64
16
+ import json
17
+
18
+ import httpx
19
+
20
+ from .cassette import Response as Rec
21
+ from .normalizers import normalize
22
+ from .transport import decide, parse_body
23
+
24
+ # Hop-by-hop / encoding headers that won't match our re-encoded body on replay.
25
+ _DROP_HEADERS = {"content-encoding", "content-length", "transfer-encoding"}
26
+
27
+
28
+ def _request_body(request: httpx.Request) -> dict:
29
+ try:
30
+ raw = request.content
31
+ except httpx.RequestNotRead:
32
+ raw = request.read()
33
+ return parse_body(raw)
34
+
35
+
36
+ def _clean_headers(headers: httpx.Headers) -> dict:
37
+ return {k: v for k, v in dict(headers).items() if k.lower() not in _DROP_HEADERS}
38
+
39
+
40
+ def _split_sse(text: str) -> list[str]:
41
+ """Split an SSE body into individual events (kept readable in the cassette)."""
42
+ return [part + "\n\n" for part in text.split("\n\n") if part.strip()]
43
+
44
+
45
+ def _is_binary_content_type(ct: str) -> bool:
46
+ """Content types we must not decode as text — images, audio, video, octet-stream.
47
+
48
+ Anything text/* or application/json|xml|...+json round-trips cleanly through
49
+ YAML; everything else (image/png, audio/wav, application/octet-stream, etc.)
50
+ gets base64-encoded to preserve bytes exactly.
51
+ """
52
+ ct = ct.split(";", 1)[0].strip().lower()
53
+ if not ct:
54
+ return False
55
+ if ct.startswith(("image/", "audio/", "video/")):
56
+ return True
57
+ if ct in {"application/octet-stream", "application/pdf", "application/zip"}:
58
+ return True
59
+ return False
60
+
61
+
62
+ def _capture(status: int, headers: httpx.Headers, data: bytes) -> Rec:
63
+ clean = _clean_headers(headers)
64
+ content_type = clean.get("content-type", "")
65
+ if "text/event-stream" in content_type:
66
+ return Rec(status=status, headers=clean, streaming=True,
67
+ events=_split_sse(data.decode("utf-8", "replace")))
68
+ if _is_binary_content_type(content_type):
69
+ return Rec(status=status, headers=clean, streaming=False,
70
+ body=base64.b64encode(data).decode("ascii"), binary=True)
71
+ try:
72
+ body = json.loads(data) if data else None
73
+ except ValueError:
74
+ body = data.decode("utf-8", "replace")
75
+ return Rec(status=status, headers=clean, streaming=False, body=body)
76
+
77
+
78
+ def _to_httpx(rec: Rec, request: httpx.Request) -> httpx.Response:
79
+ if rec.streaming:
80
+ content = "".join(rec.events).encode("utf-8")
81
+ elif rec.binary and isinstance(rec.body, str):
82
+ content = base64.b64decode(rec.body)
83
+ elif isinstance(rec.body, (dict, list)):
84
+ content = json.dumps(rec.body).encode("utf-8")
85
+ elif isinstance(rec.body, str):
86
+ content = rec.body.encode("utf-8")
87
+ else:
88
+ content = b""
89
+ return httpx.Response(
90
+ status_code=rec.status,
91
+ headers=httpx.Headers(rec.headers),
92
+ content=content,
93
+ request=request,
94
+ )
95
+
96
+
97
+ def _make_sync(cassette, mode, real_fn):
98
+ def handle_request(self, request: httpx.Request) -> httpx.Response:
99
+ body = normalize(str(request.url), _request_body(request))
100
+ decision = decide(mode, cassette, body)
101
+ if decision.response is not None: # REPLAY (no network)
102
+ return _to_httpx(decision.response, request)
103
+ real = real_fn(self, request) # PASS THROUGH
104
+ rec = _capture(real.status_code, real.headers, real.read())
105
+ cassette.record(request.method, str(request.url), body, rec) # RECORD
106
+ return _to_httpx(rec, request)
107
+
108
+ return handle_request
109
+
110
+
111
+ def _make_async(cassette, mode, real_fn):
112
+ async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
113
+ body = normalize(str(request.url), _request_body(request))
114
+ decision = decide(mode, cassette, body)
115
+ if decision.response is not None:
116
+ return _to_httpx(decision.response, request)
117
+ real = await real_fn(self, request)
118
+ rec = _capture(real.status_code, real.headers, await real.aread())
119
+ cassette.record(request.method, str(request.url), body, rec)
120
+ return _to_httpx(rec, request)
121
+
122
+ return handle_async_request
123
+
124
+
125
+ def install(cassette, mode):
126
+ """Patch httpx; returns a token to pass back to :func:`uninstall`."""
127
+ saved = (httpx.HTTPTransport.handle_request, httpx.AsyncHTTPTransport.handle_async_request)
128
+ httpx.HTTPTransport.handle_request = _make_sync(cassette, mode, saved[0])
129
+ httpx.AsyncHTTPTransport.handle_async_request = _make_async(cassette, mode, saved[1])
130
+ return saved
131
+
132
+
133
+ def uninstall(saved) -> None:
134
+ httpx.HTTPTransport.handle_request, httpx.AsyncHTTPTransport.handle_async_request = saved
@@ -0,0 +1,29 @@
1
+ """pytest integration: an auto-named cassette per test.
2
+
3
+ def test_summarize(promptecho_cassette): # -> cassettes/test_summarize.yaml
4
+ client.messages.create(...)
5
+
6
+ Mode defaults to ``once`` locally and ``none`` in CI (when the CI env var is set),
7
+ so a forgotten recording fails the build instead of making a live call.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+
14
+ import pytest
15
+
16
+ from . import use_cassette
17
+ from .transport import Mode
18
+
19
+
20
+ def _default_mode() -> Mode:
21
+ return Mode.NONE if os.environ.get("CI") else Mode.ONCE
22
+
23
+
24
+ @pytest.fixture
25
+ def promptecho_cassette(request):
26
+ cassette_dir = os.path.join(os.path.dirname(request.fspath), "cassettes")
27
+ path = os.path.join(cassette_dir, f"{request.node.name}.yaml")
28
+ with use_cassette(path, mode=_default_mode()) as cassette:
29
+ yield cassette
@@ -0,0 +1,89 @@
1
+ """The replay/record decision logic, isolated from httpx patching mechanics.
2
+
3
+ This module is pure and unit-testable: given a mode, a cassette, and a parsed
4
+ request, it decides whether to replay a recorded response or pass through to the
5
+ network and record. The actual httpx wiring (next to TODOs) lives at the bottom.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from enum import Enum
12
+
13
+ from .cassette import Cassette, Response
14
+ from .matcher import diff_request, fingerprint
15
+
16
+
17
+ class Mode(str, Enum):
18
+ ONCE = "once" # record if absent, replay if present (default)
19
+ NONE = "none" # replay only; error on miss (CI-safe)
20
+ NEW_EPISODES = "new_episodes" # replay existing, record new
21
+ ALL = "all" # always re-record
22
+
23
+
24
+ class CassetteMiss(Exception):
25
+ """Raised in mode=none when an incoming request has no recording."""
26
+
27
+
28
+ def parse_body(raw: bytes) -> dict:
29
+ if not raw:
30
+ return {}
31
+ try:
32
+ return json.loads(raw)
33
+ except (ValueError, UnicodeDecodeError):
34
+ return {}
35
+
36
+
37
+ class Decision:
38
+ """Outcome of looking at one request: either replay `response`, or `record`."""
39
+
40
+ def __init__(self, *, response: Response | None = None, record: bool = False):
41
+ self.response = response
42
+ self.record = record
43
+
44
+
45
+ def decide(mode: Mode, cassette: Cassette, body: dict) -> Decision:
46
+ """Core branch. No I/O — caller performs the network call / persistence."""
47
+ key = fingerprint(body, cassette.match_on)
48
+ existing = cassette.find(key)
49
+
50
+ if mode is Mode.ALL:
51
+ return Decision(record=True)
52
+
53
+ if mode is Mode.NONE:
54
+ if existing is None:
55
+ raise CassetteMiss(_miss_message(cassette, body))
56
+ return Decision(response=existing.response)
57
+
58
+ # ONCE and NEW_EPISODES: replay if we have it, otherwise record.
59
+ if existing is not None:
60
+ return Decision(response=existing.response)
61
+ return Decision(record=True)
62
+
63
+
64
+ def _miss_message(cassette: Cassette, body: dict) -> str:
65
+ nearest = cassette.interactions[-1] if cassette.interactions else None
66
+ if nearest is None:
67
+ return (
68
+ f"Cassette miss: {cassette.path!r} has no recordings and mode=none.\n"
69
+ f"Re-record with mode='once' (or delete and re-run the test)."
70
+ )
71
+ diff = diff_request(body, nearest.body, cassette.match_on)
72
+ if not diff:
73
+ return (
74
+ f"Cassette miss in {cassette.path!r} (mode=none): a matched field has a "
75
+ f"non-equal value the diff walker couldn't pinpoint. Re-record to refresh."
76
+ )
77
+ return (
78
+ f"Cassette miss in {cassette.path!r} (mode=none).\n"
79
+ f"The incoming request differs from the nearest recording on these fields:\n\n"
80
+ f"{diff}\n\n"
81
+ f"If the change is intentional, re-record with mode='once' (or delete the "
82
+ f"cassette and re-run). If not, fix the call so it matches the recorded "
83
+ f"fingerprint."
84
+ )
85
+
86
+
87
+ # The httpx wiring that turns these decisions into real interception lives in
88
+ # patch.py (sync + async). This module stays pure so the branch logic above is
89
+ # unit-testable without a network stack.
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: promptecho
3
+ Version: 0.1.0
4
+ Summary: Record & replay for LLM API calls — like vcrpy/nock, built for LLM traffic.
5
+ License-Expression: MIT
6
+ Keywords: anthropic,llm,mock,openai,pytest,record-replay,testing,vcr
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: httpx>=0.24
9
+ Requires-Dist: pyyaml>=6.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: anthropic; extra == 'dev'
12
+ Requires-Dist: openai; extra == 'dev'
13
+ Requires-Dist: pytest>=7; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # promptecho
17
+
18
+ **Record & replay for LLM API calls.** Like [`vcrpy`](https://github.com/kevin1024/vcrpy) / [`nock`](https://github.com/nock/nock), but built for the way LLM traffic actually behaves.
19
+
20
+ Your LLM tests have three problems: they're **flaky** (non-deterministic outputs), **slow** (real network round-trips), and **expensive** (burning tokens in CI on every run). promptecho records each real API call once to a cassette file, then replays it forever — deterministically, instantly, for free.
21
+
22
+ ```python
23
+ import promptecho
24
+ from anthropic import Anthropic
25
+
26
+ @promptecho.use_cassette("cassettes/summarize.yaml")
27
+ def test_summarize():
28
+ client = Anthropic()
29
+ msg = client.messages.create(
30
+ model="claude-opus-4-8",
31
+ max_tokens=100,
32
+ messages=[{"role": "user", "content": "Summarize: the cat sat on the mat."}],
33
+ )
34
+ assert "cat" in msg.content[0].text.lower()
35
+ ```
36
+
37
+ First run: one real call, recorded to `cassettes/summarize.yaml`.
38
+ Every run after: replayed from disk. No network, no tokens, no flake.
39
+
40
+ > **Proof, not marketing.** The end-to-end test that gates every release records against a local server, **shuts the server down**, then replays. Same response, zero network. If the response can come back with the upstream gone, the cassette is genuinely doing the work — not a partial proxy. See [`tests/test_record_replay.py`](tests/test_record_replay.py).
41
+
42
+ ---
43
+
44
+ ## Why not just use vcrpy?
45
+
46
+ You can — at the HTTP layer, vcrpy works on LLM calls today. promptecho exists because LLM traffic breaks vcrpy's assumptions in five specific ways:
47
+
48
+ 1. **Matching.** vcrpy matches on raw request bytes. LLM bodies carry volatile fields (client-injected IDs, reordered tools, whitespace) that change the bytes without changing the *meaning* — so byte-matching misses on replay. promptecho matches on a **normalized fingerprint** of the fields that determine the response, and **canonicalizes across providers**: it knows `content: "hi"` equals `content: [{"type":"text","text":"hi"}]`, an Anthropic top-level `system` equals an OpenAI `system`-role message, and an Anthropic `input_schema` tool def equals an OpenAI `function.parameters`. A raw-bytes VCR can't.
49
+ 2. **Streaming.** Most LLM calls are SSE streams. promptecho records the event stream and faithfully re-emits it on replay, so `stream=True` and token-by-token iteration work identically against a cassette — including reasoning deltas.
50
+ 3. **Binary / multimodal responses.** vcrpy's text-based cassettes silently corrupt raw `image/*` / `audio/*` / `octet-stream` bodies. promptecho detects them by `Content-Type` and base64-encodes them in the cassette, so image-out and audio-out responses round-trip byte-exact.
51
+ 4. **Debuggable CI failures.** When a vcrpy cassette miss happens, you get *"no match"*. promptecho prints the exact path that changed: `messages[1].content: recorded "summarize the cat" / incoming "summarize the dog"`. Test failures are actionable, not detective work.
52
+ 5. **Secrets.** API keys live in headers on every call. promptecho redacts them by default — a cassette is safe to commit.
53
+
54
+ ## What promptecho is *not*
55
+
56
+ - **Not a cache.** Replay matching is exact/normalized and deterministic, on purpose. It does **not** semantically match "different prompt, close enough" — that would put non-determinism back into the harness you're using to remove it. (A separate opt-in fuzzy mode is on the roadmap as a dev-loop convenience; it will never be the default and never used in CI.)
57
+ - **Not an eval.** It freezes a response so your *surrounding code* is testable. Judging whether the response is *good* is a different tool (see roadmap: `toMatchLLMSnapshot()`).
58
+
59
+ ---
60
+
61
+ ## What it covers
62
+
63
+ promptecho intercepts at the `httpx` transport layer. **If the SDK uses httpx, promptecho sees the call** — which is almost everything modern.
64
+
65
+ | You're calling | Covered? |
66
+ |---|---|
67
+ | Anthropic, OpenAI, Mistral, Cohere, `google-genai` SDKs | ✅ |
68
+ | **OpenAI SDK with custom `base_url`** → OpenRouter, Together, Fireworks, Cerebras, Groq, DeepInfra, Perplexity | ✅ |
69
+ | **Self-hosted vLLM / TGI / SGLang / LM Studio / Ollama** (OpenAI-compatible mode) | ✅ |
70
+ | Your **own fine-tune** behind any of the above | ✅ |
71
+ | **Reasoning models** — o1/o3, Claude extended thinking, DeepSeek-R1 | ✅ (incl. `reasoning_effort` / `thinking` in default match-on) |
72
+ | **Multimodal** — base64-in-JSON (vision, Claude image-out, GPT-4o) and raw binary (`image/*`, `audio/*`) | ✅ (byte-exact round-trip) |
73
+ | Bedrock via boto3, HF `InferenceClient`, in-process `transformers` | ❌ (see workarounds in [SUPPORT.md](SUPPORT.md)) |
74
+
75
+ Full matrix with caveats and workarounds: [**SUPPORT.md**](SUPPORT.md). For practical recipes by scenario (startup / enterprise / research), see [**TUTORIAL.md**](TUTORIAL.md).
76
+
77
+ ### Hosted open-source via the OpenAI SDK
78
+
79
+ This is the dominant pattern for non-Anthropic/non-OpenAI usage, and it Just Works:
80
+
81
+ ```python
82
+ from openai import OpenAI
83
+ client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key="...")
84
+
85
+ @promptecho.use_cassette("cassettes/openrouter.yaml")
86
+ def test_via_openrouter():
87
+ r = client.chat.completions.create(
88
+ model="meta-llama/llama-3.1-70b-instruct",
89
+ messages=[{"role": "user", "content": "hi"}],
90
+ )
91
+ assert r.choices[0].message.content
92
+ ```
93
+
94
+ Detection falls back to body shape when the host is unknown, so localhost gateways, in-house proxies, and self-hosted vLLM/TGI behave the same way as the brand-name hosts.
95
+
96
+ ---
97
+
98
+ ## Install
99
+
100
+ ```bash
101
+ pip install promptecho # not yet on PyPI — install from source for now
102
+ ```
103
+
104
+ ```bash
105
+ git clone <repo> && cd promptecho
106
+ pip install -e .
107
+ ```
108
+
109
+ Requires Python ≥ 3.9 and `httpx ≥ 0.24`.
110
+
111
+ ---
112
+
113
+ ## Usage
114
+
115
+ ### Decorator
116
+ ```python
117
+ @promptecho.use_cassette("cassettes/foo.yaml")
118
+ def test_foo(): ...
119
+ ```
120
+
121
+ ### Context manager
122
+ ```python
123
+ with promptecho.use_cassette("cassettes/foo.yaml"):
124
+ client.messages.create(...)
125
+ ```
126
+
127
+ ### pytest fixture (auto-named per test)
128
+ ```python
129
+ def test_bar(promptecho_cassette): # records to cassettes/test_bar.yaml
130
+ client.messages.create(...)
131
+ ```
132
+
133
+ The fixture defaults to `mode="once"` locally and `mode="none"` when `CI=true` — so a forgotten recording fails the build instead of making a live call.
134
+
135
+ ### Record modes
136
+ Borrowed from vcrpy, so the mental model is free:
137
+
138
+ | mode | absent cassette | present cassette | use for |
139
+ |------|-----------------|------------------|---------|
140
+ | `once` *(default)* | record | replay | normal dev |
141
+ | `none` | **error** | replay | **CI** — guarantees no live calls |
142
+ | `new_episodes` | record | replay + record new | evolving tests |
143
+ | `all` | record | re-record everything | refreshing fixtures |
144
+
145
+ ```python
146
+ @promptecho.use_cassette("cassettes/foo.yaml", mode="none")
147
+ ```
148
+
149
+ ### Choosing what to match on
150
+
151
+ Defaults to `["model", "messages", "system", "tools", "tool_choice", "reasoning_effort", "reasoning", "thinking"]` — everything that determines the response for a chat-shaped call, including reasoning-model knobs.
152
+
153
+ ```python
154
+ @promptecho.use_cassette(
155
+ "cassettes/foo.yaml",
156
+ match_on=["model", "messages", "system", "temperature"], # add temperature
157
+ )
158
+ ```
159
+
160
+ For non-chat shapes (raw TGI `/generate`, embeddings) you'll want to override, e.g. `match_on=["model", "input"]` for an embeddings endpoint. See [SUPPORT.md → Request shapes](SUPPORT.md#request-shapes).
161
+
162
+ ### Async
163
+
164
+ Works identically with `httpx.AsyncClient` and the async surfaces of Anthropic / OpenAI / Mistral SDKs — the async transport is patched the same way as sync.
165
+
166
+ ---
167
+
168
+ ## Cassette format
169
+
170
+ Human-readable YAML, designed to diff cleanly in PRs:
171
+
172
+ ```yaml
173
+ version: 1
174
+ match_on: [model, messages, system, tools, tool_choice, reasoning_effort, reasoning, thinking]
175
+ interactions:
176
+ - request:
177
+ method: POST
178
+ url: https://api.anthropic.com/v1/messages
179
+ match_key: ef43f6acaed95b2f # fingerprint of matched fields
180
+ matched_on: [model, messages, system, tools, tool_choice]
181
+ body: # canonical (provider-normalized) body
182
+ model: claude-opus-4-8
183
+ messages:
184
+ - {role: user, content: "Summarize: the cat sat on the mat."}
185
+ response:
186
+ status: 200
187
+ headers: {content-type: application/json}
188
+ streaming: false
189
+ body:
190
+ content: [{type: text, text: "A cat sat on a mat."}]
191
+ usage: {input_tokens: 14, output_tokens: 8}
192
+ ```
193
+
194
+ - **Streamed** responses store the ordered SSE events under `response.events` with `streaming: true`; replay re-emits them in order.
195
+ - **Binary** responses (image/audio/octet-stream) get `binary: true` and the body is base64-encoded; replay decodes and returns the original bytes.
196
+ - **The stored body is the canonical, provider-normalized shape** — not the raw provider JSON. That makes cassettes provider-agnostic and easier to skim in code review.
197
+
198
+ Auto-redacted on record: `authorization`, `x-api-key`, `openai-organization`. Configurable.
199
+
200
+ See [`examples/cassettes/example.yaml`](examples/cassettes/example.yaml) for a real one.
201
+
202
+ ---
203
+
204
+ ## Status
205
+
206
+ **v0.1.0, working core. 19 tests, all green.** Not yet on PyPI.
207
+
208
+ Records and replays real httpx traffic — sync, async, SSE streaming, binary responses, cross-provider request shapes — verified end-to-end against a local server that gets shut down between record and replay.
209
+
210
+ ### Roadmap (build-in-public)
211
+
212
+ Done:
213
+ - [x] httpx sync + async transport interception
214
+ - [x] SSE streaming record/replay
215
+ - [x] pytest plugin + auto-naming
216
+ - [x] Per-provider request normalizers (Anthropic / OpenAI / generic)
217
+ - [x] Reasoning-model match defaults (`reasoning_effort`, `thinking`, `reasoning`)
218
+ - [x] Binary response round-trip (image/audio/octet-stream — base64 in cassette)
219
+ - [x] Field-level diff on cassette miss (CI `mode=none` errors pinpoint the changed path, not just the field name)
220
+
221
+ Next:
222
+ - [ ] `requests` / `urllib3` interception backend — unlocks boto3-Bedrock and HF `InferenceClient`
223
+ - [ ] `promptecho lint` — find un-recorded calls in a test suite
224
+ - [ ] **`toMatchLLMSnapshot()` sibling** — semantic snapshot assertions on top of recorded calls
225
+
226
+ ## Design
227
+
228
+ For the why-not-the-other-way decisions — fingerprint vs raw bytes, why semantic matching is fenced off, how SSE re-emission works, how cross-provider normalization is structured — see [DESIGN.md](DESIGN.md).
229
+
230
+ ## License
231
+
232
+ MIT
@@ -0,0 +1,11 @@
1
+ promptecho/__init__.py,sha256=6SokaWIYwu6dZm0Vin2psxevz8jcNuLDsDM3h12kMwg,2309
2
+ promptecho/cassette.py,sha256=uzJh084L4gk8zr-H6psWxd9oDMKP0glfNzJlcIEaHCA,4445
3
+ promptecho/matcher.py,sha256=t_5kBPphChTwYe_0WAjkcTPLYDqO7tKaYgGbRTpwnlg,4484
4
+ promptecho/normalizers.py,sha256=0X9oItnIolaS_62-sB3D-h6yYduAAbcsK23tYWI3ZlI,5807
5
+ promptecho/patch.py,sha256=Xc5D37Odj0pElVxfon-7HT-sktBBZn0VeRkOq-3ihKE,5223
6
+ promptecho/pytest_plugin.py,sha256=oVF3vzqVHoG_Vk-WjpVo_Wota-CpgV0YVahReJbwVFM,827
7
+ promptecho/transport.py,sha256=g6bYMwOV5b0LVqGLjwQ1dJyfNKLnE4HiQegzpLjqSSg,3147
8
+ promptecho-0.1.0.dist-info/METADATA,sha256=zh0KRAXXMl-hoWYUJLQU3wx2bjSRkpMeWOt9EulAahU,10858
9
+ promptecho-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ promptecho-0.1.0.dist-info/entry_points.txt,sha256=bw3ZMfD4yiP33qS4WOOV11TerOvgYXOS8FL70k6pYHM,49
11
+ promptecho-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [pytest11]
2
+ promptecho = promptecho.pytest_plugin