promptecho 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promptecho/__init__.py +81 -0
- promptecho/cassette.py +138 -0
- promptecho/matcher.py +115 -0
- promptecho/normalizers.py +160 -0
- promptecho/patch.py +134 -0
- promptecho/pytest_plugin.py +29 -0
- promptecho/transport.py +89 -0
- promptecho-0.1.0.dist-info/METADATA +232 -0
- promptecho-0.1.0.dist-info/RECORD +11 -0
- promptecho-0.1.0.dist-info/WHEEL +4 -0
- promptecho-0.1.0.dist-info/entry_points.txt +2 -0
promptecho/__init__.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""promptecho — record & replay for LLM API calls.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
promptecho.use_cassette(path, mode="once", match_on=None) # decorator + context manager
|
|
5
|
+
promptecho.Mode # record modes
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import functools
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
|
|
13
|
+
from .cassette import Cassette
|
|
14
|
+
from .transport import Mode
|
|
15
|
+
|
|
16
|
+
__all__ = ["use_cassette", "Mode", "Cassette"]
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@contextmanager
|
|
21
|
+
def _activate(cassette: Cassette, mode: Mode):
|
|
22
|
+
"""Patch httpx for the duration of the block, then restore and flush.
|
|
23
|
+
|
|
24
|
+
While active, every httpx-based client (Anthropic, OpenAI, raw httpx) routes
|
|
25
|
+
through the record/replay decision (see patch.py / DESIGN.md §1).
|
|
26
|
+
"""
|
|
27
|
+
from .patch import install, uninstall
|
|
28
|
+
|
|
29
|
+
if mode is Mode.ALL:
|
|
30
|
+
cassette.interactions.clear() # re-record from scratch
|
|
31
|
+
cassette._dirty = True
|
|
32
|
+
|
|
33
|
+
saved = install(cassette, mode)
|
|
34
|
+
try:
|
|
35
|
+
yield cassette
|
|
36
|
+
finally:
|
|
37
|
+
uninstall(saved)
|
|
38
|
+
cassette.save()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class _UseCassette:
|
|
42
|
+
"""Works as both a decorator and a context manager (like vcrpy.use_cassette)."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, path: str, mode: str | Mode = Mode.ONCE, match_on=None):
|
|
45
|
+
self.path = path
|
|
46
|
+
self.mode = Mode(mode)
|
|
47
|
+
self.match_on = match_on
|
|
48
|
+
|
|
49
|
+
def _load(self) -> Cassette:
|
|
50
|
+
return Cassette.load(self.path, match_on=self.match_on)
|
|
51
|
+
|
|
52
|
+
def __enter__(self):
|
|
53
|
+
self._cm = _activate(self._load(), self.mode)
|
|
54
|
+
return self._cm.__enter__()
|
|
55
|
+
|
|
56
|
+
def __exit__(self, *exc):
|
|
57
|
+
return self._cm.__exit__(*exc)
|
|
58
|
+
|
|
59
|
+
def __call__(self, func):
|
|
60
|
+
@functools.wraps(func)
|
|
61
|
+
def wrapper(*args, **kwargs):
|
|
62
|
+
with _activate(self._load(), self.mode):
|
|
63
|
+
return func(*args, **kwargs)
|
|
64
|
+
|
|
65
|
+
return wrapper
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def use_cassette(path: str, mode: str | Mode = Mode.ONCE, match_on=None) -> _UseCassette:
|
|
69
|
+
"""Record on first run, replay forever after.
|
|
70
|
+
|
|
71
|
+
Usage as a decorator::
|
|
72
|
+
|
|
73
|
+
@promptecho.use_cassette("cassettes/foo.yaml")
|
|
74
|
+
def test_foo(): ...
|
|
75
|
+
|
|
76
|
+
or as a context manager::
|
|
77
|
+
|
|
78
|
+
with promptecho.use_cassette("cassettes/foo.yaml", mode="none"):
|
|
79
|
+
client.messages.create(...)
|
|
80
|
+
"""
|
|
81
|
+
return _UseCassette(path, mode=mode, match_on=match_on)
|
promptecho/cassette.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Cassette: the on-disk record of interactions. Human-readable YAML.
|
|
2
|
+
|
|
3
|
+
A cassette is a list of (request, response) interactions keyed by request
|
|
4
|
+
fingerprint. It is designed to diff cleanly in PRs and to be safe to commit
|
|
5
|
+
(secrets are redacted before anything reaches disk).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import dataclasses
|
|
11
|
+
import os
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
from .matcher import DEFAULT_MATCH_ON, fingerprint
|
|
17
|
+
|
|
18
|
+
REDACTED = "REDACTED"
|
|
19
|
+
REDACT_HEADERS = {"authorization", "x-api-key", "openai-organization"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Response:
|
|
24
|
+
status: int
|
|
25
|
+
headers: dict
|
|
26
|
+
streaming: bool = False
|
|
27
|
+
body: object | None = None # non-streaming body: JSON, str, or base64 str when binary
|
|
28
|
+
events: list[str] = field(default_factory=list) # ordered raw SSE events
|
|
29
|
+
binary: bool = False # if True, body is base64-encoded raw bytes
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Interaction:
|
|
34
|
+
method: str
|
|
35
|
+
url: str
|
|
36
|
+
match_key: str
|
|
37
|
+
matched_on: list[str]
|
|
38
|
+
body: dict
|
|
39
|
+
response: Response
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class Cassette:
|
|
44
|
+
path: str
|
|
45
|
+
match_on: list[str] = field(default_factory=lambda: list(DEFAULT_MATCH_ON))
|
|
46
|
+
interactions: list[Interaction] = field(default_factory=list)
|
|
47
|
+
_dirty: bool = False
|
|
48
|
+
|
|
49
|
+
# --- lookup -----------------------------------------------------------
|
|
50
|
+
def find(self, key: str) -> Interaction | None:
|
|
51
|
+
for ix in self.interactions:
|
|
52
|
+
if ix.match_key == key:
|
|
53
|
+
return ix
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
def record(self, method: str, url: str, body: dict, response: Response) -> None:
|
|
57
|
+
key = fingerprint(body, self.match_on)
|
|
58
|
+
self.interactions.append(
|
|
59
|
+
Interaction(
|
|
60
|
+
method=method,
|
|
61
|
+
url=url,
|
|
62
|
+
match_key=key,
|
|
63
|
+
matched_on=list(self.match_on),
|
|
64
|
+
body=body,
|
|
65
|
+
response=_redact_response(response),
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
self._dirty = True
|
|
69
|
+
|
|
70
|
+
# --- persistence ------------------------------------------------------
|
|
71
|
+
@classmethod
|
|
72
|
+
def load(cls, path: str, match_on: list[str] | None = None) -> "Cassette":
|
|
73
|
+
mo = match_on or list(DEFAULT_MATCH_ON)
|
|
74
|
+
if not os.path.exists(path):
|
|
75
|
+
return cls(path=path, match_on=mo)
|
|
76
|
+
with open(path) as f:
|
|
77
|
+
raw = yaml.safe_load(f) or {}
|
|
78
|
+
interactions = [_interaction_from_dict(d) for d in raw.get("interactions", [])]
|
|
79
|
+
return cls(path=path, match_on=raw.get("match_on", mo), interactions=interactions)
|
|
80
|
+
|
|
81
|
+
def save(self) -> None:
|
|
82
|
+
if not self._dirty:
|
|
83
|
+
return
|
|
84
|
+
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
|
|
85
|
+
doc = {
|
|
86
|
+
"version": 1,
|
|
87
|
+
"match_on": self.match_on,
|
|
88
|
+
"interactions": [_interaction_to_dict(ix) for ix in self.interactions],
|
|
89
|
+
}
|
|
90
|
+
with open(self.path, "w") as f:
|
|
91
|
+
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True, width=100)
|
|
92
|
+
self._dirty = False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# --- (de)serialization helpers -------------------------------------------
|
|
96
|
+
def _redact_response(resp: Response) -> Response:
|
|
97
|
+
headers = {k: (REDACTED if k.lower() in REDACT_HEADERS else v) for k, v in resp.headers.items()}
|
|
98
|
+
return dataclasses.replace(resp, headers=headers)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _interaction_to_dict(ix: Interaction) -> dict:
|
|
102
|
+
r = ix.response
|
|
103
|
+
response = {"status": r.status, "headers": r.headers, "streaming": r.streaming}
|
|
104
|
+
if r.binary:
|
|
105
|
+
response["binary"] = True
|
|
106
|
+
if r.streaming:
|
|
107
|
+
response["events"] = r.events
|
|
108
|
+
else:
|
|
109
|
+
response["body"] = r.body
|
|
110
|
+
return {
|
|
111
|
+
"request": {
|
|
112
|
+
"method": ix.method,
|
|
113
|
+
"url": ix.url,
|
|
114
|
+
"match_key": ix.match_key,
|
|
115
|
+
"matched_on": ix.matched_on,
|
|
116
|
+
"body": ix.body,
|
|
117
|
+
},
|
|
118
|
+
"response": response,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _interaction_from_dict(d: dict) -> Interaction:
|
|
123
|
+
req, resp = d["request"], d["response"]
|
|
124
|
+
return Interaction(
|
|
125
|
+
method=req["method"],
|
|
126
|
+
url=req["url"],
|
|
127
|
+
match_key=req["match_key"],
|
|
128
|
+
matched_on=req.get("matched_on", list(DEFAULT_MATCH_ON)),
|
|
129
|
+
body=req.get("body", {}),
|
|
130
|
+
response=Response(
|
|
131
|
+
status=resp["status"],
|
|
132
|
+
headers=resp.get("headers", {}),
|
|
133
|
+
streaming=resp.get("streaming", False),
|
|
134
|
+
body=resp.get("body"),
|
|
135
|
+
events=resp.get("events", []),
|
|
136
|
+
binary=resp.get("binary", False),
|
|
137
|
+
),
|
|
138
|
+
)
|
promptecho/matcher.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Request fingerprinting — the deterministic core of replay matching.
|
|
2
|
+
|
|
3
|
+
We match on a *normalized fingerprint* of the request fields that actually
|
|
4
|
+
determine the response, never on raw bytes (see DESIGN.md §2).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
DEFAULT_MATCH_ON = [
|
|
13
|
+
"model", "messages", "system", "tools", "tool_choice",
|
|
14
|
+
# Reasoning-model knobs that change the response without changing the prompt:
|
|
15
|
+
# OpenAI o-series, Anthropic extended thinking, OpenRouter unified field.
|
|
16
|
+
# If these aren't matched, "reasoning_effort=high" and "low" tests collide.
|
|
17
|
+
"reasoning_effort", "reasoning", "thinking",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def canonical_json(obj: object) -> str:
|
|
22
|
+
"""Stable serialization: sorted keys, no insignificant whitespace.
|
|
23
|
+
|
|
24
|
+
Ensures re-serialization of the same logical request can't change the key.
|
|
25
|
+
"""
|
|
26
|
+
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def pick(body: dict, match_on: list[str]) -> dict:
|
|
30
|
+
"""Keep only the load-bearing fields, in a stable shape."""
|
|
31
|
+
return {k: body[k] for k in match_on if k in body}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def fingerprint(body: dict, match_on: list[str] | None = None) -> str:
|
|
35
|
+
"""Map a request body to the cassette key for the recording it should replay.
|
|
36
|
+
|
|
37
|
+
The same logical request always yields the same key; volatile fields that
|
|
38
|
+
aren't in ``match_on`` cannot affect it.
|
|
39
|
+
"""
|
|
40
|
+
fields = pick(body, match_on or DEFAULT_MATCH_ON)
|
|
41
|
+
digest = hashlib.sha256(canonical_json(fields).encode("utf-8")).hexdigest()
|
|
42
|
+
return digest[:16]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def diff_fields(incoming: dict, recorded: dict, match_on: list[str]) -> list[str]:
|
|
46
|
+
"""Names of top-level matched fields whose values differ. Cheap pointer; for the
|
|
47
|
+
human-readable leaf-level diff used in cassette-miss errors, see :func:`diff_request`."""
|
|
48
|
+
return [k for k in match_on if incoming.get(k) != recorded.get(k)]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_MISSING = object() # sentinel for "field/element not present on this side"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _walk_diff(incoming, recorded, path: str):
|
|
55
|
+
"""Yield (path, recorded_value, incoming_value) for each leaf-level difference.
|
|
56
|
+
|
|
57
|
+
Walks both structures in parallel. dicts are compared by key (sorted for stable
|
|
58
|
+
output); lists by index, with extras flagged. Anything else is a leaf — emit and
|
|
59
|
+
stop. Recursion depth is bounded by request shape (chat messages rarely nest deep).
|
|
60
|
+
"""
|
|
61
|
+
if incoming == recorded:
|
|
62
|
+
return
|
|
63
|
+
if type(incoming) is not type(recorded):
|
|
64
|
+
yield path, recorded, incoming
|
|
65
|
+
return
|
|
66
|
+
if isinstance(incoming, dict):
|
|
67
|
+
for k in sorted(set(incoming) | set(recorded)):
|
|
68
|
+
sub = f"{path}.{k}" if path else k
|
|
69
|
+
i_val = incoming.get(k, _MISSING)
|
|
70
|
+
r_val = recorded.get(k, _MISSING)
|
|
71
|
+
if i_val is _MISSING or r_val is _MISSING:
|
|
72
|
+
yield sub, r_val, i_val
|
|
73
|
+
else:
|
|
74
|
+
yield from _walk_diff(i_val, r_val, sub)
|
|
75
|
+
return
|
|
76
|
+
if isinstance(incoming, list):
|
|
77
|
+
for i in range(max(len(incoming), len(recorded))):
|
|
78
|
+
sub = f"{path}[{i}]"
|
|
79
|
+
if i >= len(incoming):
|
|
80
|
+
yield sub, recorded[i], _MISSING
|
|
81
|
+
elif i >= len(recorded):
|
|
82
|
+
yield sub, _MISSING, incoming[i]
|
|
83
|
+
else:
|
|
84
|
+
yield from _walk_diff(incoming[i], recorded[i], sub)
|
|
85
|
+
return
|
|
86
|
+
yield path, recorded, incoming
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _truncate(v, limit: int = 80) -> str:
|
|
90
|
+
if v is _MISSING:
|
|
91
|
+
return "<not present>"
|
|
92
|
+
s = v if isinstance(v, str) else canonical_json(v)
|
|
93
|
+
return s if len(s) <= limit else s[: limit - 3] + "..."
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def diff_request(incoming: dict, recorded: dict, match_on: list[str]) -> str:
|
|
97
|
+
"""Multi-line, human-readable field-level diff of two request bodies.
|
|
98
|
+
|
|
99
|
+
Restricted to ``match_on`` fields — volatile fields outside the match set are
|
|
100
|
+
intentionally hidden, since they can't have caused the miss. Returns the empty
|
|
101
|
+
string when no matched fields differ; callers should treat that as "no diff."
|
|
102
|
+
"""
|
|
103
|
+
lines = []
|
|
104
|
+
for field in match_on:
|
|
105
|
+
i_val = incoming.get(field, _MISSING)
|
|
106
|
+
r_val = recorded.get(field, _MISSING)
|
|
107
|
+
if i_val is _MISSING and r_val is _MISSING:
|
|
108
|
+
continue
|
|
109
|
+
if i_val == r_val:
|
|
110
|
+
continue
|
|
111
|
+
for path, r, i in _walk_diff(i_val, r_val, field):
|
|
112
|
+
lines.append(f" {path}:")
|
|
113
|
+
lines.append(f" recorded: {_truncate(r)}")
|
|
114
|
+
lines.append(f" incoming: {_truncate(i)}")
|
|
115
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Per-provider request normalization.
|
|
2
|
+
|
|
3
|
+
Different providers (and even one SDK across versions) express the *same logical
|
|
4
|
+
prompt* in different shapes:
|
|
5
|
+
|
|
6
|
+
- Anthropic puts the system prompt in a top-level ``system`` param; OpenAI puts
|
|
7
|
+
it in a ``system``/``developer`` role message.
|
|
8
|
+
- Message content may be a bare string or a list of typed content blocks.
|
|
9
|
+
- Tool defs differ: Anthropic ``{name, description, input_schema}`` vs OpenAI
|
|
10
|
+
``{type: function, function: {name, description, parameters}}``.
|
|
11
|
+
|
|
12
|
+
``normalize()`` maps a raw request body into one canonical shape, so logically
|
|
13
|
+
identical calls produce the same fingerprint. This is the thing a raw-bytes HTTP
|
|
14
|
+
VCR fundamentally cannot do.
|
|
15
|
+
|
|
16
|
+
The canonical body is also what gets written to the cassette — a provider-agnostic
|
|
17
|
+
view of the call, which is arguably more readable than the raw provider JSON.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from urllib.parse import urlsplit
|
|
23
|
+
|
|
24
|
+
CANONICAL_ROLES_AS_SYSTEM = ("system", "developer")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# --- detection ------------------------------------------------------------
|
|
28
|
+
def detect(url: str, body: dict) -> str:
|
|
29
|
+
"""Best-effort provider detection: URL host first, then body shape."""
|
|
30
|
+
host = (urlsplit(url).hostname or "").lower()
|
|
31
|
+
if "anthropic" in host:
|
|
32
|
+
return "anthropic"
|
|
33
|
+
if "openai" in host or "azure" in host:
|
|
34
|
+
return "openai"
|
|
35
|
+
|
|
36
|
+
# Shape fallback (covers localhost / proxies / gateways).
|
|
37
|
+
if "system" in body:
|
|
38
|
+
return "anthropic"
|
|
39
|
+
messages = body.get("messages") or []
|
|
40
|
+
if any(isinstance(m, dict) and m.get("role") in CANONICAL_ROLES_AS_SYSTEM for m in messages):
|
|
41
|
+
return "openai"
|
|
42
|
+
tools = body.get("tools") or []
|
|
43
|
+
if any(isinstance(t, dict) and "input_schema" in t for t in tools):
|
|
44
|
+
return "anthropic"
|
|
45
|
+
if any(isinstance(t, dict) and "function" in t for t in tools):
|
|
46
|
+
return "openai"
|
|
47
|
+
return "generic"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# --- shared canonicalizers ------------------------------------------------
|
|
51
|
+
def _canon_content(content):
|
|
52
|
+
"""Collapse a single text block to a bare string; canonicalize text blocks."""
|
|
53
|
+
if isinstance(content, str):
|
|
54
|
+
return content
|
|
55
|
+
if isinstance(content, list):
|
|
56
|
+
blocks = [_canon_block(b) for b in content]
|
|
57
|
+
if len(blocks) == 1 and isinstance(blocks[0], dict) and blocks[0].get("type") == "text":
|
|
58
|
+
return blocks[0]["text"]
|
|
59
|
+
return blocks
|
|
60
|
+
return content
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _canon_block(block):
|
|
64
|
+
if isinstance(block, dict) and block.get("type") == "text" and "text" in block:
|
|
65
|
+
return {"type": "text", "text": block["text"]}
|
|
66
|
+
return block
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _canon_message(message):
|
|
70
|
+
if not isinstance(message, dict):
|
|
71
|
+
return message
|
|
72
|
+
out = dict(message)
|
|
73
|
+
if "content" in out:
|
|
74
|
+
out["content"] = _canon_content(out["content"])
|
|
75
|
+
return out
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _canon_system(system):
|
|
79
|
+
if isinstance(system, list):
|
|
80
|
+
texts = [b.get("text", "") for b in system
|
|
81
|
+
if isinstance(b, dict) and b.get("type") == "text"]
|
|
82
|
+
if texts:
|
|
83
|
+
return "\n".join(texts)
|
|
84
|
+
return system
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _canon_tool(tool):
|
|
88
|
+
if not isinstance(tool, dict):
|
|
89
|
+
return tool
|
|
90
|
+
if isinstance(tool.get("function"), dict): # OpenAI shape
|
|
91
|
+
fn = tool["function"]
|
|
92
|
+
canon = {"name": fn.get("name"), "description": fn.get("description"),
|
|
93
|
+
"parameters": fn.get("parameters")}
|
|
94
|
+
else: # Anthropic / generic
|
|
95
|
+
canon = {"name": tool.get("name"), "description": tool.get("description"),
|
|
96
|
+
"parameters": tool.get("input_schema", tool.get("parameters"))}
|
|
97
|
+
return {k: v for k, v in canon.items() if v is not None}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _canon_tool_choice(choice):
|
|
101
|
+
if isinstance(choice, str):
|
|
102
|
+
return {"mode": choice}
|
|
103
|
+
if isinstance(choice, dict):
|
|
104
|
+
if isinstance(choice.get("function"), dict): # OpenAI
|
|
105
|
+
return {"mode": "tool", "name": choice["function"].get("name")}
|
|
106
|
+
if "type" in choice: # Anthropic
|
|
107
|
+
out = {"mode": choice["type"]}
|
|
108
|
+
if choice.get("name"):
|
|
109
|
+
out["name"] = choice["name"]
|
|
110
|
+
return out
|
|
111
|
+
return choice
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _apply_common(out: dict) -> dict:
|
|
115
|
+
if "messages" in out:
|
|
116
|
+
out["messages"] = [_canon_message(m) for m in out["messages"]]
|
|
117
|
+
if "tools" in out:
|
|
118
|
+
out["tools"] = [_canon_tool(t) for t in out["tools"]]
|
|
119
|
+
if "tool_choice" in out:
|
|
120
|
+
out["tool_choice"] = _canon_tool_choice(out["tool_choice"])
|
|
121
|
+
return out
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# --- per-provider ---------------------------------------------------------
|
|
125
|
+
def _anthropic(body: dict) -> dict:
|
|
126
|
+
out = dict(body)
|
|
127
|
+
if "system" in out:
|
|
128
|
+
out["system"] = _canon_system(out["system"])
|
|
129
|
+
return _apply_common(out)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _openai(body: dict) -> dict:
|
|
133
|
+
out = dict(body)
|
|
134
|
+
system_parts, rest = [], []
|
|
135
|
+
for m in out.get("messages", []):
|
|
136
|
+
if isinstance(m, dict) and m.get("role") in CANONICAL_ROLES_AS_SYSTEM:
|
|
137
|
+
c = m.get("content", "")
|
|
138
|
+
system_parts.append(c if isinstance(c, str) else _canon_content(c))
|
|
139
|
+
else:
|
|
140
|
+
rest.append(m)
|
|
141
|
+
if system_parts and "system" not in out:
|
|
142
|
+
out["system"] = "\n".join(p for p in system_parts if isinstance(p, str))
|
|
143
|
+
out["messages"] = rest
|
|
144
|
+
if "max_completion_tokens" in out and "max_tokens" not in out:
|
|
145
|
+
out["max_tokens"] = out.pop("max_completion_tokens")
|
|
146
|
+
return _apply_common(out)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _generic(body: dict) -> dict:
|
|
150
|
+
return _apply_common(dict(body))
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
_NORMALIZERS = {"anthropic": _anthropic, "openai": _openai, "generic": _generic}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def normalize(url: str, body: dict) -> dict:
|
|
157
|
+
"""Map a raw provider request body into the canonical shape used for matching."""
|
|
158
|
+
if not isinstance(body, dict):
|
|
159
|
+
return body
|
|
160
|
+
return _NORMALIZERS[detect(url, body)](body)
|
promptecho/patch.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""httpx interception — the wiring that makes record/replay real.
|
|
2
|
+
|
|
3
|
+
We monkeypatch ``httpx.HTTPTransport.handle_request`` (and the async twin) so
|
|
4
|
+
every client built on httpx — Anthropic, OpenAI, raw httpx — routes through the
|
|
5
|
+
record/replay decision in :mod:`promptecho.transport`. This is the same approach
|
|
6
|
+
respx and vcrpy's httpx stub use. See DESIGN.md §1.
|
|
7
|
+
|
|
8
|
+
On record we read the full upstream response, capture it, and return a fresh
|
|
9
|
+
buffered response so the SDK can consume it normally. Streaming (SSE) responses
|
|
10
|
+
are captured as their ordered events and re-emitted byte-for-byte on replay.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import base64
|
|
16
|
+
import json
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
from .cassette import Response as Rec
|
|
21
|
+
from .normalizers import normalize
|
|
22
|
+
from .transport import decide, parse_body
|
|
23
|
+
|
|
24
|
+
# Hop-by-hop / encoding headers that won't match our re-encoded body on replay.
|
|
25
|
+
_DROP_HEADERS = {"content-encoding", "content-length", "transfer-encoding"}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _request_body(request: httpx.Request) -> dict:
|
|
29
|
+
try:
|
|
30
|
+
raw = request.content
|
|
31
|
+
except httpx.RequestNotRead:
|
|
32
|
+
raw = request.read()
|
|
33
|
+
return parse_body(raw)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _clean_headers(headers: httpx.Headers) -> dict:
|
|
37
|
+
return {k: v for k, v in dict(headers).items() if k.lower() not in _DROP_HEADERS}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _split_sse(text: str) -> list[str]:
|
|
41
|
+
"""Split an SSE body into individual events (kept readable in the cassette)."""
|
|
42
|
+
return [part + "\n\n" for part in text.split("\n\n") if part.strip()]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _is_binary_content_type(ct: str) -> bool:
|
|
46
|
+
"""Content types we must not decode as text — images, audio, video, octet-stream.
|
|
47
|
+
|
|
48
|
+
Anything text/* or application/json|xml|...+json round-trips cleanly through
|
|
49
|
+
YAML; everything else (image/png, audio/wav, application/octet-stream, etc.)
|
|
50
|
+
gets base64-encoded to preserve bytes exactly.
|
|
51
|
+
"""
|
|
52
|
+
ct = ct.split(";", 1)[0].strip().lower()
|
|
53
|
+
if not ct:
|
|
54
|
+
return False
|
|
55
|
+
if ct.startswith(("image/", "audio/", "video/")):
|
|
56
|
+
return True
|
|
57
|
+
if ct in {"application/octet-stream", "application/pdf", "application/zip"}:
|
|
58
|
+
return True
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _capture(status: int, headers: httpx.Headers, data: bytes) -> Rec:
|
|
63
|
+
clean = _clean_headers(headers)
|
|
64
|
+
content_type = clean.get("content-type", "")
|
|
65
|
+
if "text/event-stream" in content_type:
|
|
66
|
+
return Rec(status=status, headers=clean, streaming=True,
|
|
67
|
+
events=_split_sse(data.decode("utf-8", "replace")))
|
|
68
|
+
if _is_binary_content_type(content_type):
|
|
69
|
+
return Rec(status=status, headers=clean, streaming=False,
|
|
70
|
+
body=base64.b64encode(data).decode("ascii"), binary=True)
|
|
71
|
+
try:
|
|
72
|
+
body = json.loads(data) if data else None
|
|
73
|
+
except ValueError:
|
|
74
|
+
body = data.decode("utf-8", "replace")
|
|
75
|
+
return Rec(status=status, headers=clean, streaming=False, body=body)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _to_httpx(rec: Rec, request: httpx.Request) -> httpx.Response:
|
|
79
|
+
if rec.streaming:
|
|
80
|
+
content = "".join(rec.events).encode("utf-8")
|
|
81
|
+
elif rec.binary and isinstance(rec.body, str):
|
|
82
|
+
content = base64.b64decode(rec.body)
|
|
83
|
+
elif isinstance(rec.body, (dict, list)):
|
|
84
|
+
content = json.dumps(rec.body).encode("utf-8")
|
|
85
|
+
elif isinstance(rec.body, str):
|
|
86
|
+
content = rec.body.encode("utf-8")
|
|
87
|
+
else:
|
|
88
|
+
content = b""
|
|
89
|
+
return httpx.Response(
|
|
90
|
+
status_code=rec.status,
|
|
91
|
+
headers=httpx.Headers(rec.headers),
|
|
92
|
+
content=content,
|
|
93
|
+
request=request,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _make_sync(cassette, mode, real_fn):
|
|
98
|
+
def handle_request(self, request: httpx.Request) -> httpx.Response:
|
|
99
|
+
body = normalize(str(request.url), _request_body(request))
|
|
100
|
+
decision = decide(mode, cassette, body)
|
|
101
|
+
if decision.response is not None: # REPLAY (no network)
|
|
102
|
+
return _to_httpx(decision.response, request)
|
|
103
|
+
real = real_fn(self, request) # PASS THROUGH
|
|
104
|
+
rec = _capture(real.status_code, real.headers, real.read())
|
|
105
|
+
cassette.record(request.method, str(request.url), body, rec) # RECORD
|
|
106
|
+
return _to_httpx(rec, request)
|
|
107
|
+
|
|
108
|
+
return handle_request
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _make_async(cassette, mode, real_fn):
|
|
112
|
+
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
|
|
113
|
+
body = normalize(str(request.url), _request_body(request))
|
|
114
|
+
decision = decide(mode, cassette, body)
|
|
115
|
+
if decision.response is not None:
|
|
116
|
+
return _to_httpx(decision.response, request)
|
|
117
|
+
real = await real_fn(self, request)
|
|
118
|
+
rec = _capture(real.status_code, real.headers, await real.aread())
|
|
119
|
+
cassette.record(request.method, str(request.url), body, rec)
|
|
120
|
+
return _to_httpx(rec, request)
|
|
121
|
+
|
|
122
|
+
return handle_async_request
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def install(cassette, mode):
|
|
126
|
+
"""Patch httpx; returns a token to pass back to :func:`uninstall`."""
|
|
127
|
+
saved = (httpx.HTTPTransport.handle_request, httpx.AsyncHTTPTransport.handle_async_request)
|
|
128
|
+
httpx.HTTPTransport.handle_request = _make_sync(cassette, mode, saved[0])
|
|
129
|
+
httpx.AsyncHTTPTransport.handle_async_request = _make_async(cassette, mode, saved[1])
|
|
130
|
+
return saved
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def uninstall(saved) -> None:
|
|
134
|
+
httpx.HTTPTransport.handle_request, httpx.AsyncHTTPTransport.handle_async_request = saved
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""pytest integration: an auto-named cassette per test.
|
|
2
|
+
|
|
3
|
+
def test_summarize(promptecho_cassette): # -> cassettes/test_summarize.yaml
|
|
4
|
+
client.messages.create(...)
|
|
5
|
+
|
|
6
|
+
Mode defaults to ``once`` locally and ``none`` in CI (when the CI env var is set),
|
|
7
|
+
so a forgotten recording fails the build instead of making a live call.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
from . import use_cassette
|
|
17
|
+
from .transport import Mode
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _default_mode() -> Mode:
|
|
21
|
+
return Mode.NONE if os.environ.get("CI") else Mode.ONCE
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def promptecho_cassette(request):
|
|
26
|
+
cassette_dir = os.path.join(os.path.dirname(request.fspath), "cassettes")
|
|
27
|
+
path = os.path.join(cassette_dir, f"{request.node.name}.yaml")
|
|
28
|
+
with use_cassette(path, mode=_default_mode()) as cassette:
|
|
29
|
+
yield cassette
|
promptecho/transport.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""The replay/record decision logic, isolated from httpx patching mechanics.
|
|
2
|
+
|
|
3
|
+
This module is pure and unit-testable: given a mode, a cassette, and a parsed
|
|
4
|
+
request, it decides whether to replay a recorded response or pass through to the
|
|
5
|
+
network and record. The actual httpx wiring (next to TODOs) lives at the bottom.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from enum import Enum
|
|
12
|
+
|
|
13
|
+
from .cassette import Cassette, Response
|
|
14
|
+
from .matcher import diff_request, fingerprint
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Mode(str, Enum):
|
|
18
|
+
ONCE = "once" # record if absent, replay if present (default)
|
|
19
|
+
NONE = "none" # replay only; error on miss (CI-safe)
|
|
20
|
+
NEW_EPISODES = "new_episodes" # replay existing, record new
|
|
21
|
+
ALL = "all" # always re-record
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CassetteMiss(Exception):
|
|
25
|
+
"""Raised in mode=none when an incoming request has no recording."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_body(raw: bytes) -> dict:
|
|
29
|
+
if not raw:
|
|
30
|
+
return {}
|
|
31
|
+
try:
|
|
32
|
+
return json.loads(raw)
|
|
33
|
+
except (ValueError, UnicodeDecodeError):
|
|
34
|
+
return {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Decision:
|
|
38
|
+
"""Outcome of looking at one request: either replay `response`, or `record`."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, *, response: Response | None = None, record: bool = False):
|
|
41
|
+
self.response = response
|
|
42
|
+
self.record = record
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def decide(mode: Mode, cassette: Cassette, body: dict) -> Decision:
|
|
46
|
+
"""Core branch. No I/O — caller performs the network call / persistence."""
|
|
47
|
+
key = fingerprint(body, cassette.match_on)
|
|
48
|
+
existing = cassette.find(key)
|
|
49
|
+
|
|
50
|
+
if mode is Mode.ALL:
|
|
51
|
+
return Decision(record=True)
|
|
52
|
+
|
|
53
|
+
if mode is Mode.NONE:
|
|
54
|
+
if existing is None:
|
|
55
|
+
raise CassetteMiss(_miss_message(cassette, body))
|
|
56
|
+
return Decision(response=existing.response)
|
|
57
|
+
|
|
58
|
+
# ONCE and NEW_EPISODES: replay if we have it, otherwise record.
|
|
59
|
+
if existing is not None:
|
|
60
|
+
return Decision(response=existing.response)
|
|
61
|
+
return Decision(record=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _miss_message(cassette: Cassette, body: dict) -> str:
|
|
65
|
+
nearest = cassette.interactions[-1] if cassette.interactions else None
|
|
66
|
+
if nearest is None:
|
|
67
|
+
return (
|
|
68
|
+
f"Cassette miss: {cassette.path!r} has no recordings and mode=none.\n"
|
|
69
|
+
f"Re-record with mode='once' (or delete and re-run the test)."
|
|
70
|
+
)
|
|
71
|
+
diff = diff_request(body, nearest.body, cassette.match_on)
|
|
72
|
+
if not diff:
|
|
73
|
+
return (
|
|
74
|
+
f"Cassette miss in {cassette.path!r} (mode=none): a matched field has a "
|
|
75
|
+
f"non-equal value the diff walker couldn't pinpoint. Re-record to refresh."
|
|
76
|
+
)
|
|
77
|
+
return (
|
|
78
|
+
f"Cassette miss in {cassette.path!r} (mode=none).\n"
|
|
79
|
+
f"The incoming request differs from the nearest recording on these fields:\n\n"
|
|
80
|
+
f"{diff}\n\n"
|
|
81
|
+
f"If the change is intentional, re-record with mode='once' (or delete the "
|
|
82
|
+
f"cassette and re-run). If not, fix the call so it matches the recorded "
|
|
83
|
+
f"fingerprint."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# The httpx wiring that turns these decisions into real interception lives in
|
|
88
|
+
# patch.py (sync + async). This module stays pure so the branch logic above is
|
|
89
|
+
# unit-testable without a network stack.
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: promptecho
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Record & replay for LLM API calls — like vcrpy/nock, built for LLM traffic.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Keywords: anthropic,llm,mock,openai,pytest,record-replay,testing,vcr
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: httpx>=0.24
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: anthropic; extra == 'dev'
|
|
12
|
+
Requires-Dist: openai; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# promptecho
|
|
17
|
+
|
|
18
|
+
**Record & replay for LLM API calls.** Like [`vcrpy`](https://github.com/kevin1024/vcrpy) / [`nock`](https://github.com/nock/nock), but built for the way LLM traffic actually behaves.
|
|
19
|
+
|
|
20
|
+
Your LLM tests have three problems: they're **flaky** (non-deterministic outputs), **slow** (real network round-trips), and **expensive** (burning tokens in CI on every run). promptecho records each real API call once to a cassette file, then replays it forever — deterministically, instantly, for free.
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import promptecho
|
|
24
|
+
from anthropic import Anthropic
|
|
25
|
+
|
|
26
|
+
@promptecho.use_cassette("cassettes/summarize.yaml")
|
|
27
|
+
def test_summarize():
|
|
28
|
+
client = Anthropic()
|
|
29
|
+
msg = client.messages.create(
|
|
30
|
+
model="claude-opus-4-8",
|
|
31
|
+
max_tokens=100,
|
|
32
|
+
messages=[{"role": "user", "content": "Summarize: the cat sat on the mat."}],
|
|
33
|
+
)
|
|
34
|
+
assert "cat" in msg.content[0].text.lower()
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
First run: one real call, recorded to `cassettes/summarize.yaml`.
|
|
38
|
+
Every run after: replayed from disk. No network, no tokens, no flake.
|
|
39
|
+
|
|
40
|
+
> **Proof, not marketing.** The end-to-end test that gates every release records against a local server, **shuts the server down**, then replays. Same response, zero network. If the response can come back with the upstream gone, the cassette is genuinely doing the work — not a partial proxy. See [`tests/test_record_replay.py`](tests/test_record_replay.py).
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Why not just use vcrpy?
|
|
45
|
+
|
|
46
|
+
You can — at the HTTP layer, vcrpy works on LLM calls today. promptecho exists because LLM traffic breaks vcrpy's assumptions in five specific ways:
|
|
47
|
+
|
|
48
|
+
1. **Matching.** vcrpy matches on raw request bytes. LLM bodies carry volatile fields (client-injected IDs, reordered tools, whitespace) that change the bytes without changing the *meaning* — so byte-matching misses on replay. promptecho matches on a **normalized fingerprint** of the fields that determine the response, and **canonicalizes across providers**: it knows `content: "hi"` equals `content: [{"type":"text","text":"hi"}]`, an Anthropic top-level `system` equals an OpenAI `system`-role message, and an Anthropic `input_schema` tool def equals an OpenAI `function.parameters`. A raw-bytes VCR can't.
|
|
49
|
+
2. **Streaming.** Most LLM calls are SSE streams. promptecho records the event stream and faithfully re-emits it on replay, so `stream=True` and token-by-token iteration work identically against a cassette — including reasoning deltas.
|
|
50
|
+
3. **Binary / multimodal responses.** vcrpy's text-based cassettes silently corrupt raw `image/*` / `audio/*` / `octet-stream` bodies. promptecho detects them by `Content-Type` and base64-encodes them in the cassette, so image-out and audio-out responses round-trip byte-exact.
|
|
51
|
+
4. **Debuggable CI failures.** When a vcrpy cassette miss happens, you get *"no match"*. promptecho prints the exact path that changed: `messages[1].content: recorded "summarize the cat" / incoming "summarize the dog"`. Test failures are actionable, not detective work.
|
|
52
|
+
5. **Secrets.** API keys live in headers on every call. promptecho redacts them by default — a cassette is safe to commit.
|
|
53
|
+
|
|
54
|
+
## What promptecho is *not*
|
|
55
|
+
|
|
56
|
+
- **Not a cache.** Replay matching is exact/normalized and deterministic, on purpose. It does **not** semantically match "different prompt, close enough" — that would put non-determinism back into the harness you're using to remove it. (A separate opt-in fuzzy mode is on the roadmap as a dev-loop convenience; it will never be the default and never used in CI.)
|
|
57
|
+
- **Not an eval.** It freezes a response so your *surrounding code* is testable. Judging whether the response is *good* is a different tool (see roadmap: `toMatchLLMSnapshot()`).
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## What it covers
|
|
62
|
+
|
|
63
|
+
promptecho intercepts at the `httpx` transport layer. **If the SDK uses httpx, promptecho sees the call** — which is almost everything modern.
|
|
64
|
+
|
|
65
|
+
| You're calling | Covered? |
|
|
66
|
+
|---|---|
|
|
67
|
+
| Anthropic, OpenAI, Mistral, Cohere, `google-genai` SDKs | ✅ |
|
|
68
|
+
| **OpenAI SDK with custom `base_url`** → OpenRouter, Together, Fireworks, Cerebras, Groq, DeepInfra, Perplexity | ✅ |
|
|
69
|
+
| **Self-hosted vLLM / TGI / SGLang / LM Studio / Ollama** (OpenAI-compatible mode) | ✅ |
|
|
70
|
+
| Your **own fine-tune** behind any of the above | ✅ |
|
|
71
|
+
| **Reasoning models** — o1/o3, Claude extended thinking, DeepSeek-R1 | ✅ (incl. `reasoning_effort` / `thinking` in default match-on) |
|
|
72
|
+
| **Multimodal** — base64-in-JSON (vision, Claude image-out, GPT-4o) and raw binary (`image/*`, `audio/*`) | ✅ (byte-exact round-trip) |
|
|
73
|
+
| Bedrock via boto3, HF `InferenceClient`, in-process `transformers` | ❌ (see workarounds in [SUPPORT.md](SUPPORT.md)) |
|
|
74
|
+
|
|
75
|
+
Full matrix with caveats and workarounds: [**SUPPORT.md**](SUPPORT.md). For practical recipes by scenario (startup / enterprise / research), see [**TUTORIAL.md**](TUTORIAL.md).
|
|
76
|
+
|
|
77
|
+
### Hosted open-source via the OpenAI SDK
|
|
78
|
+
|
|
79
|
+
This is the dominant pattern for non-Anthropic/non-OpenAI usage, and it Just Works:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from openai import OpenAI
|
|
83
|
+
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key="...")
|
|
84
|
+
|
|
85
|
+
@promptecho.use_cassette("cassettes/openrouter.yaml")
|
|
86
|
+
def test_via_openrouter():
|
|
87
|
+
r = client.chat.completions.create(
|
|
88
|
+
model="meta-llama/llama-3.1-70b-instruct",
|
|
89
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
90
|
+
)
|
|
91
|
+
assert r.choices[0].message.content
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Detection falls back to body shape when the host is unknown, so localhost gateways, in-house proxies, and self-hosted vLLM/TGI behave the same way as the brand-name hosts.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Install
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pip install promptecho # not yet on PyPI — install from source for now
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
git clone <repo> && cd promptecho
|
|
106
|
+
pip install -e .
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Requires Python ≥ 3.9 and `httpx ≥ 0.24`.
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Usage
|
|
114
|
+
|
|
115
|
+
### Decorator
|
|
116
|
+
```python
|
|
117
|
+
@promptecho.use_cassette("cassettes/foo.yaml")
|
|
118
|
+
def test_foo(): ...
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Context manager
|
|
122
|
+
```python
|
|
123
|
+
with promptecho.use_cassette("cassettes/foo.yaml"):
|
|
124
|
+
client.messages.create(...)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### pytest fixture (auto-named per test)
|
|
128
|
+
```python
|
|
129
|
+
def test_bar(promptecho_cassette): # records to cassettes/test_bar.yaml
|
|
130
|
+
client.messages.create(...)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
The fixture defaults to `mode="once"` locally and `mode="none"` when `CI=true` — so a forgotten recording fails the build instead of making a live call.
|
|
134
|
+
|
|
135
|
+
### Record modes
|
|
136
|
+
Borrowed from vcrpy, so the mental model is free:
|
|
137
|
+
|
|
138
|
+
| mode | absent cassette | present cassette | use for |
|
|
139
|
+
|------|-----------------|------------------|---------|
|
|
140
|
+
| `once` *(default)* | record | replay | normal dev |
|
|
141
|
+
| `none` | **error** | replay | **CI** — guarantees no live calls |
|
|
142
|
+
| `new_episodes` | record | replay + record new | evolving tests |
|
|
143
|
+
| `all` | record | re-record everything | refreshing fixtures |
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
@promptecho.use_cassette("cassettes/foo.yaml", mode="none")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Choosing what to match on
|
|
150
|
+
|
|
151
|
+
Defaults to `["model", "messages", "system", "tools", "tool_choice", "reasoning_effort", "reasoning", "thinking"]` — everything that determines the response for a chat-shaped call, including reasoning-model knobs.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
@promptecho.use_cassette(
|
|
155
|
+
"cassettes/foo.yaml",
|
|
156
|
+
match_on=["model", "messages", "system", "temperature"], # add temperature
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
For non-chat shapes (raw TGI `/generate`, embeddings) you'll want to override, e.g. `match_on=["model", "input"]` for an embeddings endpoint. See [SUPPORT.md → Request shapes](SUPPORT.md#request-shapes).
|
|
161
|
+
|
|
162
|
+
### Async
|
|
163
|
+
|
|
164
|
+
Works identically with `httpx.AsyncClient` and the async surfaces of Anthropic / OpenAI / Mistral SDKs — the async transport is patched the same way as sync.
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Cassette format
|
|
169
|
+
|
|
170
|
+
Human-readable YAML, designed to diff cleanly in PRs:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
version: 1
|
|
174
|
+
match_on: [model, messages, system, tools, tool_choice, reasoning_effort, reasoning, thinking]
|
|
175
|
+
interactions:
|
|
176
|
+
- request:
|
|
177
|
+
method: POST
|
|
178
|
+
url: https://api.anthropic.com/v1/messages
|
|
179
|
+
match_key: ef43f6acaed95b2f # fingerprint of matched fields
|
|
180
|
+
matched_on: [model, messages, system, tools, tool_choice]
|
|
181
|
+
body: # canonical (provider-normalized) body
|
|
182
|
+
model: claude-opus-4-8
|
|
183
|
+
messages:
|
|
184
|
+
- {role: user, content: "Summarize: the cat sat on the mat."}
|
|
185
|
+
response:
|
|
186
|
+
status: 200
|
|
187
|
+
headers: {content-type: application/json}
|
|
188
|
+
streaming: false
|
|
189
|
+
body:
|
|
190
|
+
content: [{type: text, text: "A cat sat on a mat."}]
|
|
191
|
+
usage: {input_tokens: 14, output_tokens: 8}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
- **Streamed** responses store the ordered SSE events under `response.events` with `streaming: true`; replay re-emits them in order.
|
|
195
|
+
- **Binary** responses (image/audio/octet-stream) get `binary: true` and the body is base64-encoded; replay decodes and returns the original bytes.
|
|
196
|
+
- **The stored body is the canonical, provider-normalized shape** — not the raw provider JSON. That makes cassettes provider-agnostic and easier to skim in code review.
|
|
197
|
+
|
|
198
|
+
Auto-redacted on record: `authorization`, `x-api-key`, `openai-organization`. Configurable.
|
|
199
|
+
|
|
200
|
+
See [`examples/cassettes/example.yaml`](examples/cassettes/example.yaml) for a real one.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Status
|
|
205
|
+
|
|
206
|
+
**v0.1.0, working core. 19 tests, all green.** Not yet on PyPI.
|
|
207
|
+
|
|
208
|
+
Records and replays real httpx traffic — sync, async, SSE streaming, binary responses, cross-provider request shapes — verified end-to-end against a local server that gets shut down between record and replay.
|
|
209
|
+
|
|
210
|
+
### Roadmap (build-in-public)
|
|
211
|
+
|
|
212
|
+
Done:
|
|
213
|
+
- [x] httpx sync + async transport interception
|
|
214
|
+
- [x] SSE streaming record/replay
|
|
215
|
+
- [x] pytest plugin + auto-naming
|
|
216
|
+
- [x] Per-provider request normalizers (Anthropic / OpenAI / generic)
|
|
217
|
+
- [x] Reasoning-model match defaults (`reasoning_effort`, `thinking`, `reasoning`)
|
|
218
|
+
- [x] Binary response round-trip (image/audio/octet-stream — base64 in cassette)
|
|
219
|
+
- [x] Field-level diff on cassette miss (CI `mode=none` errors pinpoint the changed path, not just the field name)
|
|
220
|
+
|
|
221
|
+
Next:
|
|
222
|
+
- [ ] `requests` / `urllib3` interception backend — unlocks boto3-Bedrock and HF `InferenceClient`
|
|
223
|
+
- [ ] `promptecho lint` — find un-recorded calls in a test suite
|
|
224
|
+
- [ ] **`toMatchLLMSnapshot()` sibling** — semantic snapshot assertions on top of recorded calls
|
|
225
|
+
|
|
226
|
+
## Design
|
|
227
|
+
|
|
228
|
+
For the why-not-the-other-way decisions — fingerprint vs raw bytes, why semantic matching is fenced off, how SSE re-emission works, how cross-provider normalization is structured — see [DESIGN.md](DESIGN.md).
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
promptecho/__init__.py,sha256=6SokaWIYwu6dZm0Vin2psxevz8jcNuLDsDM3h12kMwg,2309
|
|
2
|
+
promptecho/cassette.py,sha256=uzJh084L4gk8zr-H6psWxd9oDMKP0glfNzJlcIEaHCA,4445
|
|
3
|
+
promptecho/matcher.py,sha256=t_5kBPphChTwYe_0WAjkcTPLYDqO7tKaYgGbRTpwnlg,4484
|
|
4
|
+
promptecho/normalizers.py,sha256=0X9oItnIolaS_62-sB3D-h6yYduAAbcsK23tYWI3ZlI,5807
|
|
5
|
+
promptecho/patch.py,sha256=Xc5D37Odj0pElVxfon-7HT-sktBBZn0VeRkOq-3ihKE,5223
|
|
6
|
+
promptecho/pytest_plugin.py,sha256=oVF3vzqVHoG_Vk-WjpVo_Wota-CpgV0YVahReJbwVFM,827
|
|
7
|
+
promptecho/transport.py,sha256=g6bYMwOV5b0LVqGLjwQ1dJyfNKLnE4HiQegzpLjqSSg,3147
|
|
8
|
+
promptecho-0.1.0.dist-info/METADATA,sha256=zh0KRAXXMl-hoWYUJLQU3wx2bjSRkpMeWOt9EulAahU,10858
|
|
9
|
+
promptecho-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
promptecho-0.1.0.dist-info/entry_points.txt,sha256=bw3ZMfD4yiP33qS4WOOV11TerOvgYXOS8FL70k6pYHM,49
|
|
11
|
+
promptecho-0.1.0.dist-info/RECORD,,
|