receipts-gate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. receipts_gate-0.1.0/.github/workflows/ci.yml +19 -0
  2. receipts_gate-0.1.0/.github/workflows/publish.yml +34 -0
  3. receipts_gate-0.1.0/.gitignore +10 -0
  4. receipts_gate-0.1.0/LICENSE +21 -0
  5. receipts_gate-0.1.0/PKG-INFO +220 -0
  6. receipts_gate-0.1.0/README.md +193 -0
  7. receipts_gate-0.1.0/examples/agent_loop.py +103 -0
  8. receipts_gate-0.1.0/examples/ci/receipts-example.yml +33 -0
  9. receipts_gate-0.1.0/examples/research_agent.py +89 -0
  10. receipts_gate-0.1.0/pyproject.toml +45 -0
  11. receipts_gate-0.1.0/src/receipts/__init__.py +70 -0
  12. receipts_gate-0.1.0/src/receipts/adapters/__init__.py +17 -0
  13. receipts_gate-0.1.0/src/receipts/adapters/anthropic_trace.py +84 -0
  14. receipts_gate-0.1.0/src/receipts/adapters/langchain.py +54 -0
  15. receipts_gate-0.1.0/src/receipts/adapters/openai_trace.py +87 -0
  16. receipts_gate-0.1.0/src/receipts/cli.py +100 -0
  17. receipts_gate-0.1.0/src/receipts/errors.py +18 -0
  18. receipts_gate-0.1.0/src/receipts/extract.py +100 -0
  19. receipts_gate-0.1.0/src/receipts/gate.py +196 -0
  20. receipts_gate-0.1.0/src/receipts/instrument.py +65 -0
  21. receipts_gate-0.1.0/src/receipts/ledger.py +65 -0
  22. receipts_gate-0.1.0/src/receipts/llm.py +73 -0
  23. receipts_gate-0.1.0/src/receipts/models.py +164 -0
  24. receipts_gate-0.1.0/src/receipts/py.typed +0 -0
  25. receipts_gate-0.1.0/src/receipts/trace.py +66 -0
  26. receipts_gate-0.1.0/src/receipts/verify.py +108 -0
  27. receipts_gate-0.1.0/tests/test_adapters.py +82 -0
  28. receipts_gate-0.1.0/tests/test_features.py +168 -0
  29. receipts_gate-0.1.0/tests/test_gate.py +200 -0
  30. receipts_gate-0.1.0/tests/test_regressions.py +153 -0
@@ -0,0 +1,19 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+ strategy:
11
+ matrix:
12
+ python-version: ["3.10", "3.13"]
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: ${{ matrix.python-version }}
18
+ - run: pip install -e ".[dev]"
19
+ - run: python -m pytest -q
@@ -0,0 +1,34 @@
1
+ name: publish
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: actions/setup-python@v5
14
+ with:
15
+ python-version: "3.12"
16
+ - run: python -m pip install build
17
+ - run: python -m build
18
+ - uses: actions/upload-artifact@v4
19
+ with:
20
+ name: dist
21
+ path: dist/
22
+
23
+ publish:
24
+ needs: build
25
+ runs-on: ubuntu-latest
26
+ environment: pypi
27
+ permissions:
28
+ id-token: write
29
+ steps:
30
+ - uses: actions/download-artifact@v4
31
+ with:
32
+ name: dist
33
+ path: dist/
34
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .pytest_cache/
4
+ dist/
5
+ build/
6
+ *.egg-info/
7
+ .venv/
8
+ venv/
9
+ .env
10
+ launch/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Adrian Melendez
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: receipts-gate
3
+ Version: 0.1.0
4
+ Summary: A hard gate that forces AI agents to back every claim with evidence — or declare it an assumption.
5
+ Project-URL: Homepage, https://github.com/Thepizzapie/receipts
6
+ Project-URL: Repository, https://github.com/Thepizzapie/receipts
7
+ Project-URL: Issues, https://github.com/Thepizzapie/receipts/issues
8
+ Author: Adrian Melendez
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,ai,evaluation,grounding,guardrails,provenance
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Provides-Extra: anthropic
23
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=8; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # Receipts
29
+
30
+ **Force AI agents to back every claim with evidence — or declare it an assumption.**
31
+
32
+ Agents lie about the research they did and quietly base conclusions on guesses,
33
+ because there's no cost to either. Receipts adds the cost. Every claim an agent
34
+ makes must point at a tamper-evident log of what it *actually did*. Claims that
35
+ can't be grounded are illegal — they must be surfaced as explicit assumptions, not
36
+ buried in the answer.
37
+
38
+ You can't fix this with prompting ("don't assume, don't lie"). Receipts doesn't
39
+ ask the model to be honest — it *measures* honesty against ground truth, with
40
+ deterministic checks the model can't game.
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install receipts-gate
46
+ ```
47
+
48
+ Zero dependencies. The distribution is `receipts-gate` (the bare name was taken);
49
+ the import and the CLI are plain `receipts`.
50
+
51
+ ## The three rules
52
+
53
+ Every claim is checked against the `Ledger` (the captured execution log):
54
+
55
+ | Rule | Kills | How |
56
+ |------|-------|-----|
57
+ | **Binding** | guesses stated as fact | A claim must cite real evidence in the ledger, or be demoted to an assumption. |
58
+ | **Effort honesty** | "I reviewed the entire codebase" | Effort claims must cite evidence of a matching *kind*; words like *all / entire / thoroughly* require machine-checkable `coverage` proof. |
59
+ | **Support** | citing a source that doesn't say it | Cited evidence must actually back the claim (deterministic heuristic by default; pluggable LLM/NLI judge for production). |
60
+
61
+ ## Two modes, one engine
62
+
63
+ - **Live gate** (`Gate.finalize`) — runs inside the agent loop and *blocks*
64
+ ungrounded output before the user ever sees it, handing the agent a precise list
65
+ of what to fix.
66
+ - **Post-hoc auditor** (`audit`) — ingest a finished trace and get a `Verdict`.
67
+ Same engine, no runtime coupling. Drop it in CI to fail PRs from ungrounded agent runs.
68
+
69
+ ## Quick start
70
+
71
+ ```python
72
+ from receipts import Ledger, Gate, Answer, Claim, Assumption, ClaimKind
73
+
74
+ # 1. Evidence is captured from real work, not self-reported.
75
+ ledger = Ledger()
76
+ ev = ledger.record("file_read", "config.py", "PORT = 8080")
77
+
78
+ # 2. The agent emits claims that point at evidence.
79
+ gate = Gate(ledger) # hard gate by default
80
+ answer = Answer(
81
+ summary="The service listens on port 8080.",
82
+ claims=[Claim("The port is 8080", evidence_ids=[ev.id], kind=ClaimKind.FACT)],
83
+ assumptions=[
84
+ Assumption("The paid tier is higher", reason="not verified",
85
+ impact="capacity planning wrong if false"),
86
+ ],
87
+ )
88
+
89
+ # 3. The gate renders the answer, or raises UngroundedAnswerError with a fix list.
90
+ print(gate.finalize(answer))
91
+ ```
92
+
93
+ Capture evidence automatically by wrapping tools:
94
+
95
+ ```python
96
+ from receipts import track
97
+
98
+ @track(ledger, kind="web_fetch", source=lambda url: url)
99
+ def fetch(url): ...
100
+ ```
101
+
102
+ Or audit an existing run:
103
+
104
+ ```python
105
+ from receipts import ingest_trace, audit
106
+ ingest_trace(ledger, my_logged_events) # [{"kind","source","content"}, ...]
107
+ verdict = audit(answer, ledger)
108
+ print(verdict.report())
109
+ ```
110
+
111
+ ## Semantic support via an LLM
112
+
113
+ The default support check is token overlap — transparent, but shallow. For real
114
+ grounding (catching a claim that cites evidence which is *related* but doesn't say
115
+ what the claim asserts), swap in the LLM judge. The gate stays deterministic for
116
+ binding and effort; only the support check calls the model, so the thing being
117
+ audited can't game the structural rules.
118
+
119
+ ```python
120
+ from receipts import Gate, AnthropicLLM, LLMSupportVerifier
121
+
122
+ gate = Gate(ledger, verifier=LLMSupportVerifier(AnthropicLLM())) # needs receipts-gate[anthropic]
123
+ ```
124
+
125
+ `LLM` is a one-method protocol — drop in any backend (NLI model, embeddings, a
126
+ different vendor) or `FakeLLM` for tests.
127
+
128
+ ## Free-text extraction (no structured claims required)
129
+
130
+ Agents don't have to emit `Claim` objects by hand. Give Receipts the prose answer
131
+ and the ledger, and it produces a structured `Answer` for the gate to check. The
132
+ extraction is LLM-driven and advisory — every binding it produces is still
133
+ re-verified deterministically, so a hallucinated citation surfaces as a gate
134
+ failure, not a silent pass.
135
+
136
+ ```python
137
+ from receipts import extract_claims, Gate, AnthropicLLM
138
+
139
+ answer = extract_claims(free_text_answer, ledger, AnthropicLLM())
140
+ print(Gate(ledger).finalize(answer)) # checks the extracted claims
141
+ ```
142
+
143
+ ## CLI / CI gate
144
+
145
+ `receipts audit trace.json` exits non-zero when any claim is ungrounded — a
146
+ drop-in CI gate on agent output. Deterministic by default (no API key); `--llm`
147
+ turns on the semantic judge.
148
+
149
+ ```bash
150
+ receipts audit trace.json # exit 1 if blocked
151
+ receipts audit trace.json --json # machine-readable
152
+ ```
153
+
154
+ Trace format and a copy-into-your-repo GitHub Action are in
155
+ [`examples/ci/receipts-example.yml`](examples/ci/receipts-example.yml).
156
+ Claims reference evidence by a local label, list index, or real id — see
157
+ [`trace.py`](src/receipts/trace.py).
158
+
159
+ ## Framework adapters
160
+
161
+ Capture is framework-agnostic; these map specific ecosystems onto the Ledger.
162
+
163
+ **OpenAI / Anthropic / Claude Agent SDK traces** — ingest a finished transcript,
164
+ recording each tool result as evidence (pure Python, no SDK needed):
165
+
166
+ ```python
167
+ from receipts.adapters import from_openai_messages, from_anthropic_messages
168
+
169
+ from_openai_messages(ledger, openai_messages) # OpenAI chat format
170
+ from_anthropic_messages(ledger, anthropic_messages) # Messages API / Claude Agent SDK
171
+ ```
172
+
173
+ **LangChain / LangGraph** — capture tool results *live* via a callback handler,
174
+ no change to your tool code:
175
+
176
+ ```python
177
+ from receipts.adapters import langchain_handler
178
+
179
+ handler = langchain_handler(ledger)
180
+ agent.invoke(inputs, config={"callbacks": [handler]})
181
+ ```
182
+
183
+ ## Layout
184
+
185
+ ```
186
+ src/receipts/
187
+ models.py data model — Evidence, Claim, Assumption, Answer, Verdict
188
+ ledger.py append-only, hash-stamped evidence log (the ground truth)
189
+ instrument.py capture: @track decorator + ingest_trace()
190
+ verify.py pluggable support check — HeuristicVerifier + LLMSupportVerifier
191
+ llm.py LLM protocol + AnthropicLLM / FakeLLM backends
192
+ extract.py free-text answer -> structured Answer
193
+ trace.py load a portable trace file -> (Ledger, Answer)
194
+ cli.py `receipts audit` command
195
+ gate.py the three rules + Gate + audit()
196
+ adapters/ OpenAI trace, Anthropic/Claude-Agent-SDK trace, LangChain callback
197
+ examples/ research_agent.py (gate), agent_loop.py (capture->extract->gate)
198
+ tests/ pytest suite
199
+ ```
200
+
201
+ ## Develop
202
+
203
+ ```bash
204
+ python -m pytest -q
205
+ python examples/research_agent.py
206
+ ```
207
+
208
+ ## Status / roadmap
209
+
210
+ v0.1 — core engine, hard gate, post-hoc auditor, framework-agnostic capture,
211
+ LLM support verifier, free-text extraction, OpenAI adapter, `receipts audit` CLI
212
+ + CI action.
213
+
214
+ Adapters: OpenAI trace, Anthropic / Claude Agent SDK trace, LangChain/LangGraph
215
+ callback.
216
+
217
+ Next candidates:
218
+ - NLI / embeddings `SupportVerifier` backends (no LLM call)
219
+ - Live in-loop gate wrappers for popular agent runtimes
220
+ - Published package on PyPI
@@ -0,0 +1,193 @@
1
+ # Receipts
2
+
3
+ **Force AI agents to back every claim with evidence — or declare it an assumption.**
4
+
5
+ Agents lie about the research they did and quietly base conclusions on guesses,
6
+ because there's no cost to either. Receipts adds the cost. Every claim an agent
7
+ makes must point at a tamper-evident log of what it *actually did*. Claims that
8
+ can't be grounded are illegal — they must be surfaced as explicit assumptions, not
9
+ buried in the answer.
10
+
11
+ You can't fix this with prompting ("don't assume, don't lie"). Receipts doesn't
12
+ ask the model to be honest — it *measures* honesty against ground truth, with
13
+ deterministic checks the model can't game.
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ pip install receipts-gate
19
+ ```
20
+
21
+ Zero dependencies. The distribution is `receipts-gate` (the bare name was taken);
22
+ the import and the CLI are plain `receipts`.
23
+
24
+ ## The three rules
25
+
26
+ Every claim is checked against the `Ledger` (the captured execution log):
27
+
28
+ | Rule | Kills | How |
29
+ |------|-------|-----|
30
+ | **Binding** | guesses stated as fact | A claim must cite real evidence in the ledger, or be demoted to an assumption. |
31
+ | **Effort honesty** | "I reviewed the entire codebase" | Effort claims must cite evidence of a matching *kind*; words like *all / entire / thoroughly* require machine-checkable `coverage` proof. |
32
+ | **Support** | citing a source that doesn't say it | Cited evidence must actually back the claim (deterministic heuristic by default; pluggable LLM/NLI judge for production). |
33
+
34
+ ## Two modes, one engine
35
+
36
+ - **Live gate** (`Gate.finalize`) — runs inside the agent loop and *blocks*
37
+ ungrounded output before the user ever sees it, handing the agent a precise list
38
+ of what to fix.
39
+ - **Post-hoc auditor** (`audit`) — ingest a finished trace and get a `Verdict`.
40
+ Same engine, no runtime coupling. Drop it in CI to fail PRs from ungrounded agent runs.
41
+
42
+ ## Quick start
43
+
44
+ ```python
45
+ from receipts import Ledger, Gate, Answer, Claim, Assumption, ClaimKind
46
+
47
+ # 1. Evidence is captured from real work, not self-reported.
48
+ ledger = Ledger()
49
+ ev = ledger.record("file_read", "config.py", "PORT = 8080")
50
+
51
+ # 2. The agent emits claims that point at evidence.
52
+ gate = Gate(ledger) # hard gate by default
53
+ answer = Answer(
54
+ summary="The service listens on port 8080.",
55
+ claims=[Claim("The port is 8080", evidence_ids=[ev.id], kind=ClaimKind.FACT)],
56
+ assumptions=[
57
+ Assumption("The paid tier is higher", reason="not verified",
58
+ impact="capacity planning wrong if false"),
59
+ ],
60
+ )
61
+
62
+ # 3. The gate renders the answer, or raises UngroundedAnswerError with a fix list.
63
+ print(gate.finalize(answer))
64
+ ```
65
+
66
+ Capture evidence automatically by wrapping tools:
67
+
68
+ ```python
69
+ from receipts import track
70
+
71
+ @track(ledger, kind="web_fetch", source=lambda url: url)
72
+ def fetch(url): ...
73
+ ```
74
+
75
+ Or audit an existing run:
76
+
77
+ ```python
78
+ from receipts import ingest_trace, audit
79
+ ingest_trace(ledger, my_logged_events) # [{"kind","source","content"}, ...]
80
+ verdict = audit(answer, ledger)
81
+ print(verdict.report())
82
+ ```
83
+
84
+ ## Semantic support via an LLM
85
+
86
+ The default support check is token overlap — transparent, but shallow. For real
87
+ grounding (catching a claim that cites evidence which is *related* but doesn't say
88
+ what the claim asserts), swap in the LLM judge. The gate stays deterministic for
89
+ binding and effort; only the support check calls the model, so the thing being
90
+ audited can't game the structural rules.
91
+
92
+ ```python
93
+ from receipts import Gate, AnthropicLLM, LLMSupportVerifier
94
+
95
+ gate = Gate(ledger, verifier=LLMSupportVerifier(AnthropicLLM())) # needs receipts-gate[anthropic]
96
+ ```
97
+
98
+ `LLM` is a one-method protocol — drop in any backend (NLI model, embeddings, a
99
+ different vendor) or `FakeLLM` for tests.
100
+
101
+ ## Free-text extraction (no structured claims required)
102
+
103
+ Agents don't have to emit `Claim` objects by hand. Give Receipts the prose answer
104
+ and the ledger, and it produces a structured `Answer` for the gate to check. The
105
+ extraction is LLM-driven and advisory — every binding it produces is still
106
+ re-verified deterministically, so a hallucinated citation surfaces as a gate
107
+ failure, not a silent pass.
108
+
109
+ ```python
110
+ from receipts import extract_claims, Gate, AnthropicLLM
111
+
112
+ answer = extract_claims(free_text_answer, ledger, AnthropicLLM())
113
+ print(Gate(ledger).finalize(answer)) # checks the extracted claims
114
+ ```
115
+
116
+ ## CLI / CI gate
117
+
118
+ `receipts audit trace.json` exits non-zero when any claim is ungrounded — a
119
+ drop-in CI gate on agent output. Deterministic by default (no API key); `--llm`
120
+ turns on the semantic judge.
121
+
122
+ ```bash
123
+ receipts audit trace.json # exit 1 if blocked
124
+ receipts audit trace.json --json # machine-readable
125
+ ```
126
+
127
+ Trace format and a copy-into-your-repo GitHub Action are in
128
+ [`examples/ci/receipts-example.yml`](examples/ci/receipts-example.yml).
129
+ Claims reference evidence by a local label, list index, or real id — see
130
+ [`trace.py`](src/receipts/trace.py).
131
+
132
+ ## Framework adapters
133
+
134
+ Capture is framework-agnostic; these map specific ecosystems onto the Ledger.
135
+
136
+ **OpenAI / Anthropic / Claude Agent SDK traces** — ingest a finished transcript,
137
+ recording each tool result as evidence (pure Python, no SDK needed):
138
+
139
+ ```python
140
+ from receipts.adapters import from_openai_messages, from_anthropic_messages
141
+
142
+ from_openai_messages(ledger, openai_messages) # OpenAI chat format
143
+ from_anthropic_messages(ledger, anthropic_messages) # Messages API / Claude Agent SDK
144
+ ```
145
+
146
+ **LangChain / LangGraph** — capture tool results *live* via a callback handler,
147
+ no change to your tool code:
148
+
149
+ ```python
150
+ from receipts.adapters import langchain_handler
151
+
152
+ handler = langchain_handler(ledger)
153
+ agent.invoke(inputs, config={"callbacks": [handler]})
154
+ ```
155
+
156
+ ## Layout
157
+
158
+ ```
159
+ src/receipts/
160
+ models.py data model — Evidence, Claim, Assumption, Answer, Verdict
161
+ ledger.py append-only, hash-stamped evidence log (the ground truth)
162
+ instrument.py capture: @track decorator + ingest_trace()
163
+ verify.py pluggable support check — HeuristicVerifier + LLMSupportVerifier
164
+ llm.py LLM protocol + AnthropicLLM / FakeLLM backends
165
+ extract.py free-text answer -> structured Answer
166
+ trace.py load a portable trace file -> (Ledger, Answer)
167
+ cli.py `receipts audit` command
168
+ gate.py the three rules + Gate + audit()
169
+ adapters/ OpenAI trace, Anthropic/Claude-Agent-SDK trace, LangChain callback
170
+ examples/ research_agent.py (gate), agent_loop.py (capture->extract->gate)
171
+ tests/ pytest suite
172
+ ```
173
+
174
+ ## Develop
175
+
176
+ ```bash
177
+ python -m pytest -q
178
+ python examples/research_agent.py
179
+ ```
180
+
181
+ ## Status / roadmap
182
+
183
+ v0.1 — core engine, hard gate, post-hoc auditor, framework-agnostic capture,
184
+ LLM support verifier, free-text extraction, OpenAI adapter, `receipts audit` CLI
185
+ + CI action.
186
+
187
+ Adapters: OpenAI trace, Anthropic / Claude Agent SDK trace, LangChain/LangGraph
188
+ callback.
189
+
190
+ Next candidates:
191
+ - NLI / embeddings `SupportVerifier` backends (no LLM call)
192
+ - Live in-loop gate wrappers for popular agent runtimes
193
+ - Published package on PyPI
@@ -0,0 +1,103 @@
1
+ """End-to-end: a tracked tool loop -> free-text answer -> extract -> gate.
2
+
3
+ Runs fully offline (FakeLLM stands in for the model), so it's a faithful shape of
4
+ a real integration without needing an API key:
5
+
6
+ 1. Tools are wrapped with @track, so every call is recorded as evidence.
7
+ 2. The "agent" runs the tools and writes a normal prose answer.
8
+ 3. extract_claims turns the prose into structured claims bound to evidence.
9
+ 4. The gate checks those bindings deterministically and blocks if ungrounded.
10
+
11
+ Run: python examples/agent_loop.py
12
+ """
13
+
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
18
+
19
+ from receipts import ( # noqa: E402
20
+ FakeLLM,
21
+ Gate,
22
+ Ledger,
23
+ UngroundedAnswerError,
24
+ extract_claims,
25
+ track,
26
+ )
27
+
28
+ # --- 1. Tools. Wrapping with @track means the Ledger is the source of truth ---
29
+ ledger = Ledger()
30
+
31
+
32
+ @track(ledger, kind="file_read", source=lambda path: path)
33
+ def read_file(path: str) -> str:
34
+ return {"app/config.py": "PORT = 8080\nDEBUG = False"}[path]
35
+
36
+
37
+ @track(ledger, kind="web_fetch", source=lambda url: url)
38
+ def web_fetch(url: str) -> str:
39
+ return "The free tier allows 100 requests per minute."
40
+
41
+
42
+ # --- 2. The agent does real work; evidence accrues automatically ------------
43
+ read_file("app/config.py")
44
+ web_fetch("https://docs.example.com/limits")
45
+
46
+
47
+ # --- 3. Extraction. A real integration passes AnthropicLLM(); here a FakeLLM --
48
+ # plays the model, binding claims to the evidence actually in the ledger.
49
+ def fake_model(kind: str):
50
+ file_ev, web_ev = ledger.all()[0].id, ledger.all()[1].id
51
+
52
+ def honest(system, user, schema):
53
+ return {
54
+ "claims": [
55
+ {"text": "The service listens on port 8080", "kind": "fact",
56
+ "evidence_ids": [file_ev]},
57
+ {"text": "The free tier allows 100 requests per minute", "kind": "fact",
58
+ "evidence_ids": [web_ev]},
59
+ ],
60
+ "assumptions": [
61
+ {"text": "The paid tier limit is higher", "reason": "not on the fetched page",
62
+ "impact": "capacity planning wrong if false"},
63
+ ],
64
+ }
65
+
66
+ def dishonest(system, user, schema):
67
+ return {
68
+ "claims": [
69
+ # Fabricated number + ungrounded guess stated as fact.
70
+ {"text": "The rate limit is 5000 requests per minute", "kind": "fact",
71
+ "evidence_ids": [web_ev]},
72
+ {"text": "I audited every file in the repository", "kind": "effort",
73
+ "evidence_ids": [file_ev]},
74
+ ],
75
+ "assumptions": [],
76
+ }
77
+
78
+ return FakeLLM(honest if kind == "honest" else dishonest)
79
+
80
+
81
+ def run(label: str, text: str, kind: str) -> None:
82
+ print(f"\n=== {label} ===")
83
+ answer = extract_claims(text, ledger, fake_model(kind))
84
+ try:
85
+ print(Gate(ledger).finalize(answer))
86
+ except UngroundedAnswerError as e:
87
+ print(e.verdict.report())
88
+
89
+
90
+ run(
91
+ "Honest answer (passes)",
92
+ "The service listens on port 8080 and the free tier allows 100 rpm.",
93
+ "honest",
94
+ )
95
+ run(
96
+ "Dishonest answer (blocked)",
97
+ "I audited every file; the rate limit is 5000 rpm.",
98
+ "dishonest",
99
+ )
100
+ # Note: the effort overclaim ("every file") is caught structurally. The fabricated
101
+ # "5000 rpm" cites the limits page and slips the token-overlap heuristic — swapping
102
+ # in Gate(ledger, verifier=LLMSupportVerifier(AnthropicLLM())) catches the number
103
+ # swap too. Binding + effort are deterministic; support is as strong as its verifier.
@@ -0,0 +1,33 @@
1
+ # Example: gate agent-produced output in CI.
2
+ #
3
+ # Have your agent emit a trace file (evidence + structured answer) as a build
4
+ # artifact, then fail the build if any claim isn't grounded. Copy this into a
5
+ # consuming repo's .github/workflows/ and point it at wherever your agent
6
+ # writes its trace.
7
+ name: receipts-grounding-gate
8
+
9
+ on:
10
+ pull_request:
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ audit:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Install Receipts
24
+ run: pip install receipts-gate
25
+
26
+ # Replace with the step that runs your agent and writes trace.json.
27
+ - name: Produce agent trace
28
+ run: echo "your agent run goes here; it must write trace.json"
29
+
30
+ # Deterministic gate — no API key needed. Exits non-zero if any claim
31
+ # is ungrounded, failing the build.
32
+ - name: Audit grounding
33
+ run: receipts audit trace.json