contexttrace 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {contexttrace-0.5.0 → contexttrace-0.6.0}/PKG-INFO +44 -5
- {contexttrace-0.5.0 → contexttrace-0.6.0}/README.md +174 -135
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/__init__.py +44 -36
- contexttrace-0.6.0/contexttrace/_version.py +1 -0
- contexttrace-0.6.0/contexttrace/capture.py +154 -0
- contexttrace-0.6.0/contexttrace/capture_endpoint.py +174 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/cli.py +1223 -831
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/endpoint_eval.py +315 -314
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/__init__.py +38 -30
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/audit.py +688 -449
- contexttrace-0.6.0/contexttrace/verify/audit_benchmark.py +439 -0
- contexttrace-0.6.0/contexttrace/verify/audit_benchmark_cases.json +574 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/audit_report.py +415 -372
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/evidence.py +2 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/facts.py +69 -3
- contexttrace-0.6.0/contexttrace/verify/qa.py +268 -0
- contexttrace-0.6.0/contexttrace/verify/qa_report.py +361 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/runner.py +1 -1
- contexttrace-0.6.0/contexttrace/verify/trace_inspect.py +92 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/verdicts.py +10 -3
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace.egg-info/SOURCES.txt +7 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/pyproject.toml +99 -99
- contexttrace-0.5.0/contexttrace/_version.py +0 -1
- {contexttrace-0.5.0 → contexttrace-0.6.0}/MANIFEST.in +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/client.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/config.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/demo.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/demo_data.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/errors.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/evaluator.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/__init__.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/fastapi.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/langchain.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/langgraph.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/llamaindex.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/integrations/opentelemetry.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/local.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/py.typed +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/regression.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/reliability.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/report.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/storage/__init__.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/storage/sqlite_store.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/thresholds.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/transport.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/abstention.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/benchmark.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/citations.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/claims.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/compare.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/compare_report.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/demos.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/external_benchmark_cases.json +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/real_benchmark_cases.json +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/report.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/root_cause.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/schema.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/verify/spans.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/contexttrace/viewer.py +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/setup.cfg +0 -0
- {contexttrace-0.5.0 → contexttrace-0.6.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: contexttrace
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Local-first SDK and CLI for RAG and agent reliability tracing, citation checks, and failure diagnosis.
|
|
5
5
|
Author: ContextTrace contributors
|
|
6
6
|
License: MIT
|
|
@@ -119,7 +119,31 @@ with ct.trace(query="What is the refund policy?") as trace:
|
|
|
119
119
|
|
|
120
120
|
## BYO RAG Endpoint
|
|
121
121
|
|
|
122
|
-
|
|
122
|
+
Capture and verify one live response from a running local or hosted RAG API without adding SDK code:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
contexttrace capture endpoint \
|
|
126
|
+
--endpoint http://localhost:8000/query \
|
|
127
|
+
--query "What is the refund policy?" \
|
|
128
|
+
--answer-path $.answer \
|
|
129
|
+
--contexts-path $.contexts \
|
|
130
|
+
--citations-path $.citations \
|
|
131
|
+
--out traces/refund_trace.json \
|
|
132
|
+
--verify \
|
|
133
|
+
--report
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
If you already have a saved endpoint response:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
contexttrace capture response response.json \
|
|
140
|
+
--query "What is the refund policy?" \
|
|
141
|
+
--out traces/refund_trace.json \
|
|
142
|
+
--verify \
|
|
143
|
+
--report
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Evaluate a dataset through the same endpoint when you are ready to regression test:
|
|
123
147
|
|
|
124
148
|
```bash
|
|
125
149
|
contexttrace eval \
|
|
@@ -139,6 +163,8 @@ Verify a portable RAG trace artifact without a hosted dashboard:
|
|
|
139
163
|
|
|
140
164
|
```bash
|
|
141
165
|
contexttrace verify-demo unsupported_claim --report
|
|
166
|
+
contexttrace inspect trace.json
|
|
167
|
+
contexttrace qa trace.json --corpus docs/ --report
|
|
142
168
|
contexttrace verify trace.json
|
|
143
169
|
contexttrace verify trace.json --json
|
|
144
170
|
contexttrace verify trace.json --report --out reports/example.html
|
|
@@ -153,23 +179,36 @@ contexttrace compare baseline.json current.json --fail-on new_failure
|
|
|
153
179
|
contexttrace audit trace.json --corpus docs/
|
|
154
180
|
contexttrace audit trace.json --corpus docs/ --report
|
|
155
181
|
contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
|
|
182
|
+
contexttrace audit-benchmark --case-set real --mode semantic
|
|
183
|
+
contexttrace audit-benchmark --case-set real --mode semantic --report
|
|
156
184
|
```
|
|
157
185
|
|
|
158
186
|
Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
|
|
159
187
|
|
|
160
188
|
`verify-demo` uses bundled demo traces, so it works immediately after `pip install contexttrace`. Available demos include `unsupported_claim`, `partial_support`, `citation_mismatch`, `should_abstain`, and `supported_answer`.
|
|
161
189
|
|
|
162
|
-
Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32
|
|
190
|
+
Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
|
|
163
191
|
|
|
164
192
|
Verification output includes evidence span offsets, stable span hashes, multiple supporting spans, typed matched/missing facts, and claim-level root causes so partial support failures are easier to inspect.
|
|
165
193
|
|
|
166
194
|
ContextTrace verifies whether each generated claim is actually supported by retrieved evidence. Instead of only showing a trace or a score, it tells you where the evidence chain broke: unsupported claim, citation mismatch, retrieval miss, answer overreach, conflicting context, or should-have-abstained.
|
|
167
195
|
|
|
196
|
+
Use the capture helper when you have RAG artifacts in memory:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from contexttrace import capture_rag_trace, write_rag_trace
|
|
200
|
+
|
|
201
|
+
trace = capture_rag_trace(query=question, answer=answer, contexts=retrieved_docs)
|
|
202
|
+
write_rag_trace(trace, "trace.json")
|
|
203
|
+
```
|
|
204
|
+
|
|
168
205
|
Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
|
|
169
206
|
|
|
170
|
-
Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed.
|
|
207
|
+
Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, reranking buried it, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed. Audit output includes failure stages, diagnostic signals, and prioritized next actions.
|
|
208
|
+
|
|
209
|
+
Use `contexttrace audit-benchmark --case-set real --mode semantic` to test retrieval-audit labels against bundled public OSS documentation and GitHub issue snippets from Qdrant, Chroma, Haystack, LangChain, and ContextTrace docs.
|
|
171
210
|
|
|
172
|
-
The v0.
|
|
211
|
+
The v0.6.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
|
|
173
212
|
|
|
174
213
|
## What It Catches
|
|
175
214
|
|
|
@@ -1,135 +1,174 @@
|
|
|
1
|
-
# ContextTrace
|
|
2
|
-
|
|
3
|
-
**Debug RAG failures before users find them.**
|
|
4
|
-
|
|
5
|
-
ContextTrace is a local-first Python SDK and CLI for evaluating existing RAG and AI agent systems. It records retrieved chunks, selected context, answer claims, citations, token usage, latency, and agent events, then writes local traces and HTML reports without requiring a hosted dashboard.
|
|
6
|
-
|
|
7
|
-
## Install
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
pip install contexttrace
|
|
11
|
-
contexttrace --version
|
|
12
|
-
contexttrace init
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
Optional integrations:
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
pip install "contexttrace[langchain]"
|
|
19
|
-
pip install "contexttrace[llamaindex]"
|
|
20
|
-
pip install "contexttrace[fastapi]"
|
|
21
|
-
pip install "contexttrace[langgraph]"
|
|
22
|
-
pip install "contexttrace[otel]"
|
|
23
|
-
pip install "contexttrace[all]"
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## Quickstart
|
|
27
|
-
|
|
28
|
-
```bash
|
|
29
|
-
contexttrace init
|
|
30
|
-
contexttrace demo --dataset refund_policy
|
|
31
|
-
contexttrace report --last
|
|
32
|
-
contexttrace doctor
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
By default, traces are stored locally in:
|
|
36
|
-
|
|
37
|
-
```text
|
|
38
|
-
.contexttrace/contexttrace.db
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## SDK Example
|
|
42
|
-
|
|
43
|
-
```python
|
|
44
|
-
from contexttrace import ContextTrace
|
|
45
|
-
|
|
46
|
-
ct = ContextTrace(project="support-rag")
|
|
47
|
-
|
|
48
|
-
with ct.trace(query="What is the refund policy?") as trace:
|
|
49
|
-
chunks = retriever.search("What is the refund policy?")
|
|
50
|
-
trace.log_retrieval(chunks)
|
|
51
|
-
trace.log_context(chunks[:5])
|
|
52
|
-
|
|
53
|
-
answer = llm.generate("What is the refund policy?", chunks[:5])
|
|
54
|
-
trace.log_answer(answer, usage={"total_tokens": 1200})
|
|
55
|
-
trace.log_citations([
|
|
56
|
-
{"claim": "Refunds are available within 30 days.", "source_chunk_id": "chunk_12"}
|
|
57
|
-
])
|
|
58
|
-
|
|
59
|
-
result = trace.evaluate()
|
|
60
|
-
print(result["failure"]["failure_type"])
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
## BYO RAG Endpoint
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
```bash
|
|
68
|
-
contexttrace
|
|
69
|
-
--
|
|
70
|
-
--
|
|
71
|
-
--
|
|
72
|
-
--
|
|
73
|
-
--
|
|
74
|
-
--
|
|
75
|
-
--
|
|
76
|
-
--
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
contexttrace
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
-
|
|
125
|
-
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
-
|
|
134
|
-
|
|
135
|
-
-
|
|
1
|
+
# ContextTrace
|
|
2
|
+
|
|
3
|
+
**Debug RAG failures before users find them.**
|
|
4
|
+
|
|
5
|
+
ContextTrace is a local-first Python SDK and CLI for evaluating existing RAG and AI agent systems. It records retrieved chunks, selected context, answer claims, citations, token usage, latency, and agent events, then writes local traces and HTML reports without requiring a hosted dashboard.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install contexttrace
|
|
11
|
+
contexttrace --version
|
|
12
|
+
contexttrace init
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Optional integrations:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install "contexttrace[langchain]"
|
|
19
|
+
pip install "contexttrace[llamaindex]"
|
|
20
|
+
pip install "contexttrace[fastapi]"
|
|
21
|
+
pip install "contexttrace[langgraph]"
|
|
22
|
+
pip install "contexttrace[otel]"
|
|
23
|
+
pip install "contexttrace[all]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
contexttrace init
|
|
30
|
+
contexttrace demo --dataset refund_policy
|
|
31
|
+
contexttrace report --last
|
|
32
|
+
contexttrace doctor
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
By default, traces are stored locally in:
|
|
36
|
+
|
|
37
|
+
```text
|
|
38
|
+
.contexttrace/contexttrace.db
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## SDK Example
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from contexttrace import ContextTrace
|
|
45
|
+
|
|
46
|
+
ct = ContextTrace(project="support-rag")
|
|
47
|
+
|
|
48
|
+
with ct.trace(query="What is the refund policy?") as trace:
|
|
49
|
+
chunks = retriever.search("What is the refund policy?")
|
|
50
|
+
trace.log_retrieval(chunks)
|
|
51
|
+
trace.log_context(chunks[:5])
|
|
52
|
+
|
|
53
|
+
answer = llm.generate("What is the refund policy?", chunks[:5])
|
|
54
|
+
trace.log_answer(answer, usage={"total_tokens": 1200})
|
|
55
|
+
trace.log_citations([
|
|
56
|
+
{"claim": "Refunds are available within 30 days.", "source_chunk_id": "chunk_12"}
|
|
57
|
+
])
|
|
58
|
+
|
|
59
|
+
result = trace.evaluate()
|
|
60
|
+
print(result["failure"]["failure_type"])
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## BYO RAG Endpoint
|
|
64
|
+
|
|
65
|
+
Capture and verify one live response from a running local or hosted RAG API without adding SDK code:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
contexttrace capture endpoint \
|
|
69
|
+
--endpoint http://localhost:8000/query \
|
|
70
|
+
--query "What is the refund policy?" \
|
|
71
|
+
--answer-path $.answer \
|
|
72
|
+
--contexts-path $.contexts \
|
|
73
|
+
--citations-path $.citations \
|
|
74
|
+
--out traces/refund_trace.json \
|
|
75
|
+
--verify \
|
|
76
|
+
--report
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
If you already have a saved endpoint response:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
contexttrace capture response response.json \
|
|
83
|
+
--query "What is the refund policy?" \
|
|
84
|
+
--out traces/refund_trace.json \
|
|
85
|
+
--verify \
|
|
86
|
+
--report
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Evaluate a dataset through the same endpoint when you are ready to regression test:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
contexttrace eval \
|
|
93
|
+
--dataset evals/questions.json \
|
|
94
|
+
--endpoint http://localhost:8000/query \
|
|
95
|
+
--method POST \
|
|
96
|
+
--input-key question \
|
|
97
|
+
--answer-path $.answer \
|
|
98
|
+
--contexts-path $.contexts \
|
|
99
|
+
--citations-path $.citations \
|
|
100
|
+
--fail-on "failure_rate>0.25"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Claim-Level Evidence Verification
|
|
104
|
+
|
|
105
|
+
Verify a portable RAG trace artifact without a hosted dashboard:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
contexttrace verify-demo unsupported_claim --report
|
|
109
|
+
contexttrace inspect trace.json
|
|
110
|
+
contexttrace qa trace.json --corpus docs/ --report
|
|
111
|
+
contexttrace verify trace.json
|
|
112
|
+
contexttrace verify trace.json --json
|
|
113
|
+
contexttrace verify trace.json --report --out reports/example.html
|
|
114
|
+
contexttrace verify trace.json --mode semantic
|
|
115
|
+
contexttrace verify trace.json --fail-on unsupported --fail-on citation_mismatch
|
|
116
|
+
contexttrace verify-benchmark --mode semantic
|
|
117
|
+
contexttrace verify-benchmark --mode semantic --report
|
|
118
|
+
contexttrace verify-benchmark --case-set external --mode semantic --report
|
|
119
|
+
contexttrace compare baseline.json current.json
|
|
120
|
+
contexttrace compare baseline.json current.json --report
|
|
121
|
+
contexttrace compare baseline.json current.json --fail-on new_failure
|
|
122
|
+
contexttrace audit trace.json --corpus docs/
|
|
123
|
+
contexttrace audit trace.json --corpus docs/ --report
|
|
124
|
+
contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
|
|
125
|
+
contexttrace audit-benchmark --case-set real --mode semantic
|
|
126
|
+
contexttrace audit-benchmark --case-set real --mode semantic --report
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
|
|
130
|
+
|
|
131
|
+
`verify-demo` uses bundled demo traces, so it works immediately after `pip install contexttrace`. Available demos include `unsupported_claim`, `partial_support`, `citation_mismatch`, `should_abstain`, and `supported_answer`.
|
|
132
|
+
|
|
133
|
+
Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
|
|
134
|
+
|
|
135
|
+
Verification output includes evidence span offsets, stable span hashes, multiple supporting spans, typed matched/missing facts, and claim-level root causes so partial support failures are easier to inspect.
|
|
136
|
+
|
|
137
|
+
ContextTrace verifies whether each generated claim is actually supported by retrieved evidence. Instead of only showing a trace or a score, it tells you where the evidence chain broke: unsupported claim, citation mismatch, retrieval miss, answer overreach, conflicting context, or should-have-abstained.
|
|
138
|
+
|
|
139
|
+
Use the capture helper when you have RAG artifacts in memory:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from contexttrace import capture_rag_trace, write_rag_trace
|
|
143
|
+
|
|
144
|
+
trace = capture_rag_trace(query=question, answer=answer, contexts=retrieved_docs)
|
|
145
|
+
write_rag_trace(trace, "trace.json")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
|
|
149
|
+
|
|
150
|
+
Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, reranking buried it, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed. Audit output includes failure stages, diagnostic signals, and prioritized next actions.
|
|
151
|
+
|
|
152
|
+
Use `contexttrace audit-benchmark --case-set real --mode semantic` to test retrieval-audit labels against bundled public OSS documentation and GitHub issue snippets from Qdrant, Chroma, Haystack, LangChain, and ContextTrace docs.
|
|
153
|
+
|
|
154
|
+
The v0.6.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
|
|
155
|
+
|
|
156
|
+
## What It Catches
|
|
157
|
+
|
|
158
|
+
- `retrieval_miss`
|
|
159
|
+
- `citation_mismatch`
|
|
160
|
+
- `unsupported_answer`
|
|
161
|
+
- `contradicted_answer`
|
|
162
|
+
- `conflicting_sources`
|
|
163
|
+
- `should_have_abstained`
|
|
164
|
+
- agent failures such as `stale_memory_used` and `tool_error`
|
|
165
|
+
|
|
166
|
+
## Privacy
|
|
167
|
+
|
|
168
|
+
Local mode is the default. ContextTrace makes no network calls unless you configure an LLM judge provider or evaluate a RAG endpoint you provide.
|
|
169
|
+
|
|
170
|
+
## Links
|
|
171
|
+
|
|
172
|
+
- Repository: https://github.com/samarth1412/Context-Trace
|
|
173
|
+
- Documentation: https://github.com/samarth1412/Context-Trace/tree/main/docs
|
|
174
|
+
- Issues: https://github.com/samarth1412/Context-Trace/issues
|
|
@@ -1,36 +1,44 @@
|
|
|
1
|
-
from contexttrace._version import __version__
|
|
2
|
-
from contexttrace.
|
|
3
|
-
from contexttrace.
|
|
4
|
-
from contexttrace.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from contexttrace.integrations.
|
|
13
|
-
from contexttrace.integrations.
|
|
14
|
-
from contexttrace.integrations.
|
|
15
|
-
from contexttrace.
|
|
16
|
-
from contexttrace.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
|
|
1
|
+
from contexttrace._version import __version__
|
|
2
|
+
from contexttrace.capture import capture_rag_trace, langchain_documents_to_contexts, write_rag_trace
|
|
3
|
+
from contexttrace.capture_endpoint import EndpointCapture, capture_endpoint_trace, capture_response_trace
|
|
4
|
+
from contexttrace.client import AsyncContextTrace, ContextTrace
|
|
5
|
+
from contexttrace.config import ContextTraceConfig
|
|
6
|
+
from contexttrace.errors import (
|
|
7
|
+
ContextTraceConfigError,
|
|
8
|
+
ContextTraceError,
|
|
9
|
+
ContextTraceHTTPError,
|
|
10
|
+
ContextTraceLocalError,
|
|
11
|
+
)
|
|
12
|
+
from contexttrace.integrations.fastapi import ContextTraceFastAPIMiddleware
|
|
13
|
+
from contexttrace.integrations.langchain import ContextTraceCallbackHandler
|
|
14
|
+
from contexttrace.integrations.langgraph import ContextTraceLangGraphTracer
|
|
15
|
+
from contexttrace.integrations.llamaindex import ContextTraceLlamaIndexCallbackHandler
|
|
16
|
+
from contexttrace.integrations.opentelemetry import OpenTelemetryExporter, export_contexttrace_trace
|
|
17
|
+
from contexttrace.reliability import ReliabilityScore, ReliabilityScorer
|
|
18
|
+
from contexttrace.report import ReportGenerator
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"AsyncContextTrace",
|
|
22
|
+
"ContextTrace",
|
|
23
|
+
"ContextTraceConfig",
|
|
24
|
+
"ContextTraceConfigError",
|
|
25
|
+
"ContextTraceCallbackHandler",
|
|
26
|
+
"ContextTraceError",
|
|
27
|
+
"ContextTraceFastAPIMiddleware",
|
|
28
|
+
"ContextTraceHTTPError",
|
|
29
|
+
"ContextTraceLocalError",
|
|
30
|
+
"ContextTraceLangGraphTracer",
|
|
31
|
+
"ContextTraceLlamaIndexCallbackHandler",
|
|
32
|
+
"EndpointCapture",
|
|
33
|
+
"OpenTelemetryExporter",
|
|
34
|
+
"ReliabilityScore",
|
|
35
|
+
"ReliabilityScorer",
|
|
36
|
+
"ReportGenerator",
|
|
37
|
+
"capture_rag_trace",
|
|
38
|
+
"capture_endpoint_trace",
|
|
39
|
+
"capture_response_trace",
|
|
40
|
+
"export_contexttrace_trace",
|
|
41
|
+
"langchain_documents_to_contexts",
|
|
42
|
+
"write_rag_trace",
|
|
43
|
+
"__version__",
|
|
44
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.6.0"
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Iterable
|
|
6
|
+
|
|
7
|
+
from contexttrace.verify.schema import RAGTrace, TraceCitation, TraceContext, load_trace
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def capture_rag_trace(
|
|
11
|
+
*,
|
|
12
|
+
query: str,
|
|
13
|
+
answer: str,
|
|
14
|
+
contexts: Iterable[Any],
|
|
15
|
+
citations: Iterable[Any] | None = None,
|
|
16
|
+
metadata: dict[str, Any] | None = None,
|
|
17
|
+
context_id_prefix: str = "context",
|
|
18
|
+
) -> RAGTrace:
|
|
19
|
+
"""Create a portable ContextTrace verification trace from common RAG artifacts."""
|
|
20
|
+
|
|
21
|
+
payload = {
|
|
22
|
+
"query": query,
|
|
23
|
+
"answer": answer,
|
|
24
|
+
"contexts": [
|
|
25
|
+
context_to_trace_context(context, index=index, id_prefix=context_id_prefix).to_dict()
|
|
26
|
+
for index, context in enumerate(contexts)
|
|
27
|
+
],
|
|
28
|
+
"citations": [
|
|
29
|
+
citation_to_trace_citation(citation).to_dict()
|
|
30
|
+
for citation in (citations or [])
|
|
31
|
+
],
|
|
32
|
+
"metadata": dict(metadata or {}),
|
|
33
|
+
}
|
|
34
|
+
return load_trace(payload, source="captured RAG trace")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def write_rag_trace(trace: RAGTrace, path: str | Path) -> str:
|
|
38
|
+
"""Write a portable RAG trace JSON file that works with `contexttrace verify`."""
|
|
39
|
+
|
|
40
|
+
output_path = Path(path)
|
|
41
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
output_path.write_text(json.dumps(trace.to_dict(), indent=2), encoding="utf-8")
|
|
43
|
+
return str(output_path)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def context_to_trace_context(
|
|
47
|
+
context: Any,
|
|
48
|
+
*,
|
|
49
|
+
index: int = 0,
|
|
50
|
+
id_prefix: str = "context",
|
|
51
|
+
) -> TraceContext:
|
|
52
|
+
"""Convert dicts, LangChain Documents, or document-like objects to TraceContext."""
|
|
53
|
+
|
|
54
|
+
if isinstance(context, TraceContext):
|
|
55
|
+
return context
|
|
56
|
+
|
|
57
|
+
if isinstance(context, dict):
|
|
58
|
+
text = _first_present(context, ("text", "content", "page_content"))
|
|
59
|
+
metadata = dict(context.get("metadata") or {})
|
|
60
|
+
context_id = _first_present(
|
|
61
|
+
context,
|
|
62
|
+
("id", "chunk_id", "source_id", "source_chunk_id", "document_id"),
|
|
63
|
+
)
|
|
64
|
+
source = context.get("source")
|
|
65
|
+
score = context.get("score", context.get("relevance_score"))
|
|
66
|
+
else:
|
|
67
|
+
text = getattr(context, "page_content", None) or getattr(context, "text", None)
|
|
68
|
+
metadata = dict(getattr(context, "metadata", None) or {})
|
|
69
|
+
context_id = getattr(context, "id", None) or metadata.get("chunk_id") or metadata.get("id")
|
|
70
|
+
source = metadata.get("source")
|
|
71
|
+
score = getattr(context, "score", None) or metadata.get("score") or metadata.get("relevance_score")
|
|
72
|
+
|
|
73
|
+
context_text = str(text or "").strip()
|
|
74
|
+
if not context_text:
|
|
75
|
+
raise ValueError("Captured context %s did not include text/content/page_content." % index)
|
|
76
|
+
|
|
77
|
+
if source is not None and "source" not in metadata:
|
|
78
|
+
metadata["source"] = source
|
|
79
|
+
if score is not None and "score" not in metadata and "relevance_score" not in metadata:
|
|
80
|
+
metadata["score"] = score
|
|
81
|
+
|
|
82
|
+
resolved_id = _context_id(
|
|
83
|
+
context_id=context_id,
|
|
84
|
+
metadata=metadata,
|
|
85
|
+
id_prefix=id_prefix,
|
|
86
|
+
index=index,
|
|
87
|
+
)
|
|
88
|
+
return TraceContext(id=resolved_id, text=context_text, metadata=metadata)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def citation_to_trace_citation(citation: Any) -> TraceCitation:
|
|
92
|
+
if isinstance(citation, TraceCitation):
|
|
93
|
+
return citation
|
|
94
|
+
|
|
95
|
+
if isinstance(citation, dict):
|
|
96
|
+
claim = citation.get("claim")
|
|
97
|
+
source_id = citation.get("source_id") or citation.get("source_chunk_id") or citation.get("chunk_id")
|
|
98
|
+
metadata = dict(citation.get("metadata") or {})
|
|
99
|
+
else:
|
|
100
|
+
claim = getattr(citation, "claim", None)
|
|
101
|
+
source_id = (
|
|
102
|
+
getattr(citation, "source_id", None)
|
|
103
|
+
or getattr(citation, "source_chunk_id", None)
|
|
104
|
+
or getattr(citation, "chunk_id", None)
|
|
105
|
+
)
|
|
106
|
+
metadata = dict(getattr(citation, "metadata", None) or {})
|
|
107
|
+
|
|
108
|
+
if not str(claim or "").strip():
|
|
109
|
+
raise ValueError("Captured citation did not include claim.")
|
|
110
|
+
if not str(source_id or "").strip():
|
|
111
|
+
raise ValueError("Captured citation did not include source_id/source_chunk_id/chunk_id.")
|
|
112
|
+
return TraceCitation(claim=str(claim).strip(), source_id=str(source_id).strip(), metadata=metadata)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def langchain_documents_to_contexts(
|
|
116
|
+
documents: Iterable[Any],
|
|
117
|
+
*,
|
|
118
|
+
id_prefix: str = "langchain_doc",
|
|
119
|
+
) -> list[TraceContext]:
|
|
120
|
+
return [
|
|
121
|
+
context_to_trace_context(document, index=index, id_prefix=id_prefix)
|
|
122
|
+
for index, document in enumerate(documents)
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _first_present(payload: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
|
127
|
+
for key in keys:
|
|
128
|
+
value = payload.get(key)
|
|
129
|
+
if value is not None and str(value).strip() != "":
|
|
130
|
+
return value
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _context_id(
|
|
135
|
+
*,
|
|
136
|
+
context_id: Any,
|
|
137
|
+
metadata: dict[str, Any],
|
|
138
|
+
id_prefix: str,
|
|
139
|
+
index: int,
|
|
140
|
+
) -> str:
|
|
141
|
+
if context_id is not None and str(context_id).strip():
|
|
142
|
+
return str(context_id).strip()
|
|
143
|
+
source = metadata.get("source")
|
|
144
|
+
chunk_marker = (
|
|
145
|
+
metadata.get("chunk_id")
|
|
146
|
+
or metadata.get("chunk_index")
|
|
147
|
+
or metadata.get("page")
|
|
148
|
+
or metadata.get("start_index")
|
|
149
|
+
)
|
|
150
|
+
if source is not None and str(source).strip() and chunk_marker is not None:
|
|
151
|
+
return "%s:%s" % (str(source).strip(), str(chunk_marker).strip())
|
|
152
|
+
if source is not None and str(source).strip():
|
|
153
|
+
return "%s:%s" % (str(source).strip(), index + 1)
|
|
154
|
+
return "%s_%s" % (id_prefix, index + 1)
|