contexttrace 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {contexttrace-0.5.0 → contexttrace-0.7.0}/PKG-INFO +52 -5
  2. {contexttrace-0.5.0 → contexttrace-0.7.0}/README.md +173 -126
  3. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/__init__.py +44 -36
  4. contexttrace-0.7.0/contexttrace/_version.py +1 -0
  5. contexttrace-0.7.0/contexttrace/capture.py +154 -0
  6. contexttrace-0.7.0/contexttrace/capture_endpoint.py +174 -0
  7. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/cli.py +1457 -731
  8. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/endpoint_eval.py +315 -314
  9. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/__init__.py +26 -18
  10. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/audit.py +688 -449
  11. contexttrace-0.7.0/contexttrace/verify/audit_benchmark.py +439 -0
  12. contexttrace-0.7.0/contexttrace/verify/audit_benchmark_cases.json +574 -0
  13. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/audit_report.py +415 -372
  14. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/evidence.py +2 -0
  15. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/facts.py +69 -3
  16. contexttrace-0.7.0/contexttrace/verify/qa.py +268 -0
  17. contexttrace-0.7.0/contexttrace/verify/qa_report.py +361 -0
  18. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/runner.py +1 -1
  19. contexttrace-0.7.0/contexttrace/verify/suite.py +662 -0
  20. contexttrace-0.7.0/contexttrace/verify/suite_report.py +316 -0
  21. contexttrace-0.7.0/contexttrace/verify/trace_inspect.py +92 -0
  22. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/verdicts.py +10 -3
  23. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace.egg-info/SOURCES.txt +9 -0
  24. {contexttrace-0.5.0 → contexttrace-0.7.0}/pyproject.toml +99 -99
  25. contexttrace-0.5.0/contexttrace/_version.py +0 -1
  26. {contexttrace-0.5.0 → contexttrace-0.7.0}/MANIFEST.in +0 -0
  27. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/client.py +0 -0
  28. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/config.py +0 -0
  29. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/demo.py +0 -0
  30. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/demo_data.py +0 -0
  31. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/errors.py +0 -0
  32. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/evaluator.py +0 -0
  33. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/__init__.py +0 -0
  34. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/fastapi.py +0 -0
  35. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/langchain.py +0 -0
  36. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/langgraph.py +0 -0
  37. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/llamaindex.py +0 -0
  38. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/integrations/opentelemetry.py +0 -0
  39. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/local.py +0 -0
  40. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/py.typed +0 -0
  41. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/regression.py +0 -0
  42. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/reliability.py +0 -0
  43. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/report.py +0 -0
  44. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/storage/__init__.py +0 -0
  45. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/storage/sqlite_store.py +0 -0
  46. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/thresholds.py +0 -0
  47. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/transport.py +0 -0
  48. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/abstention.py +0 -0
  49. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/benchmark.py +0 -0
  50. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/citations.py +0 -0
  51. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/claims.py +0 -0
  52. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/compare.py +0 -0
  53. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/compare_report.py +0 -0
  54. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/demos.py +0 -0
  55. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/external_benchmark_cases.json +0 -0
  56. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/real_benchmark_cases.json +0 -0
  57. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/report.py +0 -0
  58. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/root_cause.py +0 -0
  59. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/schema.py +0 -0
  60. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/verify/spans.py +0 -0
  61. {contexttrace-0.5.0 → contexttrace-0.7.0}/contexttrace/viewer.py +0 -0
  62. {contexttrace-0.5.0 → contexttrace-0.7.0}/setup.cfg +0 -0
  63. {contexttrace-0.5.0 → contexttrace-0.7.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: contexttrace
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Local-first SDK and CLI for RAG and agent reliability tracing, citation checks, and failure diagnosis.
5
5
  Author: ContextTrace contributors
6
6
  License: MIT
@@ -119,7 +119,31 @@ with ct.trace(query="What is the refund policy?") as trace:
119
119
 
120
120
  ## BYO RAG Endpoint
121
121
 
122
- Evaluate a running local or hosted RAG API without adding SDK code:
122
+ Capture and verify one live response from a running local or hosted RAG API without adding SDK code:
123
+
124
+ ```bash
125
+ contexttrace capture endpoint \
126
+ --endpoint http://localhost:8000/query \
127
+ --query "What is the refund policy?" \
128
+ --answer-path $.answer \
129
+ --contexts-path $.contexts \
130
+ --citations-path $.citations \
131
+ --out traces/refund_trace.json \
132
+ --verify \
133
+ --report
134
+ ```
135
+
136
+ If you already have a saved endpoint response:
137
+
138
+ ```bash
139
+ contexttrace capture response response.json \
140
+ --query "What is the refund policy?" \
141
+ --out traces/refund_trace.json \
142
+ --verify \
143
+ --report
144
+ ```
145
+
146
+ Evaluate a dataset through the same endpoint when you are ready to regression test:
123
147
 
124
148
  ```bash
125
149
  contexttrace eval \
@@ -139,6 +163,8 @@ Verify a portable RAG trace artifact without a hosted dashboard:
139
163
 
140
164
  ```bash
141
165
  contexttrace verify-demo unsupported_claim --report
166
+ contexttrace inspect trace.json
167
+ contexttrace qa trace.json --corpus docs/ --report
142
168
  contexttrace verify trace.json
143
169
  contexttrace verify trace.json --json
144
170
  contexttrace verify trace.json --report --out reports/example.html
@@ -150,26 +176,47 @@ contexttrace verify-benchmark --case-set external --mode semantic --report
150
176
  contexttrace compare baseline.json current.json
151
177
  contexttrace compare baseline.json current.json --report
152
178
  contexttrace compare baseline.json current.json --fail-on new_failure
179
+ contexttrace suite create traces/*.json --out contexttrace-suite.json
180
+ contexttrace suite add contexttrace-suite.json traces/new_failure.json
181
+ contexttrace suite list contexttrace-suite.json
182
+ contexttrace suite run contexttrace-suite.json --endpoint http://localhost:8000/query --report
183
+ contexttrace suite prune contexttrace-suite.json --results .contexttrace/suites/contexttrace-regression-suite_results.json
184
+ contexttrace suite report .contexttrace/suites/contexttrace-regression-suite_results.json
153
185
  contexttrace audit trace.json --corpus docs/
154
186
  contexttrace audit trace.json --corpus docs/ --report
155
187
  contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
188
+ contexttrace audit-benchmark --case-set real --mode semantic
189
+ contexttrace audit-benchmark --case-set real --mode semantic --report
156
190
  ```
157
191
 
158
192
  Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
159
193
 
160
194
  `verify-demo` uses bundled demo traces, so it works immediately after `pip install contexttrace`. Available demos include `unsupported_claim`, `partial_support`, `citation_mismatch`, `should_abstain`, and `supported_answer`.
161
195
 
162
- Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 real ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
196
+ Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
163
197
 
164
198
  Verification output includes evidence span offsets, stable span hashes, multiple supporting spans, typed matched/missing facts, and claim-level root causes so partial support failures are easier to inspect.
165
199
 
166
200
  ContextTrace verifies whether each generated claim is actually supported by retrieved evidence. Instead of only showing a trace or a score, it tells you where the evidence chain broke: unsupported claim, citation mismatch, retrieval miss, answer overreach, conflicting context, or should-have-abstained.
167
201
 
202
+ Use the capture helper when you have RAG artifacts in memory:
203
+
204
+ ```python
205
+ from contexttrace import capture_rag_trace, write_rag_trace
206
+
207
+ trace = capture_rag_trace(query=question, answer=answer, contexts=retrieved_docs)
208
+ write_rag_trace(trace, "trace.json")
209
+ ```
210
+
168
211
  Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
169
212
 
170
- Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed.
213
+ Use `contexttrace suite create`, `suite add`, and `suite run` to turn saved failures into replayable endpoint tests. Suite runs call your current RAG endpoint with the saved query, verify the new answer, compare it with the baseline trace, and exit non-zero when a saved failure still reproduces or a good case regresses. Use `suite list`, `suite remove`, and `suite prune` to manage the suite as failures are fixed or retired.
214
+
215
+ Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, reranking buried it, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed. Audit output includes failure stages, diagnostic signals, and prioritized next actions.
216
+
217
+ Use `contexttrace audit-benchmark --case-set real --mode semantic` to test retrieval-audit labels against bundled public OSS documentation and GitHub issue snippets from Qdrant, Chroma, Haystack, LangChain, and ContextTrace docs.
171
218
 
172
- The v0.5.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
219
+ The v0.7.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
173
220
 
174
221
  ## What It Catches
175
222
 
@@ -1,135 +1,182 @@
1
- # ContextTrace
2
-
3
- **Debug RAG failures before users find them.**
4
-
5
- ContextTrace is a local-first Python SDK and CLI for evaluating existing RAG and AI agent systems. It records retrieved chunks, selected context, answer claims, citations, token usage, latency, and agent events, then writes local traces and HTML reports without requiring a hosted dashboard.
6
-
7
- ## Install
8
-
9
- ```bash
10
- pip install contexttrace
11
- contexttrace --version
12
- contexttrace init
13
- ```
14
-
15
- Optional integrations:
16
-
17
- ```bash
18
- pip install "contexttrace[langchain]"
19
- pip install "contexttrace[llamaindex]"
20
- pip install "contexttrace[fastapi]"
21
- pip install "contexttrace[langgraph]"
22
- pip install "contexttrace[otel]"
23
- pip install "contexttrace[all]"
24
- ```
25
-
26
- ## Quickstart
27
-
28
- ```bash
29
- contexttrace init
30
- contexttrace demo --dataset refund_policy
31
- contexttrace report --last
32
- contexttrace doctor
33
- ```
34
-
35
- By default, traces are stored locally in:
36
-
37
- ```text
38
- .contexttrace/contexttrace.db
39
- ```
40
-
41
- ## SDK Example
42
-
43
- ```python
44
- from contexttrace import ContextTrace
45
-
46
- ct = ContextTrace(project="support-rag")
47
-
48
- with ct.trace(query="What is the refund policy?") as trace:
49
- chunks = retriever.search("What is the refund policy?")
50
- trace.log_retrieval(chunks)
51
- trace.log_context(chunks[:5])
52
-
53
- answer = llm.generate("What is the refund policy?", chunks[:5])
54
- trace.log_answer(answer, usage={"total_tokens": 1200})
55
- trace.log_citations([
56
- {"claim": "Refunds are available within 30 days.", "source_chunk_id": "chunk_12"}
57
- ])
58
-
59
- result = trace.evaluate()
60
- print(result["failure"]["failure_type"])
61
- ```
62
-
63
- ## BYO RAG Endpoint
64
-
65
- Evaluate a running local or hosted RAG API without adding SDK code:
66
-
67
- ```bash
68
- contexttrace eval \
69
- --dataset evals/questions.json \
70
- --endpoint http://localhost:8000/query \
71
- --method POST \
72
- --input-key question \
73
- --answer-path $.answer \
74
- --contexts-path $.contexts \
75
- --citations-path $.citations \
76
- --fail-on "failure_rate>0.25"
77
- ```
78
-
79
- ## Claim-Level Evidence Verification
80
-
81
- Verify a portable RAG trace artifact without a hosted dashboard:
82
-
83
- ```bash
84
- contexttrace verify-demo unsupported_claim --report
85
- contexttrace verify trace.json
86
- contexttrace verify trace.json --json
87
- contexttrace verify trace.json --report --out reports/example.html
88
- contexttrace verify trace.json --mode semantic
89
- contexttrace verify trace.json --fail-on unsupported --fail-on citation_mismatch
90
- contexttrace verify-benchmark --mode semantic
91
- contexttrace verify-benchmark --mode semantic --report
92
- contexttrace verify-benchmark --case-set external --mode semantic --report
1
+ # ContextTrace
2
+
3
+ **Debug RAG failures before users find them.**
4
+
5
+ ContextTrace is a local-first Python SDK and CLI for evaluating existing RAG and AI agent systems. It records retrieved chunks, selected context, answer claims, citations, token usage, latency, and agent events, then writes local traces and HTML reports without requiring a hosted dashboard.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install contexttrace
11
+ contexttrace --version
12
+ contexttrace init
13
+ ```
14
+
15
+ Optional integrations:
16
+
17
+ ```bash
18
+ pip install "contexttrace[langchain]"
19
+ pip install "contexttrace[llamaindex]"
20
+ pip install "contexttrace[fastapi]"
21
+ pip install "contexttrace[langgraph]"
22
+ pip install "contexttrace[otel]"
23
+ pip install "contexttrace[all]"
24
+ ```
25
+
26
+ ## Quickstart
27
+
28
+ ```bash
29
+ contexttrace init
30
+ contexttrace demo --dataset refund_policy
31
+ contexttrace report --last
32
+ contexttrace doctor
33
+ ```
34
+
35
+ By default, traces are stored locally in:
36
+
37
+ ```text
38
+ .contexttrace/contexttrace.db
39
+ ```
40
+
41
+ ## SDK Example
42
+
43
+ ```python
44
+ from contexttrace import ContextTrace
45
+
46
+ ct = ContextTrace(project="support-rag")
47
+
48
+ with ct.trace(query="What is the refund policy?") as trace:
49
+ chunks = retriever.search("What is the refund policy?")
50
+ trace.log_retrieval(chunks)
51
+ trace.log_context(chunks[:5])
52
+
53
+ answer = llm.generate("What is the refund policy?", chunks[:5])
54
+ trace.log_answer(answer, usage={"total_tokens": 1200})
55
+ trace.log_citations([
56
+ {"claim": "Refunds are available within 30 days.", "source_chunk_id": "chunk_12"}
57
+ ])
58
+
59
+ result = trace.evaluate()
60
+ print(result["failure"]["failure_type"])
61
+ ```
62
+
63
+ ## BYO RAG Endpoint
64
+
65
+ Capture and verify one live response from a running local or hosted RAG API without adding SDK code:
66
+
67
+ ```bash
68
+ contexttrace capture endpoint \
69
+ --endpoint http://localhost:8000/query \
70
+ --query "What is the refund policy?" \
71
+ --answer-path $.answer \
72
+ --contexts-path $.contexts \
73
+ --citations-path $.citations \
74
+ --out traces/refund_trace.json \
75
+ --verify \
76
+ --report
77
+ ```
78
+
79
+ If you already have a saved endpoint response:
80
+
81
+ ```bash
82
+ contexttrace capture response response.json \
83
+ --query "What is the refund policy?" \
84
+ --out traces/refund_trace.json \
85
+ --verify \
86
+ --report
87
+ ```
88
+
89
+ Evaluate a dataset through the same endpoint when you are ready to regression test:
90
+
91
+ ```bash
92
+ contexttrace eval \
93
+ --dataset evals/questions.json \
94
+ --endpoint http://localhost:8000/query \
95
+ --method POST \
96
+ --input-key question \
97
+ --answer-path $.answer \
98
+ --contexts-path $.contexts \
99
+ --citations-path $.citations \
100
+ --fail-on "failure_rate>0.25"
101
+ ```
102
+
103
+ ## Claim-Level Evidence Verification
104
+
105
+ Verify a portable RAG trace artifact without a hosted dashboard:
106
+
107
+ ```bash
108
+ contexttrace verify-demo unsupported_claim --report
109
+ contexttrace inspect trace.json
110
+ contexttrace qa trace.json --corpus docs/ --report
111
+ contexttrace verify trace.json
112
+ contexttrace verify trace.json --json
113
+ contexttrace verify trace.json --report --out reports/example.html
114
+ contexttrace verify trace.json --mode semantic
115
+ contexttrace verify trace.json --fail-on unsupported --fail-on citation_mismatch
116
+ contexttrace verify-benchmark --mode semantic
117
+ contexttrace verify-benchmark --mode semantic --report
118
+ contexttrace verify-benchmark --case-set external --mode semantic --report
93
119
  contexttrace compare baseline.json current.json
94
120
  contexttrace compare baseline.json current.json --report
95
121
  contexttrace compare baseline.json current.json --fail-on new_failure
122
+ contexttrace suite create traces/*.json --out contexttrace-suite.json
123
+ contexttrace suite add contexttrace-suite.json traces/new_failure.json
124
+ contexttrace suite list contexttrace-suite.json
125
+ contexttrace suite run contexttrace-suite.json --endpoint http://localhost:8000/query --report
126
+ contexttrace suite prune contexttrace-suite.json --results .contexttrace/suites/contexttrace-regression-suite_results.json
127
+ contexttrace suite report .contexttrace/suites/contexttrace-regression-suite_results.json
96
128
  contexttrace audit trace.json --corpus docs/
97
129
  contexttrace audit trace.json --corpus docs/ --report
98
130
  contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
99
- ```
100
-
101
- Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
102
-
103
- `verify-demo` uses bundled demo traces, so it works immediately after `pip install contexttrace`. Available demos include `unsupported_claim`, `partial_support`, `citation_mismatch`, `should_abstain`, and `supported_answer`.
104
-
105
- Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 real ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
106
-
107
- Verification output includes evidence span offsets, stable span hashes, multiple supporting spans, typed matched/missing facts, and claim-level root causes so partial support failures are easier to inspect.
108
-
109
- ContextTrace verifies whether each generated claim is actually supported by retrieved evidence. Instead of only showing a trace or a score, it tells you where the evidence chain broke: unsupported claim, citation mismatch, retrieval miss, answer overreach, conflicting context, or should-have-abstained.
131
+ contexttrace audit-benchmark --case-set real --mode semantic
132
+ contexttrace audit-benchmark --case-set real --mode semantic --report
133
+ ```
134
+
135
+ Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
136
+
137
+ `verify-demo` uses bundled demo traces, so it works immediately after `pip install contexttrace`. Available demos include `unsupported_claim`, `partial_support`, `citation_mismatch`, `should_abstain`, and `supported_answer`.
138
+
139
+ Use `--mode semantic` for local paraphrase-aware matching, and `verify-benchmark` to inspect bundled precision/recall metrics. The default benchmark includes 32 ContextTrace docs and release-artifact cases. `--case-set external` adds public OSS documentation and GitHub issue cases from Qdrant, Chroma, Haystack, and LangChain, while `--case-set all` runs both packs. `--report` writes an HTML report with misses to inspect.
140
+
141
+ Verification output includes evidence span offsets, stable span hashes, multiple supporting spans, typed matched/missing facts, and claim-level root causes so partial support failures are easier to inspect.
142
+
143
+ ContextTrace verifies whether each generated claim is actually supported by retrieved evidence. Instead of only showing a trace or a score, it tells you where the evidence chain broke: unsupported claim, citation mismatch, retrieval miss, answer overreach, conflicting context, or should-have-abstained.
144
+
145
+ Use the capture helper when you have RAG artifacts in memory:
146
+
147
+ ```python
148
+ from contexttrace import capture_rag_trace, write_rag_trace
149
+
150
+ trace = capture_rag_trace(query=question, answer=answer, contexts=retrieved_docs)
151
+ write_rag_trace(trace, "trace.json")
152
+ ```
110
153
 
111
154
  Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
112
155
 
113
- Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed.
114
-
115
- The v0.5.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
116
-
117
- ## What It Catches
118
-
119
- - `retrieval_miss`
120
- - `citation_mismatch`
121
- - `unsupported_answer`
122
- - `contradicted_answer`
123
- - `conflicting_sources`
124
- - `should_have_abstained`
125
- - agent failures such as `stale_memory_used` and `tool_error`
126
-
127
- ## Privacy
128
-
129
- Local mode is the default. ContextTrace makes no network calls unless you configure an LLM judge provider or evaluate a RAG endpoint you provide.
130
-
131
- ## Links
132
-
133
- - Repository: https://github.com/samarth1412/Context-Trace
134
- - Documentation: https://github.com/samarth1412/Context-Trace/tree/main/docs
135
- - Issues: https://github.com/samarth1412/Context-Trace/issues
156
+ Use `contexttrace suite create`, `suite add`, and `suite run` to turn saved failures into replayable endpoint tests. Suite runs call your current RAG endpoint with the saved query, verify the new answer, compare it with the baseline trace, and exit non-zero when a saved failure still reproduces or a good case regresses. Use `suite list`, `suite remove`, and `suite prune` to manage the suite as failures are fixed or retired.
157
+
158
+ Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, reranking buried it, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed. Audit output includes failure stages, diagnostic signals, and prioritized next actions.
159
+
160
+ Use `contexttrace audit-benchmark --case-set real --mode semantic` to test retrieval-audit labels against bundled public OSS documentation and GitHub issue snippets from Qdrant, Chroma, Haystack, LangChain, and ContextTrace docs.
161
+
162
+ The v0.7.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
163
+
164
+ ## What It Catches
165
+
166
+ - `retrieval_miss`
167
+ - `citation_mismatch`
168
+ - `unsupported_answer`
169
+ - `contradicted_answer`
170
+ - `conflicting_sources`
171
+ - `should_have_abstained`
172
+ - agent failures such as `stale_memory_used` and `tool_error`
173
+
174
+ ## Privacy
175
+
176
+ Local mode is the default. ContextTrace makes no network calls unless you configure an LLM judge provider or evaluate a RAG endpoint you provide.
177
+
178
+ ## Links
179
+
180
+ - Repository: https://github.com/samarth1412/Context-Trace
181
+ - Documentation: https://github.com/samarth1412/Context-Trace/tree/main/docs
182
+ - Issues: https://github.com/samarth1412/Context-Trace/issues
@@ -1,36 +1,44 @@
1
- from contexttrace._version import __version__
2
- from contexttrace.client import AsyncContextTrace, ContextTrace
3
- from contexttrace.config import ContextTraceConfig
4
- from contexttrace.errors import (
5
- ContextTraceConfigError,
6
- ContextTraceError,
7
- ContextTraceHTTPError,
8
- ContextTraceLocalError,
9
- )
10
- from contexttrace.integrations.fastapi import ContextTraceFastAPIMiddleware
11
- from contexttrace.integrations.langchain import ContextTraceCallbackHandler
12
- from contexttrace.integrations.langgraph import ContextTraceLangGraphTracer
13
- from contexttrace.integrations.llamaindex import ContextTraceLlamaIndexCallbackHandler
14
- from contexttrace.integrations.opentelemetry import OpenTelemetryExporter, export_contexttrace_trace
15
- from contexttrace.reliability import ReliabilityScore, ReliabilityScorer
16
- from contexttrace.report import ReportGenerator
17
-
18
- __all__ = [
19
- "AsyncContextTrace",
20
- "ContextTrace",
21
- "ContextTraceConfig",
22
- "ContextTraceConfigError",
23
- "ContextTraceCallbackHandler",
24
- "ContextTraceError",
25
- "ContextTraceFastAPIMiddleware",
26
- "ContextTraceHTTPError",
27
- "ContextTraceLocalError",
28
- "ContextTraceLangGraphTracer",
29
- "ContextTraceLlamaIndexCallbackHandler",
30
- "OpenTelemetryExporter",
31
- "ReliabilityScore",
32
- "ReliabilityScorer",
33
- "ReportGenerator",
34
- "export_contexttrace_trace",
35
- "__version__",
36
- ]
1
+ from contexttrace._version import __version__
2
+ from contexttrace.capture import capture_rag_trace, langchain_documents_to_contexts, write_rag_trace
3
+ from contexttrace.capture_endpoint import EndpointCapture, capture_endpoint_trace, capture_response_trace
4
+ from contexttrace.client import AsyncContextTrace, ContextTrace
5
+ from contexttrace.config import ContextTraceConfig
6
+ from contexttrace.errors import (
7
+ ContextTraceConfigError,
8
+ ContextTraceError,
9
+ ContextTraceHTTPError,
10
+ ContextTraceLocalError,
11
+ )
12
+ from contexttrace.integrations.fastapi import ContextTraceFastAPIMiddleware
13
+ from contexttrace.integrations.langchain import ContextTraceCallbackHandler
14
+ from contexttrace.integrations.langgraph import ContextTraceLangGraphTracer
15
+ from contexttrace.integrations.llamaindex import ContextTraceLlamaIndexCallbackHandler
16
+ from contexttrace.integrations.opentelemetry import OpenTelemetryExporter, export_contexttrace_trace
17
+ from contexttrace.reliability import ReliabilityScore, ReliabilityScorer
18
+ from contexttrace.report import ReportGenerator
19
+
20
+ __all__ = [
21
+ "AsyncContextTrace",
22
+ "ContextTrace",
23
+ "ContextTraceConfig",
24
+ "ContextTraceConfigError",
25
+ "ContextTraceCallbackHandler",
26
+ "ContextTraceError",
27
+ "ContextTraceFastAPIMiddleware",
28
+ "ContextTraceHTTPError",
29
+ "ContextTraceLocalError",
30
+ "ContextTraceLangGraphTracer",
31
+ "ContextTraceLlamaIndexCallbackHandler",
32
+ "EndpointCapture",
33
+ "OpenTelemetryExporter",
34
+ "ReliabilityScore",
35
+ "ReliabilityScorer",
36
+ "ReportGenerator",
37
+ "capture_rag_trace",
38
+ "capture_endpoint_trace",
39
+ "capture_response_trace",
40
+ "export_contexttrace_trace",
41
+ "langchain_documents_to_contexts",
42
+ "write_rag_trace",
43
+ "__version__",
44
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "0.7.0"
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Iterable
6
+
7
+ from contexttrace.verify.schema import RAGTrace, TraceCitation, TraceContext, load_trace
8
+
9
+
10
+ def capture_rag_trace(
11
+ *,
12
+ query: str,
13
+ answer: str,
14
+ contexts: Iterable[Any],
15
+ citations: Iterable[Any] | None = None,
16
+ metadata: dict[str, Any] | None = None,
17
+ context_id_prefix: str = "context",
18
+ ) -> RAGTrace:
19
+ """Create a portable ContextTrace verification trace from common RAG artifacts."""
20
+
21
+ payload = {
22
+ "query": query,
23
+ "answer": answer,
24
+ "contexts": [
25
+ context_to_trace_context(context, index=index, id_prefix=context_id_prefix).to_dict()
26
+ for index, context in enumerate(contexts)
27
+ ],
28
+ "citations": [
29
+ citation_to_trace_citation(citation).to_dict()
30
+ for citation in (citations or [])
31
+ ],
32
+ "metadata": dict(metadata or {}),
33
+ }
34
+ return load_trace(payload, source="captured RAG trace")
35
+
36
+
37
+ def write_rag_trace(trace: RAGTrace, path: str | Path) -> str:
38
+ """Write a portable RAG trace JSON file that works with `contexttrace verify`."""
39
+
40
+ output_path = Path(path)
41
+ output_path.parent.mkdir(parents=True, exist_ok=True)
42
+ output_path.write_text(json.dumps(trace.to_dict(), indent=2), encoding="utf-8")
43
+ return str(output_path)
44
+
45
+
46
+ def context_to_trace_context(
47
+ context: Any,
48
+ *,
49
+ index: int = 0,
50
+ id_prefix: str = "context",
51
+ ) -> TraceContext:
52
+ """Convert dicts, LangChain Documents, or document-like objects to TraceContext."""
53
+
54
+ if isinstance(context, TraceContext):
55
+ return context
56
+
57
+ if isinstance(context, dict):
58
+ text = _first_present(context, ("text", "content", "page_content"))
59
+ metadata = dict(context.get("metadata") or {})
60
+ context_id = _first_present(
61
+ context,
62
+ ("id", "chunk_id", "source_id", "source_chunk_id", "document_id"),
63
+ )
64
+ source = context.get("source")
65
+ score = context.get("score", context.get("relevance_score"))
66
+ else:
67
+ text = getattr(context, "page_content", None) or getattr(context, "text", None)
68
+ metadata = dict(getattr(context, "metadata", None) or {})
69
+ context_id = getattr(context, "id", None) or metadata.get("chunk_id") or metadata.get("id")
70
+ source = metadata.get("source")
71
+ score = getattr(context, "score", None) or metadata.get("score") or metadata.get("relevance_score")
72
+
73
+ context_text = str(text or "").strip()
74
+ if not context_text:
75
+ raise ValueError("Captured context %s did not include text/content/page_content." % index)
76
+
77
+ if source is not None and "source" not in metadata:
78
+ metadata["source"] = source
79
+ if score is not None and "score" not in metadata and "relevance_score" not in metadata:
80
+ metadata["score"] = score
81
+
82
+ resolved_id = _context_id(
83
+ context_id=context_id,
84
+ metadata=metadata,
85
+ id_prefix=id_prefix,
86
+ index=index,
87
+ )
88
+ return TraceContext(id=resolved_id, text=context_text, metadata=metadata)
89
+
90
+
91
+ def citation_to_trace_citation(citation: Any) -> TraceCitation:
92
+ if isinstance(citation, TraceCitation):
93
+ return citation
94
+
95
+ if isinstance(citation, dict):
96
+ claim = citation.get("claim")
97
+ source_id = citation.get("source_id") or citation.get("source_chunk_id") or citation.get("chunk_id")
98
+ metadata = dict(citation.get("metadata") or {})
99
+ else:
100
+ claim = getattr(citation, "claim", None)
101
+ source_id = (
102
+ getattr(citation, "source_id", None)
103
+ or getattr(citation, "source_chunk_id", None)
104
+ or getattr(citation, "chunk_id", None)
105
+ )
106
+ metadata = dict(getattr(citation, "metadata", None) or {})
107
+
108
+ if not str(claim or "").strip():
109
+ raise ValueError("Captured citation did not include claim.")
110
+ if not str(source_id or "").strip():
111
+ raise ValueError("Captured citation did not include source_id/source_chunk_id/chunk_id.")
112
+ return TraceCitation(claim=str(claim).strip(), source_id=str(source_id).strip(), metadata=metadata)
113
+
114
+
115
+ def langchain_documents_to_contexts(
116
+ documents: Iterable[Any],
117
+ *,
118
+ id_prefix: str = "langchain_doc",
119
+ ) -> list[TraceContext]:
120
+ return [
121
+ context_to_trace_context(document, index=index, id_prefix=id_prefix)
122
+ for index, document in enumerate(documents)
123
+ ]
124
+
125
+
126
+ def _first_present(payload: dict[str, Any], keys: tuple[str, ...]) -> Any:
127
+ for key in keys:
128
+ value = payload.get(key)
129
+ if value is not None and str(value).strip() != "":
130
+ return value
131
+ return None
132
+
133
+
134
+ def _context_id(
135
+ *,
136
+ context_id: Any,
137
+ metadata: dict[str, Any],
138
+ id_prefix: str,
139
+ index: int,
140
+ ) -> str:
141
+ if context_id is not None and str(context_id).strip():
142
+ return str(context_id).strip()
143
+ source = metadata.get("source")
144
+ chunk_marker = (
145
+ metadata.get("chunk_id")
146
+ or metadata.get("chunk_index")
147
+ or metadata.get("page")
148
+ or metadata.get("start_index")
149
+ )
150
+ if source is not None and str(source).strip() and chunk_marker is not None:
151
+ return "%s:%s" % (str(source).strip(), str(chunk_marker).strip())
152
+ if source is not None and str(source).strip():
153
+ return "%s:%s" % (str(source).strip(), index + 1)
154
+ return "%s_%s" % (id_prefix, index + 1)