contexttrace 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {contexttrace-0.4.0 → contexttrace-0.5.0}/PKG-INFO +7 -2
  2. {contexttrace-0.4.0 → contexttrace-0.5.0}/README.md +6 -1
  3. contexttrace-0.5.0/contexttrace/_version.py +1 -0
  4. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/cli.py +60 -0
  5. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/__init__.py +5 -0
  6. contexttrace-0.5.0/contexttrace/verify/audit.py +449 -0
  7. contexttrace-0.5.0/contexttrace/verify/audit_report.py +372 -0
  8. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace.egg-info/SOURCES.txt +2 -0
  9. {contexttrace-0.4.0 → contexttrace-0.5.0}/pyproject.toml +1 -1
  10. contexttrace-0.4.0/contexttrace/_version.py +0 -1
  11. {contexttrace-0.4.0 → contexttrace-0.5.0}/MANIFEST.in +0 -0
  12. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/__init__.py +0 -0
  13. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/client.py +0 -0
  14. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/config.py +0 -0
  15. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/demo.py +0 -0
  16. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/demo_data.py +0 -0
  17. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/endpoint_eval.py +0 -0
  18. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/errors.py +0 -0
  19. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/evaluator.py +0 -0
  20. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/__init__.py +0 -0
  21. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/fastapi.py +0 -0
  22. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/langchain.py +0 -0
  23. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/langgraph.py +0 -0
  24. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/llamaindex.py +0 -0
  25. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/integrations/opentelemetry.py +0 -0
  26. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/local.py +0 -0
  27. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/py.typed +0 -0
  28. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/regression.py +0 -0
  29. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/reliability.py +0 -0
  30. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/report.py +0 -0
  31. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/storage/__init__.py +0 -0
  32. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/storage/sqlite_store.py +0 -0
  33. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/thresholds.py +0 -0
  34. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/transport.py +0 -0
  35. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/abstention.py +0 -0
  36. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/benchmark.py +0 -0
  37. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/citations.py +0 -0
  38. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/claims.py +0 -0
  39. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/compare.py +0 -0
  40. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/compare_report.py +0 -0
  41. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/demos.py +0 -0
  42. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/evidence.py +0 -0
  43. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/external_benchmark_cases.json +0 -0
  44. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/facts.py +0 -0
  45. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/real_benchmark_cases.json +0 -0
  46. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/report.py +0 -0
  47. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/root_cause.py +0 -0
  48. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/runner.py +0 -0
  49. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/schema.py +0 -0
  50. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/spans.py +0 -0
  51. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/verify/verdicts.py +0 -0
  52. {contexttrace-0.4.0 → contexttrace-0.5.0}/contexttrace/viewer.py +0 -0
  53. {contexttrace-0.4.0 → contexttrace-0.5.0}/setup.cfg +0 -0
  54. {contexttrace-0.4.0 → contexttrace-0.5.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: contexttrace
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Local-first SDK and CLI for RAG and agent reliability tracing, citation checks, and failure diagnosis.
5
5
  Author: ContextTrace contributors
6
6
  License: MIT
@@ -150,6 +150,9 @@ contexttrace verify-benchmark --case-set external --mode semantic --report
150
150
  contexttrace compare baseline.json current.json
151
151
  contexttrace compare baseline.json current.json --report
152
152
  contexttrace compare baseline.json current.json --fail-on new_failure
153
+ contexttrace audit trace.json --corpus docs/
154
+ contexttrace audit trace.json --corpus docs/ --report
155
+ contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
153
156
  ```
154
157
 
155
158
  Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
@@ -164,7 +167,9 @@ ContextTrace verifies whether each generated claim is actually supported by retr
164
167
 
165
168
  Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
166
169
 
167
- The v0.4.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
170
+ Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed.
171
+
172
+ The v0.5.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
168
173
 
169
174
  ## What It Catches
170
175
 
@@ -93,6 +93,9 @@ contexttrace verify-benchmark --case-set external --mode semantic --report
93
93
  contexttrace compare baseline.json current.json
94
94
  contexttrace compare baseline.json current.json --report
95
95
  contexttrace compare baseline.json current.json --fail-on new_failure
96
+ contexttrace audit trace.json --corpus docs/
97
+ contexttrace audit trace.json --corpus docs/ --report
98
+ contexttrace audit trace.json --corpus docs/ --fail-on retrieval_miss
96
99
  ```
97
100
 
98
101
  Input requires `query`, `answer`, and `contexts` with `id` and `text`. Optional `citations` are checked to catch cited sources that do not actually support the matched claim.
@@ -107,7 +110,9 @@ ContextTrace verifies whether each generated claim is actually supported by retr
107
110
 
108
111
  Use `contexttrace compare baseline.json current.json` to diff two portable traces or saved `verify --json` outputs. It reports support-rate deltas, new unsupported claims, citation regressions, should-abstain flips, and new root causes, with `--fail-on` gates for CI.
109
112
 
110
- The v0.4.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
113
+ Use `contexttrace audit trace.json --corpus docs/` to diagnose whether an unsupported claim failed because retrieval missed evidence, chunking omitted the supporting span, the corpus lacks coverage, or generation overclaimed.
114
+
115
+ The v0.5.0 verifier uses local lexical heuristics by default. Claim extraction is rule-based, contradiction detection is conservative, and semantic or LLM-judge support can be added later.
111
116
 
112
117
  ## What It Catches
113
118
 
@@ -0,0 +1 @@
1
+ __version__ = "0.5.0"
@@ -24,6 +24,8 @@ from contexttrace.storage import SQLiteTraceStore
24
24
  from contexttrace.thresholds import parse_thresholds, threshold_failures
25
25
  from contexttrace.verify import (
26
26
  VerificationInputError,
27
+ audit_failures,
28
+ audit_trace,
27
29
  compare_failures,
28
30
  compare_trace_files,
29
31
  list_verify_demos,
@@ -32,6 +34,7 @@ from contexttrace.verify import (
32
34
  verify_trace,
33
35
  )
34
36
  from contexttrace.verify.benchmark import run_verify_benchmark, write_verify_benchmark_report
37
+ from contexttrace.verify.audit_report import AuditReportGenerator
35
38
  from contexttrace.verify.compare_report import CompareReportGenerator
36
39
  from contexttrace.verify.report import VerifyReportGenerator
37
40
  from contexttrace.viewer import serve_viewer
@@ -404,6 +407,63 @@ def compare_command(
404
407
  return 1 if fail_messages else 0
405
408
 
406
409
 
410
+ @cli.command("audit")
411
+ @click.argument("trace_json")
412
+ @click.option("--corpus", "corpus_path", required=True, help="Local corpus directory or file to search for supporting evidence.")
413
+ @click.option("--json", "json_output", is_flag=True, help="Print the full audit result as JSON.")
414
+ @click.option("--report", is_flag=True, help="Generate a local HTML retrieval audit report.")
415
+ @click.option("--out", default=None, help="HTML report path. Implies --report when provided.")
416
+ @click.option("--mode", default="lexical", show_default=True, type=click.Choice(["lexical", "semantic"]), help="Evidence scoring mode.")
417
+ @click.option("--fail-on", multiple=True, help="Fail on retrieval_miss, reranking_failure, chunking_issue, corpus_gap, answer_overreach, stale_source, insufficient_context, or any_failure.")
418
+ def audit_command(
419
+ trace_json: str,
420
+ corpus_path: str,
421
+ json_output: bool,
422
+ report: bool,
423
+ out: Optional[str],
424
+ mode: str,
425
+ fail_on: tuple[str, ...],
426
+ ) -> int:
427
+ """Audit a verified trace against a broader local corpus."""
428
+
429
+ try:
430
+ trace = load_trace_file(trace_json)
431
+ result = audit_trace(trace, corpus_path=corpus_path, mode=mode)
432
+ except VerificationInputError as exc:
433
+ raise click.ClickException(str(exc)) from exc
434
+
435
+ written_report = None
436
+ if report or out:
437
+ default_name = "%s_audit.html" % Path(trace_json).stem
438
+ output_path = out or str(Path(".contexttrace") / "reports" / default_name)
439
+ written_report = AuditReportGenerator().generate(result, trace, path=output_path)
440
+
441
+ fail_messages = audit_failures(result, fail_on)
442
+ if json_output:
443
+ if written_report:
444
+ click.echo("Report: %s" % written_report, err=True)
445
+ click.echo(json.dumps(result, indent=2))
446
+ for message in fail_messages:
447
+ click.echo("Audit failed: %s" % message, err=True)
448
+ return 1 if fail_messages else 0
449
+
450
+ summary = result["summary"]
451
+ click.echo("Primary audit label: %s" % summary["primary_audit_label"])
452
+ click.echo("Claims audited: %s" % summary["total_claims"])
453
+ click.echo("Corpus documents: %s" % summary["corpus_documents"])
454
+ click.echo("Retrieval misses: %s" % summary["retrieval_miss"])
455
+ click.echo("Chunking issues: %s" % summary["chunking_issue"])
456
+ click.echo("Reranking failures: %s" % summary["reranking_failure"])
457
+ click.echo("Corpus gaps: %s" % summary["corpus_gap"])
458
+ click.echo("Answer overreach: %s" % summary["answer_overreach"])
459
+ click.echo("Insufficient context: %s" % summary["insufficient_context"])
460
+ if written_report:
461
+ click.echo("Report: %s" % written_report)
462
+ for message in fail_messages:
463
+ click.echo("Audit failed: %s" % message, err=True)
464
+ return 1 if fail_messages else 0
465
+
466
+
407
467
  def _write_verify_report(
408
468
  result: dict,
409
469
  trace: object,
@@ -1,4 +1,5 @@
1
1
  from contexttrace.verify.runner import verify_trace, verify_trace_file
2
+ from contexttrace.verify.audit import audit_failures, audit_trace, audit_trace_file, load_corpus
2
3
  from contexttrace.verify.compare import compare_failures, compare_trace_files, compare_verifications
3
4
  from contexttrace.verify.schema import (
4
5
  RAGTrace,
@@ -14,10 +15,14 @@ __all__ = [
14
15
  "TraceCitation",
15
16
  "TraceContext",
16
17
  "VerificationInputError",
18
+ "audit_failures",
19
+ "audit_trace",
20
+ "audit_trace_file",
17
21
  "compare_failures",
18
22
  "compare_trace_files",
19
23
  "compare_verifications",
20
24
  "list_verify_demos",
25
+ "load_corpus",
21
26
  "load_trace_file",
22
27
  "load_verify_demo",
23
28
  "verify_trace",
@@ -0,0 +1,449 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from contexttrace.verify.claims import Claim
8
+ from contexttrace.verify.evidence import find_best_evidence
9
+ from contexttrace.verify.runner import verify_trace
10
+ from contexttrace.verify.schema import RAGTrace, TraceContext, VerificationInputError, load_trace_file
11
+ from contexttrace.verify.verdicts import classify_claim
12
+
13
+
14
+ NO_FAILURE = "no_failure_detected"
15
+ RETRIEVAL_MISS = "retrieval_miss"
16
+ RERANKING_FAILURE = "reranking_failure"
17
+ CHUNKING_ISSUE = "chunking_issue"
18
+ CORPUS_GAP = "corpus_gap"
19
+ ANSWER_OVERREACH = "answer_overreach"
20
+ STALE_SOURCE = "stale_source"
21
+ INSUFFICIENT_CONTEXT = "insufficient_context"
22
+
23
+ AUDIT_FAILURE_LABELS = {
24
+ RETRIEVAL_MISS,
25
+ RERANKING_FAILURE,
26
+ CHUNKING_ISSUE,
27
+ CORPUS_GAP,
28
+ ANSWER_OVERREACH,
29
+ STALE_SOURCE,
30
+ INSUFFICIENT_CONTEXT,
31
+ }
32
+ BAD_CITATIONS = {
33
+ "cited_source_missing",
34
+ "cited_source_does_not_support_claim",
35
+ "claim_supported_by_different_source",
36
+ }
37
+ SUPPORTED_VERDICTS = {"supported"}
38
+ CORPUS_EXTENSIONS = {
39
+ ".csv",
40
+ ".html",
41
+ ".json",
42
+ ".jsonl",
43
+ ".md",
44
+ ".markdown",
45
+ ".rst",
46
+ ".text",
47
+ ".tsv",
48
+ ".txt",
49
+ ".yaml",
50
+ ".yml",
51
+ }
52
+ SKIP_DIRECTORIES = {
53
+ ".contexttrace",
54
+ ".git",
55
+ ".hg",
56
+ ".mypy_cache",
57
+ ".pytest_cache",
58
+ ".ruff_cache",
59
+ ".svn",
60
+ "__pycache__",
61
+ "build",
62
+ "dist",
63
+ "node_modules",
64
+ }
65
+ MAX_FILE_BYTES = 1_000_000
66
+ RERANKING_CUTOFF = 3
67
+
68
+
69
+ def audit_trace_file(
70
+ trace_path: str | Path,
71
+ *,
72
+ corpus_path: str | Path,
73
+ mode: str = "lexical",
74
+ ) -> dict[str, Any]:
75
+ trace = load_trace_file(trace_path)
76
+ return audit_trace(trace, corpus_path=corpus_path, mode=mode)
77
+
78
+
79
+ def audit_trace(
80
+ trace: RAGTrace,
81
+ *,
82
+ corpus_path: str | Path,
83
+ mode: str = "lexical",
84
+ ) -> dict[str, Any]:
85
+ corpus_contexts = load_corpus(corpus_path)
86
+ verification = verify_trace(trace, mode=mode)
87
+ claim_audits = [
88
+ _audit_claim(claim, trace, corpus_contexts, mode=mode)
89
+ for claim in verification.get("claims") or []
90
+ ]
91
+ summary = _summary(claim_audits, verification, corpus_contexts, mode=mode)
92
+ return {
93
+ "query": trace.query,
94
+ "answer": trace.answer,
95
+ "summary": summary,
96
+ "claims": claim_audits,
97
+ "verification": {
98
+ "summary": verification.get("summary") or {},
99
+ "abstention": verification.get("abstention") or {},
100
+ "diagnostics": verification.get("diagnostics") or {},
101
+ },
102
+ "corpus": {
103
+ "path": str(Path(corpus_path)),
104
+ "documents": len(corpus_contexts),
105
+ },
106
+ "metadata": dict(trace.metadata),
107
+ }
108
+
109
+
110
+ def load_corpus(corpus_path: str | Path) -> list[TraceContext]:
111
+ root = Path(corpus_path)
112
+ if not root.exists():
113
+ raise VerificationInputError("Corpus path %s does not exist." % root)
114
+
115
+ files = [root] if root.is_file() else _corpus_files(root)
116
+ contexts: list[TraceContext] = []
117
+ for path in files:
118
+ text = _read_text(path)
119
+ if not text.strip():
120
+ continue
121
+ context_id = _context_id(path, root)
122
+ contexts.append(
123
+ TraceContext(
124
+ id=context_id,
125
+ text=text,
126
+ metadata={
127
+ "path": str(path),
128
+ "source": context_id,
129
+ "size_bytes": path.stat().st_size,
130
+ "kind": "corpus_document",
131
+ },
132
+ )
133
+ )
134
+
135
+ if not contexts:
136
+ raise VerificationInputError("Corpus path %s did not contain readable text documents." % root)
137
+ return contexts
138
+
139
+
140
+ def audit_failures(result: dict[str, Any], fail_on: tuple[str, ...]) -> list[str]:
141
+ if not fail_on:
142
+ return []
143
+ summary = result.get("summary") or {}
144
+ messages = []
145
+ for raw_rule in fail_on:
146
+ rule = raw_rule.strip().lower().replace("-", "_")
147
+ if rule == "any_failure" and bool(summary.get("has_audit_failures")):
148
+ messages.append("audit failure detected")
149
+ elif rule == "retrieval_miss" and int(summary.get(RETRIEVAL_MISS) or 0) > 0:
150
+ messages.append("retrieval miss detected")
151
+ elif rule == "reranking_failure" and int(summary.get(RERANKING_FAILURE) or 0) > 0:
152
+ messages.append("reranking failure detected")
153
+ elif rule == "chunking_issue" and int(summary.get(CHUNKING_ISSUE) or 0) > 0:
154
+ messages.append("chunking issue detected")
155
+ elif rule == "corpus_gap" and int(summary.get(CORPUS_GAP) or 0) > 0:
156
+ messages.append("corpus gap detected")
157
+ elif rule == "answer_overreach" and int(summary.get(ANSWER_OVERREACH) or 0) > 0:
158
+ messages.append("answer overreach detected")
159
+ elif rule == "stale_source" and int(summary.get(STALE_SOURCE) or 0) > 0:
160
+ messages.append("stale source detected")
161
+ elif rule == "insufficient_context" and int(summary.get(INSUFFICIENT_CONTEXT) or 0) > 0:
162
+ messages.append("insufficient context detected")
163
+ elif rule not in AUDIT_FAILURE_LABELS and rule != "any_failure":
164
+ messages.append("unknown --fail-on rule %s" % raw_rule)
165
+ return messages
166
+
167
+
168
+ def _audit_claim(
169
+ claim: dict[str, Any],
170
+ trace: RAGTrace,
171
+ corpus_contexts: list[TraceContext],
172
+ *,
173
+ mode: str,
174
+ ) -> dict[str, Any]:
175
+ claim_text = str(claim.get("claim") or "")
176
+ claim_id = str(claim.get("claim_id") or "")
177
+ corpus_match = find_best_evidence(claim_text, corpus_contexts, mode=mode)
178
+ corpus_verification = classify_claim(
179
+ Claim(id=claim_id or "claim", text=claim_text),
180
+ corpus_match,
181
+ has_contexts=bool(corpus_contexts),
182
+ )
183
+ diagnosis = _diagnose(claim, trace, corpus_match, corpus_verification)
184
+ return {
185
+ "claim_id": claim_id,
186
+ "claim": claim_text,
187
+ "audit_label": diagnosis["label"],
188
+ "confidence": diagnosis["confidence"],
189
+ "reason": diagnosis["reason"],
190
+ "suggested_fix": diagnosis["suggested_fix"],
191
+ "retrieved": {
192
+ "verdict": claim.get("verdict"),
193
+ "best_context_id": claim.get("best_context_id"),
194
+ "best_score": claim.get("best_score"),
195
+ "evidence": claim.get("evidence"),
196
+ "matched_terms": list(claim.get("matched_terms") or []),
197
+ "root_cause": (claim.get("root_cause") or {}).get("label"),
198
+ "citation_status": claim.get("citation_status"),
199
+ },
200
+ "corpus": {
201
+ "verdict": corpus_verification.verdict,
202
+ "best_document_id": corpus_match.context_id,
203
+ "best_score": corpus_match.score,
204
+ "evidence": corpus_match.snippet,
205
+ "matched_terms": list(corpus_match.matched_terms),
206
+ "evidence_span": corpus_match.span_dict(),
207
+ "supporting_spans": list(corpus_match.supporting_spans or []),
208
+ "required_facts": list(corpus_verification.required_facts),
209
+ "matched_facts": list(corpus_verification.matched_facts),
210
+ "missing_facts": list(corpus_verification.missing_facts),
211
+ "conflicting_facts": list(corpus_verification.conflicting_facts),
212
+ },
213
+ }
214
+
215
+
216
+ def _diagnose(
217
+ claim: dict[str, Any],
218
+ trace: RAGTrace,
219
+ corpus_match: object,
220
+ corpus_verification: object,
221
+ ) -> dict[str, Any]:
222
+ verdict = str(claim.get("verdict") or "")
223
+ root_label = str((claim.get("root_cause") or {}).get("label") or NO_FAILURE)
224
+ citation_status = str(claim.get("citation_status") or "")
225
+ corpus_verdict = str(getattr(corpus_verification, "verdict", ""))
226
+ corpus_score = float(getattr(corpus_match, "score", 0.0) or 0.0)
227
+ same_source_rank = _same_source_retrieved_rank(str(getattr(corpus_match, "context_id", "") or ""), trace)
228
+
229
+ if _is_citation_only_failure(claim):
230
+ return _result(
231
+ NO_FAILURE,
232
+ 0.92,
233
+ "The claim is supported by retrieved evidence; the remaining issue is citation-level, not a retrieval or corpus failure.",
234
+ "Fix the claim-level citation, but do not treat this as a retrieval miss.",
235
+ )
236
+
237
+ if not _is_failure(claim):
238
+ return _result(
239
+ NO_FAILURE,
240
+ 0.99,
241
+ "The claim is already supported by the retrieved contexts.",
242
+ "No fix needed for this claim.",
243
+ )
244
+
245
+ if verdict == "contradicted" or corpus_verdict == "contradicted" or root_label in {"stale_context", "conflicting_contexts"}:
246
+ return _result(
247
+ STALE_SOURCE,
248
+ 0.86,
249
+ "The claim appears to conflict with retrieved or corpus evidence.",
250
+ "Resolve stale or conflicting sources before allowing the answer to use this fact.",
251
+ )
252
+
253
+ if corpus_verdict in SUPPORTED_VERDICTS:
254
+ if same_source_rank is None:
255
+ return _result(
256
+ RETRIEVAL_MISS,
257
+ max(0.82, min(0.98, corpus_score + 0.12)),
258
+ "The broader corpus contains evidence for this claim, but the retrieved contexts did not include it.",
259
+ "Improve retrieval recall, filters, query rewriting, or top_k so this source is retrieved.",
260
+ )
261
+ if same_source_rank >= RERANKING_CUTOFF:
262
+ return _result(
263
+ RERANKING_FAILURE,
264
+ max(0.78, min(0.95, corpus_score + 0.08)),
265
+ "A related source was retrieved, but it appeared too low in the retrieved context list for reliable generation.",
266
+ "Add a reranker or raise high-evidence chunks from this source before generation.",
267
+ )
268
+ return _result(
269
+ CHUNKING_ISSUE,
270
+ max(0.78, min(0.95, corpus_score + 0.08)),
271
+ "The retrieved source appears related, but the retrieved chunk omitted the supporting span found in the corpus.",
272
+ "Adjust chunk boundaries, overlap, or parent-document retrieval so the answerable span is included.",
273
+ )
274
+
275
+ if root_label == "answer_overreach" or verdict == "partially_supported":
276
+ return _result(
277
+ ANSWER_OVERREACH,
278
+ 0.82,
279
+ "The evidence supports part of the claim, but not every required fact.",
280
+ "Remove unsupported details or retrieve evidence that explicitly supports each detail.",
281
+ )
282
+
283
+ if corpus_verdict == "partially_supported":
284
+ return _result(
285
+ ANSWER_OVERREACH,
286
+ 0.78,
287
+ "The corpus supports only part of the claim, so the answer likely added unsupported detail.",
288
+ "Split the claim and require support for every required fact before answering.",
289
+ )
290
+
291
+ if corpus_verdict == "unverifiable" or verdict == "unverifiable":
292
+ return _result(
293
+ INSUFFICIENT_CONTEXT,
294
+ 0.72,
295
+ "The closest corpus evidence is related but too weak or ambiguous to verify the claim.",
296
+ "Retrieve more specific evidence or force the model to qualify/abstain.",
297
+ )
298
+
299
+ if citation_status in BAD_CITATIONS and corpus_score >= 0.35:
300
+ return _result(
301
+ INSUFFICIENT_CONTEXT,
302
+ 0.7,
303
+ "The claim has a citation problem and the broader corpus evidence is still not strong enough.",
304
+ "Regenerate claim-level citations and require cited sources to cover all required facts.",
305
+ )
306
+
307
+ return _result(
308
+ CORPUS_GAP,
309
+ max(0.7, min(0.95, 1.0 - corpus_score)),
310
+ "Neither the retrieved contexts nor the broader corpus provide enough support for this claim.",
311
+ "Add the missing source to the corpus or make the answer abstain when the corpus lacks this fact.",
312
+ )
313
+
314
+
315
+ def _summary(
316
+ claim_audits: list[dict[str, Any]],
317
+ verification: dict[str, Any],
318
+ corpus_contexts: list[TraceContext],
319
+ *,
320
+ mode: str,
321
+ ) -> dict[str, Any]:
322
+ counts = Counter(str(claim.get("audit_label") or NO_FAILURE) for claim in claim_audits)
323
+ labels = [NO_FAILURE] + sorted(AUDIT_FAILURE_LABELS)
324
+ failure_count = sum(counts[label] for label in AUDIT_FAILURE_LABELS)
325
+ return {
326
+ "mode": mode,
327
+ "total_claims": len(claim_audits),
328
+ "audited_claims": len([claim for claim in claim_audits if claim.get("audit_label") != NO_FAILURE]),
329
+ "corpus_documents": len(corpus_contexts),
330
+ "has_audit_failures": failure_count > 0,
331
+ "primary_audit_label": _primary_label(counts),
332
+ "verification_failure_type": (verification.get("summary") or {}).get("failure_type"),
333
+ "verification_primary_root_cause": (verification.get("summary") or {}).get("primary_root_cause"),
334
+ **{label: counts[label] for label in labels},
335
+ }
336
+
337
+
338
+ def _primary_label(counts: Counter) -> str:
339
+ failures = {label: counts[label] for label in AUDIT_FAILURE_LABELS if counts[label]}
340
+ if not failures:
341
+ return NO_FAILURE
342
+ priority = [
343
+ RETRIEVAL_MISS,
344
+ CHUNKING_ISSUE,
345
+ RERANKING_FAILURE,
346
+ CORPUS_GAP,
347
+ ANSWER_OVERREACH,
348
+ STALE_SOURCE,
349
+ INSUFFICIENT_CONTEXT,
350
+ ]
351
+ return max(
352
+ failures,
353
+ key=lambda label: (
354
+ failures[label],
355
+ -priority.index(label) if label in priority else -len(priority),
356
+ ),
357
+ )
358
+
359
+
360
+ def _is_failure(claim: dict[str, Any]) -> bool:
361
+ return (
362
+ str(claim.get("verdict") or "") not in SUPPORTED_VERDICTS
363
+ or str(claim.get("citation_status") or "") in BAD_CITATIONS
364
+ or str((claim.get("root_cause") or {}).get("label") or NO_FAILURE) != NO_FAILURE
365
+ )
366
+
367
+
368
+ def _is_citation_only_failure(claim: dict[str, Any]) -> bool:
369
+ return (
370
+ str(claim.get("verdict") or "") in SUPPORTED_VERDICTS
371
+ and str(claim.get("citation_status") or "") in BAD_CITATIONS
372
+ and str((claim.get("root_cause") or {}).get("label") or NO_FAILURE)
373
+ in {"wrong_source_cited", "missing_cited_source", NO_FAILURE}
374
+ )
375
+
376
+
377
+ def _same_source_retrieved_rank(corpus_context_id: str, trace: RAGTrace) -> int | None:
378
+ corpus_key = _source_key(corpus_context_id)
379
+ if not corpus_key:
380
+ return None
381
+ for index, context in enumerate(trace.contexts):
382
+ candidates = [
383
+ context.id,
384
+ context.metadata.get("source"),
385
+ context.metadata.get("path"),
386
+ context.metadata.get("file"),
387
+ context.metadata.get("document"),
388
+ ]
389
+ if any(_sources_match(corpus_key, _source_key(value)) for value in candidates):
390
+ return index
391
+ return None
392
+
393
+
394
+ def _sources_match(left: str, right: str) -> bool:
395
+ if not left or not right:
396
+ return False
397
+ if left == right:
398
+ return True
399
+ return Path(left).name == Path(right).name
400
+
401
+
402
+ def _source_key(value: Any) -> str:
403
+ text = str(value or "").strip().replace("\\", "/").lower()
404
+ return text.strip("./")
405
+
406
+
407
+ def _result(label: str, confidence: float, reason: str, suggested_fix: str) -> dict[str, Any]:
408
+ return {
409
+ "label": label,
410
+ "confidence": round(confidence, 3),
411
+ "reason": reason,
412
+ "suggested_fix": suggested_fix,
413
+ }
414
+
415
+
416
+ def _corpus_files(root: Path) -> list[Path]:
417
+ files: list[Path] = []
418
+ for path in root.rglob("*"):
419
+ if not path.is_file():
420
+ continue
421
+ if any(part in SKIP_DIRECTORIES for part in path.parts):
422
+ continue
423
+ if path.suffix.lower() not in CORPUS_EXTENSIONS:
424
+ continue
425
+ if path.stat().st_size > MAX_FILE_BYTES:
426
+ continue
427
+ files.append(path)
428
+ return sorted(files, key=lambda item: str(item).lower())
429
+
430
+
431
+ def _read_text(path: Path) -> str:
432
+ try:
433
+ return path.read_text(encoding="utf-8")
434
+ except UnicodeDecodeError:
435
+ try:
436
+ return path.read_text(encoding="utf-8", errors="ignore")
437
+ except OSError:
438
+ return ""
439
+ except OSError:
440
+ return ""
441
+
442
+
443
+ def _context_id(path: Path, root: Path) -> str:
444
+ if root.is_file():
445
+ return path.name
446
+ try:
447
+ return path.relative_to(root).as_posix()
448
+ except ValueError:
449
+ return path.name
@@ -0,0 +1,372 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from html import escape
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from contexttrace.verify.schema import RAGTrace
9
+
10
+
11
+ class AuditReportGenerator:
12
+ def generate(self, result: dict[str, Any], trace: RAGTrace, *, path: str) -> str:
13
+ output_path = Path(path)
14
+ output_path.parent.mkdir(parents=True, exist_ok=True)
15
+ output_path.write_text(self.render(result, trace), encoding="utf-8")
16
+ return str(output_path)
17
+
18
+ def render(self, result: dict[str, Any], trace: RAGTrace) -> str:
19
+ summary = result.get("summary") or {}
20
+ claims = list(result.get("claims") or [])
21
+ return HTML_TEMPLATE.format(
22
+ query=escape(_string(result.get("query"))),
23
+ answer=escape(_string(result.get("answer"))),
24
+ summary_cards=_summary_cards(summary),
25
+ claim_rows=_claim_rows(claims),
26
+ retrieval_misses=_claim_cards(claims, {"retrieval_miss"}, "No retrieval misses detected."),
27
+ chunking_issues=_claim_cards(
28
+ claims,
29
+ {"chunking_issue", "reranking_failure"},
30
+ "No chunking or reranking failures detected.",
31
+ ),
32
+ corpus_gaps=_claim_cards(claims, {"corpus_gap"}, "No corpus coverage gaps detected."),
33
+ answer_overreach=_claim_cards(
34
+ claims,
35
+ {"answer_overreach", "insufficient_context", "stale_source"},
36
+ "No answer overreach, stale source, or insufficient-context failures detected.",
37
+ ),
38
+ retrieved_contexts=_retrieved_contexts(trace),
39
+ corpus_summary=escape(json.dumps(result.get("corpus") or {}, indent=2)),
40
+ why_failed=_why_failed(claims),
41
+ raw_json=escape(json.dumps(_raw_summary(result), indent=2)),
42
+ )
43
+
44
+
45
+ def _summary_cards(summary: dict[str, Any]) -> str:
46
+ cards = [
47
+ ("Primary Audit Label", summary.get("primary_audit_label")),
48
+ ("Total Claims", summary.get("total_claims", 0)),
49
+ ("Audited Failures", summary.get("audited_claims", 0)),
50
+ ("Corpus Documents", summary.get("corpus_documents", 0)),
51
+ ("Retrieval Misses", summary.get("retrieval_miss", 0)),
52
+ ("Chunking Issues", summary.get("chunking_issue", 0)),
53
+ ("Reranking Failures", summary.get("reranking_failure", 0)),
54
+ ("Corpus Gaps", summary.get("corpus_gap", 0)),
55
+ ("Answer Overreach", summary.get("answer_overreach", 0)),
56
+ ("Stale Sources", summary.get("stale_source", 0)),
57
+ ("Insufficient Context", summary.get("insufficient_context", 0)),
58
+ ("Verification Failure", summary.get("verification_failure_type")),
59
+ ]
60
+ return "\n".join(
61
+ """
62
+ <div class="card">
63
+ <div class="label">{label}</div>
64
+ <div class="value">{value}</div>
65
+ </div>
66
+ """.format(label=escape(label), value=escape(_string(value)))
67
+ for label, value in cards
68
+ )
69
+
70
+
71
+ def _claim_rows(claims: list[dict[str, Any]]) -> str:
72
+ if not claims:
73
+ return "<tr><td colspan=\"7\" class=\"muted\">No factual claims were extracted.</td></tr>"
74
+ rows = []
75
+ for claim in claims:
76
+ retrieved = claim.get("retrieved") or {}
77
+ corpus = claim.get("corpus") or {}
78
+ label = _string(claim.get("audit_label"))
79
+ rows.append(
80
+ """
81
+ <tr>
82
+ <td><span class="badge audit-{label_class}">{label}</span></td>
83
+ <td>{claim}</td>
84
+ <td>{retrieved_verdict}</td>
85
+ <td>{retrieved_context}</td>
86
+ <td>{corpus_verdict}</td>
87
+ <td>{corpus_document}</td>
88
+ <td>{fix}</td>
89
+ </tr>
90
+ """.format(
91
+ label_class=escape(_css_token(label)),
92
+ label=escape(label),
93
+ claim=escape(_string(claim.get("claim"))),
94
+ retrieved_verdict=escape(_string(retrieved.get("verdict"))),
95
+ retrieved_context=escape(_string(retrieved.get("best_context_id") or "none")),
96
+ corpus_verdict=escape(_string(corpus.get("verdict"))),
97
+ corpus_document=escape(_string(corpus.get("best_document_id") or "none")),
98
+ fix=escape(_string(claim.get("suggested_fix"))),
99
+ )
100
+ )
101
+ return "\n".join(rows)
102
+
103
+
104
+ def _claim_cards(claims: list[dict[str, Any]], labels: set[str], empty: str) -> str:
105
+ selected = [claim for claim in claims if claim.get("audit_label") in labels]
106
+ if not selected:
107
+ return "<p class=\"muted\">%s</p>" % escape(empty)
108
+ return "\n".join(_claim_card(claim) for claim in selected)
109
+
110
+
111
+ def _claim_card(claim: dict[str, Any]) -> str:
112
+ retrieved = claim.get("retrieved") or {}
113
+ corpus = claim.get("corpus") or {}
114
+ return """
115
+ <article class="item">
116
+ <div class="item-meta">{claim_id} | {label} | confidence {confidence}</div>
117
+ <h3>{claim}</h3>
118
+ <p><strong>Diagnosis:</strong> {reason}</p>
119
+ <p><strong>Retrieved evidence:</strong> {retrieved_evidence}</p>
120
+ <p class="muted">Retrieved context: {retrieved_context} | verdict {retrieved_verdict} | score {retrieved_score}</p>
121
+ <p><strong>Corpus evidence:</strong> {corpus_evidence}</p>
122
+ <p class="muted">Corpus document: {corpus_document} | verdict {corpus_verdict} | score {corpus_score}</p>
123
+ <p><strong>Suggested fix:</strong> {fix}</p>
124
+ </article>
125
+ """.format(
126
+ claim_id=escape(_string(claim.get("claim_id"))),
127
+ label=escape(_string(claim.get("audit_label"))),
128
+ confidence=escape(_string(claim.get("confidence"))),
129
+ claim=escape(_string(claim.get("claim"))),
130
+ reason=escape(_string(claim.get("reason"))),
131
+ retrieved_evidence=escape(_string(retrieved.get("evidence") or "none")),
132
+ retrieved_context=escape(_string(retrieved.get("best_context_id") or "none")),
133
+ retrieved_verdict=escape(_string(retrieved.get("verdict"))),
134
+ retrieved_score=escape(_string(retrieved.get("best_score"))),
135
+ corpus_evidence=escape(_string(corpus.get("evidence") or "none")),
136
+ corpus_document=escape(_string(corpus.get("best_document_id") or "none")),
137
+ corpus_verdict=escape(_string(corpus.get("verdict"))),
138
+ corpus_score=escape(_string(corpus.get("best_score"))),
139
+ fix=escape(_string(claim.get("suggested_fix"))),
140
+ )
141
+
142
+
143
+ def _retrieved_contexts(trace: RAGTrace) -> str:
144
+ if not trace.contexts:
145
+ return "<p class=\"muted\">No retrieved contexts were supplied.</p>"
146
+ cards = []
147
+ for index, context in enumerate(trace.contexts, start=1):
148
+ cards.append(
149
+ """
150
+ <article class="item">
151
+ <div class="item-meta">rank {rank} | {context_id} | {metadata}</div>
152
+ <p>{text}</p>
153
+ </article>
154
+ """.format(
155
+ rank=index,
156
+ context_id=escape(context.id),
157
+ metadata=escape(json.dumps(context.metadata, sort_keys=True) if context.metadata else "no metadata"),
158
+ text=escape(context.text),
159
+ )
160
+ )
161
+ return "\n".join(cards)
162
+
163
+
164
+ def _why_failed(claims: list[dict[str, Any]]) -> str:
165
+ explanations = []
166
+ for claim in claims:
167
+ label = _string(claim.get("audit_label"))
168
+ if label == "no_failure_detected":
169
+ continue
170
+ explanations.append(
171
+ "%s: %s Suggested fix: %s"
172
+ % (
173
+ label,
174
+ _string(claim.get("reason")),
175
+ _string(claim.get("suggested_fix")),
176
+ )
177
+ )
178
+ if not explanations:
179
+ explanations.append("No corpus-level evidence-chain failure was detected.")
180
+ return "<ul>%s</ul>" % "\n".join("<li>%s</li>" % escape(item) for item in explanations)
181
+
182
+
183
+ def _raw_summary(result: dict[str, Any]) -> dict[str, Any]:
184
+ return {
185
+ "summary": result.get("summary"),
186
+ "claims": result.get("claims"),
187
+ "verification": result.get("verification"),
188
+ "corpus": result.get("corpus"),
189
+ }
190
+
191
+
192
+ def _css_token(value: Any) -> str:
193
+ token = _string(value).lower().replace("_", "-").replace(" ", "-")
194
+ return "".join(char for char in token if char.isalnum() or char == "-") or "unknown"
195
+
196
+
197
+ def _string(value: Any) -> str:
198
+ if value is None:
199
+ return ""
200
+ return str(value)
201
+
202
+
203
+ HTML_TEMPLATE = """<!doctype html>
204
+ <html lang="en">
205
+ <head>
206
+ <meta charset="utf-8">
207
+ <meta name="viewport" content="width=device-width, initial-scale=1">
208
+ <title>ContextTrace Retrieval Audit Report</title>
209
+ <style>
210
+ :root {{
211
+ color-scheme: light;
212
+ --bg: #f7f8fa;
213
+ --panel: #ffffff;
214
+ --subtle: #fbfcfe;
215
+ --text: #202832;
216
+ --muted: #657286;
217
+ --line: #d9e0ea;
218
+ --ok: #176f44;
219
+ --warn: #946200;
220
+ --bad: #b42318;
221
+ --accent: #2458d3;
222
+ }}
223
+ * {{ box-sizing: border-box; }}
224
+ body {{
225
+ margin: 0;
226
+ background: var(--bg);
227
+ color: var(--text);
228
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
229
+ line-height: 1.5;
230
+ }}
231
+ main {{ max-width: 1160px; margin: 0 auto; padding: 32px 20px 56px; }}
232
+ header {{ border-bottom: 1px solid var(--line); margin-bottom: 22px; padding-bottom: 18px; }}
233
+ h1, h2, h3 {{ margin: 0; }}
234
+ h1 {{ font-size: 30px; }}
235
+ h2 {{ font-size: 18px; margin-bottom: 12px; }}
236
+ h3 {{ font-size: 15px; margin-bottom: 8px; }}
237
+ section {{
238
+ background: var(--panel);
239
+ border: 1px solid var(--line);
240
+ border-radius: 8px;
241
+ margin: 16px 0;
242
+ padding: 18px;
243
+ }}
244
+ .summary {{
245
+ display: grid;
246
+ gap: 12px;
247
+ grid-template-columns: repeat(auto-fit, minmax(155px, 1fr));
248
+ }}
249
+ .card, .item {{
250
+ border: 1px solid var(--line);
251
+ border-radius: 8px;
252
+ background: var(--subtle);
253
+ padding: 12px;
254
+ }}
255
+ .item + .item {{ margin-top: 10px; }}
256
+ .label, .item-meta {{
257
+ color: var(--muted);
258
+ font-size: 12px;
259
+ font-weight: 700;
260
+ text-transform: uppercase;
261
+ }}
262
+ .value {{ margin-top: 4px; font-size: 18px; overflow-wrap: anywhere; }}
263
+ .muted {{ color: var(--muted); }}
264
+ .answer, .item p {{ white-space: pre-wrap; }}
265
+ table {{ width: 100%; border-collapse: collapse; font-size: 14px; }}
266
+ th, td {{ border-bottom: 1px solid var(--line); padding: 10px; text-align: left; vertical-align: top; }}
267
+ th {{ color: var(--muted); font-size: 12px; text-transform: uppercase; }}
268
+ .badge {{
269
+ display: inline-block;
270
+ border-radius: 999px;
271
+ border: 1px solid var(--line);
272
+ background: #eef2f7;
273
+ padding: 3px 8px;
274
+ font-size: 12px;
275
+ font-weight: 700;
276
+ white-space: nowrap;
277
+ }}
278
+ .audit-no-failure-detected {{ color: var(--ok); background: #e9f7ef; }}
279
+ .audit-retrieval-miss, .audit-corpus-gap, .audit-stale-source {{ color: var(--bad); background: #fdeceb; }}
280
+ .audit-chunking-issue, .audit-reranking-failure,
281
+ .audit-answer-overreach, .audit-insufficient-context {{ color: var(--warn); background: #fff7df; }}
282
+ pre {{
283
+ margin: 0;
284
+ overflow: auto;
285
+ background: #101828;
286
+ color: #f8fafc;
287
+ border-radius: 8px;
288
+ padding: 14px;
289
+ font-size: 13px;
290
+ }}
291
+ </style>
292
+ </head>
293
+ <body>
294
+ <main>
295
+ <header>
296
+ <h1>ContextTrace Retrieval Audit Report</h1>
297
+ <p class="muted">Local corpus-level diagnosis for claim evidence failures.</p>
298
+ </header>
299
+
300
+ <section>
301
+ <h2>Audit Summary</h2>
302
+ <div class="summary">{summary_cards}</div>
303
+ </section>
304
+
305
+ <section>
306
+ <h2>Query</h2>
307
+ <p>{query}</p>
308
+ <h2>Answer</h2>
309
+ <p class="answer">{answer}</p>
310
+ </section>
311
+
312
+ <section>
313
+ <h2>Claim Failure Diagnosis</h2>
314
+ <table>
315
+ <thead>
316
+ <tr>
317
+ <th>Audit Label</th>
318
+ <th>Claim</th>
319
+ <th>Retrieved Verdict</th>
320
+ <th>Retrieved Context</th>
321
+ <th>Corpus Verdict</th>
322
+ <th>Corpus Document</th>
323
+ <th>Suggested Fix</th>
324
+ </tr>
325
+ </thead>
326
+ <tbody>{claim_rows}</tbody>
327
+ </table>
328
+ </section>
329
+
330
+ <section>
331
+ <h2>Retrieval Misses</h2>
332
+ {retrieval_misses}
333
+ </section>
334
+
335
+ <section>
336
+ <h2>Chunking And Reranking Issues</h2>
337
+ {chunking_issues}
338
+ </section>
339
+
340
+ <section>
341
+ <h2>Corpus Gaps</h2>
342
+ {corpus_gaps}
343
+ </section>
344
+
345
+ <section>
346
+ <h2>Answer Overreach And Ambiguous Evidence</h2>
347
+ {answer_overreach}
348
+ </section>
349
+
350
+ <section>
351
+ <h2>Retrieved Contexts</h2>
352
+ {retrieved_contexts}
353
+ </section>
354
+
355
+ <section>
356
+ <h2>Corpus Summary</h2>
357
+ <pre>{corpus_summary}</pre>
358
+ </section>
359
+
360
+ <section>
361
+ <h2>Why This Failed</h2>
362
+ {why_failed}
363
+ </section>
364
+
365
+ <section>
366
+ <h2>Raw JSON Summary</h2>
367
+ <pre>{raw_json}</pre>
368
+ </section>
369
+ </main>
370
+ </body>
371
+ </html>
372
+ """
@@ -30,6 +30,8 @@ contexttrace/storage/__init__.py
30
30
  contexttrace/storage/sqlite_store.py
31
31
  contexttrace/verify/__init__.py
32
32
  contexttrace/verify/abstention.py
33
+ contexttrace/verify/audit.py
34
+ contexttrace/verify/audit_report.py
33
35
  contexttrace/verify/benchmark.py
34
36
  contexttrace/verify/citations.py
35
37
  contexttrace/verify/claims.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "contexttrace"
7
- version = "0.4.0"
7
+ version = "0.5.0"
8
8
  description = "Local-first SDK and CLI for RAG and agent reliability tracing, citation checks, and failure diagnosis."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -1 +0,0 @@
1
- __version__ = "0.4.0"
File without changes
File without changes
File without changes