contexttrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ from contexttrace._version import __version__
2
+ from contexttrace.client import AsyncContextTrace, ContextTrace
3
+ from contexttrace.config import ContextTraceConfig
4
+ from contexttrace.errors import (
5
+ ContextTraceConfigError,
6
+ ContextTraceError,
7
+ ContextTraceHTTPError,
8
+ ContextTraceLocalError,
9
+ )
10
+ from contexttrace.integrations.fastapi import ContextTraceFastAPIMiddleware
11
+ from contexttrace.integrations.langchain import ContextTraceCallbackHandler
12
+ from contexttrace.integrations.langgraph import ContextTraceLangGraphTracer
13
+ from contexttrace.integrations.llamaindex import ContextTraceLlamaIndexCallbackHandler
14
+ from contexttrace.integrations.opentelemetry import OpenTelemetryExporter, export_contexttrace_trace
15
+ from contexttrace.reliability import ReliabilityScore, ReliabilityScorer
16
+ from contexttrace.report import ReportGenerator
17
+
18
+ __all__ = [
19
+ "AsyncContextTrace",
20
+ "ContextTrace",
21
+ "ContextTraceConfig",
22
+ "ContextTraceConfigError",
23
+ "ContextTraceCallbackHandler",
24
+ "ContextTraceError",
25
+ "ContextTraceFastAPIMiddleware",
26
+ "ContextTraceHTTPError",
27
+ "ContextTraceLocalError",
28
+ "ContextTraceLangGraphTracer",
29
+ "ContextTraceLlamaIndexCallbackHandler",
30
+ "OpenTelemetryExporter",
31
+ "ReliabilityScore",
32
+ "ReliabilityScorer",
33
+ "ReportGenerator",
34
+ "export_contexttrace_trace",
35
+ "__version__",
36
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
contexttrace/cli.py ADDED
@@ -0,0 +1,474 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ import urllib.error
6
+ import urllib.request
7
+ import webbrowser
8
+ from dataclasses import asdict
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ import click
13
+
14
+ from contexttrace._version import __version__
15
+ from contexttrace.client import ContextTrace
16
+ from contexttrace.config import ContextTraceConfig, load_config, write_default_config
17
+ from contexttrace.demo import run_demo_dataset
18
+ from contexttrace.demo_data import list_demo_datasets
19
+ from contexttrace.endpoint_eval import run_endpoint_eval
20
+ from contexttrace.errors import ContextTraceError
21
+ from contexttrace.regression import BENCHMARK_STRATEGIES, run_local_benchmark
22
+ from contexttrace.report import ReportGenerator
23
+ from contexttrace.storage import SQLiteTraceStore
24
+ from contexttrace.thresholds import parse_thresholds, threshold_failures
25
+ from contexttrace.viewer import serve_viewer
26
+
27
+
28
+ SAMPLE_QUESTIONS = [
29
+ {
30
+ "id": "refund_policy",
31
+ "query": "What is the refund policy?",
32
+ "expected_sources": ["refund_policy.md"],
33
+ }
34
+ ]
35
+
36
+
37
+ @click.group(context_settings={"help_option_names": ["-h", "--help"]})
38
+ @click.option("--config", "config_path", default=None, help="Path to contexttrace.yaml.")
39
+ @click.version_option(version=__version__, prog_name="contexttrace")
40
+ @click.pass_context
41
+ def cli(ctx: click.Context, config_path: Optional[str]) -> None:
42
+ ctx.obj = {"config_path": config_path}
43
+
44
+
45
+ @cli.command()
46
+ @click.option("--path", default="contexttrace.yaml", help="Configuration file to write.")
47
+ @click.option("--force", is_flag=True, help="Overwrite an existing config file.")
48
+ def init(path: str, force: bool) -> None:
49
+ config_path = write_default_config(path, overwrite=force)
50
+ config = load_config(config_path=config_path)
51
+ Path(config.local_store_dir).mkdir(parents=True, exist_ok=True)
52
+ Path(config.local_store_dir, "reports").mkdir(parents=True, exist_ok=True)
53
+ Path("evals").mkdir(parents=True, exist_ok=True)
54
+ sample_path = Path("evals") / "questions.json"
55
+ if force or not sample_path.exists():
56
+ sample_path.write_text(json.dumps(SAMPLE_QUESTIONS, indent=2), encoding="utf-8")
57
+ legacy_sample_path = Path("evals") / "sample_questions.json"
58
+ if force or not legacy_sample_path.exists():
59
+ legacy_sample_path.write_text(json.dumps(SAMPLE_QUESTIONS, indent=2), encoding="utf-8")
60
+ SQLiteTraceStore(config.storage_path)
61
+ click.echo("Wrote %s" % config_path)
62
+ click.echo("Initialized local trace store: %s" % config.storage_path)
63
+ click.echo("Created sample eval dataset: %s" % sample_path)
64
+
65
+
66
+ @cli.command()
67
+ @click.pass_context
68
+ def status(ctx: click.Context) -> None:
69
+ config = _load(ctx)
70
+ store = SQLiteTraceStore(config.storage_path)
71
+ last_eval = store.last_eval_run()
72
+ click.echo("Project: %s" % config.project)
73
+ click.echo("Mode: %s" % config.mode)
74
+ click.echo("Local DB: %s" % config.storage_path)
75
+ click.echo("Trace count: %s" % store.trace_count())
76
+ click.echo("Last eval run: %s" % ((last_eval or {}).get("id") or "None"))
77
+ click.echo("Judge provider: %s" % config.judge_provider)
78
+
79
+
80
+ @cli.group()
81
+ def config() -> None:
82
+ """Inspect ContextTrace configuration."""
83
+
84
+
85
+ @config.command("show")
86
+ @click.option("--show-secrets", is_flag=True, help="Show API keys instead of masking them.")
87
+ @click.pass_context
88
+ def config_show(ctx: click.Context, show_secrets: bool) -> None:
89
+ resolved = _load(ctx)
90
+ payload = asdict(resolved)
91
+ if payload.get("api_key") and not show_secrets:
92
+ payload["api_key"] = _mask_secret(str(payload["api_key"]))
93
+ click.echo(json.dumps(payload, indent=2, sort_keys=True))
94
+
95
+
96
+ @cli.group()
97
+ def traces() -> None:
98
+ """Inspect local traces."""
99
+
100
+
101
+ @traces.command("list")
102
+ @click.option("--limit", default=20, show_default=True, help="Maximum traces to show.")
103
+ @click.pass_context
104
+ def traces_list(ctx: click.Context, limit: int) -> None:
105
+ client = _client(ctx)
106
+ rows = client.list_traces(limit=limit)
107
+ if not rows:
108
+ click.echo("No traces found.")
109
+ return
110
+ click.echo("trace_id\tquery\tfailure_type\tcitation_support\tcreated_at")
111
+ for trace in rows:
112
+ evaluation = trace.get("evaluation") or {}
113
+ failure = evaluation.get("failure") or {}
114
+ scores = evaluation.get("scores") or {}
115
+ click.echo(
116
+ "%s\t%s\t%s\t%s\t%s"
117
+ % (
118
+ trace.get("id") or trace.get("trace_id"),
119
+ _preview(trace.get("query")),
120
+ failure.get("failure_type") or "not_evaluated",
121
+ scores.get("citation_support", ""),
122
+ trace.get("created_at") or "",
123
+ )
124
+ )
125
+
126
+
127
+ @traces.command("show")
128
+ @click.argument("trace_id")
129
+ @click.pass_context
130
+ def traces_show(ctx: click.Context, trace_id: str) -> None:
131
+ trace = _client(ctx).get_trace(trace_id)
132
+ answer = trace.get("answer") or {}
133
+ evaluation = trace.get("evaluation") or {}
134
+ failure = evaluation.get("failure") or {}
135
+ scores = evaluation.get("scores") or {}
136
+ click.echo("Trace: %s" % trace.get("id"))
137
+ click.echo("Project: %s" % trace.get("project"))
138
+ click.echo("Query: %s" % trace.get("query"))
139
+ click.echo("Answer: %s" % _preview(answer.get("answer"), limit=500))
140
+ click.echo("Failure type: %s" % (failure.get("failure_type") or "not_evaluated"))
141
+ click.echo("Severity: %s" % (failure.get("severity") or "unknown"))
142
+ click.echo("Citation support: %s" % scores.get("citation_support", ""))
143
+ click.echo("Unsupported claim rate: %s" % scores.get("unsupported_claim_rate", ""))
144
+ click.echo("Chunks: %s" % len(trace.get("chunks") or []))
145
+ click.echo("Citation checks: %s" % len(trace.get("citation_checks") or []))
146
+
147
+
148
+ @cli.group("trace")
149
+ def trace_alias() -> None:
150
+ """Backward-compatible alias for traces."""
151
+
152
+
153
+ trace_alias.add_command(traces_list, "list")
154
+ trace_alias.add_command(traces_show, "show")
155
+
156
+
157
+ @cli.command()
158
+ @click.option("--last", is_flag=True, help="Export the most recent trace.")
159
+ @click.option("--trace-id", default=None, help="Trace ID to export.")
160
+ @click.option("--eval-run", default=None, help="Eval run ID to export.")
161
+ @click.option("--output", default=None, help="HTML file to write.")
162
+ @click.option("--open", "open_browser", is_flag=True, help="Open the report in the default browser.")
163
+ @click.pass_context
164
+ def report(
165
+ ctx: click.Context,
166
+ last: bool,
167
+ trace_id: Optional[str],
168
+ eval_run: Optional[str],
169
+ output: Optional[str],
170
+ open_browser: bool,
171
+ ) -> None:
172
+ config = _load(ctx)
173
+ client = _client(ctx)
174
+ report_dir = Path(config.local_store_dir) / "reports"
175
+ report_dir.mkdir(parents=True, exist_ok=True)
176
+
177
+ if eval_run:
178
+ store = SQLiteTraceStore(config.storage_path)
179
+ run = store.get_eval_run(eval_run)
180
+ traces_for_run = [
181
+ store.get_trace(question["trace_id"])
182
+ for question in run.get("questions") or []
183
+ if question.get("trace_id")
184
+ ]
185
+ output_path = output or str(report_dir / ("%s.html" % eval_run))
186
+ written = ReportGenerator().generate_eval_report(run, traces_for_run, path=output_path)
187
+ else:
188
+ if not trace_id:
189
+ last = True
190
+ selected = client.last_trace() if last else client.get_trace(str(trace_id))
191
+ if selected is None:
192
+ raise click.ClickException("No traces found.")
193
+ output_path = output or str(report_dir / ("%s.html" % selected["id"]))
194
+ written = ReportGenerator().generate(selected, path=output_path)
195
+
196
+ click.echo("Wrote %s" % written)
197
+ if open_browser:
198
+ webbrowser.open(Path(written).resolve().as_uri())
199
+
200
+
201
+ @cli.command("eval")
202
+ @click.option("--dataset", required=True, help="Path to eval questions JSON.")
203
+ @click.option("--endpoint", default=None, help="RAG endpoint URL. Defaults to config eval_endpoint.")
204
+ @click.option("--method", default="POST", type=click.Choice(["GET", "POST"], case_sensitive=False), help="Endpoint method.")
205
+ @click.option("--input-key", default="question", show_default=True, help="Request body/query key for the question.")
206
+ @click.option("--answer-path", default="$.answer", show_default=True, help="JSONPath for answer extraction.")
207
+ @click.option("--contexts-path", default="$.contexts", show_default=True, help="JSONPath for context extraction.")
208
+ @click.option("--citations-path", default="$.citations", show_default=True, help="JSONPath for citation extraction.")
209
+ @click.option("--body-template", default=None, help="JSON body template. Use {{query}} where the question should be inserted.")
210
+ @click.option("--endpoint-header", multiple=True, help="Header formatted as Name:Value. May be repeated.")
211
+ @click.option("--timeout", default=30.0, show_default=True, type=float, help="Per-request timeout.")
212
+ @click.option("--report-path", default=None, help="HTML report path. Defaults to .contexttrace/reports/eval_<id>.html.")
213
+ @click.option("--api-key", default=None, help="Accepted for compatibility; local mode does not require it.")
214
+ @click.option("--contexttrace-url", default=None, help="Accepted for compatibility; local mode stores traces locally.")
215
+ @click.option("--min-citation-support", default=0.0, show_default=True, type=float, help="Fail when average citation support is below this value.")
216
+ @click.option("--max-unsupported-claim-rate", default=1.0, show_default=True, type=float, help="Fail when unsupported claim rate is above this value.")
217
+ @click.option("--max-failure-rate", default=1.0, show_default=True, type=float, help="Fail when failure rate is above this value.")
218
+ @click.option("--summary-path", default=None, help="Optional markdown summary output path.")
219
+ @click.option("--fail-on", multiple=True, help="Threshold rule such as failure_rate>0.25. May be repeated.")
220
+ @click.option("--results-path", default=None, help="Optional JSON results output path.")
221
+ @click.pass_context
222
+ def eval_command(
223
+ ctx: click.Context,
224
+ dataset: str,
225
+ endpoint: Optional[str],
226
+ method: str,
227
+ input_key: str,
228
+ answer_path: str,
229
+ contexts_path: str,
230
+ citations_path: str,
231
+ body_template: Optional[str],
232
+ endpoint_header: tuple[str, ...],
233
+ timeout: float,
234
+ report_path: Optional[str],
235
+ api_key: Optional[str],
236
+ contexttrace_url: Optional[str],
237
+ min_citation_support: float,
238
+ max_unsupported_claim_rate: float,
239
+ max_failure_rate: float,
240
+ summary_path: Optional[str],
241
+ fail_on: tuple[str, ...],
242
+ results_path: Optional[str],
243
+ ) -> None:
244
+ config = _load(ctx)
245
+ resolved_endpoint = endpoint or config.eval_endpoint
246
+ if not resolved_endpoint:
247
+ raise click.ClickException("--endpoint or eval_endpoint in contexttrace.yaml is required.")
248
+ body = json.loads(body_template) if body_template else None
249
+ result = run_endpoint_eval(
250
+ dataset_path=dataset,
251
+ endpoint=resolved_endpoint,
252
+ contexttrace=_client(ctx),
253
+ method=method,
254
+ headers=_parse_headers(list(endpoint_header)),
255
+ body_template=body,
256
+ input_key=input_key,
257
+ answer_path=answer_path,
258
+ contexts_path=contexts_path,
259
+ citations_path=citations_path,
260
+ timeout=timeout,
261
+ report_path=report_path,
262
+ )
263
+ click.echo("Questions tested: %s" % result.questions_tested)
264
+ click.echo("Reliability score: %s" % result.reliability_score)
265
+ click.echo("Failure rate: %s" % result.failure_rate)
266
+ click.echo("Avg citation support: %s" % result.avg_citation_support)
267
+ click.echo("Unsupported claim rate: %s" % result.unsupported_claim_rate)
268
+ click.echo("Top failures: %s" % (", ".join(result.top_failures) or "None"))
269
+ if result.report_path:
270
+ click.echo("Report: %s" % result.report_path)
271
+ if summary_path:
272
+ Path(summary_path).write_text(_eval_markdown(result), encoding="utf-8")
273
+ click.echo("Summary: %s" % summary_path)
274
+ if results_path:
275
+ Path(results_path).parent.mkdir(parents=True, exist_ok=True)
276
+ Path(results_path).write_text(json.dumps(result.to_dict(), indent=2), encoding="utf-8")
277
+ click.echo("Results: %s" % results_path)
278
+ metrics = {
279
+ "failure_rate": result.failure_rate,
280
+ "citation_support": result.avg_citation_support,
281
+ "avg_citation_support": result.avg_citation_support,
282
+ "unsupported_claim_rate": result.unsupported_claim_rate,
283
+ "reliability_score": result.reliability_score,
284
+ }
285
+ parsed_fail_on = parse_thresholds(fail_on)
286
+ fail_on_messages = threshold_failures(metrics, parsed_fail_on)
287
+ for message in fail_on_messages:
288
+ click.echo("Threshold failed: %s" % message, err=True)
289
+ failed = (
290
+ result.avg_citation_support < min_citation_support
291
+ or result.unsupported_claim_rate > max_unsupported_claim_rate
292
+ or result.failure_rate > max_failure_rate
293
+ or bool(fail_on_messages)
294
+ )
295
+ if failed:
296
+ return 1
297
+ return 0
298
+
299
+
300
+ @cli.command()
301
+ @click.option("--dataset", default="refund_policy", show_default=True, help="Demo dataset name or path.")
302
+ @click.option("--strategy", default="adaptive", show_default=True, help="Demo retrieval strategy.")
303
+ @click.pass_context
304
+ def demo(ctx: click.Context, dataset: str, strategy: str) -> None:
305
+ client = _client(ctx)
306
+ config = _load(ctx)
307
+ report_path = Path(config.local_store_dir) / "reports" / ("%s_demo.html" % Path(dataset).name)
308
+ result = run_demo_dataset(
309
+ dataset=dataset,
310
+ contexttrace=client,
311
+ strategy=strategy,
312
+ report_path=str(report_path),
313
+ )
314
+ click.echo("Dataset: %s" % result.dataset)
315
+ click.echo("Traces created: %s" % len(result.trace_ids))
316
+ click.echo("Reliability score: %s" % result.summary.get("reliability_score"))
317
+ click.echo("Failure rate: %s" % result.summary.get("failure_rate"))
318
+ click.echo("Citation support: %s" % result.summary.get("citation_support"))
319
+ click.echo("Top failures: %s" % (", ".join(result.summary.get("top_failures") or []) or "None"))
320
+ click.echo("Report: %s" % result.report_path)
321
+
322
+
323
+ @cli.command()
324
+ @click.option("--dataset", required=True, help="Demo dataset name or path.")
325
+ @click.option("--strategy", "strategies", multiple=True, help="Strategy to run. May be repeated.")
326
+ @click.option("--output-dir", default=".contexttrace/benchmarks", show_default=True, help="Benchmark output directory.")
327
+ @click.option("--fail-on", multiple=True, help="Threshold rule such as failure_rate>0.25. May be repeated.")
328
+ @click.option("--report-path", default=None, help="Optional benchmark HTML report path.")
329
+ @click.pass_context
330
+ def benchmark(
331
+ ctx: click.Context,
332
+ dataset: str,
333
+ strategies: tuple[str, ...],
334
+ output_dir: str,
335
+ fail_on: tuple[str, ...],
336
+ report_path: Optional[str],
337
+ ) -> None:
338
+ result = run_local_benchmark(
339
+ dataset=dataset,
340
+ contexttrace=_client(ctx),
341
+ output_dir=output_dir,
342
+ strategies=strategies or BENCHMARK_STRATEGIES,
343
+ fail_on=fail_on,
344
+ report_path=report_path,
345
+ )
346
+ summary = result["summary"]
347
+ click.echo("Status: %s" % result["status"])
348
+ click.echo("Questions tested: %s" % summary.get("questions_tested"))
349
+ click.echo("Reliability score: %s" % summary.get("reliability_score"))
350
+ click.echo("Failure rate: %s" % summary.get("failure_rate"))
351
+ click.echo("Citation support: %s" % summary.get("citation_support"))
352
+ click.echo("Unsupported claim rate: %s" % summary.get("unsupported_claim_rate"))
353
+ click.echo("Results: %s" % result["results_path"])
354
+ click.echo("Summary: %s" % result["summary_path"])
355
+ click.echo("Report: %s" % result["report_path"])
356
+ for failure in result["threshold_failures"]:
357
+ click.echo("Threshold failed: %s" % failure, err=True)
358
+ if result["threshold_failures"]:
359
+ return 1
360
+ return 0
361
+
362
+
363
+ @cli.command()
364
+ @click.pass_context
365
+ def doctor(ctx: click.Context) -> None:
366
+ config_path = Path(ctx.obj.get("config_path") or "contexttrace.yaml")
367
+ config = _load(ctx)
368
+ checks = []
369
+ checks.append(("config exists", config_path.exists()))
370
+ try:
371
+ SQLiteTraceStore(config.storage_path)
372
+ checks.append(("SQLite writable", True))
373
+ except Exception:
374
+ checks.append(("SQLite writable", False))
375
+ checks.append(("demo datasets available", bool(list_demo_datasets())))
376
+ if config.judge_provider in {"openai", "openai-compatible"}:
377
+ checks.append(("LLM API key present", bool(config.api_key)))
378
+ else:
379
+ checks.append(("LLM API key present", True))
380
+ if config.eval_endpoint:
381
+ checks.append(("endpoint reachable", _endpoint_reachable(config.eval_endpoint)))
382
+ failed = [name for name, ok in checks if not ok]
383
+ for name, ok in checks:
384
+ click.echo("%s: %s" % ("OK" if ok else "FAIL", name))
385
+ if failed:
386
+ raise click.ClickException("Doctor found %s failed check(s)." % len(failed))
387
+
388
+
389
+ @cli.command()
390
+ @click.option("--host", default="127.0.0.1", show_default=True)
391
+ @click.option("--port", default=8765, show_default=True, type=int)
392
+ @click.pass_context
393
+ def viewer(ctx: click.Context, host: str, port: int) -> None:
394
+ config = _load(ctx)
395
+ serve_viewer(storage_path=config.storage_path, host=host, port=port)
396
+
397
+
398
+ def main(argv: Optional[list[str]] = None) -> int:
399
+ try:
400
+ result = cli.main(args=argv, prog_name="contexttrace", standalone_mode=False)
401
+ return int(result or 0)
402
+ except click.exceptions.Exit as exc:
403
+ return int(exc.exit_code or 0)
404
+ except click.ClickException as exc:
405
+ exc.show(file=sys.stderr)
406
+ return int(exc.exit_code)
407
+ except ContextTraceError as exc:
408
+ click.echo("ContextTrace failed: %s" % exc, err=True)
409
+ return 2
410
+ except ValueError as exc:
411
+ click.echo("ContextTrace failed: %s" % exc, err=True)
412
+ return 2
413
+
414
+
415
+ def _load(ctx: click.Context) -> ContextTraceConfig:
416
+ return load_config(config_path=(ctx.obj or {}).get("config_path"))
417
+
418
+
419
+ def _client(ctx: click.Context) -> ContextTrace:
420
+ config_path = (ctx.obj or {}).get("config_path")
421
+ return ContextTrace(config_path=config_path)
422
+
423
+
424
+ def _parse_headers(values: list[str]) -> dict[str, str]:
425
+ headers = {}
426
+ for value in values:
427
+ if ":" not in value:
428
+ raise click.ClickException("Endpoint headers must be formatted as Name:Value.")
429
+ name, header_value = value.split(":", 1)
430
+ headers[name.strip()] = header_value.strip()
431
+ return headers
432
+
433
+
434
+ def _mask_secret(value: str) -> str:
435
+ if len(value) <= 6:
436
+ return "***"
437
+ return "%s***%s" % (value[:3], value[-3:])
438
+
439
+
440
+ def _preview(value: object, *, limit: int = 100) -> str:
441
+ text = "" if value is None else str(value).replace("\n", " ")
442
+ return text if len(text) <= limit else text[: limit - 1] + "..."
443
+
444
+
445
+ def _endpoint_reachable(endpoint: str) -> bool:
446
+ request = urllib.request.Request(endpoint, method="GET")
447
+ try:
448
+ urllib.request.urlopen(request, timeout=2).close()
449
+ return True
450
+ except urllib.error.HTTPError:
451
+ return True
452
+ except urllib.error.URLError:
453
+ return False
454
+
455
+
456
+ def _eval_markdown(result: object) -> str:
457
+ return "\n".join(
458
+ [
459
+ "# ContextTrace Local Eval Summary",
460
+ "",
461
+ "- Questions tested: %s" % getattr(result, "questions_tested", 0),
462
+ "- Reliability score: %s" % getattr(result, "reliability_score", 0),
463
+ "- Failure rate: %s" % getattr(result, "failure_rate", 0),
464
+ "- Average citation support: %s" % getattr(result, "avg_citation_support", 0),
465
+ "- Unsupported claim rate: %s" % getattr(result, "unsupported_claim_rate", 0),
466
+ "- Top failures: %s" % (", ".join(getattr(result, "top_failures", []) or []) or "None"),
467
+ "- Report: %s" % (getattr(result, "report_path", None) or "Not generated"),
468
+ "",
469
+ ]
470
+ )
471
+
472
+
473
+ if __name__ == "__main__": # pragma: no cover
474
+ raise SystemExit(main())