dprovenancekit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. dprovenancekit-0.1.0/LICENSE +29 -0
  2. dprovenancekit-0.1.0/PKG-INFO +345 -0
  3. dprovenancekit-0.1.0/README.md +310 -0
  4. dprovenancekit-0.1.0/pyproject.toml +55 -0
  5. dprovenancekit-0.1.0/setup.cfg +4 -0
  6. dprovenancekit-0.1.0/src/dprovenancekit/__init__.py +212 -0
  7. dprovenancekit-0.1.0/src/dprovenancekit/alignment_config.py +182 -0
  8. dprovenancekit-0.1.0/src/dprovenancekit/alignment_contract.py +73 -0
  9. dprovenancekit-0.1.0/src/dprovenancekit/alignment_engine.py +111 -0
  10. dprovenancekit-0.1.0/src/dprovenancekit/alignment_evidence.py +103 -0
  11. dprovenancekit-0.1.0/src/dprovenancekit/alignment_findings.py +69 -0
  12. dprovenancekit-0.1.0/src/dprovenancekit/alignment_interpreter.py +220 -0
  13. dprovenancekit-0.1.0/src/dprovenancekit/alignment_matcher.py +57 -0
  14. dprovenancekit-0.1.0/src/dprovenancekit/alignment_meta.py +81 -0
  15. dprovenancekit-0.1.0/src/dprovenancekit/alignment_models.py +270 -0
  16. dprovenancekit-0.1.0/src/dprovenancekit/alignment_narrative.py +73 -0
  17. dprovenancekit-0.1.0/src/dprovenancekit/alignment_render.py +99 -0
  18. dprovenancekit-0.1.0/src/dprovenancekit/alignment_semantics.py +44 -0
  19. dprovenancekit-0.1.0/src/dprovenancekit/alignment_snapshot.py +61 -0
  20. dprovenancekit-0.1.0/src/dprovenancekit/anomaly.py +72 -0
  21. dprovenancekit-0.1.0/src/dprovenancekit/benchmark.py +764 -0
  22. dprovenancekit-0.1.0/src/dprovenancekit/circuit_breaker.py +69 -0
  23. dprovenancekit-0.1.0/src/dprovenancekit/cli.py +196 -0
  24. dprovenancekit-0.1.0/src/dprovenancekit/cloud_store.py +391 -0
  25. dprovenancekit-0.1.0/src/dprovenancekit/config.py +27 -0
  26. dprovenancekit-0.1.0/src/dprovenancekit/context.py +39 -0
  27. dprovenancekit-0.1.0/src/dprovenancekit/corpus.py +402 -0
  28. dprovenancekit-0.1.0/src/dprovenancekit/diff.py +117 -0
  29. dprovenancekit-0.1.0/src/dprovenancekit/drop_stats.py +94 -0
  30. dprovenancekit-0.1.0/src/dprovenancekit/edge.py +23 -0
  31. dprovenancekit-0.1.0/src/dprovenancekit/event.py +148 -0
  32. dprovenancekit-0.1.0/src/dprovenancekit/graph.py +41 -0
  33. dprovenancekit-0.1.0/src/dprovenancekit/instrument.py +389 -0
  34. dprovenancekit-0.1.0/src/dprovenancekit/integrations/__init__.py +16 -0
  35. dprovenancekit-0.1.0/src/dprovenancekit/integrations/langchain.py +650 -0
  36. dprovenancekit-0.1.0/src/dprovenancekit/integrations/openai_agents.py +455 -0
  37. dprovenancekit-0.1.0/src/dprovenancekit/kit.py +126 -0
  38. dprovenancekit-0.1.0/src/dprovenancekit/live_engine.py +86 -0
  39. dprovenancekit-0.1.0/src/dprovenancekit/perturbation.py +58 -0
  40. dprovenancekit-0.1.0/src/dprovenancekit/priority.py +34 -0
  41. dprovenancekit-0.1.0/src/dprovenancekit/py.typed +0 -0
  42. dprovenancekit-0.1.0/src/dprovenancekit/query.py +371 -0
  43. dprovenancekit-0.1.0/src/dprovenancekit/raw_store.py +100 -0
  44. dprovenancekit-0.1.0/src/dprovenancekit/render_hints.py +21 -0
  45. dprovenancekit-0.1.0/src/dprovenancekit/replay.py +244 -0
  46. dprovenancekit-0.1.0/src/dprovenancekit/snapshot_diff.py +279 -0
  47. dprovenancekit-0.1.0/src/dprovenancekit/sqlite_store.py +573 -0
  48. dprovenancekit-0.1.0/src/dprovenancekit/store.py +262 -0
  49. dprovenancekit-0.1.0/src/dprovenancekit/testing.py +277 -0
  50. dprovenancekit-0.1.0/src/dprovenancekit/verification.py +231 -0
  51. dprovenancekit-0.1.0/src/dprovenancekit/viewmodel.py +112 -0
  52. dprovenancekit-0.1.0/src/dprovenancekit/write_buffer.py +236 -0
  53. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/PKG-INFO +345 -0
  54. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/SOURCES.txt +83 -0
  55. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/dependency_links.txt +1 -0
  56. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/entry_points.txt +2 -0
  57. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/requires.txt +9 -0
  58. dprovenancekit-0.1.0/src/dprovenancekit.egg-info/top_level.txt +1 -0
  59. dprovenancekit-0.1.0/tests/test_alignment_engine.py +176 -0
  60. dprovenancekit-0.1.0/tests/test_benchmark_runner.py +139 -0
  61. dprovenancekit-0.1.0/tests/test_cloud_chaos.py +157 -0
  62. dprovenancekit-0.1.0/tests/test_cloud_store.py +78 -0
  63. dprovenancekit-0.1.0/tests/test_conformance.py +187 -0
  64. dprovenancekit-0.1.0/tests/test_corpus.py +85 -0
  65. dprovenancekit-0.1.0/tests/test_diff_engine.py +125 -0
  66. dprovenancekit-0.1.0/tests/test_example_regression.py +19 -0
  67. dprovenancekit-0.1.0/tests/test_explainability_auditor.py +132 -0
  68. dprovenancekit-0.1.0/tests/test_identity_stability.py +70 -0
  69. dprovenancekit-0.1.0/tests/test_in_memory_store.py +98 -0
  70. dprovenancekit-0.1.0/tests/test_instrument.py +365 -0
  71. dprovenancekit-0.1.0/tests/test_integration_langchain.py +371 -0
  72. dprovenancekit-0.1.0/tests/test_integration_openai_agents.py +421 -0
  73. dprovenancekit-0.1.0/tests/test_query_parity.py +81 -0
  74. dprovenancekit-0.1.0/tests/test_raw_store_roundtrip.py +43 -0
  75. dprovenancekit-0.1.0/tests/test_regression_gate.py +283 -0
  76. dprovenancekit-0.1.0/tests/test_replay_engine.py +112 -0
  77. dprovenancekit-0.1.0/tests/test_snapshot_diff.py +222 -0
  78. dprovenancekit-0.1.0/tests/test_span_tree.py +61 -0
  79. dprovenancekit-0.1.0/tests/test_sqlite_encode_drop.py +69 -0
  80. dprovenancekit-0.1.0/tests/test_sqlite_get_run.py +46 -0
  81. dprovenancekit-0.1.0/tests/test_sqlite_insert_failure_drop.py +94 -0
  82. dprovenancekit-0.1.0/tests/test_sqlite_stress.py +113 -0
  83. dprovenancekit-0.1.0/tests/test_stability_evaluation.py +90 -0
  84. dprovenancekit-0.1.0/tests/test_trace_graph.py +165 -0
  85. dprovenancekit-0.1.0/tests/test_write_buffer.py +104 -0
@@ -0,0 +1,29 @@
1
+ Business Source License 1.1
2
+
3
+ Parameters
4
+ Licensor: Danny Kissel
5
+ Licensed Work: DProvenanceKit
6
+ Additional Use Grant: You may use the Licensed Work for production purposes, provided that you do not offer it as a commercial service. (Update as needed)
7
+ Change Date: 2030-06-16 (Update as needed)
8
+ Change License: Version 2.0 or later of the Apache License (Update as needed)
9
+
10
+ Terms
11
+
12
+ The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensor may make an Additional Use Grant, above, permitting limited production use.
13
+
14
+ Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and the rights granted in the paragraph above terminate.
15
+
16
+ If your use of the Licensed Work does not comply with the requirements currently in effect as described in this License, you must purchase a commercial license from the Licensor, its affiliated entities, or authorized resellers, or you must refrain from using the Licensed Work.
17
+
18
+ All copies of the original and modified Licensed Work, and derivative works of the Licensed Work, are subject to this License. This License applies separately for each version of the Licensed Work and the Change Date may vary for each version of the Licensed Work released by Licensor.
19
+
20
+ You must conspicuously display this License on each original or modified copy of the Licensed Work. If you receive the Licensed Work in original or modified form from a third party, the terms and conditions set forth in this License apply to your use of that work.
21
+
22
+ Any use of the Licensed Work in violation of this License will automatically terminate your rights under this License for the current and all other versions of the Licensed Work.
23
+
24
+ This License does not grant you any right in any trademark or logo of Licensor or its affiliates (provided that you may use a trademark or logo of Licensor as expressly required by this License).
25
+
26
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE.
27
+
28
+ License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
29
+ "Business Source License" is a trademark of MariaDB Corporation Ab.
@@ -0,0 +1,345 @@
1
+ Metadata-Version: 2.4
2
+ Name: dprovenancekit
3
+ Version: 0.1.0
4
+ Summary: Reasoning observability and regression testing for AI systems — a Python port of DProvenanceKit.
5
+ Author: DProvenanceKit
6
+ License: BSL-1.1
7
+ Project-URL: Homepage, https://github.com/Therealdk8890/DProvenanceKitPython
8
+ Project-URL: Repository, https://github.com/Therealdk8890/DProvenanceKitPython
9
+ Project-URL: Issues, https://github.com/Therealdk8890/DProvenanceKitPython/issues
10
+ Project-URL: Swift original, https://github.com/Therealdk8890/DProvenanceKit
11
+ Keywords: observability,ai,agents,tracing,provenance,regression
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: Topic :: Software Development :: Debuggers
20
+ Classifier: Topic :: Software Development :: Quality Assurance
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: License :: Other/Proprietary License
23
+ Classifier: Operating System :: OS Independent
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Provides-Extra: langchain
31
+ Requires-Dist: langchain-core>=0.2; extra == "langchain"
32
+ Provides-Extra: openai-agents
33
+ Requires-Dist: openai-agents>=0.1; extra == "openai-agents"
34
+ Dynamic: license-file
35
+
36
+ # DProvenanceKit (Python)
37
+
38
+ **Reasoning observability and regression testing for AI systems — a Python port of the Swift [DProvenanceKit](https://github.com/Therealdk8890/DProvenanceKit).**
39
+
40
+ When an agent's reasoning drifts between runs, DProvenanceKit turns each execution into a queryable, diffable trace so you can see *what changed and why* — not just *what happened*.
41
+
42
+ > Run → Record → Query → Diff → Detect Regressions
43
+
44
+ This is a faithful, dependency-free port of the Swift library to Python. It keeps the same architecture and guarantees — synchronous non-blocking recording, priority-aware backpressure, one query language over two backends held at parity, structural diffing, formally-modeled semantic alignment, and by-tier drop accounting so load-shedding is never silent.
45
+
46
+ The original Swift package is unchanged; this is a parallel implementation.
47
+
48
+ ---
49
+
50
+ ## Why a Python port
51
+
52
+ The Swift library targets Apple-platform and on-device AI. This port brings the same reasoning-layer observability to Python codebases — agent frameworks, LLM workflows, tool-using models — with **zero third-party dependencies** (it uses only the standard library: `sqlite3`, `contextvars`, `threading`, `json`, `hashlib`, `uuid`, `urllib`).
53
+
54
+ ---
55
+
56
+ ## Install
57
+
58
+ From PyPI (released builds):
59
+
60
+ ```bash
61
+ pip install dprovenancekit
62
+ pip install "dprovenancekit[langchain]" # + LangChain adapter
63
+ pip install "dprovenancekit[openai-agents]" # + OpenAI Agents adapter
64
+ ```
65
+
66
+ From a checkout (development):
67
+
68
+ ```bash
69
+ pip install -e ".[dev]"
70
+ ```
71
+
72
+ Requires Python 3.9+; the core has **zero third-party dependencies**. Releasing is documented
73
+ in [RELEASING.md](RELEASING.md).
74
+
75
+ ---
76
+
77
+ ## 5-minute demo
78
+
79
+ ### 1. Define your events
80
+
81
+ Any frozen dataclass that subclasses `TraceableEvent`, exposing a stable `type_identifier` and a `priority`:
82
+
83
+ ```python
84
+ from dataclasses import dataclass
85
+ from dprovenancekit import TraceableEvent, TracePriority
86
+
87
+ @dataclass(frozen=True)
88
+ class MyAIDecision(TraceableEvent):
89
+ kind: str # "promptGenerated" | "documentEvaluated" | "conflictDetected" | "finalDecisionMade"
90
+ token_count: int = 0
91
+ document_id: str = ""
92
+ score: float = 0.0
93
+ reason: str = ""
94
+ approved: bool = False
95
+
96
+ @property
97
+ def type_identifier(self) -> str:
98
+ return self.kind
99
+
100
+ @property
101
+ def priority(self) -> TracePriority:
102
+ if self.kind == "finalDecisionMade":
103
+ return TracePriority.CRITICAL
104
+ if self.kind == "conflictDetected":
105
+ return TracePriority.DIAGNOSTIC
106
+ return TracePriority.TELEMETRY
107
+ ```
108
+
109
+ ### 2. Record an execution run
110
+
111
+ `record(...)` is synchronous and never blocks — it touches only an in-memory buffer. Ambient run / engine / span context propagates through `contextvars`, so nested scopes attribute events correctly with no plumbing.
112
+
113
+ ```python
114
+ from dprovenancekit import DProvenanceKit, InMemoryTraceStore
115
+
116
+ kit = DProvenanceKit(MyAIDecision)
117
+ store = InMemoryTraceStore()
118
+
119
+ with kit.run(context_id="demo_case", store=store):
120
+ kit.record(MyAIDecision(kind="documentEvaluated", document_id="DocA", score=0.95))
121
+ kit.record(MyAIDecision(kind="conflictDetected", reason="timeline_inconsistency"))
122
+ kit.record(MyAIDecision(kind="finalDecisionMade", approved=False))
123
+ ```
124
+
125
+ ### 3. Query reasoning patterns
126
+
127
+ ```python
128
+ from dprovenancekit import TraceQueryDSL
129
+
130
+ suspicious = store.query_runs(
131
+ TraceQueryDSL()
132
+ .requiring_step("conflictDetected")
133
+ .missing_step("documentEvaluated")
134
+ )
135
+ ```
136
+
137
+ Find runs where a conflict was reported but no document was ever evaluated. The same DSL compiles to SQL for `SQLiteTraceStore` and is evaluated in memory for `InMemoryTraceStore` — the two backends are held in lockstep by a parity test suite.
138
+
139
+ ### 4. Diff runs
140
+
141
+ ```python
142
+ from dprovenancekit import TraceDiffEngine
143
+
144
+ diff = TraceDiffEngine().diff(base=run_a, comparison=run_b)
145
+ print(diff.changes) # structural steps that appeared, disappeared, or moved
146
+ ```
147
+
148
+ ### 5. Semantic alignment
149
+
150
+ `TraceAlignmentEngine` decides whether two executions are behaviorally equivalent within a formally-defined semantic model, even when payloads vary slightly:
151
+
152
+ ```python
153
+ from dprovenancekit import (
154
+ AlignmentConfiguration, AlignmentProfile, AnyEquivalenceEvaluator, TraceAlignmentEngine,
155
+ )
156
+
157
+ config = AlignmentConfiguration(
158
+ profile=AlignmentProfile.strict_audit_v1,
159
+ equivalence_evaluator=AnyEquivalenceEvaluator(
160
+ evaluator_identifier="MyAIDecision_Semantic",
161
+ evaluator=lambda a, b: 1.0 if a == b else 0.0,
162
+ ),
163
+ )
164
+ result = TraceAlignmentEngine(config).align(base=run_a, comparison=run_b)
165
+ print(result.regression_risk.level)
166
+ ```
167
+
168
+ ### 6. Detect regressions automatically
169
+
170
+ ```python
171
+ from dprovenancekit import AnomalyDetector, AnomalyRule, TraceQueryDSL
172
+
173
+ class UnverifiedConflictRule(AnomalyRule):
174
+ @property
175
+ def name(self): return "unverified_conflict"
176
+ @property
177
+ def anomaly_query(self):
178
+ return TraceQueryDSL().requiring_step("conflictDetected").missing_step("documentEvaluated")
179
+ def describe(self, run): return "Conflict detected with no supporting evaluation"
180
+
181
+ anomalies = AnomalyDetector(store).detect_anomalies([UnverifiedConflictRule()])
182
+ ```
183
+
184
+ ---
185
+
186
+ ## Benchmark corpus
187
+
188
+ The library ships the same validation corpus as the Swift version. The headless CLI runs it through the real benchmark runner:
189
+
190
+ ```bash
191
+ dprovenancekit evaluate # precision/recall/F1 over the standard + adversarial corpora
192
+ dprovenancekit diagnose # causal ranking of failure modes
193
+ dprovenancekit stability # determinism boundary: isolated vs perturbed F1 variance
194
+ ```
195
+
196
+ The standard corpus scores **Precision 1.000 / Recall 1.000 / F1 1.000** across 8 scenarios (reordering, semantic evolution, noise injection, branch collapse, …), matching the Swift implementation.
197
+
198
+ ---
199
+
200
+ ## What's included
201
+
202
+ | Component | Module |
203
+ | --- | --- |
204
+ | Event model, priority tiers, drop accounting | `event`, `priority`, `drop_stats` |
205
+ | Recording API + ambient context | `kit`, `context` |
206
+ | Stores (in-memory, WAL SQLite, raw read, cloud) | `store`, `sqlite_store`, `raw_store`, `cloud_store` |
207
+ | Priority-aware write buffer | `write_buffer` |
208
+ | Query DSL + two backends (AST eval + SQL compiler) | `query` |
209
+ | Live querying + anomaly detection | `live_engine`, `anomaly` |
210
+ | Structural diff + span-aware snapshot diff | `diff`, `snapshot_diff` |
211
+ | Deterministic replay | `replay` |
212
+ | Semantic alignment engine + evidence + verification | `alignment_*`, `verification` |
213
+ | Benchmark harness, failure diagnoser, corpus | `benchmark`, `corpus` |
214
+ | Pure view models for a trace viewer | `viewmodel` |
215
+ | Framework adapters (LangChain / LangGraph) | `integrations.langchain` |
216
+ | Framework adapters (OpenAI Agents SDK) | `integrations.openai_agents` |
217
+ | Regression-gate test helper | `testing` |
218
+ | Framework-agnostic instrumentation (decorators) | `instrument` |
219
+
220
+ The SwiftUI `DProvenanceUI` target is intentionally **not** ported (it is Apple-platform UI); its pure value-model layer (`SpanViewModel`, flattening) is ported in `viewmodel`.
221
+
222
+ ---
223
+
224
+ ## Cross-language conformance
225
+
226
+ Keeping the Swift and Python SDKs behaviorally equivalent is enforced, not hoped for. [`conformance/`](conformance/) holds **Trace Specification v1** — a language-neutral contract plus frozen golden vectors that pin the run fingerprint, the alignment profile hash, canonical payload encoding, query semantics, and alignment verdicts.
227
+
228
+ ```bash
229
+ python -m pytest tests/test_conformance.py # the Python SDK's claim of conformance
230
+ python conformance/generate_vectors.py # intentionally re-freeze the contract
231
+ ```
232
+
233
+ The committed `conformance/vectors/*.json` are the contract: any SDK — Swift today, Rust or TypeScript later — proves equivalence by reproducing the same files. See [`conformance/TRACE_SPEC_v1.md`](conformance/TRACE_SPEC_v1.md).
234
+
235
+ ---
236
+
237
+ ## Integrations
238
+
239
+ Framework adapters live in `dprovenancekit.integrations` and are the only parts of the package with third-party dependencies — the core stays pure standard library, and nothing imports an adapter unless you do.
240
+
241
+ ### LangChain / LangGraph
242
+
243
+ ```bash
244
+ pip install dprovenancekit[langchain]
245
+ ```
246
+
247
+ ```python
248
+ from dprovenancekit import SQLiteTraceStore
249
+ from dprovenancekit.integrations.langchain import DProvenanceTracer, LangChainTraceEvent
250
+
251
+ store = SQLiteTraceStore(LangChainTraceEvent, "traces.sqlite")
252
+ tracer = DProvenanceTracer(store)
253
+
254
+ with tracer.trace(context_id="customer-42") as cb:
255
+ answer = chain.invoke(question, config={"callbacks": [cb]})
256
+
257
+ # The run is now recorded — query it, diff it against a known-good run, or
258
+ # compare run fingerprints to detect when the agent took a different path.
259
+ ```
260
+
261
+ [`DProvenanceCallbackHandler`](src/dprovenancekit/integrations/langchain.py) translates LangChain's callback stream into a trace: each `on_llm_start` / `on_tool_start` / `on_retriever_start` / `on_chain_start` (and its completion) becomes a typed event in execution order, LangChain's `run_id`/`parent_run_id` become the trace's **span tree**, the active model/tool/retriever becomes the **engine**, and (by default) lifecycle **provenance edges** are emitted (`DERIVED_FROM` start→completion, `INFORMED` parent→child). Because events flow through the same recording path as hand-written ones, the whole toolkit applies: a run's **fingerprint** is the structural identity of the agent's execution path, so two runs that diverge (a tool called in a different order, a retrieval step skipped) produce different fingerprints — a cheap regression signal. Options: `capture_payloads` (prompt/completion/IO previews), `link_lifecycle` (edges), `record_chains` (LCEL/LangGraph chain noise).
262
+
263
+ ### OpenAI Agents SDK
264
+
265
+ ```bash
266
+ pip install dprovenancekit[openai-agents]
267
+ ```
268
+
269
+ ```python
270
+ from dprovenancekit import SQLiteTraceStore
271
+ from dprovenancekit.integrations.openai_agents import register, OpenAIAgentsTraceEvent
272
+
273
+ store = SQLiteTraceStore(OpenAIAgentsTraceEvent, "traces.sqlite")
274
+ register(store) # registers a global tracing processor
275
+
276
+ # ... run your agents normally; each run is recorded ...
277
+ ```
278
+
279
+ [`DProvenanceTracingProcessor`](src/dprovenancekit/integrations/openai_agents.py) implements the SDK's `TracingProcessor`: each agent run becomes a trace-run (`context_id` = the trace name), and every span start/end becomes a typed event — `agent.start`, `generation.end`, `function.start`, `guardrail.error`, … — in execution order. The span's `span_id`/`parent_id` become the **span tree**, the active agent/tool/model becomes the **engine**, errors and triggered guardrails are recorded at `CRITICAL`, and lifecycle **provenance edges** are emitted (same `DERIVED_FROM`/`INFORMED` model). One registered processor captures every run; the same `fingerprint`/diff/align tooling then applies.
280
+
281
+ ---
282
+
283
+ ## Regression gate
284
+
285
+ `dprovenancekit.testing` turns "did my agent regress?" into one assertion you can drop into any test or CI step. Give it a *golden* run (known-good) and a *candidate* run (what your current code produced); it aligns them and fails with a readable diagnostic if the candidate diverged.
286
+
287
+ ```python
288
+ from dprovenancekit.testing import assert_no_regression
289
+
290
+ assert_no_regression(golden=golden_run, candidate=candidate_run)
291
+ ```
292
+
293
+ Strict by default — any removed, added, or changed (ambiguous) step fails, and a removed CRITICAL step is additionally a HIGH-severity regression. Loosen with `max_regression_level` (gate only on severity) or `allow_divergent_steps` (tolerate benign per-step changes), or pass a custom `evaluator` to define what "equivalent" means (e.g. ignore volatile fields like token counts). `RegressionGate(...).check(...)` returns a `RegressionReport` (no raise) for richer assertions. Detecting *reordered* steps requires a span-aware profile (`AlignmentProfile.developer_debug_v1`); the default linear profile treats a pure reorder as still-matching. Complements `AlignmentSnapshotValidator` (an exact output-hash snapshot): the gate works on two runs and reasons about regression severity.
294
+
295
+ ---
296
+
297
+ ## Example: regression testing
298
+
299
+ [`examples/regression_testing.py`](examples/regression_testing.py) is the end-to-end story in ~150 readable lines: record a **golden** run of a fact-checking agent (retrieve → verify → decide), then catch a later run that skips its verification step — via both the fast **fingerprint** check and the detailed **alignment** verdict (which flags the dropped `claimVerified` step as a HIGH regression).
300
+
301
+ ```bash
302
+ python examples/regression_testing.py
303
+ ```
304
+
305
+ It self-asserts its verdicts, so it doubles as an executable test of the headline use case.
306
+
307
+ ---
308
+
309
+ ## Instrumenting plain code (no framework)
310
+
311
+ Not using a framework? Instrument a hand-written agent loop directly — no event type to define, zero dependencies (ships in core as `dprovenancekit.instrument`):
312
+
313
+ ```python
314
+ from dprovenancekit import InMemoryTraceStore, traced, traced_run, record_event
315
+
316
+ @traced
317
+ def search(query): ...
318
+
319
+ @traced
320
+ def answer(question, sources): ...
321
+
322
+ store = InMemoryTraceStore()
323
+ with traced_run(store, context_id="ticket-42"):
324
+ sources = search(question)
325
+ record_event("plan.chosen", {"strategy": "rag"})
326
+ reply = answer(question, sources)
327
+ ```
328
+
329
+ `@traced` records a `"<name>.start"` / `".end"` / `".error"` event pair per call in its own **span** (the function name is the **engine**), nests calls in the span tree, and emits the same `DERIVED_FROM` / `INFORMED` provenance edges as the framework adapters. `record_event(...)` drops an ad-hoc event (a decision, a chosen branch). Plain functions, `async def`, generators, and async generators are all supported (for a generator, start/end bracket the full iteration). Instrumentation never changes behavior — capture is failure-proof and exceptions pass through unchanged. Outside a `traced_run` the decorators are transparent, so instrumented code is safe to call untraced. The trace it produces is identical in shape to the adapter-produced ones, so fingerprint / diff / align / the regression gate all apply.
330
+
331
+ ---
332
+
333
+ ## Tests
334
+
335
+ ```bash
336
+ python -m pytest
337
+ ```
338
+
339
+ 167 tests: 80 ported from the Swift suite (query parity, write-buffer backpressure, SQLite stress + drop accounting, alignment, replay, snapshot diff, explainability fidelity, benchmark scoring, cloud chaos, …), 27 cross-language conformance checks against the frozen Trace Specification v1 vectors, 14 LangChain integration tests, 16 OpenAI Agents SDK integration tests, 16 instrumentation-layer tests, 13 regression-gate tests, and the regression-testing example run as a self-asserting test. (The real-framework tests run only when `langchain-core` / `openai-agents` are installed, otherwise skipped.)
340
+
341
+ ---
342
+
343
+ ## License
344
+
345
+ Distributed under the **Business Source License 1.1**, same as the upstream Swift project. See [LICENSE](LICENSE).