dprovenancekit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dprovenancekit/__init__.py +212 -0
- dprovenancekit/alignment_config.py +182 -0
- dprovenancekit/alignment_contract.py +73 -0
- dprovenancekit/alignment_engine.py +111 -0
- dprovenancekit/alignment_evidence.py +103 -0
- dprovenancekit/alignment_findings.py +69 -0
- dprovenancekit/alignment_interpreter.py +220 -0
- dprovenancekit/alignment_matcher.py +57 -0
- dprovenancekit/alignment_meta.py +81 -0
- dprovenancekit/alignment_models.py +270 -0
- dprovenancekit/alignment_narrative.py +73 -0
- dprovenancekit/alignment_render.py +99 -0
- dprovenancekit/alignment_semantics.py +44 -0
- dprovenancekit/alignment_snapshot.py +61 -0
- dprovenancekit/anomaly.py +72 -0
- dprovenancekit/benchmark.py +764 -0
- dprovenancekit/circuit_breaker.py +69 -0
- dprovenancekit/cli.py +196 -0
- dprovenancekit/cloud_store.py +391 -0
- dprovenancekit/config.py +27 -0
- dprovenancekit/context.py +39 -0
- dprovenancekit/corpus.py +402 -0
- dprovenancekit/diff.py +117 -0
- dprovenancekit/drop_stats.py +94 -0
- dprovenancekit/edge.py +23 -0
- dprovenancekit/event.py +148 -0
- dprovenancekit/graph.py +41 -0
- dprovenancekit/instrument.py +389 -0
- dprovenancekit/integrations/__init__.py +16 -0
- dprovenancekit/integrations/langchain.py +650 -0
- dprovenancekit/integrations/openai_agents.py +455 -0
- dprovenancekit/kit.py +126 -0
- dprovenancekit/live_engine.py +86 -0
- dprovenancekit/perturbation.py +58 -0
- dprovenancekit/priority.py +34 -0
- dprovenancekit/py.typed +0 -0
- dprovenancekit/query.py +371 -0
- dprovenancekit/raw_store.py +100 -0
- dprovenancekit/render_hints.py +21 -0
- dprovenancekit/replay.py +244 -0
- dprovenancekit/snapshot_diff.py +279 -0
- dprovenancekit/sqlite_store.py +573 -0
- dprovenancekit/store.py +262 -0
- dprovenancekit/testing.py +277 -0
- dprovenancekit/verification.py +231 -0
- dprovenancekit/viewmodel.py +112 -0
- dprovenancekit/write_buffer.py +236 -0
- dprovenancekit-0.1.0.dist-info/METADATA +345 -0
- dprovenancekit-0.1.0.dist-info/RECORD +53 -0
- dprovenancekit-0.1.0.dist-info/WHEEL +5 -0
- dprovenancekit-0.1.0.dist-info/entry_points.txt +2 -0
- dprovenancekit-0.1.0.dist-info/licenses/LICENSE +29 -0
- dprovenancekit-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""DProvenanceKit — reasoning observability and regression testing for AI systems.
|
|
2
|
+
|
|
3
|
+
A Python port of the Swift DProvenanceKit. Run → Record → Query → Diff → Detect
|
|
4
|
+
Regressions.
|
|
5
|
+
|
|
6
|
+
kit = DProvenanceKit(MyEvent)
|
|
7
|
+
store = InMemoryTraceStore()
|
|
8
|
+
with kit.run(context_id="case-1", store=store):
|
|
9
|
+
kit.record(MyEvent.document_evaluated("DocA", 0.95))
|
|
10
|
+
kit.record(MyEvent.conflict_detected("timeline_inconsistency"))
|
|
11
|
+
|
|
12
|
+
runs = store.query_runs(
|
|
13
|
+
TraceQueryDSL().requiring_step("conflictDetected").missing_step("documentEvaluated")
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
# Core event model
|
|
20
|
+
from .priority import TracePriority
|
|
21
|
+
from .event import (
|
|
22
|
+
TraceableEvent,
|
|
23
|
+
TraceEvent,
|
|
24
|
+
TraceEventRow,
|
|
25
|
+
RunRow,
|
|
26
|
+
AnyTraceableEvent,
|
|
27
|
+
)
|
|
28
|
+
from .edge import TraceEdge, TraceEdgeType
|
|
29
|
+
from .graph import TraceGraph, TraceExplanation
|
|
30
|
+
from .drop_stats import TraceDropStats, TraceDropTally
|
|
31
|
+
from .config import BufferCapacity, EvictionPolicy, OfflineConfig
|
|
32
|
+
|
|
33
|
+
# Recording + context
|
|
34
|
+
from .context import TraceContext, AnyActiveTraceRun
|
|
35
|
+
from .kit import DProvenanceKit, ActiveTraceRun
|
|
36
|
+
|
|
37
|
+
# Query
|
|
38
|
+
from .query import (
|
|
39
|
+
TraceRun,
|
|
40
|
+
TraceQueryDSL,
|
|
41
|
+
TraceQueryNode,
|
|
42
|
+
TraceQueryPlanner,
|
|
43
|
+
TraceQueryCompiler,
|
|
44
|
+
CompiledSQLQuery,
|
|
45
|
+
IndexConstraint,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Buffer + stores
|
|
49
|
+
from .write_buffer import TraceWriteBuffer
|
|
50
|
+
from .store import (
|
|
51
|
+
TraceStore,
|
|
52
|
+
InMemoryTraceStore,
|
|
53
|
+
TraceError,
|
|
54
|
+
NodeNotFoundError,
|
|
55
|
+
NotImplementedTraceError,
|
|
56
|
+
)
|
|
57
|
+
from .sqlite_store import SQLiteTraceStore, SQLiteConnection, SQLiteWriter
|
|
58
|
+
from .raw_store import RawTraceStore, RawTraceRun, RawTraceEvent
|
|
59
|
+
from .cloud_store import (
|
|
60
|
+
CloudTraceStore,
|
|
61
|
+
CloudWriter,
|
|
62
|
+
CloudWriterError,
|
|
63
|
+
FlushTimedOut,
|
|
64
|
+
CloudTraceStoreError,
|
|
65
|
+
UnsupportedSchemaError,
|
|
66
|
+
ServerError,
|
|
67
|
+
default_transport,
|
|
68
|
+
)
|
|
69
|
+
from .circuit_breaker import CircuitBreaker, CircuitState
|
|
70
|
+
|
|
71
|
+
# Live querying + anomalies
|
|
72
|
+
from .live_engine import LiveTraceQueryEngine, TraceQuerySubscription, QueryState
|
|
73
|
+
from .anomaly import Anomaly, AnomalyRule, AnomalyDetector, LiveAnomalySubscription
|
|
74
|
+
|
|
75
|
+
# Diff + replay
|
|
76
|
+
from .diff import TraceDiffEngine, TraceDiffResult, Change, ChangeKind
|
|
77
|
+
from .replay import (
|
|
78
|
+
TraceReplayEngine,
|
|
79
|
+
ReplaySnapshot,
|
|
80
|
+
ReplayEvent,
|
|
81
|
+
ReplaySource,
|
|
82
|
+
ReplayManifest,
|
|
83
|
+
ReplaySnapshotMetadata,
|
|
84
|
+
SpanNode,
|
|
85
|
+
SequenceGap,
|
|
86
|
+
)
|
|
87
|
+
from .snapshot_diff import (
|
|
88
|
+
SnapshotDiffEngine,
|
|
89
|
+
SnapshotDiffResult,
|
|
90
|
+
SpanChange,
|
|
91
|
+
SpanChangeKind,
|
|
92
|
+
EventChange,
|
|
93
|
+
EventChangeKind,
|
|
94
|
+
DivergencePoint,
|
|
95
|
+
DiffSummary,
|
|
96
|
+
)
|
|
97
|
+
from .render_hints import RenderHints, DiffPresentationMode
|
|
98
|
+
|
|
99
|
+
# Alignment
|
|
100
|
+
from .alignment_config import (
|
|
101
|
+
AlignmentConfiguration,
|
|
102
|
+
AlignmentProfile,
|
|
103
|
+
AlignmentMode,
|
|
104
|
+
AlignmentStrategy,
|
|
105
|
+
AnyEquivalenceEvaluator,
|
|
106
|
+
)
|
|
107
|
+
from .alignment_engine import TraceAlignmentEngine, VerificationCaptureMode
|
|
108
|
+
from .alignment_models import (
|
|
109
|
+
TraceAlignmentResult,
|
|
110
|
+
EventAlignment,
|
|
111
|
+
AlignmentState,
|
|
112
|
+
AlignmentStateKind,
|
|
113
|
+
AlignmentStrength,
|
|
114
|
+
AlignmentStrengthCategory,
|
|
115
|
+
AmbiguousMatch,
|
|
116
|
+
AlignmentExplanation,
|
|
117
|
+
HeuristicEvidence,
|
|
118
|
+
HeuristicEvidenceCategory,
|
|
119
|
+
RegressionRisk,
|
|
120
|
+
RegressionLevel,
|
|
121
|
+
AlignmentFinding,
|
|
122
|
+
AlignmentFindingKind,
|
|
123
|
+
DecisionTimelineEntry,
|
|
124
|
+
)
|
|
125
|
+
from .alignment_meta import AlignmentMetaEvent, MetaEventKind
|
|
126
|
+
from .alignment_contract import AlignmentExecutionContract
|
|
127
|
+
from .alignment_evidence import (
|
|
128
|
+
AlignmentBinding,
|
|
129
|
+
BindingDecision,
|
|
130
|
+
EquivalenceDecisionRecord,
|
|
131
|
+
EquivalenceReason,
|
|
132
|
+
InterpretationStep,
|
|
133
|
+
AlignmentEvidence,
|
|
134
|
+
AlignmentEvidenceCollector,
|
|
135
|
+
NullEvidenceCollector,
|
|
136
|
+
EvidenceCollector,
|
|
137
|
+
VerificationArtifacts,
|
|
138
|
+
)
|
|
139
|
+
from .alignment_semantics import EquivalenceDecision, DefaultEquivalenceModel
|
|
140
|
+
from .alignment_matcher import DefaultTraceMatcher
|
|
141
|
+
from .alignment_interpreter import DefaultAlignmentInterpreter
|
|
142
|
+
from .alignment_findings import AlignmentFindingsExtractor
|
|
143
|
+
from .alignment_narrative import AlignmentNarrativeCompiler
|
|
144
|
+
from .alignment_render import AlignmentRenderNode, RenderHint, render_models
|
|
145
|
+
from .alignment_snapshot import (
|
|
146
|
+
AlignmentSnapshot,
|
|
147
|
+
AlignmentSnapshotValidator,
|
|
148
|
+
DriftToleranceMode,
|
|
149
|
+
SnapshotValidationError,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Verification
|
|
153
|
+
from .verification import (
|
|
154
|
+
FidelityVector,
|
|
155
|
+
FormalizationMap,
|
|
156
|
+
DefaultFormalizationMapBuilder,
|
|
157
|
+
CoverageInvariant,
|
|
158
|
+
CompletenessInvariant,
|
|
159
|
+
CausalOrderingInvariant,
|
|
160
|
+
NoHallucinationInvariant,
|
|
161
|
+
ExplainabilityAuditor,
|
|
162
|
+
TraceGraphValidator,
|
|
163
|
+
TraceGraphProvenanceValidator,
|
|
164
|
+
TraceGraphValidationError,
|
|
165
|
+
StructuralCycleDetected,
|
|
166
|
+
SelfReferentialEdge,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Benchmark + corpus
|
|
170
|
+
from .benchmark import (
|
|
171
|
+
BenchmarkRunner,
|
|
172
|
+
BenchmarkReport,
|
|
173
|
+
BenchmarkCase,
|
|
174
|
+
BenchmarkCaseResult,
|
|
175
|
+
BenchmarkDataset,
|
|
176
|
+
BenchmarkStabilityReport,
|
|
177
|
+
BenchmarkDeltaReport,
|
|
178
|
+
CategoryMetrics,
|
|
179
|
+
CategoryDeltaMetrics,
|
|
180
|
+
CausalRank,
|
|
181
|
+
ExpectedFinding,
|
|
182
|
+
DeterministicBoundary,
|
|
183
|
+
EnvironmentContext,
|
|
184
|
+
BenchmarkFailureDiagnoser,
|
|
185
|
+
DiagnosedFailure,
|
|
186
|
+
FailureCause,
|
|
187
|
+
FailureSeverityProfile,
|
|
188
|
+
SignalFailure,
|
|
189
|
+
ModelFailure,
|
|
190
|
+
SearchFailure,
|
|
191
|
+
DataFailure,
|
|
192
|
+
)
|
|
193
|
+
from .corpus import DProvenanceCorpus
|
|
194
|
+
from .perturbation import EvaluationPerturbationLayer, PerturbationMode
|
|
195
|
+
|
|
196
|
+
# View models (pure logic)
|
|
197
|
+
from .viewmodel import SpanViewModel, FlattenedSpanNode, flatten_span_tree
|
|
198
|
+
|
|
199
|
+
# Regression-gate test helper
|
|
200
|
+
from .testing import (
|
|
201
|
+
RegressionGate,
|
|
202
|
+
RegressionReport,
|
|
203
|
+
RegressionError,
|
|
204
|
+
assert_no_regression,
|
|
205
|
+
exact_equality_evaluator,
|
|
206
|
+
run_fingerprint,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Framework-agnostic instrumentation (decorators / context manager)
|
|
210
|
+
from .instrument import TracedEvent, traced, traced_run, record_event
|
|
211
|
+
|
|
212
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Alignment profiles, the pluggable equivalence evaluator, and the configuration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
from .alignment_contract import AlignmentExecutionContract
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AlignmentMode(Enum):
|
|
13
|
+
LINEAR = "linear"
|
|
14
|
+
SPAN_AWARE = "spanAware"
|
|
15
|
+
FULL_GRAPH = "fullGraph"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AlignmentStrategy(Enum):
|
|
19
|
+
STRICT_AUDIT = "strict_audit"
|
|
20
|
+
DEVELOPER_DEBUG = "developer_debug"
|
|
21
|
+
SEMANTIC_EXPLORATION = "semantic_exploration"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class AlignmentProfile:
|
|
26
|
+
strategy: AlignmentStrategy
|
|
27
|
+
version: int
|
|
28
|
+
type_weight: float
|
|
29
|
+
payload_weight: float
|
|
30
|
+
structural_weight: float
|
|
31
|
+
temporal_weight: float
|
|
32
|
+
semantic_threshold: float
|
|
33
|
+
max_ambiguous_candidates: int
|
|
34
|
+
ambiguity_delta_threshold: float
|
|
35
|
+
alignment_mode: AlignmentMode
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
AlignmentProfile.strict_audit_v1 = AlignmentProfile( # type: ignore[attr-defined]
|
|
39
|
+
strategy=AlignmentStrategy.STRICT_AUDIT,
|
|
40
|
+
version=1,
|
|
41
|
+
type_weight=0.5,
|
|
42
|
+
payload_weight=0.5,
|
|
43
|
+
structural_weight=0.0,
|
|
44
|
+
temporal_weight=0.0,
|
|
45
|
+
semantic_threshold=0.99,
|
|
46
|
+
max_ambiguous_candidates=1,
|
|
47
|
+
ambiguity_delta_threshold=0.0,
|
|
48
|
+
alignment_mode=AlignmentMode.LINEAR,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
AlignmentProfile.developer_debug_v1 = AlignmentProfile( # type: ignore[attr-defined]
|
|
52
|
+
strategy=AlignmentStrategy.DEVELOPER_DEBUG,
|
|
53
|
+
version=1,
|
|
54
|
+
type_weight=0.4,
|
|
55
|
+
payload_weight=0.4,
|
|
56
|
+
structural_weight=0.15,
|
|
57
|
+
temporal_weight=0.05,
|
|
58
|
+
semantic_threshold=0.75,
|
|
59
|
+
max_ambiguous_candidates=3,
|
|
60
|
+
ambiguity_delta_threshold=0.10,
|
|
61
|
+
alignment_mode=AlignmentMode.SPAN_AWARE,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class AnyEquivalenceEvaluator:
|
|
67
|
+
"""A type-erased equivalence evaluator. ``evaluator`` scores two payloads in 0..1;
|
|
68
|
+
``ambiguity_threshold_fn`` returns the per-event ambiguity floor (default 0.4)."""
|
|
69
|
+
|
|
70
|
+
evaluator_identifier: str
|
|
71
|
+
evaluator: Callable
|
|
72
|
+
ambiguity_threshold_fn: Callable = None # type: ignore[assignment]
|
|
73
|
+
|
|
74
|
+
def __post_init__(self):
|
|
75
|
+
if self.ambiguity_threshold_fn is None:
|
|
76
|
+
object.__setattr__(self, "ambiguity_threshold_fn", lambda _e: 0.4)
|
|
77
|
+
|
|
78
|
+
def evaluate_similarity(self, base, comparison) -> float:
|
|
79
|
+
return self.evaluator(base, comparison)
|
|
80
|
+
|
|
81
|
+
def ambiguity_threshold(self, event) -> float:
|
|
82
|
+
return self.ambiguity_threshold_fn(event)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class AlignmentConfiguration:
|
|
87
|
+
profile: AlignmentProfile
|
|
88
|
+
equivalence_evaluator: AnyEquivalenceEvaluator
|
|
89
|
+
engine_version: str = "1.0.0"
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def profile_hash(self) -> str:
|
|
93
|
+
return AlignmentExecutionContract.compute_profile_hash(
|
|
94
|
+
profile=self.profile,
|
|
95
|
+
evaluator_identifier=self.equivalence_evaluator.evaluator_identifier,
|
|
96
|
+
engine_version=self.engine_version,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def score_match(self, base, comp):
|
|
100
|
+
"""Weighted heuristic score over type / payload / structure / temporal locality.
|
|
101
|
+
|
|
102
|
+
Returns ``(score, AlignmentExplanation)``.
|
|
103
|
+
"""
|
|
104
|
+
from .alignment_models import (
|
|
105
|
+
AlignmentExplanation,
|
|
106
|
+
HeuristicEvidence,
|
|
107
|
+
HeuristicEvidenceCategory,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
profile = self.profile
|
|
111
|
+
score = 0.0
|
|
112
|
+
evidence = []
|
|
113
|
+
primary_reason = ""
|
|
114
|
+
|
|
115
|
+
# 1. Type match.
|
|
116
|
+
type_sim = 1.0 if base.payload.type_identifier == comp.payload.type_identifier else 0.0
|
|
117
|
+
type_contribution = type_sim * profile.type_weight
|
|
118
|
+
score += type_contribution
|
|
119
|
+
if type_contribution > 0:
|
|
120
|
+
evidence.append(
|
|
121
|
+
HeuristicEvidence(
|
|
122
|
+
HeuristicEvidenceCategory.TYPE_MATCH,
|
|
123
|
+
type_contribution,
|
|
124
|
+
f"Type match ({base.payload.type_identifier})",
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
primary_reason = "Exact Type Match"
|
|
128
|
+
|
|
129
|
+
# 2. Payload similarity.
|
|
130
|
+
payload_sim = self.equivalence_evaluator.evaluate_similarity(base.payload, comp.payload)
|
|
131
|
+
payload_contribution = payload_sim * profile.payload_weight
|
|
132
|
+
score += payload_contribution
|
|
133
|
+
if payload_contribution > 0:
|
|
134
|
+
evidence.append(
|
|
135
|
+
HeuristicEvidence(
|
|
136
|
+
HeuristicEvidenceCategory.PAYLOAD_SIMILARITY,
|
|
137
|
+
payload_contribution,
|
|
138
|
+
f"Semantic equivalence score: {payload_sim:.2f}",
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
if not primary_reason:
|
|
142
|
+
primary_reason = "Semantic Payload Match"
|
|
143
|
+
|
|
144
|
+
# 3. Structural context (span awareness).
|
|
145
|
+
structural_sim = 0.0
|
|
146
|
+
if profile.alignment_mode != AlignmentMode.LINEAR:
|
|
147
|
+
if base.parent_span_id == comp.parent_span_id and base.parent_span_id is not None:
|
|
148
|
+
structural_sim = 1.0
|
|
149
|
+
elif base.parent_span_id is None and comp.parent_span_id is None:
|
|
150
|
+
structural_sim = 1.0
|
|
151
|
+
structural_contribution = structural_sim * profile.structural_weight
|
|
152
|
+
score += structural_contribution
|
|
153
|
+
if structural_contribution > 0:
|
|
154
|
+
evidence.append(
|
|
155
|
+
HeuristicEvidence(
|
|
156
|
+
HeuristicEvidenceCategory.STRUCTURAL_CONTEXT,
|
|
157
|
+
structural_contribution,
|
|
158
|
+
"Parent span matched",
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# 4. Temporal locality (rough heuristic on sequence index distance).
|
|
163
|
+
seq_diff = abs(int(base.sequence) - int(comp.sequence))
|
|
164
|
+
temp_sim = max(0.0, 1.0 - (seq_diff / 10.0))
|
|
165
|
+
temp_contribution = temp_sim * profile.temporal_weight
|
|
166
|
+
score += temp_contribution
|
|
167
|
+
if temp_contribution > 0:
|
|
168
|
+
evidence.append(
|
|
169
|
+
HeuristicEvidence(
|
|
170
|
+
HeuristicEvidenceCategory.TEMPORAL_LOCALITY,
|
|
171
|
+
temp_contribution,
|
|
172
|
+
f"Temporal locality (+/-{seq_diff} events)",
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if not primary_reason:
|
|
177
|
+
primary_reason = "Low Confidence Match"
|
|
178
|
+
|
|
179
|
+
explanation = AlignmentExplanation(
|
|
180
|
+
primary_reason=primary_reason, final_score=score, ranked_evidence=evidence
|
|
181
|
+
)
|
|
182
|
+
return score, explanation
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Canonical ordering, normalization, and the profile hash — a frozen execution spec.
|
|
2
|
+
|
|
3
|
+
Strips out non-deterministic ordering flicker so two runs of the engine over the same
|
|
4
|
+
inputs produce byte-identical render output (and therefore the same snapshot hash).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
from typing import List
|
|
11
|
+
|
|
12
|
+
from .alignment_models import (
|
|
13
|
+
AmbiguousMatch,
|
|
14
|
+
EventAlignment,
|
|
15
|
+
HeuristicEvidence,
|
|
16
|
+
sort_evidence,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
CONTRACT_VERSION = "1.0.0"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AlignmentExecutionContract:
|
|
23
|
+
contract_version = CONTRACT_VERSION
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def canonical_sort_evidence(evidence: List[HeuristicEvidence]) -> List[HeuristicEvidence]:
|
|
27
|
+
return sort_evidence(evidence)
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def canonical_sort_ambiguity(ambiguity: List[AmbiguousMatch]) -> List[AmbiguousMatch]:
|
|
31
|
+
return sorted(ambiguity, key=lambda a: (-a.strength, a.event.sequence))
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def canonical_sort_alignments(alignments: List[EventAlignment]) -> List[EventAlignment]:
|
|
35
|
+
def key(a: EventAlignment):
|
|
36
|
+
base = a.base_event
|
|
37
|
+
comp = a.comparison_event
|
|
38
|
+
seq = (base.sequence if base else (comp.sequence if comp else 0))
|
|
39
|
+
id_ = ""
|
|
40
|
+
if base is not None:
|
|
41
|
+
id_ = str(base.id)
|
|
42
|
+
elif comp is not None:
|
|
43
|
+
id_ = str(comp.id)
|
|
44
|
+
return (seq, id_)
|
|
45
|
+
|
|
46
|
+
return sorted(alignments, key=key)
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def compute_profile_hash(profile, evaluator_identifier: str, engine_version: str) -> str:
|
|
50
|
+
payload = (
|
|
51
|
+
f"contractVersion:{CONTRACT_VERSION}\n"
|
|
52
|
+
f"engineVersion:{engine_version}\n"
|
|
53
|
+
f"strategy:{profile.strategy.value}\n"
|
|
54
|
+
f"profileVersion:{profile.version}\n"
|
|
55
|
+
f"typeWeight:{_fmt(profile.type_weight)}\n"
|
|
56
|
+
f"payloadWeight:{_fmt(profile.payload_weight)}\n"
|
|
57
|
+
f"structuralWeight:{_fmt(profile.structural_weight)}\n"
|
|
58
|
+
f"temporalWeight:{_fmt(profile.temporal_weight)}\n"
|
|
59
|
+
f"semanticThreshold:{_fmt(profile.semantic_threshold)}\n"
|
|
60
|
+
f"maxAmbiguousCandidates:{profile.max_ambiguous_candidates}\n"
|
|
61
|
+
f"ambiguityDeltaThreshold:{_fmt(profile.ambiguity_delta_threshold)}\n"
|
|
62
|
+
f"alignmentMode:{profile.alignment_mode.value}\n"
|
|
63
|
+
f"evaluatorIdentifier:{evaluator_identifier}"
|
|
64
|
+
)
|
|
65
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _fmt(value: float) -> str:
|
|
69
|
+
"""Format a float the way Swift's default ``Double`` interpolation would: an integral
|
|
70
|
+
value keeps one decimal (``0.0``), others use the shortest round-trip repr."""
|
|
71
|
+
if value == int(value):
|
|
72
|
+
return f"{value:.1f}"
|
|
73
|
+
return repr(value)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""The behavioral-equivalence engine: ``TraceAlignmentEngine``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
from .alignment_config import AlignmentConfiguration
|
|
9
|
+
from .alignment_evidence import (
|
|
10
|
+
AlignmentEvidenceCollector,
|
|
11
|
+
NullEvidenceCollector,
|
|
12
|
+
VerificationArtifacts,
|
|
13
|
+
)
|
|
14
|
+
from .alignment_interpreter import DefaultAlignmentInterpreter
|
|
15
|
+
from .alignment_matcher import DefaultTraceMatcher
|
|
16
|
+
from .alignment_models import (
|
|
17
|
+
AlignmentStateKind,
|
|
18
|
+
RegressionLevel,
|
|
19
|
+
RegressionRisk,
|
|
20
|
+
TraceAlignmentResult,
|
|
21
|
+
)
|
|
22
|
+
from .alignment_semantics import DefaultEquivalenceModel
|
|
23
|
+
from .priority import TracePriority
|
|
24
|
+
from .query import TraceRun
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VerificationCaptureMode(Enum):
|
|
28
|
+
DISABLED = "disabled"
|
|
29
|
+
EVIDENCE_ONLY = "evidenceOnly"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TraceAlignmentEngine:
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
configuration: AlignmentConfiguration,
|
|
36
|
+
capture_mode: VerificationCaptureMode = VerificationCaptureMode.DISABLED,
|
|
37
|
+
meta_trace_callback: Optional[Callable] = None,
|
|
38
|
+
):
|
|
39
|
+
self.configuration = configuration
|
|
40
|
+
self.capture_mode = capture_mode
|
|
41
|
+
self.meta_trace_callback = meta_trace_callback
|
|
42
|
+
self._matcher = DefaultTraceMatcher(configuration)
|
|
43
|
+
self._semantics = DefaultEquivalenceModel(configuration)
|
|
44
|
+
self._interpreter = DefaultAlignmentInterpreter(configuration, meta_trace_callback)
|
|
45
|
+
|
|
46
|
+
def align(
|
|
47
|
+
self,
|
|
48
|
+
base: TraceRun,
|
|
49
|
+
comparison: TraceRun,
|
|
50
|
+
minimum_priority: TracePriority = TracePriority.STRUCTURAL,
|
|
51
|
+
) -> TraceAlignmentResult:
|
|
52
|
+
base_events = [e for e in base.events if e.payload.priority >= minimum_priority]
|
|
53
|
+
comp_events = [e for e in comparison.events if e.payload.priority >= minimum_priority]
|
|
54
|
+
|
|
55
|
+
collector = (
|
|
56
|
+
AlignmentEvidenceCollector()
|
|
57
|
+
if self.capture_mode == VerificationCaptureMode.EVIDENCE_ONLY
|
|
58
|
+
else NullEvidenceCollector()
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
bindings = self._matcher.match(base_events, comp_events, evidence_collector=collector)
|
|
62
|
+
|
|
63
|
+
def equivalence(a, b):
|
|
64
|
+
return self._semantics.evaluate(a, b, evidence_collector=collector)
|
|
65
|
+
|
|
66
|
+
alignments = self._interpreter.interpret(
|
|
67
|
+
base=base_events,
|
|
68
|
+
comparison=comp_events,
|
|
69
|
+
bindings=bindings,
|
|
70
|
+
equivalence=equivalence,
|
|
71
|
+
evidence_collector=collector,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Regression risk: removing a critical reasoning step is a high risk.
|
|
75
|
+
removed_critical = [
|
|
76
|
+
a
|
|
77
|
+
for a in alignments
|
|
78
|
+
if a.state.is_removed
|
|
79
|
+
and a.base_event is not None
|
|
80
|
+
and a.base_event.payload.priority == TracePriority.CRITICAL
|
|
81
|
+
]
|
|
82
|
+
if removed_critical:
|
|
83
|
+
critical_types = ", ".join(a.base_event.payload.type_identifier for a in removed_critical)
|
|
84
|
+
risk = RegressionRisk(
|
|
85
|
+
level=RegressionLevel.HIGH,
|
|
86
|
+
strength=0.95,
|
|
87
|
+
reasoning=f"Critical reasoning steps removed: {critical_types}",
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
risk = RegressionRisk(
|
|
91
|
+
level=RegressionLevel.NONE,
|
|
92
|
+
strength=1.0,
|
|
93
|
+
reasoning="No critical steps removed.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
v_artifacts = None
|
|
97
|
+
if isinstance(collector, AlignmentEvidenceCollector):
|
|
98
|
+
v_artifacts = VerificationArtifacts(evidence=collector.export_evidence())
|
|
99
|
+
|
|
100
|
+
return TraceAlignmentResult(
|
|
101
|
+
base_run_id=base.run_id,
|
|
102
|
+
comparison_run_id=comparison.run_id,
|
|
103
|
+
profile_hash=self.configuration.profile_hash,
|
|
104
|
+
engine_version="v2-causal-strict",
|
|
105
|
+
alignments=alignments,
|
|
106
|
+
regression_risk=risk,
|
|
107
|
+
verification_artifacts=v_artifacts,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def evaluate_score(self, base, comparison):
|
|
111
|
+
return self.configuration.score_match(base, comparison)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Auditable evidence emitted during alignment: bindings, equivalence decisions, steps."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class EquivalenceReason:
|
|
12
|
+
description: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class AlignmentBinding:
|
|
17
|
+
base_event_id: "object" # uuid.UUID
|
|
18
|
+
comparison_event_id: "object"
|
|
19
|
+
similarity_score: float
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class BindingDecision:
|
|
24
|
+
base_id: str
|
|
25
|
+
comparison_id: str
|
|
26
|
+
similarity_score: float
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class EquivalenceDecisionRecord:
|
|
31
|
+
lhs: str
|
|
32
|
+
rhs: str
|
|
33
|
+
confidence: float
|
|
34
|
+
equivalent: bool
|
|
35
|
+
reason: EquivalenceReason
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class InterpretationStep:
|
|
40
|
+
source_binding: Optional[AlignmentBinding]
|
|
41
|
+
base_id: Optional[str]
|
|
42
|
+
comparison_id: Optional[str]
|
|
43
|
+
output_state: str
|
|
44
|
+
rationale: str
|
|
45
|
+
base_sequence: Optional[int] = None
|
|
46
|
+
comparison_sequence: Optional[int] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class AlignmentEvidence:
|
|
51
|
+
bindings: List[BindingDecision]
|
|
52
|
+
equivalence_decisions: List[EquivalenceDecisionRecord]
|
|
53
|
+
interpretation_steps: List[InterpretationStep]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class VerificationArtifacts:
|
|
58
|
+
evidence: AlignmentEvidence
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class EvidenceCollector:
|
|
62
|
+
def record_binding(self, decision: BindingDecision) -> None: ...
|
|
63
|
+
def record_equivalence(self, record: EquivalenceDecisionRecord) -> None: ...
|
|
64
|
+
def record_interpretation(self, step: InterpretationStep) -> None: ...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class NullEvidenceCollector(EvidenceCollector):
|
|
68
|
+
def record_binding(self, decision: BindingDecision) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def record_equivalence(self, record: EquivalenceDecisionRecord) -> None:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
def record_interpretation(self, step: InterpretationStep) -> None:
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class AlignmentEvidenceCollector(EvidenceCollector):
|
|
79
|
+
def __init__(self) -> None:
|
|
80
|
+
self._lock = threading.Lock()
|
|
81
|
+
self._bindings: List[BindingDecision] = []
|
|
82
|
+
self._equivalence_decisions: List[EquivalenceDecisionRecord] = []
|
|
83
|
+
self._interpretation_steps: List[InterpretationStep] = []
|
|
84
|
+
|
|
85
|
+
def record_binding(self, decision: BindingDecision) -> None:
|
|
86
|
+
with self._lock:
|
|
87
|
+
self._bindings.append(decision)
|
|
88
|
+
|
|
89
|
+
def record_equivalence(self, record: EquivalenceDecisionRecord) -> None:
|
|
90
|
+
with self._lock:
|
|
91
|
+
self._equivalence_decisions.append(record)
|
|
92
|
+
|
|
93
|
+
def record_interpretation(self, step: InterpretationStep) -> None:
|
|
94
|
+
with self._lock:
|
|
95
|
+
self._interpretation_steps.append(step)
|
|
96
|
+
|
|
97
|
+
def export_evidence(self) -> AlignmentEvidence:
|
|
98
|
+
with self._lock:
|
|
99
|
+
return AlignmentEvidence(
|
|
100
|
+
bindings=list(self._bindings),
|
|
101
|
+
equivalence_decisions=list(self._equivalence_decisions),
|
|
102
|
+
interpretation_steps=list(self._interpretation_steps),
|
|
103
|
+
)
|