agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
cmvk/verification.py ADDED
@@ -0,0 +1,956 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ CMVK Verification Module - Pure Mathematical Functions
5
+
6
+ This module provides pure functions for calculating drift/hallucination scores
7
+ between two outputs. These functions have no side effects and use only
8
+ numpy/scipy for mathematical operations.
9
+
10
+ Layer 1: The Primitive - Mathematical and adversarial verification.
11
+
12
+ Enhanced Features (v0.2.0):
13
+ - Configurable distance metrics (cosine, euclidean, manhattan, etc.)
14
+ - Dimensional weighting for importance-based drift calculation
15
+ - Threshold profiles for domain-specific verification
16
+ - Explainable drift with per-dimension contributions
17
+ - Batch verification for efficiency
18
+ - Audit trail integration
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from collections.abc import Sequence
24
+ from dataclasses import dataclass
25
+ from enum import Enum
26
+ from typing import TYPE_CHECKING, Any
27
+
28
+ import numpy as np
29
+ from numpy.typing import ArrayLike
30
+
31
+ try:
32
+ from scipy import stats
33
+
34
+ HAS_SCIPY = True
35
+ except ImportError:
36
+ HAS_SCIPY = False
37
+
38
+ if TYPE_CHECKING:
39
+ from .audit import AuditTrail
40
+
41
+
42
+ class DriftType(Enum):
43
+ """Types of drift/divergence detected between outputs."""
44
+
45
+ SEMANTIC = "semantic"
46
+ STRUCTURAL = "structural"
47
+ NUMERICAL = "numerical"
48
+ LEXICAL = "lexical"
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class VerificationScore:
53
+ """
54
+ Immutable result of verification between two outputs.
55
+
56
+ Attributes:
57
+ drift_score: Overall drift score between 0.0 (identical) and 1.0 (completely different)
58
+ confidence: Confidence in the score (0.0 to 1.0)
59
+ drift_type: Primary type of drift detected
60
+ details: Dictionary with component scores
61
+ explanation: Optional drift explanation with dimension contributions (CMVK-010)
62
+ """
63
+
64
+ drift_score: float
65
+ confidence: float
66
+ drift_type: DriftType
67
+ details: dict
68
+ explanation: dict | None = None
69
+
70
+ def passed(self, threshold: float = 0.3) -> bool:
71
+ """Check if drift is within acceptable threshold."""
72
+ return self.drift_score <= threshold
73
+
74
+ def to_dict(self) -> dict:
75
+ """Convert to dictionary for serialization."""
76
+ return {
77
+ "drift_score": self.drift_score,
78
+ "confidence": self.confidence,
79
+ "drift_type": self.drift_type.value,
80
+ "details": self.details,
81
+ "explanation": self.explanation,
82
+ }
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class DriftExplanation:
87
+ """
88
+ Detailed explanation of drift between two vectors (CMVK-010).
89
+
90
+ Attributes:
91
+ primary_drift_dimension: Index or name of dimension with highest contribution
92
+ dimension_contributions: Mapping of dimension to its contribution percentage
93
+ top_contributors: List of top N contributing dimensions
94
+ metric_used: The distance metric used
95
+ interpretation: Human-readable interpretation of the drift
96
+ """
97
+
98
+ primary_drift_dimension: str | int
99
+ dimension_contributions: dict[str | int, float]
100
+ top_contributors: list[tuple[str | int, float]]
101
+ metric_used: str
102
+ interpretation: str
103
+
104
+ def to_dict(self) -> dict:
105
+ """Convert to dictionary."""
106
+ return {
107
+ "primary_drift_dimension": self.primary_drift_dimension,
108
+ "dimension_contributions": self.dimension_contributions,
109
+ "top_contributors": self.top_contributors,
110
+ "metric_used": self.metric_used,
111
+ "interpretation": self.interpretation,
112
+ }
113
+
114
+
115
+ def verify(output_a: str, output_b: str) -> VerificationScore:
116
+ """
117
+ Calculate drift/hallucination score between two outputs.
118
+
119
+ This is the primary verification function - a pure function with no side effects.
120
+ Takes two outputs and returns a score indicating their divergence.
121
+
122
+ Args:
123
+ output_a: First output (typically from model A / generator)
124
+ output_b: Second output (typically from model B / verifier)
125
+
126
+ Returns:
127
+ VerificationScore with drift score, confidence, and details
128
+
129
+ Example:
130
+ >>> score = verify("def add(a, b): return a + b", "def add(x, y): return x + y")
131
+ >>> score.drift_score # Low score - semantically similar
132
+ 0.15
133
+ """
134
+ if not output_a and not output_b:
135
+ return VerificationScore(
136
+ drift_score=0.0,
137
+ confidence=1.0,
138
+ drift_type=DriftType.LEXICAL,
139
+ details={"reason": "both_empty"},
140
+ )
141
+
142
+ if not output_a or not output_b:
143
+ return VerificationScore(
144
+ drift_score=1.0,
145
+ confidence=1.0,
146
+ drift_type=DriftType.STRUCTURAL,
147
+ details={"reason": "one_empty"},
148
+ )
149
+
150
+ # Calculate multiple drift components
151
+ lexical_drift = _lexical_drift(output_a, output_b)
152
+ structural_drift = _structural_drift(output_a, output_b)
153
+ numerical_drift = _numerical_drift(output_a, output_b)
154
+
155
+ # Weighted combination
156
+ weights = {"lexical": 0.3, "structural": 0.4, "numerical": 0.3}
157
+
158
+ combined_drift = (
159
+ weights["lexical"] * lexical_drift["score"]
160
+ + weights["structural"] * structural_drift["score"]
161
+ + weights["numerical"] * numerical_drift["score"]
162
+ )
163
+
164
+ # Determine primary drift type
165
+ scores = {
166
+ DriftType.LEXICAL: lexical_drift["score"],
167
+ DriftType.STRUCTURAL: structural_drift["score"],
168
+ DriftType.NUMERICAL: numerical_drift["score"],
169
+ }
170
+ primary_drift = max(scores, key=lambda k: scores[k])
171
+
172
+ # Calculate confidence based on agreement between methods
173
+ score_values = list(scores.values())
174
+ confidence = 1.0 - np.std(score_values) if len(score_values) > 1 else 0.8
175
+
176
+ return VerificationScore(
177
+ drift_score=float(np.clip(combined_drift, 0.0, 1.0)),
178
+ confidence=float(np.clip(confidence, 0.0, 1.0)),
179
+ drift_type=primary_drift,
180
+ details={
181
+ "lexical": lexical_drift,
182
+ "structural": structural_drift,
183
+ "numerical": numerical_drift,
184
+ "weights": weights,
185
+ },
186
+ )
187
+
188
+
189
+ def verify_embeddings(
190
+ embedding_a: ArrayLike,
191
+ embedding_b: ArrayLike,
192
+ metric: str = "cosine",
193
+ weights: ArrayLike | None = None,
194
+ threshold_profile: str | None = None,
195
+ explain: bool = False,
196
+ dimension_names: list[str] | None = None,
197
+ audit_trail: AuditTrail | None = None,
198
+ ) -> VerificationScore:
199
+ """
200
+ Calculate drift score between two embedding vectors.
201
+
202
+ Enhanced verification function with configurable metrics, weighting,
203
+ threshold profiles, and explainability (CMVK-001 through CMVK-010).
204
+
205
+ Args:
206
+ embedding_a: Embedding vector for output A (e.g., claimed values)
207
+ embedding_b: Embedding vector for output B (e.g., observed values)
208
+ metric: Distance metric to use. Options:
209
+ - "cosine": Cosine distance (default, normalizes vectors)
210
+ - "euclidean": Euclidean distance (preserves magnitude - CMVK-001)
211
+ - "manhattan": Manhattan/L1 distance
212
+ - "chebyshev": Maximum absolute difference
213
+ - "mahalanobis": Mahalanobis distance
214
+ weights: Optional weights for each dimension (CMVK-008).
215
+ Higher weights increase that dimension's contribution to drift.
216
+ threshold_profile: Name of threshold profile to use (CMVK-005).
217
+ Options: "carbon", "financial", "medical", "general", "strict"
218
+ explain: If True, include detailed drift explanation (CMVK-010)
219
+ dimension_names: Optional names for dimensions (for explainability)
220
+ audit_trail: Optional AuditTrail instance for logging (CMVK-006)
221
+
222
+ Returns:
223
+ VerificationScore with drift score, confidence, and optional explanation
224
+
225
+ Example:
226
+ >>> # Basic usage
227
+ >>> score = verify_embeddings(claim_vec, obs_vec)
228
+
229
+ >>> # With Euclidean distance for magnitude-sensitive comparison
230
+ >>> score = verify_embeddings(
231
+ ... claim_vec, obs_vec,
232
+ ... metric="euclidean",
233
+ ... threshold_profile="carbon",
234
+ ... explain=True
235
+ ... )
236
+
237
+ >>> # With dimensional weighting
238
+ >>> score = verify_embeddings(
239
+ ... claim_vec, obs_vec,
240
+ ... metric="euclidean",
241
+ ... weights=[0.6, 0.4], # NDVI more important than carbon
242
+ ... explain=True,
243
+ ... dimension_names=["ndvi", "carbon_stock"]
244
+ ... )
245
+ """
246
+ from .metrics import calculate_distance, calculate_weighted_distance
247
+
248
+ vec_a = np.asarray(embedding_a, dtype=np.float64)
249
+ vec_b = np.asarray(embedding_b, dtype=np.float64)
250
+
251
+ # Load threshold profile if specified
252
+ profile = None
253
+ if threshold_profile:
254
+ from .profiles import get_profile
255
+
256
+ profile = get_profile(threshold_profile)
257
+ # Use profile's default metric if none specified
258
+ if metric == "cosine" and profile.default_metric != "cosine":
259
+ metric = profile.default_metric
260
+
261
+ # Shape validation
262
+ if vec_a.shape != vec_b.shape:
263
+ result = VerificationScore(
264
+ drift_score=1.0,
265
+ confidence=0.5,
266
+ drift_type=DriftType.STRUCTURAL,
267
+ details={"reason": "shape_mismatch", "shape_a": vec_a.shape, "shape_b": vec_b.shape},
268
+ )
269
+ if audit_trail:
270
+ _log_to_audit(audit_trail, vec_a, vec_b, result, metric, threshold_profile)
271
+ return result
272
+
273
+ # Calculate distance with appropriate function
274
+ if weights is not None:
275
+ metric_result = calculate_weighted_distance(vec_a, vec_b, weights=weights, metric=metric)
276
+ else:
277
+ metric_result = calculate_distance(vec_a, vec_b, metric=metric)
278
+
279
+ # Build drift score from normalized distance
280
+ drift_score = float(np.clip(metric_result.normalized, 0.0, 1.0))
281
+
282
+ # Calculate confidence based on vector properties
283
+ confidence = _calculate_embedding_confidence(vec_a, vec_b)
284
+
285
+ # Build explanation if requested
286
+ explanation_dict = None
287
+ if explain:
288
+ explanation = _build_drift_explanation(
289
+ vec_a, vec_b, metric_result, weights, dimension_names
290
+ )
291
+ explanation_dict = explanation.to_dict()
292
+
293
+ # Build details
294
+ details = {
295
+ "metric": metric,
296
+ "raw_distance": metric_result.distance,
297
+ "normalized_distance": metric_result.normalized,
298
+ **metric_result.details,
299
+ }
300
+
301
+ # Add profile info if used
302
+ if profile:
303
+ passed = profile.is_within_threshold(drift_score, confidence)
304
+ severity = profile.get_severity(drift_score)
305
+ details["profile"] = {
306
+ "name": profile.name,
307
+ "drift_threshold": profile.drift_threshold,
308
+ "passed": passed,
309
+ "severity": severity,
310
+ }
311
+
312
+ result = VerificationScore(
313
+ drift_score=drift_score,
314
+ confidence=confidence,
315
+ drift_type=DriftType.SEMANTIC,
316
+ details=details,
317
+ explanation=explanation_dict,
318
+ )
319
+
320
+ # Log to audit trail if provided
321
+ if audit_trail:
322
+ _log_to_audit(audit_trail, vec_a, vec_b, result, metric, threshold_profile)
323
+
324
+ return result
325
+
326
+
327
+ def verify_embeddings_batch(
328
+ embeddings_a: Sequence[ArrayLike],
329
+ embeddings_b: Sequence[ArrayLike],
330
+ metric: str = "cosine",
331
+ weights: ArrayLike | None = None,
332
+ threshold_profile: str | None = None,
333
+ explain: bool = False,
334
+ dimension_names: list[str] | None = None,
335
+ audit_trail: AuditTrail | None = None,
336
+ ) -> list[VerificationScore]:
337
+ """
338
+ Verify multiple embedding pairs efficiently (CMVK-004).
339
+
340
+ Processes all pairs with consistent settings and optional audit logging.
341
+
342
+ Args:
343
+ embeddings_a: Sequence of embedding vectors from source A
344
+ embeddings_b: Sequence of embedding vectors from source B
345
+ metric: Distance metric (applied to all pairs)
346
+ weights: Dimensional weights (applied to all pairs)
347
+ threshold_profile: Threshold profile name
348
+ explain: Whether to include explanations
349
+ dimension_names: Optional dimension names for explainability
350
+ audit_trail: Optional AuditTrail for logging
351
+
352
+ Returns:
353
+ List of VerificationScore for each pair
354
+
355
+ Raises:
356
+ ValueError: If sequence lengths don't match
357
+ """
358
+ if len(embeddings_a) != len(embeddings_b):
359
+ raise ValueError(
360
+ f"Length mismatch: embeddings_a has {len(embeddings_a)} items, "
361
+ f"embeddings_b has {len(embeddings_b)} items"
362
+ )
363
+
364
+ results = []
365
+ for vec_a, vec_b in zip(embeddings_a, embeddings_b, strict=True):
366
+ score = verify_embeddings(
367
+ vec_a,
368
+ vec_b,
369
+ metric=metric,
370
+ weights=weights,
371
+ threshold_profile=threshold_profile,
372
+ explain=explain,
373
+ dimension_names=dimension_names,
374
+ audit_trail=audit_trail,
375
+ )
376
+ results.append(score)
377
+
378
+ return results
379
+
380
+
381
+ def aggregate_embedding_scores(
382
+ scores: Sequence[VerificationScore],
383
+ threshold_profile: str | None = None,
384
+ ) -> dict[str, Any]:
385
+ """
386
+ Aggregate multiple embedding verification scores with profile context.
387
+
388
+ Args:
389
+ scores: Sequence of VerificationScore objects
390
+ threshold_profile: Optional profile for pass/fail classification
391
+
392
+ Returns:
393
+ Dictionary with aggregate statistics and pass rates
394
+ """
395
+ if not scores:
396
+ return {"count": 0}
397
+
398
+ profile = None
399
+ if threshold_profile:
400
+ from .profiles import get_profile
401
+
402
+ profile = get_profile(threshold_profile)
403
+
404
+ drift_values = [s.drift_score for s in scores]
405
+ confidence_values = [s.confidence for s in scores]
406
+
407
+ # Calculate pass/fail if profile available
408
+ if profile:
409
+ passed_count = sum(
410
+ 1 for s in scores if profile.is_within_threshold(s.drift_score, s.confidence)
411
+ )
412
+ severity_counts: dict[str, int] = {
413
+ "pass": 0,
414
+ "warning": 0,
415
+ "critical": 0,
416
+ "severe": 0,
417
+ }
418
+ for s in scores:
419
+ severity = profile.get_severity(s.drift_score)
420
+ severity_counts[severity] += 1
421
+ else:
422
+ passed_count = sum(1 for s in scores if s.drift_score <= 0.3)
423
+ severity_counts = {}
424
+
425
+ result: dict[str, Any] = {
426
+ "count": len(scores),
427
+ "passed_count": passed_count,
428
+ "failed_count": len(scores) - passed_count,
429
+ "pass_rate": passed_count / len(scores),
430
+ "mean_drift": float(np.mean(drift_values)),
431
+ "std_drift": float(np.std(drift_values)),
432
+ "min_drift": float(np.min(drift_values)),
433
+ "max_drift": float(np.max(drift_values)),
434
+ "median_drift": float(np.median(drift_values)),
435
+ "mean_confidence": float(np.mean(confidence_values)),
436
+ "p95_drift": float(np.percentile(drift_values, 95)),
437
+ }
438
+
439
+ if severity_counts and profile:
440
+ result["severity_distribution"] = severity_counts
441
+ result["profile_used"] = profile.name
442
+
443
+ return result
444
+
445
+
446
+ # ============================================================================
447
+ # Explainability Functions (CMVK-010)
448
+ # ============================================================================
449
+
450
+
451
+ def _build_drift_explanation(
452
+ vec_a: np.ndarray,
453
+ vec_b: np.ndarray,
454
+ metric_result: Any,
455
+ weights: ArrayLike | None,
456
+ dimension_names: list[str] | None,
457
+ ) -> DriftExplanation:
458
+ """Build detailed drift explanation."""
459
+ diff = np.abs(vec_a - vec_b)
460
+
461
+ # Apply weights if provided
462
+ if weights is not None:
463
+ weights_arr = np.asarray(weights, dtype=np.float64)
464
+ weighted_diff = diff * weights_arr
465
+ else:
466
+ weighted_diff = diff
467
+
468
+ # Calculate per-dimension contributions
469
+ total_diff = np.sum(weighted_diff)
470
+ contributions = weighted_diff / total_diff if total_diff > 0 else np.zeros_like(diff)
471
+
472
+ # Map contributions to names or indices
473
+ contrib_dict: dict[str | int, float]
474
+ sorted_contribs: list[tuple[str | int, float]]
475
+ primary_dim: str | int
476
+
477
+ if dimension_names and len(dimension_names) == len(contributions):
478
+ contrib_dict = {
479
+ name: float(c) for name, c in zip(dimension_names, contributions, strict=False)
480
+ }
481
+ sorted_contribs = sorted(contrib_dict.items(), key=lambda x: x[1], reverse=True)
482
+ primary_dim = sorted_contribs[0][0]
483
+ else:
484
+ contrib_dict = {i: float(c) for i, c in enumerate(contributions)}
485
+ sorted_contribs = sorted(contrib_dict.items(), key=lambda x: x[1], reverse=True)
486
+ primary_dim = sorted_contribs[0][0]
487
+
488
+ # Top contributors (up to 5)
489
+ top_contributors: list[tuple[str | int, float]] = sorted_contribs[:5]
490
+
491
+ # Generate interpretation
492
+ interpretation = _generate_interpretation(
493
+ vec_a, vec_b, primary_dim, top_contributors, dimension_names
494
+ )
495
+
496
+ return DriftExplanation(
497
+ primary_drift_dimension=primary_dim,
498
+ dimension_contributions=contrib_dict,
499
+ top_contributors=top_contributors,
500
+ metric_used=metric_result.metric.value,
501
+ interpretation=interpretation,
502
+ )
503
+
504
+
505
+ def _generate_interpretation(
506
+ vec_a: np.ndarray,
507
+ vec_b: np.ndarray,
508
+ primary_dim: str | int,
509
+ top_contributors: list[tuple[str | int, float]],
510
+ dimension_names: list[str] | None,
511
+ ) -> str:
512
+ """Generate human-readable interpretation of drift."""
513
+ # Get primary dimension index
514
+ if isinstance(primary_dim, str) and dimension_names:
515
+ idx = dimension_names.index(primary_dim)
516
+ else:
517
+ idx = primary_dim if isinstance(primary_dim, int) else 0
518
+
519
+ diff_value = abs(vec_a[idx] - vec_b[idx])
520
+ pct_diff = (diff_value / abs(vec_a[idx])) * 100 if vec_a[idx] != 0 else float("inf")
521
+
522
+ dim_name = primary_dim if isinstance(primary_dim, str) else f"dimension {primary_dim}"
523
+
524
+ if len(top_contributors) > 1 and top_contributors[0][1] > 0.5:
525
+ return (
526
+ f"Drift primarily driven by {dim_name} "
527
+ f"({top_contributors[0][1]*100:.1f}% of total drift). "
528
+ f"Value changed from {vec_a[idx]:.4f} to {vec_b[idx]:.4f} "
529
+ f"({pct_diff:.1f}% difference)."
530
+ )
531
+ elif len(top_contributors) > 1:
532
+ top_names = [str(c[0]) for c, _ in zip(top_contributors[:3], range(3), strict=False)]
533
+ return (
534
+ f"Drift distributed across multiple dimensions. "
535
+ f"Top contributors: {', '.join(top_names)}. "
536
+ f"Largest single change in {dim_name}."
537
+ )
538
+ else:
539
+ return f"Single dimension drift in {dim_name}."
540
+
541
+
542
+ def _calculate_embedding_confidence(
543
+ vec_a: np.ndarray,
544
+ vec_b: np.ndarray,
545
+ ) -> float:
546
+ """Calculate confidence score for embedding verification."""
547
+ # Base confidence
548
+ confidence = 0.9
549
+
550
+ # Reduce confidence for very small vectors (less reliable)
551
+ if len(vec_a) < 10:
552
+ confidence *= 0.9
553
+
554
+ # Reduce confidence if vectors have very different magnitudes
555
+ norm_a = np.linalg.norm(vec_a)
556
+ norm_b = np.linalg.norm(vec_b)
557
+ if norm_a > 0 and norm_b > 0:
558
+ magnitude_ratio = min(norm_a, norm_b) / max(norm_a, norm_b)
559
+ if magnitude_ratio < 0.5:
560
+ confidence *= 0.85
561
+
562
+ # Reduce confidence for near-zero vectors
563
+ if norm_a < 1e-6 or norm_b < 1e-6:
564
+ confidence *= 0.7
565
+
566
+ return float(np.clip(confidence, 0.0, 1.0))
567
+
568
+
569
+ def _log_to_audit(
570
+ audit_trail: AuditTrail,
571
+ vec_a: np.ndarray,
572
+ vec_b: np.ndarray,
573
+ result: VerificationScore,
574
+ metric: str,
575
+ profile_name: str | None,
576
+ ) -> None:
577
+ """Log verification to audit trail."""
578
+ passed = result.details.get("profile", {}).get("passed", result.drift_score <= 0.3)
579
+
580
+ audit_trail.log(
581
+ operation="verify_embeddings",
582
+ inputs={
583
+ "embedding_a_shape": vec_a.shape,
584
+ "embedding_b_shape": vec_b.shape,
585
+ "embedding_a_norm": float(np.linalg.norm(vec_a)),
586
+ "embedding_b_norm": float(np.linalg.norm(vec_b)),
587
+ },
588
+ drift_score=result.drift_score,
589
+ confidence=result.confidence,
590
+ metric_used=metric,
591
+ profile_used=profile_name,
592
+ passed=passed,
593
+ result_details={
594
+ "drift_type": result.drift_type.value,
595
+ "raw_distance": result.details.get("raw_distance"),
596
+ },
597
+ )
598
+
599
+
600
+ def verify_distributions(dist_a: ArrayLike, dist_b: ArrayLike) -> VerificationScore:
601
+ """
602
+ Calculate drift between two probability distributions.
603
+
604
+ Uses KL divergence and other statistical measures to compare distributions.
605
+
606
+ Args:
607
+ dist_a: First probability distribution
608
+ dist_b: Second probability distribution
609
+
610
+ Returns:
611
+ VerificationScore with distribution-based drift score
612
+ """
613
+ p = np.asarray(dist_a, dtype=np.float64)
614
+ q = np.asarray(dist_b, dtype=np.float64)
615
+
616
+ # Normalize to valid probability distributions
617
+ p = p / (p.sum() + 1e-10)
618
+ q = q / (q.sum() + 1e-10)
619
+
620
+ # Add small epsilon to avoid log(0)
621
+ eps = 1e-10
622
+ p = np.clip(p, eps, 1.0)
623
+ q = np.clip(q, eps, 1.0)
624
+
625
+ if HAS_SCIPY:
626
+ # KL divergence
627
+ kl_div = stats.entropy(p, q)
628
+ # Jensen-Shannon divergence (symmetric, bounded [0, 1])
629
+ m = 0.5 * (p + q)
630
+ js_div = 0.5 * stats.entropy(p, m) + 0.5 * stats.entropy(q, m)
631
+ else:
632
+ # Fallback implementations
633
+ kl_div = float(np.sum(p * np.log(p / q)))
634
+ m = 0.5 * (p + q)
635
+ js_div = 0.5 * np.sum(p * np.log(p / m)) + 0.5 * np.sum(q * np.log(q / m))
636
+
637
+ # Total variation distance
638
+ tv_dist = 0.5 * np.sum(np.abs(p - q))
639
+
640
+ # Combined drift (JS divergence is bounded [0, ln(2)])
641
+ drift_score = js_div / np.log(2) # Normalize to [0, 1]
642
+
643
+ return VerificationScore(
644
+ drift_score=float(np.clip(drift_score, 0.0, 1.0)),
645
+ confidence=0.9,
646
+ drift_type=DriftType.NUMERICAL,
647
+ details={
648
+ "kl_divergence": float(kl_div),
649
+ "js_divergence": float(js_div),
650
+ "total_variation": float(tv_dist),
651
+ },
652
+ )
653
+
654
+
655
+ def verify_sequences(seq_a: Sequence[str], seq_b: Sequence[str]) -> VerificationScore:
656
+ """
657
+ Calculate drift between two sequences of tokens/items.
658
+
659
+ Uses edit distance and sequence alignment metrics.
660
+
661
+ Args:
662
+ seq_a: First sequence
663
+ seq_b: Second sequence
664
+
665
+ Returns:
666
+ VerificationScore with sequence-based drift score
667
+ """
668
+ if not seq_a and not seq_b:
669
+ return VerificationScore(
670
+ drift_score=0.0,
671
+ confidence=1.0,
672
+ drift_type=DriftType.LEXICAL,
673
+ details={"reason": "both_empty"},
674
+ )
675
+
676
+ # Levenshtein distance
677
+ edit_dist = _levenshtein_distance(seq_a, seq_b)
678
+ max_len = max(len(seq_a), len(seq_b))
679
+ normalized_edit = edit_dist / max_len if max_len > 0 else 0.0
680
+
681
+ # Jaccard similarity (set-based)
682
+ set_a = set(seq_a)
683
+ set_b = set(seq_b)
684
+ intersection = len(set_a & set_b)
685
+ union = len(set_a | set_b)
686
+ jaccard = intersection / union if union > 0 else 1.0
687
+ jaccard_drift = 1.0 - jaccard
688
+
689
+ # Order-aware similarity (longest common subsequence)
690
+ lcs_len = _lcs_length(seq_a, seq_b)
691
+ lcs_ratio = 2 * lcs_len / (len(seq_a) + len(seq_b)) if (len(seq_a) + len(seq_b)) > 0 else 1.0
692
+ lcs_drift = 1.0 - lcs_ratio
693
+
694
+ # Combined
695
+ drift_score = 0.4 * normalized_edit + 0.3 * jaccard_drift + 0.3 * lcs_drift
696
+
697
+ return VerificationScore(
698
+ drift_score=float(np.clip(drift_score, 0.0, 1.0)),
699
+ confidence=0.85,
700
+ drift_type=DriftType.STRUCTURAL,
701
+ details={
702
+ "edit_distance": edit_dist,
703
+ "normalized_edit": float(normalized_edit),
704
+ "jaccard_similarity": float(jaccard),
705
+ "lcs_ratio": float(lcs_ratio),
706
+ },
707
+ )
708
+
709
+
710
+ # ============================================================================
711
+ # Internal pure functions
712
+ # ============================================================================
713
+
714
+
715
+ def _lexical_drift(text_a: str, text_b: str) -> dict:
716
+ """
717
+ Calculate lexical drift between two texts.
718
+
719
+ Pure function - no side effects.
720
+ """
721
+ # Character-level comparison
722
+ chars_a = set(text_a)
723
+ chars_b = set(text_b)
724
+ char_jaccard = len(chars_a & chars_b) / len(chars_a | chars_b) if (chars_a | chars_b) else 1.0
725
+
726
+ # Word-level comparison
727
+ words_a = set(text_a.split())
728
+ words_b = set(text_b.split())
729
+ word_jaccard = len(words_a & words_b) / len(words_a | words_b) if (words_a | words_b) else 1.0
730
+
731
+ # Length ratio
732
+ len_a, len_b = len(text_a), len(text_b)
733
+ length_ratio = min(len_a, len_b) / max(len_a, len_b) if max(len_a, len_b) > 0 else 1.0
734
+
735
+ # Combined score (lower similarity = higher drift)
736
+ similarity = 0.3 * char_jaccard + 0.5 * word_jaccard + 0.2 * length_ratio
737
+ drift = 1.0 - similarity
738
+
739
+ return {
740
+ "score": drift,
741
+ "char_jaccard": char_jaccard,
742
+ "word_jaccard": word_jaccard,
743
+ "length_ratio": length_ratio,
744
+ }
745
+
746
+
747
+ def _structural_drift(text_a: str, text_b: str) -> dict:
748
+ """
749
+ Calculate structural drift between two texts.
750
+
751
+ Analyzes structure like line count, indentation, code patterns.
752
+ Pure function - no side effects.
753
+ """
754
+ lines_a = text_a.split("\n")
755
+ lines_b = text_b.split("\n")
756
+
757
+ # Line count difference
758
+ line_count_a, line_count_b = len(lines_a), len(lines_b)
759
+ line_ratio = (
760
+ min(line_count_a, line_count_b) / max(line_count_a, line_count_b)
761
+ if max(line_count_a, line_count_b) > 0
762
+ else 1.0
763
+ )
764
+
765
+ # Indentation pattern
766
+ indent_a = [len(line) - len(line.lstrip()) for line in lines_a if line.strip()]
767
+ indent_b = [len(line) - len(line.lstrip()) for line in lines_b if line.strip()]
768
+
769
+ if indent_a and indent_b:
770
+ avg_indent_a = np.mean(indent_a)
771
+ avg_indent_b = np.mean(indent_b)
772
+ max_indent = max(avg_indent_a, avg_indent_b, 1)
773
+ indent_similarity = 1.0 - abs(avg_indent_a - avg_indent_b) / max_indent
774
+ else:
775
+ indent_similarity = 1.0 if (not indent_a and not indent_b) else 0.5
776
+
777
+ # Code pattern markers (for code comparison)
778
+ patterns = ["def ", "class ", "import ", "return ", "if ", "for ", "while ", "try:", "except"]
779
+ pattern_a = {p for p in patterns if p in text_a}
780
+ pattern_b = {p for p in patterns if p in text_b}
781
+ pattern_jaccard = (
782
+ len(pattern_a & pattern_b) / len(pattern_a | pattern_b) if (pattern_a | pattern_b) else 1.0
783
+ )
784
+
785
+ # Combined
786
+ similarity = 0.3 * line_ratio + 0.3 * indent_similarity + 0.4 * pattern_jaccard
787
+ drift = 1.0 - similarity
788
+
789
+ return {
790
+ "score": drift,
791
+ "line_ratio": line_ratio,
792
+ "indent_similarity": indent_similarity,
793
+ "pattern_jaccard": pattern_jaccard,
794
+ }
795
+
796
+
797
+ def _numerical_drift(text_a: str, text_b: str) -> dict:
798
+ """
799
+ Calculate numerical drift by extracting and comparing numbers.
800
+
801
+ Pure function - no side effects.
802
+ """
803
+ import re
804
+
805
+ # Extract numbers from both texts
806
+ number_pattern = r"-?\d+\.?\d*"
807
+ numbers_a = [float(n) for n in re.findall(number_pattern, text_a)]
808
+ numbers_b = [float(n) for n in re.findall(number_pattern, text_b)]
809
+
810
+ if not numbers_a and not numbers_b:
811
+ return {"score": 0.0, "reason": "no_numbers"}
812
+
813
+ if not numbers_a or not numbers_b:
814
+ return {"score": 0.5, "reason": "numbers_only_in_one"}
815
+
816
+ # Compare statistics
817
+ mean_a, mean_b = np.mean(numbers_a), np.mean(numbers_b)
818
+ std_a, std_b = np.std(numbers_a), np.std(numbers_b)
819
+
820
+ # Relative difference in means
821
+ max_mean = max(abs(mean_a), abs(mean_b), 1e-10)
822
+ mean_diff = abs(mean_a - mean_b) / max_mean
823
+
824
+ # Relative difference in stds
825
+ max_std = max(std_a, std_b, 1e-10)
826
+ std_diff = abs(std_a - std_b) / max_std if max_std > 1e-10 else 0.0
827
+
828
+ # Count difference
829
+ count_ratio = min(len(numbers_a), len(numbers_b)) / max(len(numbers_a), len(numbers_b))
830
+
831
+ # Combined
832
+ drift = 0.4 * min(mean_diff, 1.0) + 0.3 * min(std_diff, 1.0) + 0.3 * (1.0 - count_ratio)
833
+
834
+ return {
835
+ "score": drift,
836
+ "mean_a": mean_a,
837
+ "mean_b": mean_b,
838
+ "std_a": std_a,
839
+ "std_b": std_b,
840
+ "count_a": len(numbers_a),
841
+ "count_b": len(numbers_b),
842
+ }
843
+
844
+
845
+ def _levenshtein_distance(seq_a: Sequence, seq_b: Sequence) -> int:
846
+ """
847
+ Calculate Levenshtein edit distance between two sequences.
848
+
849
+ Pure function using dynamic programming.
850
+ """
851
+ m, n = len(seq_a), len(seq_b)
852
+
853
+ if m == 0:
854
+ return n
855
+ if n == 0:
856
+ return m
857
+
858
+ # Use numpy for efficiency
859
+ dp = np.zeros((m + 1, n + 1), dtype=np.int32)
860
+ dp[:, 0] = np.arange(m + 1)
861
+ dp[0, :] = np.arange(n + 1)
862
+
863
+ for i in range(1, m + 1):
864
+ for j in range(1, n + 1):
865
+ cost = 0 if seq_a[i - 1] == seq_b[j - 1] else 1
866
+ dp[i, j] = min(
867
+ dp[i - 1, j] + 1, # deletion
868
+ dp[i, j - 1] + 1, # insertion
869
+ dp[i - 1, j - 1] + cost, # substitution
870
+ )
871
+
872
+ return int(dp[m, n])
873
+
874
+
875
+ def _lcs_length(seq_a: Sequence, seq_b: Sequence) -> int:
876
+ """
877
+ Calculate length of Longest Common Subsequence.
878
+
879
+ Pure function using dynamic programming.
880
+ """
881
+ m, n = len(seq_a), len(seq_b)
882
+
883
+ if m == 0 or n == 0:
884
+ return 0
885
+
886
+ dp = np.zeros((m + 1, n + 1), dtype=np.int32)
887
+
888
+ for i in range(1, m + 1):
889
+ for j in range(1, n + 1):
890
+ if seq_a[i - 1] == seq_b[j - 1]:
891
+ dp[i, j] = dp[i - 1, j - 1] + 1
892
+ else:
893
+ dp[i, j] = max(dp[i - 1, j], dp[i, j - 1])
894
+
895
+ return int(dp[m, n])
896
+
897
+
898
+ # ============================================================================
899
+ # Batch verification functions
900
+ # ============================================================================
901
+
902
+
903
+ def verify_batch(outputs_a: Sequence[str], outputs_b: Sequence[str]) -> list[VerificationScore]:
904
+ """
905
+ Verify multiple output pairs.
906
+
907
+ Pure function that processes pairs in sequence.
908
+
909
+ Args:
910
+ outputs_a: Sequence of outputs from source A
911
+ outputs_b: Sequence of outputs from source B (same length as outputs_a)
912
+
913
+ Returns:
914
+ List of VerificationScore for each pair
915
+ """
916
+ if len(outputs_a) != len(outputs_b):
917
+ raise ValueError(
918
+ f"Length mismatch: outputs_a has {len(outputs_a)} items, "
919
+ f"outputs_b has {len(outputs_b)} items"
920
+ )
921
+
922
+ return [verify(a, b) for a, b in zip(outputs_a, outputs_b, strict=False)]
923
+
924
+
925
+ def aggregate_scores(scores: Sequence[VerificationScore]) -> dict:
926
+ """
927
+ Aggregate multiple verification scores into summary statistics.
928
+
929
+ Pure function.
930
+
931
+ Args:
932
+ scores: Sequence of VerificationScore objects
933
+
934
+ Returns:
935
+ Dictionary with aggregate statistics
936
+ """
937
+ if not scores:
938
+ return {"count": 0}
939
+
940
+ drift_values = [s.drift_score for s in scores]
941
+ confidence_values = [s.confidence for s in scores]
942
+
943
+ drift_types: dict[str, int] = {}
944
+ for s in scores:
945
+ drift_types[s.drift_type.value] = drift_types.get(s.drift_type.value, 0) + 1
946
+
947
+ return {
948
+ "count": len(scores),
949
+ "mean_drift": float(np.mean(drift_values)),
950
+ "std_drift": float(np.std(drift_values)),
951
+ "min_drift": float(np.min(drift_values)),
952
+ "max_drift": float(np.max(drift_values)),
953
+ "median_drift": float(np.median(drift_values)),
954
+ "mean_confidence": float(np.mean(confidence_values)),
955
+ "drift_type_distribution": drift_types,
956
+ }