agent-os-kernel 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1051) hide show
  1. agent_os/__init__.py +66 -4
  2. agent_os/agents_compat.py +286 -0
  3. agent_os/base_agent.py +308 -0
  4. agent_os/cli.py +1079 -19
  5. agent_os/integrations/__init__.py +37 -2
  6. agent_os/integrations/openai_adapter.py +502 -0
  7. agent_os/integrations/semantic_kernel_adapter.py +569 -0
  8. agent_os/stateless.py +349 -0
  9. agent_os_kernel-1.3.0.dist-info/METADATA +676 -0
  10. agent_os_kernel-1.3.0.dist-info/RECORD +1053 -0
  11. {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/entry_points.txt +0 -1
  12. modules/amb/.github/workflows/ci.yml +102 -0
  13. modules/amb/.github/workflows/publish.yml +146 -0
  14. modules/amb/.gitignore +134 -0
  15. modules/amb/CHANGELOG.md +118 -0
  16. modules/amb/CONTRIBUTING.md +141 -0
  17. modules/amb/LICENSE +21 -0
  18. modules/amb/README.md +188 -0
  19. modules/amb/amb_core/__init__.py +175 -0
  20. modules/amb/amb_core/adapters/__init__.py +55 -0
  21. modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
  22. modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
  23. modules/amb/amb_core/adapters/kafka_broker.py +258 -0
  24. modules/amb/amb_core/adapters/nats_broker.py +283 -0
  25. modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
  26. modules/amb/amb_core/adapters/redis_broker.py +260 -0
  27. modules/amb/amb_core/broker.py +143 -0
  28. modules/amb/amb_core/bus.py +479 -0
  29. modules/amb/amb_core/cloudevents.py +507 -0
  30. modules/amb/amb_core/dlq.py +343 -0
  31. modules/amb/amb_core/hf_utils.py +534 -0
  32. modules/amb/amb_core/memory_broker.py +408 -0
  33. modules/amb/amb_core/models.py +139 -0
  34. modules/amb/amb_core/persistence.py +527 -0
  35. modules/amb/amb_core/schema.py +292 -0
  36. modules/amb/amb_core/tracing.py +356 -0
  37. modules/amb/examples/advanced_features.py +223 -0
  38. modules/amb/examples/backpressure_demo.py +225 -0
  39. modules/amb/examples/basic_usage.py +117 -0
  40. modules/amb/examples/tracing_demo.py +104 -0
  41. modules/amb/experiments/README.md +52 -0
  42. modules/amb/experiments/reproduce_results.py +467 -0
  43. modules/amb/experiments/results.json +324 -0
  44. modules/amb/paper/README.md +40 -0
  45. modules/amb/paper/paper.tex +365 -0
  46. modules/amb/paper/whitepaper.md +377 -0
  47. modules/amb/pyproject.toml +117 -0
  48. modules/amb/tests/__init__.py +1 -0
  49. modules/amb/tests/test_backpressure_priority.py +280 -0
  50. modules/amb/tests/test_bus.py +198 -0
  51. modules/amb/tests/test_cloudevents.py +443 -0
  52. modules/amb/tests/test_features.py +531 -0
  53. modules/amb/tests/test_models.py +74 -0
  54. modules/amb/tests/test_tracing.py +254 -0
  55. modules/atr/.github/workflows/ci.yml +101 -0
  56. modules/atr/.github/workflows/publish.yml +140 -0
  57. modules/atr/.gitignore +134 -0
  58. modules/atr/.pre-commit-config.yaml +37 -0
  59. modules/atr/CHANGELOG.md +39 -0
  60. modules/atr/CONTRIBUTING.md +96 -0
  61. modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
  62. modules/atr/README.md +180 -0
  63. modules/atr/atr/__init__.py +638 -0
  64. modules/atr/atr/access.py +346 -0
  65. modules/atr/atr/composition.py +643 -0
  66. modules/atr/atr/decorator.py +355 -0
  67. modules/atr/atr/executor.py +382 -0
  68. modules/atr/atr/health.py +555 -0
  69. modules/atr/atr/hf_utils.py +447 -0
  70. modules/atr/atr/injection.py +420 -0
  71. modules/atr/atr/metrics.py +438 -0
  72. modules/atr/atr/policies.py +401 -0
  73. modules/atr/atr/py.typed +2 -0
  74. modules/atr/atr/registry.py +450 -0
  75. modules/atr/atr/schema.py +478 -0
  76. modules/atr/atr/tools/safe/__init__.py +73 -0
  77. modules/atr/atr/tools/safe/calculator.py +380 -0
  78. modules/atr/atr/tools/safe/datetime_tool.py +441 -0
  79. modules/atr/atr/tools/safe/file_reader.py +400 -0
  80. modules/atr/atr/tools/safe/http_client.py +314 -0
  81. modules/atr/atr/tools/safe/json_parser.py +372 -0
  82. modules/atr/atr/tools/safe/text_tool.py +526 -0
  83. modules/atr/atr/tools/safe/toolkit.py +173 -0
  84. modules/atr/docs/PYPI_SETUP.md +113 -0
  85. modules/atr/examples/README.md +27 -0
  86. modules/atr/examples/demo.py +144 -0
  87. modules/atr/examples/sandbox_demo.py +218 -0
  88. modules/atr/experiments/README.md +69 -0
  89. modules/atr/experiments/reproduce_results.py +509 -0
  90. modules/atr/experiments/results/.gitkeep +0 -0
  91. modules/atr/experiments/results/results_20260123_140334.json +71 -0
  92. modules/atr/paper/README.md +36 -0
  93. modules/atr/paper/figures/.gitkeep +0 -0
  94. modules/atr/paper/references.bib +84 -0
  95. modules/atr/paper/structure.tex +293 -0
  96. modules/atr/paper/whitepaper.md +234 -0
  97. modules/atr/pyproject.toml +148 -0
  98. modules/atr/requirements.txt +1 -0
  99. modules/atr/setup.py +30 -0
  100. modules/atr/tests/__init__.py +1 -0
  101. modules/atr/tests/test_decorator.py +317 -0
  102. modules/atr/tests/test_executor.py +245 -0
  103. modules/atr/tests/test_integration_executor.py +184 -0
  104. modules/atr/tests/test_registry.py +312 -0
  105. modules/atr/tests/test_schema.py +182 -0
  106. modules/atr/tests/test_v2_features.py +708 -0
  107. modules/caas/.dockerignore +63 -0
  108. modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  109. modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
  110. modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  111. modules/caas/.github/workflows/ci.yml +100 -0
  112. modules/caas/.github/workflows/lint.yml +39 -0
  113. modules/caas/.github/workflows/publish-pypi.yml +124 -0
  114. modules/caas/.gitignore +73 -0
  115. modules/caas/.pre-commit-config.yaml +33 -0
  116. modules/caas/CHANGELOG.md +58 -0
  117. modules/caas/CONTRIBUTING.md +346 -0
  118. modules/caas/Dockerfile +41 -0
  119. modules/caas/LICENSE +21 -0
  120. modules/caas/MANIFEST.in +11 -0
  121. modules/caas/README.md +158 -0
  122. modules/caas/benchmarks/README.md +255 -0
  123. modules/caas/benchmarks/create_hf_dataset.py +502 -0
  124. modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
  125. modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
  126. modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
  127. modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
  128. modules/caas/benchmarks/hf_dataset/README.md +214 -0
  129. modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
  130. modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
  131. modules/caas/benchmarks/results/README.md +66 -0
  132. modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
  133. modules/caas/benchmarks/run_evaluation.py +561 -0
  134. modules/caas/benchmarks/statistical_tests.py +289 -0
  135. modules/caas/benchmarks/verify_sample_corpus.py +83 -0
  136. modules/caas/docker-compose.yml +38 -0
  137. modules/caas/docs/CONTEXT_TRIAD.md +462 -0
  138. modules/caas/docs/CONTRIBUTING.md +346 -0
  139. modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
  140. modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
  141. modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
  142. modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
  143. modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
  144. modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
  145. modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
  146. modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
  147. modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
  148. modules/caas/docs/METADATA_INJECTION.md +404 -0
  149. modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
  150. modules/caas/docs/RELATED_WORK.md +312 -0
  151. modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
  152. modules/caas/docs/RELEASE_GUIDE.md +285 -0
  153. modules/caas/docs/REPRODUCIBILITY.md +386 -0
  154. modules/caas/docs/SLIDING_WINDOW.md +387 -0
  155. modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
  156. modules/caas/docs/TESTING.md +259 -0
  157. modules/caas/docs/THREAT_MODEL.md +247 -0
  158. modules/caas/docs/TRUST_GATEWAY.md +575 -0
  159. modules/caas/docs/VFS.md +298 -0
  160. modules/caas/examples/agents/enterprise_security_agent.py +414 -0
  161. modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
  162. modules/caas/examples/demos/demo.py +309 -0
  163. modules/caas/examples/demos/demo_context_triad.py +225 -0
  164. modules/caas/examples/demos/demo_conversation_manager.py +285 -0
  165. modules/caas/examples/demos/demo_heuristic_router.py +133 -0
  166. modules/caas/examples/demos/demo_metadata_injection.py +198 -0
  167. modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
  168. modules/caas/examples/demos/demo_structure_aware.py +140 -0
  169. modules/caas/examples/demos/demo_time_decay.py +247 -0
  170. modules/caas/examples/demos/demo_trust_gateway.py +383 -0
  171. modules/caas/examples/multi_agent/README.md +159 -0
  172. modules/caas/examples/multi_agent/research_team.py +369 -0
  173. modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
  174. modules/caas/examples/usage/auth_module.py +142 -0
  175. modules/caas/examples/usage/usage_example.py +173 -0
  176. modules/caas/experiments/README.md +42 -0
  177. modules/caas/experiments/reproduce_results.py +462 -0
  178. modules/caas/paper/ARXIV_METADATA.md +145 -0
  179. modules/caas/paper/ARXIV_README.md +47 -0
  180. modules/caas/paper/CHECKLIST.md +103 -0
  181. modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
  182. modules/caas/paper/README.md +71 -0
  183. modules/caas/paper/abstract.md +24 -0
  184. modules/caas/paper/arxiv_submission.tar +0 -0
  185. modules/caas/paper/arxiv_submission.zip +0 -0
  186. modules/caas/paper/build_pdf.py +355 -0
  187. modules/caas/paper/experiments.md +149 -0
  188. modules/caas/paper/figures/.gitkeep +0 -0
  189. modules/caas/paper/figures/README.md +237 -0
  190. modules/caas/paper/figures/fig1_system_architecture.png +0 -0
  191. modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
  192. modules/caas/paper/figures/fig2_context_triad.png +0 -0
  193. modules/caas/paper/figures/fig2_context_triad.svg +105 -0
  194. modules/caas/paper/figures/fig3_ablation_results.png +0 -0
  195. modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
  196. modules/caas/paper/figures/fig4_routing_latency.png +0 -0
  197. modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
  198. modules/caas/paper/intro.md +103 -0
  199. modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
  200. modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
  201. modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
  202. modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
  203. modules/caas/paper/latex/main.tex +468 -0
  204. modules/caas/paper/latex/references.bib +140 -0
  205. modules/caas/paper/method.md +350 -0
  206. modules/caas/paper/outline.md +123 -0
  207. modules/caas/paper/related_work.md +101 -0
  208. modules/caas/paper/tables/.gitkeep +0 -0
  209. modules/caas/paper/tables/results_tables.md +50 -0
  210. modules/caas/pyproject.toml +172 -0
  211. modules/caas/requirements.txt +11 -0
  212. modules/caas/src/caas/__init__.py +232 -0
  213. modules/caas/src/caas/api/__init__.py +7 -0
  214. modules/caas/src/caas/api/server.py +1326 -0
  215. modules/caas/src/caas/caching.py +832 -0
  216. modules/caas/src/caas/cli.py +208 -0
  217. modules/caas/src/caas/conversation.py +221 -0
  218. modules/caas/src/caas/decay.py +118 -0
  219. modules/caas/src/caas/detection/__init__.py +7 -0
  220. modules/caas/src/caas/detection/detector.py +236 -0
  221. modules/caas/src/caas/enrichment.py +127 -0
  222. modules/caas/src/caas/gateway/__init__.py +24 -0
  223. modules/caas/src/caas/gateway/trust_gateway.py +471 -0
  224. modules/caas/src/caas/hf_utils.py +477 -0
  225. modules/caas/src/caas/ingestion/__init__.py +21 -0
  226. modules/caas/src/caas/ingestion/processors.py +251 -0
  227. modules/caas/src/caas/ingestion/structure_parser.py +185 -0
  228. modules/caas/src/caas/models.py +354 -0
  229. modules/caas/src/caas/pragmatic_truth.py +441 -0
  230. modules/caas/src/caas/routing/__init__.py +8 -0
  231. modules/caas/src/caas/routing/heuristic_router.py +242 -0
  232. modules/caas/src/caas/storage/__init__.py +7 -0
  233. modules/caas/src/caas/storage/store.py +450 -0
  234. modules/caas/src/caas/triad.py +472 -0
  235. modules/caas/src/caas/tuning/__init__.py +7 -0
  236. modules/caas/src/caas/tuning/tuner.py +322 -0
  237. modules/caas/src/caas/vfs/__init__.py +12 -0
  238. modules/caas/src/caas/vfs/filesystem.py +450 -0
  239. modules/caas/tests/__init__.py +3 -0
  240. modules/caas/tests/conftest.py +8 -0
  241. modules/caas/tests/test_caching.py +628 -0
  242. modules/caas/tests/test_context_triad.py +385 -0
  243. modules/caas/tests/test_conversation_manager.py +289 -0
  244. modules/caas/tests/test_functionality.py +215 -0
  245. modules/caas/tests/test_heuristic_router.py +370 -0
  246. modules/caas/tests/test_metadata_injection.py +328 -0
  247. modules/caas/tests/test_pragmatic_truth.py +322 -0
  248. modules/caas/tests/test_structure_aware_indexing.py +283 -0
  249. modules/caas/tests/test_time_decay.py +268 -0
  250. modules/caas/tests/test_trust_gateway.py +445 -0
  251. modules/caas/tests/test_vfs.py +298 -0
  252. modules/cmvk/.github/FUNDING.yml +9 -0
  253. modules/cmvk/.github/dependabot.yml +54 -0
  254. modules/cmvk/.github/workflows/ci.yml +205 -0
  255. modules/cmvk/.github/workflows/publish.yml +143 -0
  256. modules/cmvk/.gitignore +147 -0
  257. modules/cmvk/.pre-commit-config.yaml +58 -0
  258. modules/cmvk/CHANGELOG.md +146 -0
  259. modules/cmvk/CITATION.cff +48 -0
  260. modules/cmvk/CONTRIBUTING.md +229 -0
  261. modules/cmvk/Dockerfile +87 -0
  262. modules/cmvk/HF_MODEL_CARD.md +185 -0
  263. modules/cmvk/LICENSE +21 -0
  264. modules/cmvk/README.md +149 -0
  265. modules/cmvk/SECURITY.md +114 -0
  266. modules/cmvk/config/prompts/generator_v1.txt +23 -0
  267. modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
  268. modules/cmvk/config/settings.yaml +40 -0
  269. modules/cmvk/coverage_html/.gitignore +2 -0
  270. modules/cmvk/coverage_html/class_index.html +658 -0
  271. modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
  272. modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
  273. modules/cmvk/coverage_html/function_index.html +1978 -0
  274. modules/cmvk/coverage_html/index.html +255 -0
  275. modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
  276. modules/cmvk/coverage_html/status.json +1 -0
  277. modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
  278. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
  279. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
  280. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
  281. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
  282. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
  283. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
  284. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
  285. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
  286. modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
  287. modules/cmvk/docs/DIAGRAMS.md +325 -0
  288. modules/cmvk/docs/architecture.md +345 -0
  289. modules/cmvk/docs/features.md +308 -0
  290. modules/cmvk/docs/getting_started.md +279 -0
  291. modules/cmvk/docs/innovation_layer.md +377 -0
  292. modules/cmvk/docs/safety.md +281 -0
  293. modules/cmvk/docs/traceability.md +150 -0
  294. modules/cmvk/examples/basic_example.py +62 -0
  295. modules/cmvk/examples/demo_complete_pipeline.py +209 -0
  296. modules/cmvk/examples/demo_innovation_layer.py +197 -0
  297. modules/cmvk/examples/example.py +112 -0
  298. modules/cmvk/examples/model_diversity_comparison.py +110 -0
  299. modules/cmvk/examples/real_api_integration.py +121 -0
  300. modules/cmvk/examples/test_full_pipeline.py +303 -0
  301. modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
  302. modules/cmvk/experiments/README.md +216 -0
  303. modules/cmvk/experiments/ablation_runner.py +666 -0
  304. modules/cmvk/experiments/baseline_runner.py +158 -0
  305. modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
  306. modules/cmvk/experiments/datasets/README.md +85 -0
  307. modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
  308. modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
  309. modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
  310. modules/cmvk/experiments/datasets/sabotage.json +262 -0
  311. modules/cmvk/experiments/datasets/sample.json +40 -0
  312. modules/cmvk/experiments/demo_with_traces.py +110 -0
  313. modules/cmvk/experiments/efficiency_curve.py +259 -0
  314. modules/cmvk/experiments/experiment_runner.py +243 -0
  315. modules/cmvk/experiments/paper_data_generator.py +183 -0
  316. modules/cmvk/experiments/reproduce_results.py +407 -0
  317. modules/cmvk/experiments/reproducible_runner.py +352 -0
  318. modules/cmvk/experiments/sabotage_stress_test.py +311 -0
  319. modules/cmvk/experiments/test_lateral_thinking.py +116 -0
  320. modules/cmvk/experiments/test_prosecutor.py +41 -0
  321. modules/cmvk/experiments/visualize_results.py +735 -0
  322. modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
  323. modules/cmvk/notebooks/analysis.ipynb +124 -0
  324. modules/cmvk/paper/PAPER.md +561 -0
  325. modules/cmvk/paper/arxiv_checklist.md +230 -0
  326. modules/cmvk/paper/cmvk_neurips.aux +77 -0
  327. modules/cmvk/paper/cmvk_neurips.bbl +81 -0
  328. modules/cmvk/paper/cmvk_neurips.blg +48 -0
  329. modules/cmvk/paper/cmvk_neurips.out +16 -0
  330. modules/cmvk/paper/cmvk_neurips.pdf +0 -0
  331. modules/cmvk/paper/cmvk_neurips.tex +309 -0
  332. modules/cmvk/paper/figures/ablation.png +0 -0
  333. modules/cmvk/paper/figures/ablation.svg +39 -0
  334. modules/cmvk/paper/figures/architecture.png +0 -0
  335. modules/cmvk/paper/figures/architecture.svg +115 -0
  336. modules/cmvk/paper/figures/results_bar.png +0 -0
  337. modules/cmvk/paper/figures/results_bar.svg +70 -0
  338. modules/cmvk/paper/generate_figures.py +383 -0
  339. modules/cmvk/paper/neurips_2024.sty +101 -0
  340. modules/cmvk/paper/references.bib +98 -0
  341. modules/cmvk/paper/structure.tex +200 -0
  342. modules/cmvk/pyproject.toml +189 -0
  343. modules/cmvk/requirements-dev.txt +19 -0
  344. modules/cmvk/requirements.txt +14 -0
  345. modules/cmvk/src/cmvk/__init__.py +216 -0
  346. modules/cmvk/src/cmvk/audit.py +400 -0
  347. modules/cmvk/src/cmvk/benchmarks.py +476 -0
  348. modules/cmvk/src/cmvk/constitutional.py +902 -0
  349. modules/cmvk/src/cmvk/hf_utils.py +299 -0
  350. modules/cmvk/src/cmvk/metrics.py +471 -0
  351. modules/cmvk/src/cmvk/profiles.py +298 -0
  352. modules/cmvk/src/cmvk/py.typed +0 -0
  353. modules/cmvk/src/cmvk/types.py +10 -0
  354. modules/cmvk/src/cmvk/verification.py +954 -0
  355. modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
  356. modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
  357. modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
  358. modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
  359. modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
  360. modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
  361. modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
  362. modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
  363. modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
  364. modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
  365. modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
  366. modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
  367. modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
  368. modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
  369. modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
  370. modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
  371. modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
  372. modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
  373. modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
  374. modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
  375. modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
  376. modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
  377. modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
  378. modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
  379. modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
  380. modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
  381. modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
  382. modules/cmvk/tests/__init__.py +3 -0
  383. modules/cmvk/tests/conftest.py +61 -0
  384. modules/cmvk/tests/integration/__init__.py +1 -0
  385. modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
  386. modules/cmvk/tests/integration/test_integration.py +53 -0
  387. modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
  388. modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
  389. modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
  390. modules/cmvk/tests/test_constitutional.py +611 -0
  391. modules/cmvk/tests/test_enhanced_features.py +603 -0
  392. modules/cmvk/tests/test_verification.py +255 -0
  393. modules/cmvk/tests/unit/__init__.py +1 -0
  394. modules/cmvk/tests/unit/test_agents.py +64 -0
  395. modules/cmvk/tests/unit/test_cli.py +224 -0
  396. modules/cmvk/tests/unit/test_core.py +126 -0
  397. modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
  398. modules/cmvk/tests/unit/test_kernel.py +255 -0
  399. modules/cmvk/tests/unit/test_reproducibility.py +160 -0
  400. modules/cmvk/tests/unit/test_trace_logger.py +115 -0
  401. modules/cmvk/tests/unit/test_visualizer.py +218 -0
  402. modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
  403. modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
  404. modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
  405. modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
  406. modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
  407. modules/control-plane/.github/discussions.yml +73 -0
  408. modules/control-plane/.github/pull_request_template.md +82 -0
  409. modules/control-plane/.github/workflows/publish.yml +146 -0
  410. modules/control-plane/.github/workflows/release.yml +39 -0
  411. modules/control-plane/.github/workflows/tests.yml +58 -0
  412. modules/control-plane/.gitignore +55 -0
  413. modules/control-plane/CHANGELOG.md +203 -0
  414. modules/control-plane/CONTRIBUTING.md +311 -0
  415. modules/control-plane/CONTRIBUTORS.md +88 -0
  416. modules/control-plane/Dockerfile +82 -0
  417. modules/control-plane/LICENSE +21 -0
  418. modules/control-plane/MANIFEST.in +17 -0
  419. modules/control-plane/README.md +1264 -0
  420. modules/control-plane/ROADMAP.md +228 -0
  421. modules/control-plane/SECURITY.md +210 -0
  422. modules/control-plane/SUPPORT.md +106 -0
  423. modules/control-plane/acp-cli.py +212 -0
  424. modules/control-plane/benchmark/README.md +257 -0
  425. modules/control-plane/benchmark/__init__.py +19 -0
  426. modules/control-plane/benchmark/red_team_dataset.py +517 -0
  427. modules/control-plane/benchmark.py +563 -0
  428. modules/control-plane/build_and_publish.sh +130 -0
  429. modules/control-plane/docker-compose.yml +74 -0
  430. modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
  431. modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
  432. modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
  433. modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
  434. modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
  435. modules/control-plane/docs/CASE_STUDIES.md +645 -0
  436. modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
  437. modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
  438. modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
  439. modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
  440. modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
  441. modules/control-plane/docs/LIMITATIONS.md +523 -0
  442. modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
  443. modules/control-plane/docs/README.md +58 -0
  444. modules/control-plane/docs/RELATED_WORK.md +319 -0
  445. modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
  446. modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
  447. modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
  448. modules/control-plane/docs/api/CORE.md +270 -0
  449. modules/control-plane/docs/architecture/architecture.md +120 -0
  450. modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
  451. modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
  452. modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
  453. modules/control-plane/docs/guides/QUICKSTART.md +217 -0
  454. modules/control-plane/examples/README.md +138 -0
  455. modules/control-plane/examples/a2a_demo.py +410 -0
  456. modules/control-plane/examples/adapter_demo.py +347 -0
  457. modules/control-plane/examples/advanced_features.py +403 -0
  458. modules/control-plane/examples/basic_usage.py +261 -0
  459. modules/control-plane/examples/benchmark_demo.py +186 -0
  460. modules/control-plane/examples/compliance_demo.py +333 -0
  461. modules/control-plane/examples/configuration.py +265 -0
  462. modules/control-plane/examples/getting_started.py +178 -0
  463. modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
  464. modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
  465. modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
  466. modules/control-plane/examples/kernel_v1_demo.py +273 -0
  467. modules/control-plane/examples/langchain_demo.py +281 -0
  468. modules/control-plane/examples/lifecycle_demo.py +724 -0
  469. modules/control-plane/examples/mcp_demo.py +378 -0
  470. modules/control-plane/examples/ml_safety_demo.py +157 -0
  471. modules/control-plane/examples/multimodal_demo.py +347 -0
  472. modules/control-plane/examples/observability_demo.py +370 -0
  473. modules/control-plane/examples/use_cases.py +336 -0
  474. modules/control-plane/experiments/long_horizon_purge.py +235 -0
  475. modules/control-plane/experiments/multi_agent_rag.py +165 -0
  476. modules/control-plane/experiments/reproduce_results.py +667 -0
  477. modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
  478. modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
  479. modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
  480. modules/control-plane/paper/Paper.pdf +0 -0
  481. modules/control-plane/paper/README.md +71 -0
  482. modules/control-plane/paper/appendix.md +152 -0
  483. modules/control-plane/paper/architecture.md +15 -0
  484. modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
  485. modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
  486. modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
  487. modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
  488. modules/control-plane/paper/arxiv/main.aux +97 -0
  489. modules/control-plane/paper/arxiv/main.bbl +112 -0
  490. modules/control-plane/paper/arxiv/main.blg +48 -0
  491. modules/control-plane/paper/arxiv/main.out +33 -0
  492. modules/control-plane/paper/arxiv/main.pdf +0 -0
  493. modules/control-plane/paper/arxiv/main.tex +479 -0
  494. modules/control-plane/paper/arxiv/references.bib +234 -0
  495. modules/control-plane/paper/arxiv_submission.tar +0 -0
  496. modules/control-plane/paper/arxiv_submission.zip +0 -0
  497. modules/control-plane/paper/build.sh +68 -0
  498. modules/control-plane/paper/figures/README.md +47 -0
  499. modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
  500. modules/control-plane/paper/figures/ablation_chart.png +0 -0
  501. modules/control-plane/paper/figures/architecture.pdf +0 -0
  502. modules/control-plane/paper/figures/architecture.png +0 -0
  503. modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
  504. modules/control-plane/paper/figures/constraint_graphs.png +0 -0
  505. modules/control-plane/paper/figures/generate_figures.py +252 -0
  506. modules/control-plane/paper/figures/results_chart.pdf +0 -0
  507. modules/control-plane/paper/figures/results_chart.png +0 -0
  508. modules/control-plane/paper/main.md +273 -0
  509. modules/control-plane/paper/main.tex +214 -0
  510. modules/control-plane/paper/main_arxiv.aux +53 -0
  511. modules/control-plane/paper/main_arxiv.out +17 -0
  512. modules/control-plane/paper/main_arxiv.pdf +0 -0
  513. modules/control-plane/paper/main_arxiv.tex +264 -0
  514. modules/control-plane/paper/references.bib +234 -0
  515. modules/control-plane/pyproject.toml +124 -0
  516. modules/control-plane/reproducibility/ABLATIONS.md +136 -0
  517. modules/control-plane/reproducibility/README.md +288 -0
  518. modules/control-plane/reproducibility/commands.md +467 -0
  519. modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
  520. modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
  521. modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
  522. modules/control-plane/reproducibility/hardware_specs.md +317 -0
  523. modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
  524. modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
  525. modules/control-plane/reproducibility/seeds.json +106 -0
  526. modules/control-plane/scripts/prepare_pypi.py +46 -0
  527. modules/control-plane/scripts/prepare_release.py +176 -0
  528. modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
  529. modules/control-plane/setup.py +69 -0
  530. modules/control-plane/src/agent_control_plane/__init__.py +639 -0
  531. modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
  532. modules/control-plane/src/agent_control_plane/adapter.py +415 -0
  533. modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
  534. modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
  535. modules/control-plane/src/agent_control_plane/compliance.py +718 -0
  536. modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
  537. modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
  538. modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
  539. modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
  540. modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
  541. modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
  542. modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
  543. modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
  544. modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
  545. modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
  546. modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
  547. modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
  548. modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
  549. modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
  550. modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
  551. modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
  552. modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
  553. modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
  554. modules/control-plane/src/agent_control_plane/observability.py +785 -0
  555. modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
  556. modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
  557. modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
  558. modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
  559. modules/control-plane/src/agent_control_plane/signals.py +491 -0
  560. modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
  561. modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
  562. modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
  563. modules/control-plane/src/agent_control_plane/vfs.py +695 -0
  564. modules/control-plane/tests/README.md +33 -0
  565. modules/control-plane/tests/test_a2a_adapter.py +336 -0
  566. modules/control-plane/tests/test_adapter.py +422 -0
  567. modules/control-plane/tests/test_advanced_features.py +389 -0
  568. modules/control-plane/tests/test_benchmark.py +223 -0
  569. modules/control-plane/tests/test_compliance.py +214 -0
  570. modules/control-plane/tests/test_control_plane.py +295 -0
  571. modules/control-plane/tests/test_hibernation.py +274 -0
  572. modules/control-plane/tests/test_kernel_interception.py +284 -0
  573. modules/control-plane/tests/test_langchain_adapter.py +258 -0
  574. modules/control-plane/tests/test_lifecycle.py +1174 -0
  575. modules/control-plane/tests/test_mcp_adapter.py +293 -0
  576. modules/control-plane/tests/test_ml_safety.py +142 -0
  577. modules/control-plane/tests/test_multimodal.py +317 -0
  578. modules/control-plane/tests/test_new_features.py +435 -0
  579. modules/control-plane/tests/test_observability.py +338 -0
  580. modules/control-plane/tests/test_time_travel.py +387 -0
  581. modules/emk/.github/workflows/ci.yml +105 -0
  582. modules/emk/.github/workflows/publish.yml +144 -0
  583. modules/emk/.gitignore +74 -0
  584. modules/emk/CHANGELOG.md +41 -0
  585. modules/emk/CONTRIBUTING.md +295 -0
  586. modules/emk/IMPLEMENTATION.md +174 -0
  587. modules/emk/LICENSE +21 -0
  588. modules/emk/MANIFEST.in +8 -0
  589. modules/emk/README.md +135 -0
  590. modules/emk/RELEASE_NOTES.md +82 -0
  591. modules/emk/SECURITY.md +52 -0
  592. modules/emk/codecov.yml +39 -0
  593. modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
  594. modules/emk/emk/__init__.py +106 -0
  595. modules/emk/emk/hf_utils.py +419 -0
  596. modules/emk/emk/indexer.py +144 -0
  597. modules/emk/emk/py.typed +0 -0
  598. modules/emk/emk/schema.py +204 -0
  599. modules/emk/emk/sleep_cycle.py +345 -0
  600. modules/emk/emk/store.py +479 -0
  601. modules/emk/examples/basic_usage.py +123 -0
  602. modules/emk/examples/memory_features_demo.py +154 -0
  603. modules/emk/experiments/README.md +59 -0
  604. modules/emk/experiments/reproduce_results.py +461 -0
  605. modules/emk/experiments/results.json +61 -0
  606. modules/emk/paper/structure.tex +192 -0
  607. modules/emk/paper/whitepaper.md +273 -0
  608. modules/emk/pyproject.toml +91 -0
  609. modules/emk/setup.py +5 -0
  610. modules/emk/tests/test_file_adapter.py +195 -0
  611. modules/emk/tests/test_indexer.py +174 -0
  612. modules/emk/tests/test_init.py +55 -0
  613. modules/emk/tests/test_negative_memory.py +83 -0
  614. modules/emk/tests/test_schema.py +150 -0
  615. modules/emk/tests/test_semantic_rules.py +175 -0
  616. modules/emk/tests/test_sleep_cycle.py +335 -0
  617. modules/emk/tests/test_store_anti_patterns.py +239 -0
  618. modules/iatp/.github/workflows/docker-build.yml +124 -0
  619. modules/iatp/.github/workflows/publish.yml +174 -0
  620. modules/iatp/.github/workflows/python-package.yml +121 -0
  621. modules/iatp/.gitignore +67 -0
  622. modules/iatp/.pre-commit-config.yaml +64 -0
  623. modules/iatp/CHANGELOG.md +120 -0
  624. modules/iatp/Dockerfile +91 -0
  625. modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
  626. modules/iatp/MANIFEST.in +9 -0
  627. modules/iatp/README.md +180 -0
  628. modules/iatp/docker/Dockerfile.agent +27 -0
  629. modules/iatp/docker/Dockerfile.sidecar-python +86 -0
  630. modules/iatp/docker/README.md +258 -0
  631. modules/iatp/docker-compose.yml +194 -0
  632. modules/iatp/docs/ARCHITECTURE.md +243 -0
  633. modules/iatp/docs/CLI_GUIDE.md +220 -0
  634. modules/iatp/docs/DEPLOYMENT.md +304 -0
  635. modules/iatp/examples/README.md +132 -0
  636. modules/iatp/examples/backend_agent.py +39 -0
  637. modules/iatp/examples/client.py +168 -0
  638. modules/iatp/examples/demo_attestation_reputation.py +274 -0
  639. modules/iatp/examples/demo_client.py +240 -0
  640. modules/iatp/examples/demo_rbac.py +143 -0
  641. modules/iatp/examples/integration_demo.py +245 -0
  642. modules/iatp/examples/manifests/coder_agent.json +20 -0
  643. modules/iatp/examples/manifests/reviewer_agent.json +19 -0
  644. modules/iatp/examples/manifests/secure_bank.json +14 -0
  645. modules/iatp/examples/manifests/standard_agent.json +14 -0
  646. modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
  647. modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
  648. modules/iatp/examples/run_sidecar.py +105 -0
  649. modules/iatp/examples/run_untrusted_sidecar.py +77 -0
  650. modules/iatp/examples/secure_bank_agent.py +138 -0
  651. modules/iatp/examples/test_untrusted.py +82 -0
  652. modules/iatp/examples/untrusted_agent.py +119 -0
  653. modules/iatp/experiments/README.md +58 -0
  654. modules/iatp/experiments/cascading_hallucination/README.md +149 -0
  655. modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
  656. modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
  657. modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
  658. modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
  659. modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
  660. modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
  661. modules/iatp/experiments/reproduce_results.py +574 -0
  662. modules/iatp/experiments/results.json +2336 -0
  663. modules/iatp/iatp/__init__.py +164 -0
  664. modules/iatp/iatp/attestation.py +401 -0
  665. modules/iatp/iatp/cli.py +253 -0
  666. modules/iatp/iatp/hf_utils.py +469 -0
  667. modules/iatp/iatp/ipc_pipes.py +578 -0
  668. modules/iatp/iatp/main.py +410 -0
  669. modules/iatp/iatp/models/__init__.py +445 -0
  670. modules/iatp/iatp/policy_engine.py +335 -0
  671. modules/iatp/iatp/py.typed +2 -0
  672. modules/iatp/iatp/recovery.py +319 -0
  673. modules/iatp/iatp/security/__init__.py +268 -0
  674. modules/iatp/iatp/sidecar/__init__.py +517 -0
  675. modules/iatp/iatp/telemetry/__init__.py +162 -0
  676. modules/iatp/iatp/tests/__init__.py +1 -0
  677. modules/iatp/iatp/tests/test_attestation.py +368 -0
  678. modules/iatp/iatp/tests/test_cli.py +129 -0
  679. modules/iatp/iatp/tests/test_models.py +128 -0
  680. modules/iatp/iatp/tests/test_policy_engine.py +345 -0
  681. modules/iatp/iatp/tests/test_recovery.py +279 -0
  682. modules/iatp/iatp/tests/test_security.py +220 -0
  683. modules/iatp/iatp/tests/test_sidecar.py +165 -0
  684. modules/iatp/iatp/tests/test_telemetry.py +173 -0
  685. modules/iatp/paper/BLOG.md +307 -0
  686. modules/iatp/paper/PAPER.md +236 -0
  687. modules/iatp/paper/RFC_SUBMISSION.md +299 -0
  688. modules/iatp/paper/whitepaper.md +369 -0
  689. modules/iatp/proto/README.md +200 -0
  690. modules/iatp/proto/generate_stubs.py +81 -0
  691. modules/iatp/proto/iatp.proto +552 -0
  692. modules/iatp/pyproject.toml +180 -0
  693. modules/iatp/requirements-dev.txt +2 -0
  694. modules/iatp/requirements.txt +6 -0
  695. modules/iatp/setup.py +60 -0
  696. modules/iatp/sidecar/README.md +487 -0
  697. modules/iatp/sidecar/go/Dockerfile +32 -0
  698. modules/iatp/sidecar/go/README.md +237 -0
  699. modules/iatp/sidecar/go/go.mod +8 -0
  700. modules/iatp/sidecar/go/main.go +488 -0
  701. modules/iatp/spec/001-handshake.md +436 -0
  702. modules/iatp/spec/002-reversibility.md +394 -0
  703. modules/iatp/spec/schema/capability_manifest.json +266 -0
  704. modules/iatp/test_integration.py +310 -0
  705. modules/mcp-kernel-server/README.md +261 -0
  706. modules/mcp-kernel-server/pyproject.toml +60 -0
  707. modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
  708. modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
  709. modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
  710. modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
  711. modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
  712. modules/mute-agent/.github/workflows/safety_check.yml +45 -0
  713. modules/mute-agent/.gitignore +53 -0
  714. modules/mute-agent/ARCHITECTURE.md +531 -0
  715. modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
  716. modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
  717. modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
  718. modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
  719. modules/mute-agent/LICENSE +21 -0
  720. modules/mute-agent/PHASE3_SUMMARY.md +297 -0
  721. modules/mute-agent/README.md +360 -0
  722. modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
  723. modules/mute-agent/USAGE.md +505 -0
  724. modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
  725. modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
  726. modules/mute-agent/VERIFICATION_REPORT.md +435 -0
  727. modules/mute-agent/charts/cost_comparison.png +0 -0
  728. modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
  729. modules/mute-agent/charts/metrics_comparison.png +0 -0
  730. modules/mute-agent/charts/scenario_breakdown.png +0 -0
  731. modules/mute-agent/charts/trace_attack_blocked.html +140 -0
  732. modules/mute-agent/charts/trace_attack_blocked.png +0 -0
  733. modules/mute-agent/charts/trace_failure.html +140 -0
  734. modules/mute-agent/charts/trace_failure.png +0 -0
  735. modules/mute-agent/charts/trace_success.html +140 -0
  736. modules/mute-agent/charts/trace_success.png +0 -0
  737. modules/mute-agent/examples/__init__.py +1 -0
  738. modules/mute-agent/examples/advanced_example.py +384 -0
  739. modules/mute-agent/examples/graph_debugger_demo.py +241 -0
  740. modules/mute-agent/examples/listener_example.py +297 -0
  741. modules/mute-agent/examples/simple_example.py +242 -0
  742. modules/mute-agent/examples/steel_man_demo.py +297 -0
  743. modules/mute-agent/experiments/README.md +135 -0
  744. modules/mute-agent/experiments/__init__.py +3 -0
  745. modules/mute-agent/experiments/agent_comparison.csv +6 -0
  746. modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
  747. modules/mute-agent/experiments/ambiguity_test.py +335 -0
  748. modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
  749. modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
  750. modules/mute-agent/experiments/baseline_agent.py +189 -0
  751. modules/mute-agent/experiments/benchmark.py +402 -0
  752. modules/mute-agent/experiments/demo.py +172 -0
  753. modules/mute-agent/experiments/generate_cost_curve.py +474 -0
  754. modules/mute-agent/experiments/jailbreak_test.py +137 -0
  755. modules/mute-agent/experiments/latent_state_scenario.py +361 -0
  756. modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
  757. modules/mute-agent/experiments/run_extended_experiment.py +40 -0
  758. modules/mute-agent/experiments/run_v2_experiments.py +266 -0
  759. modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
  760. modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
  761. modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
  762. modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
  763. modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
  764. modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
  765. modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
  766. modules/mute-agent/experiments/visualize.py +400 -0
  767. modules/mute-agent/mute_agent/__init__.py +66 -0
  768. modules/mute-agent/mute_agent/core/__init__.py +1 -0
  769. modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
  770. modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
  771. modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
  772. modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
  773. modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
  774. modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
  775. modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
  776. modules/mute-agent/mute_agent/listener/__init__.py +41 -0
  777. modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
  778. modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
  779. modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
  780. modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
  781. modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
  782. modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
  783. modules/mute-agent/mute_agent/listener/listener.py +608 -0
  784. modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
  785. modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
  786. modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
  787. modules/mute-agent/mute_agent/super_system/router.py +202 -0
  788. modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
  789. modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
  790. modules/mute-agent/requirements-dev.txt +6 -0
  791. modules/mute-agent/requirements.txt +9 -0
  792. modules/mute-agent/setup.py +64 -0
  793. modules/mute-agent/src/__init__.py +0 -0
  794. modules/mute-agent/src/agents/__init__.py +0 -0
  795. modules/mute-agent/src/agents/baseline_agent.py +524 -0
  796. modules/mute-agent/src/agents/interactive_agent.py +113 -0
  797. modules/mute-agent/src/agents/mute_agent.py +622 -0
  798. modules/mute-agent/src/benchmarks/__init__.py +0 -0
  799. modules/mute-agent/src/benchmarks/evaluator.py +481 -0
  800. modules/mute-agent/src/benchmarks/scenarios.json +985 -0
  801. modules/mute-agent/src/core/__init__.py +0 -0
  802. modules/mute-agent/src/core/mock_state.py +320 -0
  803. modules/mute-agent/src/core/tools.py +441 -0
  804. modules/nexus/__init__.py +49 -0
  805. modules/nexus/arbiter.py +357 -0
  806. modules/nexus/client.py +464 -0
  807. modules/nexus/dmz.py +417 -0
  808. modules/nexus/escrow.py +428 -0
  809. modules/nexus/exceptions.py +284 -0
  810. modules/nexus/registry.py +391 -0
  811. modules/nexus/reputation.py +423 -0
  812. modules/nexus/schemas/__init__.py +49 -0
  813. modules/nexus/schemas/compliance.py +274 -0
  814. modules/nexus/schemas/escrow.py +249 -0
  815. modules/nexus/schemas/manifest.py +223 -0
  816. modules/nexus/schemas/receipt.py +206 -0
  817. modules/observability/README.md +192 -0
  818. modules/observability/alertmanager/alertmanager.yml +116 -0
  819. modules/observability/alerts/agent-os-alerts.yaml +197 -0
  820. modules/observability/docker-compose.yml +128 -0
  821. modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
  822. modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
  823. modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
  824. modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
  825. modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
  826. modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
  827. modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
  828. modules/observability/otel/otel-collector-config.yml +61 -0
  829. modules/observability/prometheus/prometheus.yml +63 -0
  830. modules/observability/pyproject.toml +53 -0
  831. modules/observability/scripts/export_dashboards.py +55 -0
  832. modules/observability/src/agent_os_observability/__init__.py +25 -0
  833. modules/observability/src/agent_os_observability/dashboards.py +896 -0
  834. modules/observability/src/agent_os_observability/metrics.py +396 -0
  835. modules/observability/src/agent_os_observability/server.py +221 -0
  836. modules/observability/src/agent_os_observability/tracer.py +226 -0
  837. modules/primitives/.gitignore +8 -0
  838. modules/primitives/README.md +62 -0
  839. modules/primitives/agent_primitives/__init__.py +22 -0
  840. modules/primitives/agent_primitives/failures.py +82 -0
  841. modules/primitives/agent_primitives/py.typed +0 -0
  842. modules/primitives/pyproject.toml +68 -0
  843. modules/scak/.github/copilot-instructions.md +396 -0
  844. modules/scak/.github/workflows/release.yml +117 -0
  845. modules/scak/.gitignore +32 -0
  846. modules/scak/CHANGELOG.md +173 -0
  847. modules/scak/CITATION.cff +62 -0
  848. modules/scak/CONTRIBUTING.md +429 -0
  849. modules/scak/Dockerfile +58 -0
  850. modules/scak/ENTERPRISE_FEATURES.md +518 -0
  851. modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
  852. modules/scak/LIMITATIONS.md +565 -0
  853. modules/scak/MANIFEST.in +16 -0
  854. modules/scak/NOVELTY.md +535 -0
  855. modules/scak/README.md +928 -0
  856. modules/scak/RESEARCH.md +670 -0
  857. modules/scak/agent_kernel/__init__.py +66 -0
  858. modules/scak/agent_kernel/analyzer.py +432 -0
  859. modules/scak/agent_kernel/auditor.py +31 -0
  860. modules/scak/agent_kernel/completeness_auditor.py +234 -0
  861. modules/scak/agent_kernel/detector.py +200 -0
  862. modules/scak/agent_kernel/kernel.py +741 -0
  863. modules/scak/agent_kernel/memory_manager.py +82 -0
  864. modules/scak/agent_kernel/models.py +372 -0
  865. modules/scak/agent_kernel/nudge_mechanism.py +260 -0
  866. modules/scak/agent_kernel/outcome_analyzer.py +335 -0
  867. modules/scak/agent_kernel/patcher.py +579 -0
  868. modules/scak/agent_kernel/semantic_analyzer.py +313 -0
  869. modules/scak/agent_kernel/semantic_purge.py +346 -0
  870. modules/scak/agent_kernel/simulator.py +447 -0
  871. modules/scak/agent_kernel/teacher.py +82 -0
  872. modules/scak/agent_kernel/triage.py +149 -0
  873. modules/scak/build_and_publish.ps1 +74 -0
  874. modules/scak/build_and_publish.sh +74 -0
  875. modules/scak/cli.py +471 -0
  876. modules/scak/dashboard.py +462 -0
  877. modules/scak/datasets/DATASET_CARD.md +219 -0
  878. modules/scak/datasets/README.md +143 -0
  879. modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
  880. modules/scak/datasets/hf_upload/README.md +219 -0
  881. modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
  882. modules/scak/datasets/prepare_hf_datasets.py +145 -0
  883. modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
  884. modules/scak/docker-compose.yml +99 -0
  885. modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
  886. modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
  887. modules/scak/docs/Dual-Loop-Architecture.md +344 -0
  888. modules/scak/docs/Enhanced-Features.md +612 -0
  889. modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
  890. modules/scak/docs/README.md +128 -0
  891. modules/scak/docs/Reference-Implementations.md +163 -0
  892. modules/scak/docs/SCAK_V2.md +374 -0
  893. modules/scak/docs/Three-Failure-Types.md +178 -0
  894. modules/scak/examples/basic_example.py +155 -0
  895. modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
  896. modules/scak/examples/langchain_integration_example.py +339 -0
  897. modules/scak/examples/layer4_demo.py +243 -0
  898. modules/scak/examples/production_features_demo.py +353 -0
  899. modules/scak/examples/quick_demo.py +79 -0
  900. modules/scak/examples/scak_v2_demo.py +252 -0
  901. modules/scak/experiments/README.md +438 -0
  902. modules/scak/experiments/ablation_studies/README.md +192 -0
  903. modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
  904. modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
  905. modules/scak/experiments/chaos_engineering/README.md +332 -0
  906. modules/scak/experiments/context_efficiency_test.py +328 -0
  907. modules/scak/experiments/gaia_benchmark/README.md +208 -0
  908. modules/scak/experiments/laziness_benchmark.py +179 -0
  909. modules/scak/experiments/long_horizon_task_experiment.py +252 -0
  910. modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
  911. modules/scak/experiments/results/ablation_table.md +12 -0
  912. modules/scak/experiments/results/long_horizon.json +36 -0
  913. modules/scak/experiments/results/multi_agent_rag.json +66 -0
  914. modules/scak/experiments/run_comprehensive_ablations.py +332 -0
  915. modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
  916. modules/scak/notebooks/getting_started.ipynb +33 -0
  917. modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
  918. modules/scak/paper/PAPER_CHECKLIST.md +304 -0
  919. modules/scak/paper/Paper.pdf +0 -0
  920. modules/scak/paper/README.md +113 -0
  921. modules/scak/paper/appendix.md +351 -0
  922. modules/scak/paper/arxiv/bibliography.bib +284 -0
  923. modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
  924. modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
  925. modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
  926. modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
  927. modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
  928. modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
  929. modules/scak/paper/arxiv/main.aux +103 -0
  930. modules/scak/paper/arxiv/main.bbl +113 -0
  931. modules/scak/paper/arxiv/main.blg +55 -0
  932. modules/scak/paper/arxiv/main.out +31 -0
  933. modules/scak/paper/arxiv/main.pdf +0 -0
  934. modules/scak/paper/arxiv/main.tex +482 -0
  935. modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
  936. modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
  937. modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
  938. modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
  939. modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
  940. modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
  941. modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
  942. modules/scak/paper/arxiv_submission/main.aux +103 -0
  943. modules/scak/paper/arxiv_submission/main.bbl +113 -0
  944. modules/scak/paper/arxiv_submission/main.blg +55 -0
  945. modules/scak/paper/arxiv_submission/main.out +31 -0
  946. modules/scak/paper/arxiv_submission/main.pdf +0 -0
  947. modules/scak/paper/arxiv_submission/main.tex +482 -0
  948. modules/scak/paper/arxiv_submission.tar.gz +0 -0
  949. modules/scak/paper/bibliography.bib +284 -0
  950. modules/scak/paper/build.sh +55 -0
  951. modules/scak/paper/figures/README.md +32 -0
  952. modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
  953. modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
  954. modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
  955. modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
  956. modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
  957. modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
  958. modules/scak/paper/figures/fig3_gaia_results.md +64 -0
  959. modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
  960. modules/scak/paper/figures/fig3_gaia_results.png +0 -0
  961. modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
  962. modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
  963. modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
  964. modules/scak/paper/figures/fig5_context_reduction.md +71 -0
  965. modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
  966. modules/scak/paper/figures/fig5_context_reduction.png +0 -0
  967. modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
  968. modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
  969. modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
  970. modules/scak/paper/figures/generate_figures.py +463 -0
  971. modules/scak/paper/main.aux +103 -0
  972. modules/scak/paper/main.bbl +113 -0
  973. modules/scak/paper/main.blg +55 -0
  974. modules/scak/paper/main.md +192 -0
  975. modules/scak/paper/main.out +31 -0
  976. modules/scak/paper/main.pdf +0 -0
  977. modules/scak/paper/main.tex +482 -0
  978. modules/scak/reproducibility/ABLATIONS.md +225 -0
  979. modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
  980. modules/scak/reproducibility/README.md +421 -0
  981. modules/scak/reproducibility/requirements-pinned.txt +32 -0
  982. modules/scak/reproducibility/run_all_experiments.py +395 -0
  983. modules/scak/reproducibility/seed_control.py +53 -0
  984. modules/scak/reproducibility/statistical_analysis.py +302 -0
  985. modules/scak/requirements.txt +50 -0
  986. modules/scak/setup.py +93 -0
  987. modules/scak/src/__init__.py +124 -0
  988. modules/scak/src/agents/__init__.py +13 -0
  989. modules/scak/src/agents/conflict_resolution.py +732 -0
  990. modules/scak/src/agents/orchestrator.py +761 -0
  991. modules/scak/src/agents/pubsub.py +484 -0
  992. modules/scak/src/agents/shadow_teacher.py +344 -0
  993. modules/scak/src/agents/swarm.py +661 -0
  994. modules/scak/src/agents/worker.py +357 -0
  995. modules/scak/src/integrations/__init__.py +81 -0
  996. modules/scak/src/integrations/cmvk_adapter.py +430 -0
  997. modules/scak/src/integrations/control_plane_adapter.py +601 -0
  998. modules/scak/src/integrations/langchain_integration.py +902 -0
  999. modules/scak/src/interfaces/__init__.py +59 -0
  1000. modules/scak/src/interfaces/llm_clients.py +505 -0
  1001. modules/scak/src/interfaces/openapi_tools.py +611 -0
  1002. modules/scak/src/interfaces/plugin_system.py +605 -0
  1003. modules/scak/src/interfaces/protocols.py +365 -0
  1004. modules/scak/src/interfaces/telemetry.py +464 -0
  1005. modules/scak/src/interfaces/tool_registry.py +547 -0
  1006. modules/scak/src/kernel/__init__.py +100 -0
  1007. modules/scak/src/kernel/auditor.py +305 -0
  1008. modules/scak/src/kernel/circuit_breaker.py +398 -0
  1009. modules/scak/src/kernel/core.py +724 -0
  1010. modules/scak/src/kernel/distributed.py +667 -0
  1011. modules/scak/src/kernel/evolution.py +455 -0
  1012. modules/scak/src/kernel/failover.py +621 -0
  1013. modules/scak/src/kernel/governance.py +710 -0
  1014. modules/scak/src/kernel/governance_v2.py +603 -0
  1015. modules/scak/src/kernel/lazy_evaluator.py +514 -0
  1016. modules/scak/src/kernel/load_testing.py +633 -0
  1017. modules/scak/src/kernel/memory.py +945 -0
  1018. modules/scak/src/kernel/patcher.py +581 -0
  1019. modules/scak/src/kernel/rubric.py +419 -0
  1020. modules/scak/src/kernel/schemas.py +390 -0
  1021. modules/scak/src/kernel/skill_mapper.py +309 -0
  1022. modules/scak/src/kernel/triage.py +149 -0
  1023. modules/scak/src/mocks/__init__.py +99 -0
  1024. modules/scak/tests/__init__.py +1 -0
  1025. modules/scak/tests/test_circuit_breaker.py +403 -0
  1026. modules/scak/tests/test_conflict_resolution.py +287 -0
  1027. modules/scak/tests/test_dual_loop.py +463 -0
  1028. modules/scak/tests/test_enhanced_features.py +421 -0
  1029. modules/scak/tests/test_failover_and_load.py +438 -0
  1030. modules/scak/tests/test_governance.py +185 -0
  1031. modules/scak/tests/test_kernel.py +359 -0
  1032. modules/scak/tests/test_langchain_integration.py +451 -0
  1033. modules/scak/tests/test_lazy_evaluator.py +465 -0
  1034. modules/scak/tests/test_llm_clients.py +122 -0
  1035. modules/scak/tests/test_memory_controller.py +528 -0
  1036. modules/scak/tests/test_orchestrator.py +181 -0
  1037. modules/scak/tests/test_phase3_integration.py +265 -0
  1038. modules/scak/tests/test_pubsub_swarm.py +203 -0
  1039. modules/scak/tests/test_reference_implementations.py +240 -0
  1040. modules/scak/tests/test_rubric.py +363 -0
  1041. modules/scak/tests/test_scak_v2.py +651 -0
  1042. modules/scak/tests/test_skill_mapper.py +217 -0
  1043. modules/scak/tests/test_specific_failures.py +393 -0
  1044. modules/scak/tests/test_tool_registry.py +264 -0
  1045. modules/scak/tests/test_tools_and_plugins.py +303 -0
  1046. modules/scak/tests/test_triage.py +596 -0
  1047. modules/scak/tests/test_write_through.py +319 -0
  1048. agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
  1049. agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
  1050. {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/WHEEL +0 -0
  1051. {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,954 @@
1
+ """
2
+ CMVK Verification Module - Pure Mathematical Functions
3
+
4
+ This module provides pure functions for calculating drift/hallucination scores
5
+ between two outputs. These functions have no side effects and use only
6
+ numpy/scipy for mathematical operations.
7
+
8
+ Layer 1: The Primitive - Mathematical and adversarial verification.
9
+
10
+ Enhanced Features (v0.2.0):
11
+ - Configurable distance metrics (cosine, euclidean, manhattan, etc.)
12
+ - Dimensional weighting for importance-based drift calculation
13
+ - Threshold profiles for domain-specific verification
14
+ - Explainable drift with per-dimension contributions
15
+ - Batch verification for efficiency
16
+ - Audit trail integration
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections.abc import Sequence
22
+ from dataclasses import dataclass
23
+ from enum import Enum
24
+ from typing import TYPE_CHECKING, Any
25
+
26
+ import numpy as np
27
+ from numpy.typing import ArrayLike
28
+
29
+ try:
30
+ from scipy import stats
31
+
32
+ HAS_SCIPY = True
33
+ except ImportError:
34
+ HAS_SCIPY = False
35
+
36
+ if TYPE_CHECKING:
37
+ from .audit import AuditTrail
38
+
39
+
40
+ class DriftType(Enum):
41
+ """Types of drift/divergence detected between outputs."""
42
+
43
+ SEMANTIC = "semantic"
44
+ STRUCTURAL = "structural"
45
+ NUMERICAL = "numerical"
46
+ LEXICAL = "lexical"
47
+
48
+
49
+ @dataclass(frozen=True)
50
+ class VerificationScore:
51
+ """
52
+ Immutable result of verification between two outputs.
53
+
54
+ Attributes:
55
+ drift_score: Overall drift score between 0.0 (identical) and 1.0 (completely different)
56
+ confidence: Confidence in the score (0.0 to 1.0)
57
+ drift_type: Primary type of drift detected
58
+ details: Dictionary with component scores
59
+ explanation: Optional drift explanation with dimension contributions (CMVK-010)
60
+ """
61
+
62
+ drift_score: float
63
+ confidence: float
64
+ drift_type: DriftType
65
+ details: dict
66
+ explanation: dict | None = None
67
+
68
+ def passed(self, threshold: float = 0.3) -> bool:
69
+ """Check if drift is within acceptable threshold."""
70
+ return self.drift_score <= threshold
71
+
72
+ def to_dict(self) -> dict:
73
+ """Convert to dictionary for serialization."""
74
+ return {
75
+ "drift_score": self.drift_score,
76
+ "confidence": self.confidence,
77
+ "drift_type": self.drift_type.value,
78
+ "details": self.details,
79
+ "explanation": self.explanation,
80
+ }
81
+
82
+
83
+ @dataclass(frozen=True)
84
+ class DriftExplanation:
85
+ """
86
+ Detailed explanation of drift between two vectors (CMVK-010).
87
+
88
+ Attributes:
89
+ primary_drift_dimension: Index or name of dimension with highest contribution
90
+ dimension_contributions: Mapping of dimension to its contribution percentage
91
+ top_contributors: List of top N contributing dimensions
92
+ metric_used: The distance metric used
93
+ interpretation: Human-readable interpretation of the drift
94
+ """
95
+
96
+ primary_drift_dimension: str | int
97
+ dimension_contributions: dict[str | int, float]
98
+ top_contributors: list[tuple[str | int, float]]
99
+ metric_used: str
100
+ interpretation: str
101
+
102
+ def to_dict(self) -> dict:
103
+ """Convert to dictionary."""
104
+ return {
105
+ "primary_drift_dimension": self.primary_drift_dimension,
106
+ "dimension_contributions": self.dimension_contributions,
107
+ "top_contributors": self.top_contributors,
108
+ "metric_used": self.metric_used,
109
+ "interpretation": self.interpretation,
110
+ }
111
+
112
+
113
+ def verify(output_a: str, output_b: str) -> VerificationScore:
114
+ """
115
+ Calculate drift/hallucination score between two outputs.
116
+
117
+ This is the primary verification function - a pure function with no side effects.
118
+ Takes two outputs and returns a score indicating their divergence.
119
+
120
+ Args:
121
+ output_a: First output (typically from model A / generator)
122
+ output_b: Second output (typically from model B / verifier)
123
+
124
+ Returns:
125
+ VerificationScore with drift score, confidence, and details
126
+
127
+ Example:
128
+ >>> score = verify("def add(a, b): return a + b", "def add(x, y): return x + y")
129
+ >>> score.drift_score # Low score - semantically similar
130
+ 0.15
131
+ """
132
+ if not output_a and not output_b:
133
+ return VerificationScore(
134
+ drift_score=0.0,
135
+ confidence=1.0,
136
+ drift_type=DriftType.LEXICAL,
137
+ details={"reason": "both_empty"},
138
+ )
139
+
140
+ if not output_a or not output_b:
141
+ return VerificationScore(
142
+ drift_score=1.0,
143
+ confidence=1.0,
144
+ drift_type=DriftType.STRUCTURAL,
145
+ details={"reason": "one_empty"},
146
+ )
147
+
148
+ # Calculate multiple drift components
149
+ lexical_drift = _lexical_drift(output_a, output_b)
150
+ structural_drift = _structural_drift(output_a, output_b)
151
+ numerical_drift = _numerical_drift(output_a, output_b)
152
+
153
+ # Weighted combination
154
+ weights = {"lexical": 0.3, "structural": 0.4, "numerical": 0.3}
155
+
156
+ combined_drift = (
157
+ weights["lexical"] * lexical_drift["score"]
158
+ + weights["structural"] * structural_drift["score"]
159
+ + weights["numerical"] * numerical_drift["score"]
160
+ )
161
+
162
+ # Determine primary drift type
163
+ scores = {
164
+ DriftType.LEXICAL: lexical_drift["score"],
165
+ DriftType.STRUCTURAL: structural_drift["score"],
166
+ DriftType.NUMERICAL: numerical_drift["score"],
167
+ }
168
+ primary_drift = max(scores, key=lambda k: scores[k])
169
+
170
+ # Calculate confidence based on agreement between methods
171
+ score_values = list(scores.values())
172
+ confidence = 1.0 - np.std(score_values) if len(score_values) > 1 else 0.8
173
+
174
+ return VerificationScore(
175
+ drift_score=float(np.clip(combined_drift, 0.0, 1.0)),
176
+ confidence=float(np.clip(confidence, 0.0, 1.0)),
177
+ drift_type=primary_drift,
178
+ details={
179
+ "lexical": lexical_drift,
180
+ "structural": structural_drift,
181
+ "numerical": numerical_drift,
182
+ "weights": weights,
183
+ },
184
+ )
185
+
186
+
187
+ def verify_embeddings(
188
+ embedding_a: ArrayLike,
189
+ embedding_b: ArrayLike,
190
+ metric: str = "cosine",
191
+ weights: ArrayLike | None = None,
192
+ threshold_profile: str | None = None,
193
+ explain: bool = False,
194
+ dimension_names: list[str] | None = None,
195
+ audit_trail: AuditTrail | None = None,
196
+ ) -> VerificationScore:
197
+ """
198
+ Calculate drift score between two embedding vectors.
199
+
200
+ Enhanced verification function with configurable metrics, weighting,
201
+ threshold profiles, and explainability (CMVK-001 through CMVK-010).
202
+
203
+ Args:
204
+ embedding_a: Embedding vector for output A (e.g., claimed values)
205
+ embedding_b: Embedding vector for output B (e.g., observed values)
206
+ metric: Distance metric to use. Options:
207
+ - "cosine": Cosine distance (default, normalizes vectors)
208
+ - "euclidean": Euclidean distance (preserves magnitude - CMVK-001)
209
+ - "manhattan": Manhattan/L1 distance
210
+ - "chebyshev": Maximum absolute difference
211
+ - "mahalanobis": Mahalanobis distance
212
+ weights: Optional weights for each dimension (CMVK-008).
213
+ Higher weights increase that dimension's contribution to drift.
214
+ threshold_profile: Name of threshold profile to use (CMVK-005).
215
+ Options: "carbon", "financial", "medical", "general", "strict"
216
+ explain: If True, include detailed drift explanation (CMVK-010)
217
+ dimension_names: Optional names for dimensions (for explainability)
218
+ audit_trail: Optional AuditTrail instance for logging (CMVK-006)
219
+
220
+ Returns:
221
+ VerificationScore with drift score, confidence, and optional explanation
222
+
223
+ Example:
224
+ >>> # Basic usage
225
+ >>> score = verify_embeddings(claim_vec, obs_vec)
226
+
227
+ >>> # With Euclidean distance for magnitude-sensitive comparison
228
+ >>> score = verify_embeddings(
229
+ ... claim_vec, obs_vec,
230
+ ... metric="euclidean",
231
+ ... threshold_profile="carbon",
232
+ ... explain=True
233
+ ... )
234
+
235
+ >>> # With dimensional weighting
236
+ >>> score = verify_embeddings(
237
+ ... claim_vec, obs_vec,
238
+ ... metric="euclidean",
239
+ ... weights=[0.6, 0.4], # NDVI more important than carbon
240
+ ... explain=True,
241
+ ... dimension_names=["ndvi", "carbon_stock"]
242
+ ... )
243
+ """
244
+ from .metrics import calculate_distance, calculate_weighted_distance
245
+
246
+ vec_a = np.asarray(embedding_a, dtype=np.float64)
247
+ vec_b = np.asarray(embedding_b, dtype=np.float64)
248
+
249
+ # Load threshold profile if specified
250
+ profile = None
251
+ if threshold_profile:
252
+ from .profiles import get_profile
253
+
254
+ profile = get_profile(threshold_profile)
255
+ # Use profile's default metric if none specified
256
+ if metric == "cosine" and profile.default_metric != "cosine":
257
+ metric = profile.default_metric
258
+
259
+ # Shape validation
260
+ if vec_a.shape != vec_b.shape:
261
+ result = VerificationScore(
262
+ drift_score=1.0,
263
+ confidence=0.5,
264
+ drift_type=DriftType.STRUCTURAL,
265
+ details={"reason": "shape_mismatch", "shape_a": vec_a.shape, "shape_b": vec_b.shape},
266
+ )
267
+ if audit_trail:
268
+ _log_to_audit(audit_trail, vec_a, vec_b, result, metric, threshold_profile)
269
+ return result
270
+
271
+ # Calculate distance with appropriate function
272
+ if weights is not None:
273
+ metric_result = calculate_weighted_distance(vec_a, vec_b, weights=weights, metric=metric)
274
+ else:
275
+ metric_result = calculate_distance(vec_a, vec_b, metric=metric)
276
+
277
+ # Build drift score from normalized distance
278
+ drift_score = float(np.clip(metric_result.normalized, 0.0, 1.0))
279
+
280
+ # Calculate confidence based on vector properties
281
+ confidence = _calculate_embedding_confidence(vec_a, vec_b)
282
+
283
+ # Build explanation if requested
284
+ explanation_dict = None
285
+ if explain:
286
+ explanation = _build_drift_explanation(
287
+ vec_a, vec_b, metric_result, weights, dimension_names
288
+ )
289
+ explanation_dict = explanation.to_dict()
290
+
291
+ # Build details
292
+ details = {
293
+ "metric": metric,
294
+ "raw_distance": metric_result.distance,
295
+ "normalized_distance": metric_result.normalized,
296
+ **metric_result.details,
297
+ }
298
+
299
+ # Add profile info if used
300
+ if profile:
301
+ passed = profile.is_within_threshold(drift_score, confidence)
302
+ severity = profile.get_severity(drift_score)
303
+ details["profile"] = {
304
+ "name": profile.name,
305
+ "drift_threshold": profile.drift_threshold,
306
+ "passed": passed,
307
+ "severity": severity,
308
+ }
309
+
310
+ result = VerificationScore(
311
+ drift_score=drift_score,
312
+ confidence=confidence,
313
+ drift_type=DriftType.SEMANTIC,
314
+ details=details,
315
+ explanation=explanation_dict,
316
+ )
317
+
318
+ # Log to audit trail if provided
319
+ if audit_trail:
320
+ _log_to_audit(audit_trail, vec_a, vec_b, result, metric, threshold_profile)
321
+
322
+ return result
323
+
324
+
325
+ def verify_embeddings_batch(
326
+ embeddings_a: Sequence[ArrayLike],
327
+ embeddings_b: Sequence[ArrayLike],
328
+ metric: str = "cosine",
329
+ weights: ArrayLike | None = None,
330
+ threshold_profile: str | None = None,
331
+ explain: bool = False,
332
+ dimension_names: list[str] | None = None,
333
+ audit_trail: AuditTrail | None = None,
334
+ ) -> list[VerificationScore]:
335
+ """
336
+ Verify multiple embedding pairs efficiently (CMVK-004).
337
+
338
+ Processes all pairs with consistent settings and optional audit logging.
339
+
340
+ Args:
341
+ embeddings_a: Sequence of embedding vectors from source A
342
+ embeddings_b: Sequence of embedding vectors from source B
343
+ metric: Distance metric (applied to all pairs)
344
+ weights: Dimensional weights (applied to all pairs)
345
+ threshold_profile: Threshold profile name
346
+ explain: Whether to include explanations
347
+ dimension_names: Optional dimension names for explainability
348
+ audit_trail: Optional AuditTrail for logging
349
+
350
+ Returns:
351
+ List of VerificationScore for each pair
352
+
353
+ Raises:
354
+ ValueError: If sequence lengths don't match
355
+ """
356
+ if len(embeddings_a) != len(embeddings_b):
357
+ raise ValueError(
358
+ f"Length mismatch: embeddings_a has {len(embeddings_a)} items, "
359
+ f"embeddings_b has {len(embeddings_b)} items"
360
+ )
361
+
362
+ results = []
363
+ for vec_a, vec_b in zip(embeddings_a, embeddings_b, strict=True):
364
+ score = verify_embeddings(
365
+ vec_a,
366
+ vec_b,
367
+ metric=metric,
368
+ weights=weights,
369
+ threshold_profile=threshold_profile,
370
+ explain=explain,
371
+ dimension_names=dimension_names,
372
+ audit_trail=audit_trail,
373
+ )
374
+ results.append(score)
375
+
376
+ return results
377
+
378
+
379
+ def aggregate_embedding_scores(
380
+ scores: Sequence[VerificationScore],
381
+ threshold_profile: str | None = None,
382
+ ) -> dict[str, Any]:
383
+ """
384
+ Aggregate multiple embedding verification scores with profile context.
385
+
386
+ Args:
387
+ scores: Sequence of VerificationScore objects
388
+ threshold_profile: Optional profile for pass/fail classification
389
+
390
+ Returns:
391
+ Dictionary with aggregate statistics and pass rates
392
+ """
393
+ if not scores:
394
+ return {"count": 0}
395
+
396
+ profile = None
397
+ if threshold_profile:
398
+ from .profiles import get_profile
399
+
400
+ profile = get_profile(threshold_profile)
401
+
402
+ drift_values = [s.drift_score for s in scores]
403
+ confidence_values = [s.confidence for s in scores]
404
+
405
+ # Calculate pass/fail if profile available
406
+ if profile:
407
+ passed_count = sum(
408
+ 1 for s in scores if profile.is_within_threshold(s.drift_score, s.confidence)
409
+ )
410
+ severity_counts: dict[str, int] = {
411
+ "pass": 0,
412
+ "warning": 0,
413
+ "critical": 0,
414
+ "severe": 0,
415
+ }
416
+ for s in scores:
417
+ severity = profile.get_severity(s.drift_score)
418
+ severity_counts[severity] += 1
419
+ else:
420
+ passed_count = sum(1 for s in scores if s.drift_score <= 0.3)
421
+ severity_counts = {}
422
+
423
+ result: dict[str, Any] = {
424
+ "count": len(scores),
425
+ "passed_count": passed_count,
426
+ "failed_count": len(scores) - passed_count,
427
+ "pass_rate": passed_count / len(scores),
428
+ "mean_drift": float(np.mean(drift_values)),
429
+ "std_drift": float(np.std(drift_values)),
430
+ "min_drift": float(np.min(drift_values)),
431
+ "max_drift": float(np.max(drift_values)),
432
+ "median_drift": float(np.median(drift_values)),
433
+ "mean_confidence": float(np.mean(confidence_values)),
434
+ "p95_drift": float(np.percentile(drift_values, 95)),
435
+ }
436
+
437
+ if severity_counts and profile:
438
+ result["severity_distribution"] = severity_counts
439
+ result["profile_used"] = profile.name
440
+
441
+ return result
442
+
443
+
444
+ # ============================================================================
445
+ # Explainability Functions (CMVK-010)
446
+ # ============================================================================
447
+
448
+
449
+ def _build_drift_explanation(
450
+ vec_a: np.ndarray,
451
+ vec_b: np.ndarray,
452
+ metric_result: Any,
453
+ weights: ArrayLike | None,
454
+ dimension_names: list[str] | None,
455
+ ) -> DriftExplanation:
456
+ """Build detailed drift explanation."""
457
+ diff = np.abs(vec_a - vec_b)
458
+
459
+ # Apply weights if provided
460
+ if weights is not None:
461
+ weights_arr = np.asarray(weights, dtype=np.float64)
462
+ weighted_diff = diff * weights_arr
463
+ else:
464
+ weighted_diff = diff
465
+
466
+ # Calculate per-dimension contributions
467
+ total_diff = np.sum(weighted_diff)
468
+ contributions = weighted_diff / total_diff if total_diff > 0 else np.zeros_like(diff)
469
+
470
+ # Map contributions to names or indices
471
+ contrib_dict: dict[str | int, float]
472
+ sorted_contribs: list[tuple[str | int, float]]
473
+ primary_dim: str | int
474
+
475
+ if dimension_names and len(dimension_names) == len(contributions):
476
+ contrib_dict = {
477
+ name: float(c) for name, c in zip(dimension_names, contributions, strict=False)
478
+ }
479
+ sorted_contribs = sorted(contrib_dict.items(), key=lambda x: x[1], reverse=True)
480
+ primary_dim = sorted_contribs[0][0]
481
+ else:
482
+ contrib_dict = {i: float(c) for i, c in enumerate(contributions)}
483
+ sorted_contribs = sorted(contrib_dict.items(), key=lambda x: x[1], reverse=True)
484
+ primary_dim = sorted_contribs[0][0]
485
+
486
+ # Top contributors (up to 5)
487
+ top_contributors: list[tuple[str | int, float]] = sorted_contribs[:5]
488
+
489
+ # Generate interpretation
490
+ interpretation = _generate_interpretation(
491
+ vec_a, vec_b, primary_dim, top_contributors, dimension_names
492
+ )
493
+
494
+ return DriftExplanation(
495
+ primary_drift_dimension=primary_dim,
496
+ dimension_contributions=contrib_dict,
497
+ top_contributors=top_contributors,
498
+ metric_used=metric_result.metric.value,
499
+ interpretation=interpretation,
500
+ )
501
+
502
+
503
+ def _generate_interpretation(
504
+ vec_a: np.ndarray,
505
+ vec_b: np.ndarray,
506
+ primary_dim: str | int,
507
+ top_contributors: list[tuple[str | int, float]],
508
+ dimension_names: list[str] | None,
509
+ ) -> str:
510
+ """Generate human-readable interpretation of drift."""
511
+ # Get primary dimension index
512
+ if isinstance(primary_dim, str) and dimension_names:
513
+ idx = dimension_names.index(primary_dim)
514
+ else:
515
+ idx = primary_dim if isinstance(primary_dim, int) else 0
516
+
517
+ diff_value = abs(vec_a[idx] - vec_b[idx])
518
+ pct_diff = (diff_value / abs(vec_a[idx])) * 100 if vec_a[idx] != 0 else float("inf")
519
+
520
+ dim_name = primary_dim if isinstance(primary_dim, str) else f"dimension {primary_dim}"
521
+
522
+ if len(top_contributors) > 1 and top_contributors[0][1] > 0.5:
523
+ return (
524
+ f"Drift primarily driven by {dim_name} "
525
+ f"({top_contributors[0][1]*100:.1f}% of total drift). "
526
+ f"Value changed from {vec_a[idx]:.4f} to {vec_b[idx]:.4f} "
527
+ f"({pct_diff:.1f}% difference)."
528
+ )
529
+ elif len(top_contributors) > 1:
530
+ top_names = [str(c[0]) for c, _ in zip(top_contributors[:3], range(3), strict=False)]
531
+ return (
532
+ f"Drift distributed across multiple dimensions. "
533
+ f"Top contributors: {', '.join(top_names)}. "
534
+ f"Largest single change in {dim_name}."
535
+ )
536
+ else:
537
+ return f"Single dimension drift in {dim_name}."
538
+
539
+
540
+ def _calculate_embedding_confidence(
541
+ vec_a: np.ndarray,
542
+ vec_b: np.ndarray,
543
+ ) -> float:
544
+ """Calculate confidence score for embedding verification."""
545
+ # Base confidence
546
+ confidence = 0.9
547
+
548
+ # Reduce confidence for very small vectors (less reliable)
549
+ if len(vec_a) < 10:
550
+ confidence *= 0.9
551
+
552
+ # Reduce confidence if vectors have very different magnitudes
553
+ norm_a = np.linalg.norm(vec_a)
554
+ norm_b = np.linalg.norm(vec_b)
555
+ if norm_a > 0 and norm_b > 0:
556
+ magnitude_ratio = min(norm_a, norm_b) / max(norm_a, norm_b)
557
+ if magnitude_ratio < 0.5:
558
+ confidence *= 0.85
559
+
560
+ # Reduce confidence for near-zero vectors
561
+ if norm_a < 1e-6 or norm_b < 1e-6:
562
+ confidence *= 0.7
563
+
564
+ return float(np.clip(confidence, 0.0, 1.0))
565
+
566
+
567
+ def _log_to_audit(
568
+ audit_trail: AuditTrail,
569
+ vec_a: np.ndarray,
570
+ vec_b: np.ndarray,
571
+ result: VerificationScore,
572
+ metric: str,
573
+ profile_name: str | None,
574
+ ) -> None:
575
+ """Log verification to audit trail."""
576
+ passed = result.details.get("profile", {}).get("passed", result.drift_score <= 0.3)
577
+
578
+ audit_trail.log(
579
+ operation="verify_embeddings",
580
+ inputs={
581
+ "embedding_a_shape": vec_a.shape,
582
+ "embedding_b_shape": vec_b.shape,
583
+ "embedding_a_norm": float(np.linalg.norm(vec_a)),
584
+ "embedding_b_norm": float(np.linalg.norm(vec_b)),
585
+ },
586
+ drift_score=result.drift_score,
587
+ confidence=result.confidence,
588
+ metric_used=metric,
589
+ profile_used=profile_name,
590
+ passed=passed,
591
+ result_details={
592
+ "drift_type": result.drift_type.value,
593
+ "raw_distance": result.details.get("raw_distance"),
594
+ },
595
+ )
596
+
597
+
598
+ def verify_distributions(dist_a: ArrayLike, dist_b: ArrayLike) -> VerificationScore:
599
+ """
600
+ Calculate drift between two probability distributions.
601
+
602
+ Uses KL divergence and other statistical measures to compare distributions.
603
+
604
+ Args:
605
+ dist_a: First probability distribution
606
+ dist_b: Second probability distribution
607
+
608
+ Returns:
609
+ VerificationScore with distribution-based drift score
610
+ """
611
+ p = np.asarray(dist_a, dtype=np.float64)
612
+ q = np.asarray(dist_b, dtype=np.float64)
613
+
614
+ # Normalize to valid probability distributions
615
+ p = p / (p.sum() + 1e-10)
616
+ q = q / (q.sum() + 1e-10)
617
+
618
+ # Add small epsilon to avoid log(0)
619
+ eps = 1e-10
620
+ p = np.clip(p, eps, 1.0)
621
+ q = np.clip(q, eps, 1.0)
622
+
623
+ if HAS_SCIPY:
624
+ # KL divergence
625
+ kl_div = stats.entropy(p, q)
626
+ # Jensen-Shannon divergence (symmetric, bounded [0, 1])
627
+ m = 0.5 * (p + q)
628
+ js_div = 0.5 * stats.entropy(p, m) + 0.5 * stats.entropy(q, m)
629
+ else:
630
+ # Fallback implementations
631
+ kl_div = float(np.sum(p * np.log(p / q)))
632
+ m = 0.5 * (p + q)
633
+ js_div = 0.5 * np.sum(p * np.log(p / m)) + 0.5 * np.sum(q * np.log(q / m))
634
+
635
+ # Total variation distance
636
+ tv_dist = 0.5 * np.sum(np.abs(p - q))
637
+
638
+ # Combined drift (JS divergence is bounded [0, ln(2)])
639
+ drift_score = js_div / np.log(2) # Normalize to [0, 1]
640
+
641
+ return VerificationScore(
642
+ drift_score=float(np.clip(drift_score, 0.0, 1.0)),
643
+ confidence=0.9,
644
+ drift_type=DriftType.NUMERICAL,
645
+ details={
646
+ "kl_divergence": float(kl_div),
647
+ "js_divergence": float(js_div),
648
+ "total_variation": float(tv_dist),
649
+ },
650
+ )
651
+
652
+
653
+ def verify_sequences(seq_a: Sequence[str], seq_b: Sequence[str]) -> VerificationScore:
654
+ """
655
+ Calculate drift between two sequences of tokens/items.
656
+
657
+ Uses edit distance and sequence alignment metrics.
658
+
659
+ Args:
660
+ seq_a: First sequence
661
+ seq_b: Second sequence
662
+
663
+ Returns:
664
+ VerificationScore with sequence-based drift score
665
+ """
666
+ if not seq_a and not seq_b:
667
+ return VerificationScore(
668
+ drift_score=0.0,
669
+ confidence=1.0,
670
+ drift_type=DriftType.LEXICAL,
671
+ details={"reason": "both_empty"},
672
+ )
673
+
674
+ # Levenshtein distance
675
+ edit_dist = _levenshtein_distance(seq_a, seq_b)
676
+ max_len = max(len(seq_a), len(seq_b))
677
+ normalized_edit = edit_dist / max_len if max_len > 0 else 0.0
678
+
679
+ # Jaccard similarity (set-based)
680
+ set_a = set(seq_a)
681
+ set_b = set(seq_b)
682
+ intersection = len(set_a & set_b)
683
+ union = len(set_a | set_b)
684
+ jaccard = intersection / union if union > 0 else 1.0
685
+ jaccard_drift = 1.0 - jaccard
686
+
687
+ # Order-aware similarity (longest common subsequence)
688
+ lcs_len = _lcs_length(seq_a, seq_b)
689
+ lcs_ratio = 2 * lcs_len / (len(seq_a) + len(seq_b)) if (len(seq_a) + len(seq_b)) > 0 else 1.0
690
+ lcs_drift = 1.0 - lcs_ratio
691
+
692
+ # Combined
693
+ drift_score = 0.4 * normalized_edit + 0.3 * jaccard_drift + 0.3 * lcs_drift
694
+
695
+ return VerificationScore(
696
+ drift_score=float(np.clip(drift_score, 0.0, 1.0)),
697
+ confidence=0.85,
698
+ drift_type=DriftType.STRUCTURAL,
699
+ details={
700
+ "edit_distance": edit_dist,
701
+ "normalized_edit": float(normalized_edit),
702
+ "jaccard_similarity": float(jaccard),
703
+ "lcs_ratio": float(lcs_ratio),
704
+ },
705
+ )
706
+
707
+
708
+ # ============================================================================
709
+ # Internal pure functions
710
+ # ============================================================================
711
+
712
+
713
+ def _lexical_drift(text_a: str, text_b: str) -> dict:
714
+ """
715
+ Calculate lexical drift between two texts.
716
+
717
+ Pure function - no side effects.
718
+ """
719
+ # Character-level comparison
720
+ chars_a = set(text_a)
721
+ chars_b = set(text_b)
722
+ char_jaccard = len(chars_a & chars_b) / len(chars_a | chars_b) if (chars_a | chars_b) else 1.0
723
+
724
+ # Word-level comparison
725
+ words_a = set(text_a.split())
726
+ words_b = set(text_b.split())
727
+ word_jaccard = len(words_a & words_b) / len(words_a | words_b) if (words_a | words_b) else 1.0
728
+
729
+ # Length ratio
730
+ len_a, len_b = len(text_a), len(text_b)
731
+ length_ratio = min(len_a, len_b) / max(len_a, len_b) if max(len_a, len_b) > 0 else 1.0
732
+
733
+ # Combined score (lower similarity = higher drift)
734
+ similarity = 0.3 * char_jaccard + 0.5 * word_jaccard + 0.2 * length_ratio
735
+ drift = 1.0 - similarity
736
+
737
+ return {
738
+ "score": drift,
739
+ "char_jaccard": char_jaccard,
740
+ "word_jaccard": word_jaccard,
741
+ "length_ratio": length_ratio,
742
+ }
743
+
744
+
745
+ def _structural_drift(text_a: str, text_b: str) -> dict:
746
+ """
747
+ Calculate structural drift between two texts.
748
+
749
+ Analyzes structure like line count, indentation, code patterns.
750
+ Pure function - no side effects.
751
+ """
752
+ lines_a = text_a.split("\n")
753
+ lines_b = text_b.split("\n")
754
+
755
+ # Line count difference
756
+ line_count_a, line_count_b = len(lines_a), len(lines_b)
757
+ line_ratio = (
758
+ min(line_count_a, line_count_b) / max(line_count_a, line_count_b)
759
+ if max(line_count_a, line_count_b) > 0
760
+ else 1.0
761
+ )
762
+
763
+ # Indentation pattern
764
+ indent_a = [len(line) - len(line.lstrip()) for line in lines_a if line.strip()]
765
+ indent_b = [len(line) - len(line.lstrip()) for line in lines_b if line.strip()]
766
+
767
+ if indent_a and indent_b:
768
+ avg_indent_a = np.mean(indent_a)
769
+ avg_indent_b = np.mean(indent_b)
770
+ max_indent = max(avg_indent_a, avg_indent_b, 1)
771
+ indent_similarity = 1.0 - abs(avg_indent_a - avg_indent_b) / max_indent
772
+ else:
773
+ indent_similarity = 1.0 if (not indent_a and not indent_b) else 0.5
774
+
775
+ # Code pattern markers (for code comparison)
776
+ patterns = ["def ", "class ", "import ", "return ", "if ", "for ", "while ", "try:", "except"]
777
+ pattern_a = {p for p in patterns if p in text_a}
778
+ pattern_b = {p for p in patterns if p in text_b}
779
+ pattern_jaccard = (
780
+ len(pattern_a & pattern_b) / len(pattern_a | pattern_b) if (pattern_a | pattern_b) else 1.0
781
+ )
782
+
783
+ # Combined
784
+ similarity = 0.3 * line_ratio + 0.3 * indent_similarity + 0.4 * pattern_jaccard
785
+ drift = 1.0 - similarity
786
+
787
+ return {
788
+ "score": drift,
789
+ "line_ratio": line_ratio,
790
+ "indent_similarity": indent_similarity,
791
+ "pattern_jaccard": pattern_jaccard,
792
+ }
793
+
794
+
795
+ def _numerical_drift(text_a: str, text_b: str) -> dict:
796
+ """
797
+ Calculate numerical drift by extracting and comparing numbers.
798
+
799
+ Pure function - no side effects.
800
+ """
801
+ import re
802
+
803
+ # Extract numbers from both texts
804
+ number_pattern = r"-?\d+\.?\d*"
805
+ numbers_a = [float(n) for n in re.findall(number_pattern, text_a)]
806
+ numbers_b = [float(n) for n in re.findall(number_pattern, text_b)]
807
+
808
+ if not numbers_a and not numbers_b:
809
+ return {"score": 0.0, "reason": "no_numbers"}
810
+
811
+ if not numbers_a or not numbers_b:
812
+ return {"score": 0.5, "reason": "numbers_only_in_one"}
813
+
814
+ # Compare statistics
815
+ mean_a, mean_b = np.mean(numbers_a), np.mean(numbers_b)
816
+ std_a, std_b = np.std(numbers_a), np.std(numbers_b)
817
+
818
+ # Relative difference in means
819
+ max_mean = max(abs(mean_a), abs(mean_b), 1e-10)
820
+ mean_diff = abs(mean_a - mean_b) / max_mean
821
+
822
+ # Relative difference in stds
823
+ max_std = max(std_a, std_b, 1e-10)
824
+ std_diff = abs(std_a - std_b) / max_std if max_std > 1e-10 else 0.0
825
+
826
+ # Count difference
827
+ count_ratio = min(len(numbers_a), len(numbers_b)) / max(len(numbers_a), len(numbers_b))
828
+
829
+ # Combined
830
+ drift = 0.4 * min(mean_diff, 1.0) + 0.3 * min(std_diff, 1.0) + 0.3 * (1.0 - count_ratio)
831
+
832
+ return {
833
+ "score": drift,
834
+ "mean_a": mean_a,
835
+ "mean_b": mean_b,
836
+ "std_a": std_a,
837
+ "std_b": std_b,
838
+ "count_a": len(numbers_a),
839
+ "count_b": len(numbers_b),
840
+ }
841
+
842
+
843
+ def _levenshtein_distance(seq_a: Sequence, seq_b: Sequence) -> int:
844
+ """
845
+ Calculate Levenshtein edit distance between two sequences.
846
+
847
+ Pure function using dynamic programming.
848
+ """
849
+ m, n = len(seq_a), len(seq_b)
850
+
851
+ if m == 0:
852
+ return n
853
+ if n == 0:
854
+ return m
855
+
856
+ # Use numpy for efficiency
857
+ dp = np.zeros((m + 1, n + 1), dtype=np.int32)
858
+ dp[:, 0] = np.arange(m + 1)
859
+ dp[0, :] = np.arange(n + 1)
860
+
861
+ for i in range(1, m + 1):
862
+ for j in range(1, n + 1):
863
+ cost = 0 if seq_a[i - 1] == seq_b[j - 1] else 1
864
+ dp[i, j] = min(
865
+ dp[i - 1, j] + 1, # deletion
866
+ dp[i, j - 1] + 1, # insertion
867
+ dp[i - 1, j - 1] + cost, # substitution
868
+ )
869
+
870
+ return int(dp[m, n])
871
+
872
+
873
+ def _lcs_length(seq_a: Sequence, seq_b: Sequence) -> int:
874
+ """
875
+ Calculate length of Longest Common Subsequence.
876
+
877
+ Pure function using dynamic programming.
878
+ """
879
+ m, n = len(seq_a), len(seq_b)
880
+
881
+ if m == 0 or n == 0:
882
+ return 0
883
+
884
+ dp = np.zeros((m + 1, n + 1), dtype=np.int32)
885
+
886
+ for i in range(1, m + 1):
887
+ for j in range(1, n + 1):
888
+ if seq_a[i - 1] == seq_b[j - 1]:
889
+ dp[i, j] = dp[i - 1, j - 1] + 1
890
+ else:
891
+ dp[i, j] = max(dp[i - 1, j], dp[i, j - 1])
892
+
893
+ return int(dp[m, n])
894
+
895
+
896
+ # ============================================================================
897
+ # Batch verification functions
898
+ # ============================================================================
899
+
900
+
901
+ def verify_batch(outputs_a: Sequence[str], outputs_b: Sequence[str]) -> list[VerificationScore]:
902
+ """
903
+ Verify multiple output pairs.
904
+
905
+ Pure function that processes pairs in sequence.
906
+
907
+ Args:
908
+ outputs_a: Sequence of outputs from source A
909
+ outputs_b: Sequence of outputs from source B (same length as outputs_a)
910
+
911
+ Returns:
912
+ List of VerificationScore for each pair
913
+ """
914
+ if len(outputs_a) != len(outputs_b):
915
+ raise ValueError(
916
+ f"Length mismatch: outputs_a has {len(outputs_a)} items, "
917
+ f"outputs_b has {len(outputs_b)} items"
918
+ )
919
+
920
+ return [verify(a, b) for a, b in zip(outputs_a, outputs_b, strict=False)]
921
+
922
+
923
+ def aggregate_scores(scores: Sequence[VerificationScore]) -> dict:
924
+ """
925
+ Aggregate multiple verification scores into summary statistics.
926
+
927
+ Pure function.
928
+
929
+ Args:
930
+ scores: Sequence of VerificationScore objects
931
+
932
+ Returns:
933
+ Dictionary with aggregate statistics
934
+ """
935
+ if not scores:
936
+ return {"count": 0}
937
+
938
+ drift_values = [s.drift_score for s in scores]
939
+ confidence_values = [s.confidence for s in scores]
940
+
941
+ drift_types: dict[str, int] = {}
942
+ for s in scores:
943
+ drift_types[s.drift_type.value] = drift_types.get(s.drift_type.value, 0) + 1
944
+
945
+ return {
946
+ "count": len(scores),
947
+ "mean_drift": float(np.mean(drift_values)),
948
+ "std_drift": float(np.std(drift_values)),
949
+ "min_drift": float(np.min(drift_values)),
950
+ "max_drift": float(np.max(drift_values)),
951
+ "median_drift": float(np.median(drift_values)),
952
+ "mean_confidence": float(np.mean(confidence_values)),
953
+ "drift_type_distribution": drift_types,
954
+ }