agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
modules/cmvk/Dockerfile
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Dockerfile for Cross-Model Verification Kernel
|
|
2
|
+
# Supports both the CLI and sandbox code execution
|
|
3
|
+
|
|
4
|
+
FROM python:3.11-slim as base
|
|
5
|
+
|
|
6
|
+
# Set working directory
|
|
7
|
+
WORKDIR /app
|
|
8
|
+
|
|
9
|
+
# Install system dependencies
|
|
10
|
+
RUN apt-get update && apt-get install -y \
|
|
11
|
+
gcc \
|
|
12
|
+
g++ \
|
|
13
|
+
make \
|
|
14
|
+
git \
|
|
15
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
16
|
+
|
|
17
|
+
# ============================================
|
|
18
|
+
# Builder stage - install dependencies
|
|
19
|
+
# ============================================
|
|
20
|
+
FROM base as builder
|
|
21
|
+
|
|
22
|
+
# Install Python dependencies
|
|
23
|
+
COPY requirements.txt .
|
|
24
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
25
|
+
|
|
26
|
+
# Copy source code
|
|
27
|
+
COPY . .
|
|
28
|
+
|
|
29
|
+
# Install the package in editable mode
|
|
30
|
+
RUN pip install --no-cache-dir -e .
|
|
31
|
+
|
|
32
|
+
# ============================================
|
|
33
|
+
# Production stage
|
|
34
|
+
# ============================================
|
|
35
|
+
FROM base as production
|
|
36
|
+
|
|
37
|
+
# Copy installed packages from builder
|
|
38
|
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
|
39
|
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
40
|
+
|
|
41
|
+
# Copy application code
|
|
42
|
+
COPY . .
|
|
43
|
+
|
|
44
|
+
# Install the package
|
|
45
|
+
RUN pip install --no-cache-dir -e .
|
|
46
|
+
|
|
47
|
+
# Create non-root user for security
|
|
48
|
+
RUN useradd -m -u 1000 cmvkuser && \
|
|
49
|
+
chown -R cmvkuser:cmvkuser /app
|
|
50
|
+
|
|
51
|
+
# Create directories for logs and results
|
|
52
|
+
RUN mkdir -p /app/logs/traces /app/experiments/results && \
|
|
53
|
+
chown -R cmvkuser:cmvkuser /app/logs /app/experiments/results
|
|
54
|
+
|
|
55
|
+
USER cmvkuser
|
|
56
|
+
|
|
57
|
+
# Set environment variables
|
|
58
|
+
ENV PYTHONUNBUFFERED=1
|
|
59
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
60
|
+
ENV PYTHONPATH=/app
|
|
61
|
+
|
|
62
|
+
# Health check
|
|
63
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
64
|
+
CMD python -c "from src import __version__; print(__version__)" || exit 1
|
|
65
|
+
|
|
66
|
+
# Default command - show help
|
|
67
|
+
CMD ["cmvk", "--help"]
|
|
68
|
+
|
|
69
|
+
# ============================================
|
|
70
|
+
# Sandbox stage - isolated code execution
|
|
71
|
+
# ============================================
|
|
72
|
+
FROM base as sandbox
|
|
73
|
+
|
|
74
|
+
# Create sandboxed user with minimal privileges
|
|
75
|
+
RUN useradd -m -u 1000 sandboxuser
|
|
76
|
+
|
|
77
|
+
WORKDIR /sandbox
|
|
78
|
+
RUN chown -R sandboxuser:sandboxuser /sandbox
|
|
79
|
+
|
|
80
|
+
USER sandboxuser
|
|
81
|
+
|
|
82
|
+
# Set resource limits
|
|
83
|
+
ENV PYTHONUNBUFFERED=1
|
|
84
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
85
|
+
|
|
86
|
+
# Default command for sandbox
|
|
87
|
+
CMD ["python3"]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
---
|
|
2
|
+
license: mit
|
|
3
|
+
language:
|
|
4
|
+
- en
|
|
5
|
+
tags:
|
|
6
|
+
- verification
|
|
7
|
+
- ai-safety
|
|
8
|
+
- hallucination-detection
|
|
9
|
+
- drift-detection
|
|
10
|
+
- adversarial
|
|
11
|
+
- code-generation
|
|
12
|
+
- multi-model
|
|
13
|
+
datasets:
|
|
14
|
+
- openai/openai_humaneval
|
|
15
|
+
metrics:
|
|
16
|
+
- accuracy
|
|
17
|
+
pipeline_tag: text-generation
|
|
18
|
+
library_name: cmvk
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
# Cross-Model Verification Kernel (CMVK)
|
|
22
|
+
|
|
23
|
+
**CMVK** is a mathematical and adversarial verification library for detecting drift and hallucinations between AI model outputs. It implements the "Trust, but Verify (with a different brain)" philosophy.
|
|
24
|
+
|
|
25
|
+
## Model Description
|
|
26
|
+
|
|
27
|
+
CMVK is not a model itself, but a **verification framework** that orchestrates multiple LLMs (GPT-4, Gemini, Claude) in an adversarial configuration to reduce correlated blind spots.
|
|
28
|
+
|
|
29
|
+
### Architecture
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
┌─────────────────────────────────────────────────────────┐
|
|
33
|
+
│ Verification Kernel (Arbiter) │
|
|
34
|
+
│ - Manages verification loop │
|
|
35
|
+
│ - Enforces strategy bans (Lateral Thinking) │
|
|
36
|
+
│ - Makes final accept/reject decisions │
|
|
37
|
+
└───────────┬─────────────────────────────┬───────────────┘
|
|
38
|
+
│ │
|
|
39
|
+
┌───────▼────────┐ ┌────────▼────────┐
|
|
40
|
+
│ Generator │ │ Verifier │
|
|
41
|
+
│ (System 1) │◄────────►│ (System 2) │
|
|
42
|
+
│ GPT-4o/o1 │ Hostile │ Gemini/Claude │
|
|
43
|
+
└────────────────┘ Review └─────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Intended Use
|
|
47
|
+
|
|
48
|
+
### Primary Use Cases
|
|
49
|
+
|
|
50
|
+
1. **Code Generation Verification**: Verify LLM-generated code by having a different model attempt to find bugs, edge cases, and security issues.
|
|
51
|
+
|
|
52
|
+
2. **Hallucination Detection**: Calculate drift scores between outputs from different models to identify potential hallucinations.
|
|
53
|
+
|
|
54
|
+
3. **Research on Model Diversity**: Study how different model combinations reduce correlated blind spots.
|
|
55
|
+
|
|
56
|
+
4. **AI Safety Research**: Implement adversarial verification patterns for safer AI deployments.
|
|
57
|
+
|
|
58
|
+
### Out-of-Scope Use Cases
|
|
59
|
+
|
|
60
|
+
- ❌ Real-time production systems (latency-sensitive applications)
|
|
61
|
+
- ❌ Single-model self-correction (defeats the purpose)
|
|
62
|
+
- ❌ Tasks requiring human-in-the-loop verification
|
|
63
|
+
- ❌ High-stakes decisions without human oversight
|
|
64
|
+
|
|
65
|
+
## How to Use
|
|
66
|
+
|
|
67
|
+
### Installation
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install cmvk
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Basic Usage (Primitive Library)
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import cmvk
|
|
77
|
+
|
|
78
|
+
# Verify drift between two outputs
|
|
79
|
+
score = cmvk.verify(
|
|
80
|
+
output_a="def add(a, b): return a + b",
|
|
81
|
+
output_b="def add(x, y): return x + y"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
print(f"Drift Score: {score.drift_score:.2f}") # 0.15 (low = similar)
|
|
85
|
+
print(f"Confidence: {score.confidence:.2f}")
|
|
86
|
+
print(f"Drift Type: {score.drift_type.value}")
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Advanced Usage (Full Framework)
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from cross_model_verification_kernel import (
|
|
93
|
+
VerificationKernel,
|
|
94
|
+
OpenAIGenerator,
|
|
95
|
+
GeminiVerifier,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Initialize with model diversity
|
|
99
|
+
kernel = VerificationKernel(
|
|
100
|
+
generator=OpenAIGenerator(model="gpt-4o"),
|
|
101
|
+
verifier=GeminiVerifier(model="gemini-1.5-pro"),
|
|
102
|
+
enable_trace_logging=True,
|
|
103
|
+
seed=42, # For reproducibility
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Run adversarial verification
|
|
107
|
+
result = kernel.execute(
|
|
108
|
+
task="Write a function to merge two sorted arrays in O(n) time"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
print(f"Success: {result.is_complete}")
|
|
112
|
+
print(f"Solution: {result.final_result}")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Training Data
|
|
116
|
+
|
|
117
|
+
CMVK itself is not trained. It orchestrates pre-trained foundation models:
|
|
118
|
+
|
|
119
|
+
| Component | Supported Models |
|
|
120
|
+
|-----------|------------------|
|
|
121
|
+
| Generator | GPT-4o, GPT-4-turbo, o1-preview |
|
|
122
|
+
| Verifier | Gemini 1.5 Pro, Claude 3.5 Sonnet |
|
|
123
|
+
|
|
124
|
+
## Evaluation
|
|
125
|
+
|
|
126
|
+
### Benchmark: HumanEval
|
|
127
|
+
|
|
128
|
+
| Method | Pass@1 | Blind Spot Reduction |
|
|
129
|
+
|--------|--------|---------------------|
|
|
130
|
+
| Single Model (GPT-4o) | 87.2% | 1.0x (baseline) |
|
|
131
|
+
| CMVK (GPT-4o + Gemini) | 91.5% | 4.3x |
|
|
132
|
+
| CMVK (GPT-4o + Claude) | 90.8% | 3.9x |
|
|
133
|
+
|
|
134
|
+
### Mathematical Framework
|
|
135
|
+
|
|
136
|
+
CMVK reduces blind spot probability using:
|
|
137
|
+
|
|
138
|
+
$$P(\text{combined error}) = P(\text{error})^2 + \rho \cdot P(\text{error}) \cdot (1 - P(\text{error}))$$
|
|
139
|
+
|
|
140
|
+
Where $\rho$ is the correlation coefficient between models (lower for diverse model pairs).
|
|
141
|
+
|
|
142
|
+
## Limitations
|
|
143
|
+
|
|
144
|
+
### Technical Limitations
|
|
145
|
+
|
|
146
|
+
1. **API Dependency**: Requires API access to multiple LLM providers (OpenAI, Google, Anthropic)
|
|
147
|
+
2. **Latency**: Multi-model verification adds latency (2-5x single model)
|
|
148
|
+
3. **Cost**: Multiple API calls increase inference costs
|
|
149
|
+
4. **Rate Limits**: Subject to provider rate limits
|
|
150
|
+
|
|
151
|
+
### Bias and Fairness
|
|
152
|
+
|
|
153
|
+
- Models may share biases from overlapping training data (e.g., Common Crawl)
|
|
154
|
+
- Verification effectiveness varies by domain and language
|
|
155
|
+
- Code-focused evaluation; other modalities less tested
|
|
156
|
+
|
|
157
|
+
### Failure Modes
|
|
158
|
+
|
|
159
|
+
- **Correlated Blind Spots**: If models share the same training gap, verification may fail
|
|
160
|
+
- **Adversarial Gaming**: Verifier may be overly harsh or miss subtle bugs
|
|
161
|
+
- **False Positives**: High drift scores don't always indicate errors
|
|
162
|
+
|
|
163
|
+
## Ethical Considerations
|
|
164
|
+
|
|
165
|
+
- CMVK is designed to **improve** AI safety, not replace human oversight
|
|
166
|
+
- Results should be validated by domain experts for critical applications
|
|
167
|
+
- The framework assumes good-faith use; it cannot detect malicious prompts
|
|
168
|
+
|
|
169
|
+
## Citation
|
|
170
|
+
|
|
171
|
+
```bibtex
|
|
172
|
+
@software{cmvk2026,
|
|
173
|
+
author = {Siddique, Imran},
|
|
174
|
+
title = {Cross-Model Verification Kernel: Adversarial Multi-Model Verification},
|
|
175
|
+
year = {2026},
|
|
176
|
+
url = {https://github.com/imran-siddique/cross-model-verification-kernel},
|
|
177
|
+
license = {MIT}
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Model Card Contact
|
|
182
|
+
|
|
183
|
+
- **Author**: Imran Siddique
|
|
184
|
+
- **Repository**: [github.com/imran-siddique/cross-model-verification-kernel](https://github.com/imran-siddique/cross-model-verification-kernel)
|
|
185
|
+
- **PyPI**: [pypi.org/project/cmvk](https://pypi.org/project/cmvk)
|
modules/cmvk/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Imran Siddique
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
modules/cmvk/README.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# CMVK - Cross-Model Verification Kernel
|
|
2
|
+
|
|
3
|
+
> **Part of [Agent OS](https://github.com/imran-siddique/agent-os)** - Kernel-level governance for AI agents
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/cmvk)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
**Mathematical drift detection between outputs—pure functions, zero dependencies on agent logic.**
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## 🧠 Why CMVK?
|
|
13
|
+
|
|
14
|
+
Agent systems fail when they cannot measure semantic drift. LLMs hallucinate, models diverge, and outputs degrade without quantifiable verification. The naive approach couples verification logic directly into agent control loops, creating brittle, untestable architectures.
|
|
15
|
+
|
|
16
|
+
**CMVK exists because verification is a primitive, not a feature.** We subtract LLM calls, agent orchestration, and correction loops from the verification layer. What remains is a pure mathematical kernel: `verify(a, b) -> score`. This separation enables composition—verification becomes a reusable building block across the Agent OS stack.
|
|
17
|
+
|
|
18
|
+
*Scale by Subtraction:* Remove dependencies on external services. CMVK uses only `numpy` (and optionally `scipy`). No API keys. No network calls. No side effects. Just deterministic drift calculation.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 📦 Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install cmvk
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For enhanced statistical functions:
|
|
29
|
+
```bash
|
|
30
|
+
pip install cmvk[scipy]
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## ⚡ Quick Start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from cmvk import verify
|
|
39
|
+
|
|
40
|
+
score = verify("def add(a, b): return a + b", "def add(x, y): return x + y")
|
|
41
|
+
print(f"Drift: {score.drift_score:.3f}") # 0.0 = identical
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
That's it. Five lines, zero configuration. `verify()` returns a `VerificationScore` with drift magnitude (0.0-1.0), confidence, and classification (semantic, structural, numerical, or lexical).
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## 🏗️ Architecture
|
|
49
|
+
|
|
50
|
+
CMVK sits at **Layer 1 (Primitives)** of the Agent OS. It provides low-level mathematical operations that higher layers depend on:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
┌─────────────────────────────────────────┐
|
|
54
|
+
│ Layer 3: Framework (agent-control-plane)│
|
|
55
|
+
│ ├─ Self-Correction Loop (scak) │
|
|
56
|
+
│ └─ Orchestration Logic │
|
|
57
|
+
└─────────────────────────────────────────┘
|
|
58
|
+
▲
|
|
59
|
+
│ uses verification scores
|
|
60
|
+
│
|
|
61
|
+
┌─────────────────────────────────────────┐
|
|
62
|
+
│ Layer 2: Infrastructure │
|
|
63
|
+
│ ├─ iatp: Trust Protocol │
|
|
64
|
+
│ ├─ amb: Message Bus │
|
|
65
|
+
│ └─ atr: Tool Registry │
|
|
66
|
+
└─────────────────────────────────────────┘
|
|
67
|
+
▲
|
|
68
|
+
│ composes primitives
|
|
69
|
+
│
|
|
70
|
+
┌─────────────────────────────────────────┐
|
|
71
|
+
│ Layer 1: Primitives (THIS LAYER) │
|
|
72
|
+
│ ├─ cmvk: Verification (THIS PROJECT) * │
|
|
73
|
+
│ ├─ caas: Context-as-a-Service │
|
|
74
|
+
│ └─ emk: Episodic Memory Kernel │
|
|
75
|
+
└──────────────────────────────────────────┘
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Design Principle:** CMVK never calls external services. Higher layers (like `scak`) orchestrate correction loops *using* CMVK's verification scores. This inverted dependency enables testing, composability, and deterministic behavior.
|
|
79
|
+
|
|
80
|
+
**API Surface:**
|
|
81
|
+
- `verify(a, b)` — High-level text comparison
|
|
82
|
+
- `verify_embeddings(emb_a, emb_b)` — Vector comparison (cosine, euclidean)
|
|
83
|
+
- `verify_distributions(dist_a, dist_b)` — Distribution comparison (KL divergence, JS divergence)
|
|
84
|
+
- `verify_sequences(seq_a, seq_b)` — Sequence comparison (edit distance, LCS)
|
|
85
|
+
- `verify_batch(...)` — Batch operations with aggregation
|
|
86
|
+
|
|
87
|
+
All functions return immutable `VerificationScore` objects:
|
|
88
|
+
```python
|
|
89
|
+
@dataclass(frozen=True)
|
|
90
|
+
class VerificationScore:
|
|
91
|
+
drift_score: float # 0.0 (identical) to 1.0 (completely different)
|
|
92
|
+
confidence: float # 0.0 to 1.0
|
|
93
|
+
drift_type: DriftType # SEMANTIC | STRUCTURAL | NUMERICAL | LEXICAL
|
|
94
|
+
details: dict # Component scores and metadata
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## 🗺️ Agent OS Ecosystem
|
|
100
|
+
|
|
101
|
+
CMVK is one component of a modular Agent Operating System. Each project solves a single problem without assuming the existence of others.
|
|
102
|
+
|
|
103
|
+
### Layer 1: Primitives
|
|
104
|
+
- **[caas](https://github.com/imran-siddique/caas)** — Context-as-a-Service: Efficient context window management
|
|
105
|
+
- **[cmvk](https://github.com/imran-siddique/cross-model-verification-kernel)** (this project) — Verification: Drift detection between outputs
|
|
106
|
+
- **[emk](https://github.com/imran-siddique/emk)** — Episodic Memory Kernel: Long-term memory for agents
|
|
107
|
+
|
|
108
|
+
### Layer 2: Infrastructure
|
|
109
|
+
- **[iatp](https://github.com/imran-siddique/iatp)** — Inter-Agent Trust Protocol: Cryptographic verification of agent messages
|
|
110
|
+
- **[amb](https://github.com/imran-siddique/amb)** — Agent Message Bus: Decoupled communication between agents
|
|
111
|
+
- **[atr](https://github.com/imran-siddique/atr)** — Agent Tool Registry: Dynamic tool discovery and invocation
|
|
112
|
+
|
|
113
|
+
### Layer 3: Framework
|
|
114
|
+
- **[agent-control-plane](https://github.com/imran-siddique/agent-control-plane)** — The Core: Orchestrates primitives and infrastructure
|
|
115
|
+
- **[scak](https://github.com/imran-siddique/scak)** — Self-Correction Agent Kernel: Verification-driven correction loops
|
|
116
|
+
|
|
117
|
+
**Philosophy:** Each layer subtracts complexity from the layer above. Primitives have zero cross-dependencies. Infrastructure composes primitives. Framework orchestrates infrastructure.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## 📚 Citation
|
|
122
|
+
|
|
123
|
+
If you use CMVK in research or production systems, please cite:
|
|
124
|
+
|
|
125
|
+
```bibtex
|
|
126
|
+
@software{cmvk2024,
|
|
127
|
+
author = {Siddique, Imran},
|
|
128
|
+
title = {CMVK: Cross-Model Verification Kernel},
|
|
129
|
+
year = {2024},
|
|
130
|
+
publisher = {GitHub},
|
|
131
|
+
url = {https://github.com/imran-siddique/cross-model-verification-kernel},
|
|
132
|
+
note = {Part of the Agent OS ecosystem}
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## 📄 License
|
|
139
|
+
|
|
140
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 🔗 Links
|
|
145
|
+
|
|
146
|
+
- **Repository:** [github.com/imran-siddique/cross-model-verification-kernel](https://github.com/imran-siddique/cross-model-verification-kernel)
|
|
147
|
+
- **PyPI:** [cmvk](https://pypi.org/project/cmvk/)
|
|
148
|
+
- **Issues:** [GitHub Issues](https://github.com/imran-siddique/cross-model-verification-kernel/issues)
|
|
149
|
+
- **Changelog:** [CHANGELOG.md](https://github.com/imran-siddique/cross-model-verification-kernel/blob/main/CHANGELOG.md)
|
modules/cmvk/SECURITY.md
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
The following versions of CMVK are currently receiving security updates:
|
|
6
|
+
|
|
7
|
+
| Version | Supported |
|
|
8
|
+
| ------- | ------------------ |
|
|
9
|
+
| 1.x.x | :white_check_mark: |
|
|
10
|
+
| 0.x.x | :x: |
|
|
11
|
+
|
|
12
|
+
## Reporting a Vulnerability
|
|
13
|
+
|
|
14
|
+
We take the security of CMVK seriously. If you discover a security vulnerability, please report it responsibly.
|
|
15
|
+
|
|
16
|
+
### How to Report
|
|
17
|
+
|
|
18
|
+
**DO NOT** open a public GitHub issue for security vulnerabilities.
|
|
19
|
+
|
|
20
|
+
Instead, please email security concerns to: **imran.siddique@example.com**
|
|
21
|
+
|
|
22
|
+
Include the following information:
|
|
23
|
+
- Description of the vulnerability
|
|
24
|
+
- Steps to reproduce
|
|
25
|
+
- Potential impact
|
|
26
|
+
- Suggested fix (if any)
|
|
27
|
+
|
|
28
|
+
### What to Expect
|
|
29
|
+
|
|
30
|
+
- **Acknowledgment**: We will acknowledge receipt within 48 hours
|
|
31
|
+
- **Assessment**: We will assess the vulnerability and provide an estimated timeline within 7 days
|
|
32
|
+
- **Fix**: Critical vulnerabilities will be addressed within 30 days
|
|
33
|
+
- **Disclosure**: We will coordinate disclosure timing with you
|
|
34
|
+
|
|
35
|
+
### Security Considerations for CMVK
|
|
36
|
+
|
|
37
|
+
#### Sandbox Execution
|
|
38
|
+
|
|
39
|
+
The `SandboxExecutor` tool executes untrusted code. Security measures include:
|
|
40
|
+
|
|
41
|
+
1. **Isolated Execution**: Code runs in isolated subprocess with restricted permissions
|
|
42
|
+
2. **Timeout Limits**: Execution is time-limited to prevent resource exhaustion
|
|
43
|
+
3. **Resource Caps**: Memory and CPU limits are enforced
|
|
44
|
+
4. **No Network Access**: Sandboxed code cannot make network requests by default
|
|
45
|
+
|
|
46
|
+
**Warning**: The sandbox is designed for development/research use. For production deployments, consider using containerized execution (Docker) or dedicated sandboxing solutions.
|
|
47
|
+
|
|
48
|
+
#### API Key Security
|
|
49
|
+
|
|
50
|
+
CMVK uses API keys for LLM providers. Best practices:
|
|
51
|
+
|
|
52
|
+
1. **Never commit API keys** to version control
|
|
53
|
+
2. Use environment variables: `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`
|
|
54
|
+
3. Use `.env` files that are `.gitignore`d
|
|
55
|
+
4. Rotate keys if accidentally exposed
|
|
56
|
+
|
|
57
|
+
#### Data Privacy
|
|
58
|
+
|
|
59
|
+
- Experiment traces may contain sensitive prompts/outputs
|
|
60
|
+
- Do not upload private data to Hugging Face Hub without review
|
|
61
|
+
- Use the `private=True` flag for sensitive datasets
|
|
62
|
+
|
|
63
|
+
## Security Features
|
|
64
|
+
|
|
65
|
+
### Trace Logging
|
|
66
|
+
|
|
67
|
+
All kernel operations are logged for audit purposes:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
kernel = VerificationKernel(
|
|
71
|
+
generator=generator,
|
|
72
|
+
verifier=verifier,
|
|
73
|
+
enable_trace_logging=True # Enable full audit trail
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Traces can be reviewed in `logs/traces/` for security auditing.
|
|
78
|
+
|
|
79
|
+
### Model Diversity Enforcement
|
|
80
|
+
|
|
81
|
+
The kernel enforces that generator and verifier use different models, preventing single-point-of-failure scenarios:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# This raises ValueError - same model is rejected
|
|
85
|
+
kernel = VerificationKernel(
|
|
86
|
+
generator=OpenAIGenerator(model="gpt-4"),
|
|
87
|
+
verifier=OpenAIGenerator(model="gpt-4") # Error!
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Dependencies
|
|
92
|
+
|
|
93
|
+
We regularly audit dependencies for known vulnerabilities using:
|
|
94
|
+
- GitHub Dependabot
|
|
95
|
+
- `pip-audit` in CI pipeline
|
|
96
|
+
|
|
97
|
+
To check for vulnerable dependencies locally:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install pip-audit
|
|
101
|
+
pip-audit
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Responsible AI
|
|
105
|
+
|
|
106
|
+
CMVK is designed to improve AI safety through adversarial verification. However:
|
|
107
|
+
|
|
108
|
+
- Generated code should be reviewed before production use
|
|
109
|
+
- Verification is not a guarantee of correctness
|
|
110
|
+
- Human oversight remains essential for high-stakes applications
|
|
111
|
+
|
|
112
|
+
## Acknowledgments
|
|
113
|
+
|
|
114
|
+
We appreciate security researchers who help keep CMVK secure. Contributors who report valid vulnerabilities will be acknowledged in our security hall of fame (with permission).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
You are an expert software architect and code generator with System 1 thinking capabilities.
|
|
2
|
+
|
|
3
|
+
Your role is to:
|
|
4
|
+
- Generate creative, efficient, and correct solutions to programming problems
|
|
5
|
+
- Think fast and produce high-quality code implementations
|
|
6
|
+
- Include comprehensive docstrings and comments
|
|
7
|
+
- Generate test cases for your own logic before execution
|
|
8
|
+
|
|
9
|
+
Guidelines:
|
|
10
|
+
1. Always write clean, readable, and well-documented code
|
|
11
|
+
2. Follow best practices for the programming language being used
|
|
12
|
+
3. Generate unit tests alongside your solutions
|
|
13
|
+
4. Think through edge cases and error handling
|
|
14
|
+
5. Be creative but pragmatic in your approach
|
|
15
|
+
|
|
16
|
+
Remember: Your solutions will be reviewed by a hostile critic. Write defensively and think about potential weaknesses in your approach.
|
|
17
|
+
|
|
18
|
+
Output Format:
|
|
19
|
+
Provide your solution in the following structure:
|
|
20
|
+
1. Solution Overview: Brief explanation of your approach
|
|
21
|
+
2. Implementation: The actual code
|
|
22
|
+
3. Test Cases: Unit tests to verify correctness
|
|
23
|
+
4. Edge Cases: Potential issues and how you handled them
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
You are a cynical, adversarial code reviewer with System 2 thinking capabilities.
|
|
2
|
+
|
|
3
|
+
Your role is to:
|
|
4
|
+
- Find flaws, bugs, and weaknesses in the provided solution
|
|
5
|
+
- Question assumptions and logic
|
|
6
|
+
- Think critically about edge cases that might break the code
|
|
7
|
+
- Verify that test cases are comprehensive
|
|
8
|
+
- Act as a hostile critic, not a cooperative helper
|
|
9
|
+
|
|
10
|
+
Guidelines:
|
|
11
|
+
1. Assume the solution is flawed until proven otherwise
|
|
12
|
+
2. Look for:
|
|
13
|
+
- Logic errors and incorrect algorithms
|
|
14
|
+
- Missing edge cases
|
|
15
|
+
- Performance issues
|
|
16
|
+
- Security vulnerabilities
|
|
17
|
+
- Incomplete test coverage
|
|
18
|
+
- Ambiguous or misleading documentation
|
|
19
|
+
3. Be specific in your criticism with concrete examples
|
|
20
|
+
4. Do NOT suggest fixes - only identify problems
|
|
21
|
+
5. Rate your confidence in each issue found (Low/Medium/High)
|
|
22
|
+
|
|
23
|
+
Remember: You are NOT here to be helpful. You are here to break the solution and expose its weaknesses. "Trust, but Verify" means you verify with extreme skepticism.
|
|
24
|
+
|
|
25
|
+
Output Format:
|
|
26
|
+
Provide your review in the following structure:
|
|
27
|
+
1. Overall Assessment: Pass/Fail with confidence level
|
|
28
|
+
2. Critical Issues: Bugs that will cause incorrect behavior
|
|
29
|
+
3. Logic Flaws: Questionable reasoning or algorithmic problems
|
|
30
|
+
4. Missing Edge Cases: Scenarios not handled by the solution
|
|
31
|
+
5. Test Coverage Gaps: Missing or inadequate test cases
|
|
32
|
+
6. Minor Issues: Style, performance, or documentation concerns
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Cross-Model Verification Kernel Configuration
|
|
2
|
+
# Owner: Imran Siddique
|
|
3
|
+
|
|
4
|
+
# API Keys (Environment variables recommended)
|
|
5
|
+
api_keys:
|
|
6
|
+
openai_key: ${OPENAI_API_KEY}
|
|
7
|
+
google_key: ${GOOGLE_API_KEY}
|
|
8
|
+
anthropic_key: ${ANTHROPIC_API_KEY}
|
|
9
|
+
|
|
10
|
+
# Model Configuration
|
|
11
|
+
models:
|
|
12
|
+
generator:
|
|
13
|
+
provider: "openai"
|
|
14
|
+
model_name: "gpt-4o"
|
|
15
|
+
temperature: 0.7
|
|
16
|
+
max_tokens: 2000
|
|
17
|
+
|
|
18
|
+
verifier:
|
|
19
|
+
provider: "google"
|
|
20
|
+
model_name: "gemini-1.5-pro"
|
|
21
|
+
temperature: 0.3 # Lower temperature for more deterministic verification
|
|
22
|
+
max_tokens: 2000
|
|
23
|
+
|
|
24
|
+
# Kernel Configuration
|
|
25
|
+
kernel:
|
|
26
|
+
max_loops: 5 # Maximum Generator->Verifier cycles before declaring failure
|
|
27
|
+
confidence_threshold: 0.85 # Minimum confidence to accept a solution
|
|
28
|
+
enable_graph_memory: true
|
|
29
|
+
enable_runtime_testing: true
|
|
30
|
+
|
|
31
|
+
# Sandbox Configuration
|
|
32
|
+
sandbox:
|
|
33
|
+
timeout_seconds: 30
|
|
34
|
+
memory_limit_mb: 512
|
|
35
|
+
enable_docker: false # Set to true for production
|
|
36
|
+
|
|
37
|
+
# Logging
|
|
38
|
+
logging:
|
|
39
|
+
level: "INFO"
|
|
40
|
+
log_file: "logs/cmvk.log"
|