agent-os-kernel 1.1.0__py3-none-any.whl β 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info β agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info β agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info β agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1264 @@
|
|
|
1
|
+
# Agent Control Plane
|
|
2
|
+
|
|
3
|
+
> **Part of [Agent OS](https://github.com/imran-siddique/agent-os)** - Kernel-level governance for AI agents
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/agent-control-plane/)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[](https://github.com/imran-siddique/agent-control-plane/actions)
|
|
9
|
+
[](https://github.com/psf/black)
|
|
10
|
+
[](https://pypi.org/project/agent-control-plane/)
|
|
11
|
+
[](https://huggingface.co/datasets/imran-siddique/agent-control-redteam-60)
|
|
12
|
+
|
|
13
|
+
A governance and management layer for autonomous AI agents. The Agent Control Plane treats the LLM as a raw compute component and provides a kernel-like layer for safe, controlled execution.
|
|
14
|
+
|
|
15
|
+
> **π― Benchmark Results**: The Control Plane achieves **0% safety violations** vs 26.67% for prompt-based safety, with 98% fewer tokens. [See comparative study β](#benchmark-comparative-safety-study)
|
|
16
|
+
|
|
17
|
+
> **π₯ Demo Video**: [Watch 2-minute demo β](#demo-video) | [Full tutorial (12 min) β](#demo-video)
|
|
18
|
+
|
|
19
|
+
## Philosophy: Scale by Subtraction
|
|
20
|
+
|
|
21
|
+
**We need to stop treating the LLM as a magic box and start treating it as a raw compute component that requires a kernel.**
|
|
22
|
+
|
|
23
|
+
In distributed systems, we don't ask a microservice nicely to respect a rate limitβwe enforce it at the gateway. We don't ask a database query nicely not to drop a tableβwe enforce it via permissions. With AI agents, we need the same deterministic enforcement.
|
|
24
|
+
|
|
25
|
+
## Overview
|
|
26
|
+
|
|
27
|
+
As we move from chatbots to autonomous agentsβsystems that can execute code, modify data, and trigger workflowsβthe biggest bottleneck isn't intelligence. It's **governance**. The Agent Control Plane solves this by providing:
|
|
28
|
+
|
|
29
|
+
### Core Features
|
|
30
|
+
- **Permission Management**: Fine-grained control over what agents can do
|
|
31
|
+
- **Policy Enforcement**: Governance rules and compliance constraints
|
|
32
|
+
- **Resource Management**: Quotas, rate limiting, and resource allocation
|
|
33
|
+
- **Safe Execution**: Sandboxed execution with rollback capability
|
|
34
|
+
- **Audit Logging**: Complete traceability for all agent actions (SQLite-based Flight Recorder)
|
|
35
|
+
- **Risk Assessment**: Automatic risk scoring and management
|
|
36
|
+
- **Multi-Framework Support**: Drop-in middleware for OpenAI SDK, LangChain, MCP, and A2A protocols
|
|
37
|
+
|
|
38
|
+
### Advanced Features
|
|
39
|
+
- **The Mute Agent**: Capability-based execution that returns NULL for out-of-scope requests instead of hallucinating
|
|
40
|
+
- **Shadow Mode**: Simulation environment where agents think they're executing but actions are intercepted for validation
|
|
41
|
+
- **Constraint Graphs**: Multi-dimensional context (Data, Policy, Temporal) acting as the "physics" of the agent's world
|
|
42
|
+
- **Supervisor Agents**: Recursive governance with agents watching agents, bound by a constitution of code
|
|
43
|
+
- **Reasoning Telemetry**: Complete trace of agent decision-making process
|
|
44
|
+
- **Red Team Dataset**: Comprehensive adversarial prompt testing with 60+ attack vectors
|
|
45
|
+
|
|
46
|
+
### New: Multi-Agent & Enterprise Features
|
|
47
|
+
- **Agent Orchestrator**: Multi-agent coordination with sequential, parallel, and graph-based workflows (inspired by LangGraph)
|
|
48
|
+
- **Tool Registry**: Dynamic tool registration and discovery for extensible agent capabilities
|
|
49
|
+
- **Governance Layer**: Ethical alignment, bias detection, and privacy-preserving computation
|
|
50
|
+
- **CLI Tool**: Command-line interface for agent management and operations
|
|
51
|
+
- **Docker Support**: Production-ready containerization with docker-compose
|
|
52
|
+
- **Interactive Notebooks**: Jupyter notebook tutorials for hands-on learning
|
|
53
|
+
|
|
54
|
+
### New: Advanced Safety & Compliance (v1.1)
|
|
55
|
+
- **ML-Based Safety**: Jailbreak detection with 60+ attack patterns, anomaly detection, behavioral analysis
|
|
56
|
+
- **Compliance Engine**: EU AI Act, SOC 2, GDPR, HIPAA support with automated checks
|
|
57
|
+
- **Constitutional AI**: Value alignment framework inspired by Anthropic's research
|
|
58
|
+
- **Multimodal Capabilities**: Vision (GPT-4o-style), audio processing, RAG with vector stores
|
|
59
|
+
- **Production Observability**: Prometheus metrics, distributed tracing, real-time alerting
|
|
60
|
+
|
|
61
|
+
## Key Concepts
|
|
62
|
+
|
|
63
|
+
### The Problem
|
|
64
|
+
|
|
65
|
+
Traditional LLM applications lack proper governance:
|
|
66
|
+
- Agents have unrestricted access to execute dangerous actions
|
|
67
|
+
- No deterministic enforcement of boundaries
|
|
68
|
+
- Agents try to be "helpful" by hallucinating when they should return NULL
|
|
69
|
+
- Limited visibility into agent reasoning and behavior
|
|
70
|
+
- Difficult to enforce compliance requirements
|
|
71
|
+
- Hard to debug and trace agent decisions
|
|
72
|
+
|
|
73
|
+
### The Solution
|
|
74
|
+
|
|
75
|
+
The Agent Control Plane sits between the LLM (raw compute) and the execution environment, providing:
|
|
76
|
+
|
|
77
|
+
1. **Agent Kernel**: Central coordinator that mediates all agent actions with OS-like rigor
|
|
78
|
+
2. **Policy Engine**: Enforces rules and constraints deterministically
|
|
79
|
+
3. **Execution Engine**: Safely executes actions in sandboxed environments
|
|
80
|
+
4. **Constraint Graphs**: Multi-dimensional context defining what's possible
|
|
81
|
+
5. **Shadow Mode**: Test and validate agent behavior without side effects
|
|
82
|
+
6. **Supervisor Network**: Agents watching agents for anomalies and violations
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
### Installation
|
|
87
|
+
|
|
88
|
+
#### Option 1: Install from PyPI (Recommended)
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Install the latest stable version
|
|
92
|
+
pip install agent-control-plane
|
|
93
|
+
|
|
94
|
+
# Or install with development dependencies
|
|
95
|
+
pip install agent-control-plane[dev]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
#### Option 2: Install from Source
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# Clone the repository
|
|
102
|
+
git clone https://github.com/imran-siddique/agent-control-plane.git
|
|
103
|
+
cd agent-control-plane
|
|
104
|
+
|
|
105
|
+
# Install the package in editable mode
|
|
106
|
+
pip install -e .
|
|
107
|
+
|
|
108
|
+
# Or install with development dependencies
|
|
109
|
+
pip install -e ".[dev]"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Project Structure
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
agent-control-plane/
|
|
116
|
+
βββ src/
|
|
117
|
+
β βββ agent_control_plane/ # Main package source code
|
|
118
|
+
β βββ agent_kernel.py # Core kernel functionality
|
|
119
|
+
β βββ control_plane.py # Main control plane interface
|
|
120
|
+
β βββ adapter.py # OpenAI SDK adapter (drop-in middleware)
|
|
121
|
+
β βββ policy_engine.py # Policy enforcement
|
|
122
|
+
β βββ execution_engine.py # Safe execution
|
|
123
|
+
β βββ constraint_graphs.py # Multi-dimensional context
|
|
124
|
+
β βββ shadow_mode.py # Simulation mode
|
|
125
|
+
β βββ mute_agent.py # Capability-based execution
|
|
126
|
+
β βββ supervisor_agents.py # Recursive governance
|
|
127
|
+
β βββ flight_recorder.py # Audit logging (SQLite)
|
|
128
|
+
βββ tests/ # Test suite
|
|
129
|
+
βββ examples/ # Example scripts
|
|
130
|
+
βββ benchmark/ # Red team safety benchmarks
|
|
131
|
+
βββ docs/ # Documentation
|
|
132
|
+
βββ README.md # This file
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Basic Usage
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from agent_control_plane import AgentControlPlane, create_standard_agent
|
|
139
|
+
from agent_control_plane.agent_kernel import ActionType
|
|
140
|
+
|
|
141
|
+
# Create the control plane
|
|
142
|
+
control_plane = AgentControlPlane()
|
|
143
|
+
|
|
144
|
+
# Create an agent with standard permissions
|
|
145
|
+
agent = create_standard_agent(control_plane, "my-agent")
|
|
146
|
+
|
|
147
|
+
# Execute an action
|
|
148
|
+
result = control_plane.execute_action(
|
|
149
|
+
agent,
|
|
150
|
+
ActionType.FILE_READ,
|
|
151
|
+
{"path": "/data/myfile.txt"}
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if result["success"]:
|
|
155
|
+
print(f"Result: {result['result']}")
|
|
156
|
+
else:
|
|
157
|
+
print(f"Error: {result['error']}")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Drop-In Middleware for OpenAI SDK
|
|
161
|
+
|
|
162
|
+
**NEW: Zero-friction integration!** Wrap your OpenAI client to automatically govern LLM tool calls:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from openai import OpenAI
|
|
166
|
+
from agent_control_plane import (
|
|
167
|
+
AgentControlPlane,
|
|
168
|
+
create_governed_client,
|
|
169
|
+
ActionType,
|
|
170
|
+
PermissionLevel
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Standard setup
|
|
174
|
+
control_plane = AgentControlPlane()
|
|
175
|
+
client = OpenAI(api_key="your-key")
|
|
176
|
+
|
|
177
|
+
# One line to create governed client
|
|
178
|
+
governed = create_governed_client(
|
|
179
|
+
control_plane=control_plane,
|
|
180
|
+
agent_id="my-agent",
|
|
181
|
+
openai_client=client,
|
|
182
|
+
permissions={
|
|
183
|
+
ActionType.DATABASE_QUERY: PermissionLevel.READ_ONLY,
|
|
184
|
+
ActionType.FILE_READ: PermissionLevel.READ_ONLY,
|
|
185
|
+
}
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Use exactly like normal OpenAI SDK!
|
|
189
|
+
response = governed.chat.completions.create(
|
|
190
|
+
model="gpt-4",
|
|
191
|
+
messages=[{"role": "user", "content": "Query database and save results"}],
|
|
192
|
+
tools=[...]
|
|
193
|
+
)
|
|
194
|
+
# Tool calls are automatically governed - unauthorized actions are blocked!
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**π See the [OpenAI Adapter Guide](docs/ADAPTER_GUIDE.md) for comprehensive integration instructions.**
|
|
198
|
+
|
|
199
|
+
### Multi-Framework Support
|
|
200
|
+
|
|
201
|
+
The Agent Control Plane now supports multiple AI frameworks and protocols with the same governance approach:
|
|
202
|
+
|
|
203
|
+
#### LangChain Integration
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from langchain.chat_models import ChatOpenAI
|
|
207
|
+
from langchain.agents import initialize_agent, load_tools
|
|
208
|
+
from agent_control_plane import (
|
|
209
|
+
AgentControlPlane,
|
|
210
|
+
create_governed_langchain_client,
|
|
211
|
+
ActionType,
|
|
212
|
+
PermissionLevel
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Setup
|
|
216
|
+
control_plane = AgentControlPlane()
|
|
217
|
+
llm = ChatOpenAI(temperature=0)
|
|
218
|
+
|
|
219
|
+
# Create governed LangChain client
|
|
220
|
+
governed_llm = create_governed_langchain_client(
|
|
221
|
+
control_plane=control_plane,
|
|
222
|
+
agent_id="my-langchain-agent",
|
|
223
|
+
langchain_client=llm,
|
|
224
|
+
permissions={
|
|
225
|
+
ActionType.FILE_READ: PermissionLevel.READ_ONLY,
|
|
226
|
+
ActionType.DATABASE_QUERY: PermissionLevel.READ_ONLY,
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Use in LangChain agents - tool calls are automatically governed!
|
|
231
|
+
tools = load_tools(["python_repl", "requests"])
|
|
232
|
+
agent = initialize_agent(tools, governed_llm, agent="zero-shot-react-description")
|
|
233
|
+
agent.run("Your task here")
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
#### MCP (Model Context Protocol) Support
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
from agent_control_plane import (
|
|
240
|
+
AgentControlPlane,
|
|
241
|
+
create_governed_mcp_server,
|
|
242
|
+
ActionType,
|
|
243
|
+
PermissionLevel
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Create governed MCP server
|
|
247
|
+
control_plane = AgentControlPlane()
|
|
248
|
+
mcp_server = create_governed_mcp_server(
|
|
249
|
+
control_plane=control_plane,
|
|
250
|
+
agent_id="mcp-agent",
|
|
251
|
+
server_name="file-server",
|
|
252
|
+
permissions={
|
|
253
|
+
ActionType.FILE_READ: PermissionLevel.READ_ONLY,
|
|
254
|
+
},
|
|
255
|
+
transport="stdio"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Register tools - all calls are governed
|
|
259
|
+
mcp_server.register_tool("read_file", handle_read_file, "Read a file")
|
|
260
|
+
mcp_server.register_resource("file://", handle_file_resource, "File resources")
|
|
261
|
+
mcp_server.start()
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
#### A2A (Agent-to-Agent) Protocol Support
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
from agent_control_plane import (
|
|
268
|
+
AgentControlPlane,
|
|
269
|
+
create_governed_a2a_agent,
|
|
270
|
+
ActionType,
|
|
271
|
+
PermissionLevel
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Create governed A2A agent
|
|
275
|
+
control_plane = AgentControlPlane()
|
|
276
|
+
a2a_agent = create_governed_a2a_agent(
|
|
277
|
+
control_plane=control_plane,
|
|
278
|
+
agent_id="my-a2a-agent",
|
|
279
|
+
agent_card={
|
|
280
|
+
"name": "Data Processor",
|
|
281
|
+
"description": "Processes and analyzes data",
|
|
282
|
+
"capabilities": ["data_processing", "analytics"]
|
|
283
|
+
},
|
|
284
|
+
permissions={
|
|
285
|
+
ActionType.DATABASE_QUERY: PermissionLevel.READ_ONLY,
|
|
286
|
+
ActionType.API_CALL: PermissionLevel.READ_WRITE,
|
|
287
|
+
}
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Register capabilities
|
|
291
|
+
a2a_agent.register_capability("data_processing", handle_data_processing)
|
|
292
|
+
|
|
293
|
+
# All inter-agent communications are governed!
|
|
294
|
+
a2a_agent.start()
|
|
295
|
+
```
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### Permission Control
|
|
299
|
+
|
|
300
|
+
```python
|
|
301
|
+
from agent_control_plane.agent_kernel import ActionType, PermissionLevel
|
|
302
|
+
|
|
303
|
+
# Create custom permissions
|
|
304
|
+
permissions = {
|
|
305
|
+
ActionType.FILE_READ: PermissionLevel.READ_ONLY,
|
|
306
|
+
ActionType.API_CALL: PermissionLevel.READ_WRITE,
|
|
307
|
+
ActionType.CODE_EXECUTION: PermissionLevel.NONE,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
agent = control_plane.create_agent("restricted-agent", permissions)
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### Rate Limiting
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
from agent_control_plane.policy_engine import ResourceQuota
|
|
317
|
+
|
|
318
|
+
# Set strict quotas
|
|
319
|
+
quota = ResourceQuota(
|
|
320
|
+
agent_id="rate-limited-agent",
|
|
321
|
+
max_requests_per_minute=10,
|
|
322
|
+
max_requests_per_hour=100,
|
|
323
|
+
max_concurrent_executions=2,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
control_plane.policy_engine.set_quota("rate-limited-agent", quota)
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### Custom Policies
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
from agent_control_plane.agent_kernel import PolicyRule
|
|
333
|
+
import uuid
|
|
334
|
+
|
|
335
|
+
def validate_safe_path(request):
|
|
336
|
+
"""Only allow access to /data directory"""
|
|
337
|
+
path = request.parameters.get('path', '')
|
|
338
|
+
return path.startswith('/data/')
|
|
339
|
+
|
|
340
|
+
rule = PolicyRule(
|
|
341
|
+
rule_id=str(uuid.uuid4()),
|
|
342
|
+
name="safe_path_only",
|
|
343
|
+
description="Restrict file access to /data directory",
|
|
344
|
+
action_types=[ActionType.FILE_READ, ActionType.FILE_WRITE],
|
|
345
|
+
validator=validate_safe_path,
|
|
346
|
+
priority=10
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
control_plane.policy_engine.add_custom_rule(rule)
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
## Examples
|
|
353
|
+
|
|
354
|
+
Check out the `examples/` directory for more detailed examples:
|
|
355
|
+
|
|
356
|
+
- **`getting_started.py`** - Step-by-step tutorial for beginners
|
|
357
|
+
- **`basic_usage.py`** - Fundamental concepts and patterns
|
|
358
|
+
- **`advanced_features.py`** - Shadow Mode, Mute Agent, etc.
|
|
359
|
+
- **`use_cases.py`** - Real-world production scenarios
|
|
360
|
+
- **`configuration.py`** - Different agent configurations
|
|
361
|
+
|
|
362
|
+
#### The Mute Agent - Scale by Subtraction
|
|
363
|
+
|
|
364
|
+
Create agents that know when to shut up and return NULL instead of hallucinating:
|
|
365
|
+
|
|
366
|
+
```python
|
|
367
|
+
from agent_control_plane.mute_agent import create_mute_sql_agent
|
|
368
|
+
from agent_control_plane.agent_kernel import ActionType, PermissionLevel
|
|
369
|
+
|
|
370
|
+
# Create a SQL agent that ONLY executes SELECT queries
|
|
371
|
+
sql_config = create_mute_sql_agent("sql-bot")
|
|
372
|
+
permissions = {ActionType.DATABASE_QUERY: PermissionLevel.READ_ONLY}
|
|
373
|
+
agent = control_plane.create_agent("sql-bot", permissions)
|
|
374
|
+
control_plane.enable_mute_agent("sql-bot", sql_config)
|
|
375
|
+
|
|
376
|
+
# Valid: SELECT query
|
|
377
|
+
result = control_plane.execute_action(
|
|
378
|
+
agent,
|
|
379
|
+
ActionType.DATABASE_QUERY,
|
|
380
|
+
{"query": "SELECT * FROM users"}
|
|
381
|
+
)
|
|
382
|
+
# β Success: True
|
|
383
|
+
|
|
384
|
+
# Invalid: Destructive operation
|
|
385
|
+
result = control_plane.execute_action(
|
|
386
|
+
agent,
|
|
387
|
+
ActionType.DATABASE_QUERY,
|
|
388
|
+
{"query": "DROP TABLE users"}
|
|
389
|
+
)
|
|
390
|
+
# β Success: False, Error: "NULL"
|
|
391
|
+
# Agent returns NULL instead of trying to be "helpful"!
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
#### Shadow Mode - The Matrix for Agents
|
|
395
|
+
|
|
396
|
+
Test agent behavior without actual execution:
|
|
397
|
+
|
|
398
|
+
```python
|
|
399
|
+
# Enable shadow mode
|
|
400
|
+
control_plane = AgentControlPlane(enable_shadow_mode=True)
|
|
401
|
+
agent = create_standard_agent(control_plane, "test-agent")
|
|
402
|
+
|
|
403
|
+
# This looks like normal execution...
|
|
404
|
+
result = control_plane.execute_action(
|
|
405
|
+
agent,
|
|
406
|
+
ActionType.FILE_WRITE,
|
|
407
|
+
{"path": "/data/important.txt", "content": "test"}
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# But it was SIMULATED! No actual file was written.
|
|
411
|
+
print(result["status"]) # "simulated"
|
|
412
|
+
print(result["note"]) # "This was executed in SHADOW MODE..."
|
|
413
|
+
|
|
414
|
+
# Get statistics
|
|
415
|
+
stats = control_plane.get_shadow_statistics()
|
|
416
|
+
print(f"Success rate: {stats['success_rate']:.1%}")
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
#### Constraint Graphs - Multi-Dimensional Context
|
|
420
|
+
|
|
421
|
+
Define what's possible using Data, Policy, and Temporal graphs:
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
from datetime import time
|
|
425
|
+
|
|
426
|
+
# Create control plane with constraint graphs
|
|
427
|
+
control_plane = AgentControlPlane(enable_constraint_graphs=True)
|
|
428
|
+
|
|
429
|
+
# Data Graph: What data exists
|
|
430
|
+
control_plane.add_data_table("users", {"id": "int", "name": "string"})
|
|
431
|
+
control_plane.add_data_path("/data/")
|
|
432
|
+
|
|
433
|
+
# Policy Graph: What rules apply
|
|
434
|
+
control_plane.add_policy_constraint(
|
|
435
|
+
"pii_protection",
|
|
436
|
+
"No PII in output",
|
|
437
|
+
applies_to=["table:users"],
|
|
438
|
+
rule_type="deny"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Temporal Graph: What's true RIGHT NOW
|
|
442
|
+
control_plane.add_maintenance_window(
|
|
443
|
+
"nightly_maintenance",
|
|
444
|
+
start_time=time(2, 0), # 2 AM
|
|
445
|
+
end_time=time(4, 0), # 4 AM
|
|
446
|
+
blocked_actions=[ActionType.DATABASE_WRITE]
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# The graphs enforce deterministically
|
|
450
|
+
# If a table isn't in the Data Graph, access is blocked
|
|
451
|
+
# If during maintenance window, writes are blocked
|
|
452
|
+
# This is ENFORCEMENT, not advisory
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
#### Supervisor Agents - Recursive Governance
|
|
456
|
+
|
|
457
|
+
Agents watching agents:
|
|
458
|
+
|
|
459
|
+
```python
|
|
460
|
+
from agent_control_plane.supervisor_agents import create_default_supervisor
|
|
461
|
+
|
|
462
|
+
# Create worker agents
|
|
463
|
+
agent1 = create_standard_agent(control_plane, "worker-1")
|
|
464
|
+
agent2 = create_standard_agent(control_plane, "worker-2")
|
|
465
|
+
|
|
466
|
+
# Create supervisor to watch them
|
|
467
|
+
supervisor = create_default_supervisor(["worker-1", "worker-2"])
|
|
468
|
+
control_plane.add_supervisor(supervisor)
|
|
469
|
+
|
|
470
|
+
# Agents do their work...
|
|
471
|
+
# (execute various actions)
|
|
472
|
+
|
|
473
|
+
# Run supervision cycle
|
|
474
|
+
violations = control_plane.run_supervision()
|
|
475
|
+
|
|
476
|
+
# Supervisor detects: repeated failures, excessive risk,
|
|
477
|
+
# rate limit approaching, suspicious patterns, etc.
|
|
478
|
+
for supervisor_id, viols in violations.items():
|
|
479
|
+
for v in viols:
|
|
480
|
+
print(f"[{v.severity}] {v.description}")
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
#### Multi-Agent Orchestration - Coordinate Multiple Agents
|
|
484
|
+
|
|
485
|
+
Create workflows with multiple specialized agents:
|
|
486
|
+
|
|
487
|
+
```python
|
|
488
|
+
from agent_control_plane import (
|
|
489
|
+
AgentOrchestrator,
|
|
490
|
+
AgentRole,
|
|
491
|
+
OrchestrationType,
|
|
492
|
+
create_rag_pipeline
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
# Create orchestrator
|
|
496
|
+
orchestrator = AgentOrchestrator(control_plane)
|
|
497
|
+
|
|
498
|
+
# Register specialized agents
|
|
499
|
+
orchestrator.register_agent(
|
|
500
|
+
"retriever",
|
|
501
|
+
AgentRole.SPECIALIST,
|
|
502
|
+
capabilities=["document_search", "vector_search"]
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
orchestrator.register_agent(
|
|
506
|
+
"analyzer",
|
|
507
|
+
AgentRole.SPECIALIST,
|
|
508
|
+
capabilities=["data_analysis", "summarization"]
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
orchestrator.register_agent(
|
|
512
|
+
"supervisor",
|
|
513
|
+
AgentRole.SUPERVISOR,
|
|
514
|
+
capabilities=["quality_check", "safety_check"]
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Create a RAG pipeline workflow
|
|
518
|
+
workflow = orchestrator.create_workflow("rag_pipeline", OrchestrationType.SEQUENTIAL)
|
|
519
|
+
orchestrator.add_agent_to_workflow(workflow.workflow_id, "retriever")
|
|
520
|
+
orchestrator.add_agent_to_workflow(
|
|
521
|
+
workflow.workflow_id,
|
|
522
|
+
"analyzer",
|
|
523
|
+
dependencies={"retriever"} # Analyzer depends on retriever
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Add supervisor to watch all agents
|
|
527
|
+
orchestrator.add_supervisor("supervisor", ["retriever", "analyzer"])
|
|
528
|
+
|
|
529
|
+
# Execute workflow
|
|
530
|
+
import asyncio
|
|
531
|
+
result = asyncio.run(orchestrator.execute_workflow(
|
|
532
|
+
workflow.workflow_id,
|
|
533
|
+
{"query": "What are the key findings?"}
|
|
534
|
+
))
|
|
535
|
+
print(f"Workflow completed: {result['success']}")
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
#### Governance Layer - Ethical Alignment & Privacy
|
|
539
|
+
|
|
540
|
+
Advanced safety beyond basic policies:
|
|
541
|
+
|
|
542
|
+
```python
|
|
543
|
+
from agent_control_plane import (
|
|
544
|
+
GovernanceLayer,
|
|
545
|
+
AlignmentPrinciple,
|
|
546
|
+
create_default_governance
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
# Create governance layer with ethical rules
|
|
550
|
+
governance = create_default_governance()
|
|
551
|
+
|
|
552
|
+
# Check alignment before execution
|
|
553
|
+
context = {"content": "Analyze sales data"}
|
|
554
|
+
alignment = governance.check_alignment(context)
|
|
555
|
+
if not alignment["aligned"]:
|
|
556
|
+
print(f"Alignment violations: {alignment['violations']}")
|
|
557
|
+
|
|
558
|
+
# Detect bias in content
|
|
559
|
+
text = "All engineers should be..."
|
|
560
|
+
bias_result = governance.detect_bias(text)
|
|
561
|
+
if bias_result.has_bias:
|
|
562
|
+
print(f"Bias detected: {bias_result.bias_types}")
|
|
563
|
+
print(f"Recommendations: {bias_result.recommendations}")
|
|
564
|
+
|
|
565
|
+
# Analyze privacy and PII
|
|
566
|
+
data = {"email": "user@example.com", "phone": "555-1234"}
|
|
567
|
+
privacy = governance.analyze_privacy(data)
|
|
568
|
+
print(f"Privacy level: {privacy.privacy_level.value}")
|
|
569
|
+
print(f"Contains PII: {privacy.contains_pii}")
|
|
570
|
+
print(f"Risk score: {privacy.risk_score}")
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
#### Tool Registry - Dynamic Tool Management
|
|
574
|
+
|
|
575
|
+
Extend agent capabilities with dynamic tool registration:
|
|
576
|
+
|
|
577
|
+
```python
|
|
578
|
+
from agent_control_plane import ToolRegistry, ToolType
|
|
579
|
+
|
|
580
|
+
registry = ToolRegistry()
|
|
581
|
+
|
|
582
|
+
# Register a custom tool
|
|
583
|
+
def web_search(query: str) -> dict:
|
|
584
|
+
"""Search the web for information"""
|
|
585
|
+
# Implementation here
|
|
586
|
+
return {"results": [...]}
|
|
587
|
+
|
|
588
|
+
tool_id = registry.register_tool(
|
|
589
|
+
name="web_search",
|
|
590
|
+
description="Search the web",
|
|
591
|
+
tool_type=ToolType.SEARCH,
|
|
592
|
+
handler=web_search,
|
|
593
|
+
risk_level=0.3
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
# Execute the tool
|
|
597
|
+
result = registry.execute_tool("web_search", {"query": "AI safety"})
|
|
598
|
+
print(f"Search results: {result['result']}")
|
|
599
|
+
|
|
600
|
+
# Discover tools by type
|
|
601
|
+
search_tools = registry.get_tools_by_type(ToolType.SEARCH)
|
|
602
|
+
print(f"Available search tools: {len(search_tools)}")
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
## Command Line Interface
|
|
606
|
+
|
|
607
|
+
Agent Control Plane includes a CLI for common operations:
|
|
608
|
+
|
|
609
|
+
```bash
|
|
610
|
+
# Create an agent
|
|
611
|
+
python acp-cli.py agent create my-agent --role worker
|
|
612
|
+
|
|
613
|
+
# List all agents
|
|
614
|
+
python acp-cli.py agent list
|
|
615
|
+
|
|
616
|
+
# Inspect an agent
|
|
617
|
+
python acp-cli.py agent inspect my-agent
|
|
618
|
+
|
|
619
|
+
# View audit logs
|
|
620
|
+
python acp-cli.py audit show --limit 10 --format json
|
|
621
|
+
|
|
622
|
+
# Run safety benchmark
|
|
623
|
+
python acp-cli.py benchmark run
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
## Docker Deployment
|
|
627
|
+
|
|
628
|
+
Deploy Agent Control Plane in containers for production:
|
|
629
|
+
|
|
630
|
+
```bash
|
|
631
|
+
# Build and start services
|
|
632
|
+
docker-compose up -d
|
|
633
|
+
|
|
634
|
+
# View logs
|
|
635
|
+
docker-compose logs -f
|
|
636
|
+
|
|
637
|
+
# Development environment with Jupyter
|
|
638
|
+
docker-compose --profile dev up -d acp-dev
|
|
639
|
+
|
|
640
|
+
# Distributed setup with Redis
|
|
641
|
+
docker-compose --profile distributed up -d
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
See [Docker Deployment Guide](docs/DOCKER_DEPLOYMENT.md) for complete instructions.
|
|
645
|
+
|
|
646
|
+
## Interactive Tutorial
|
|
647
|
+
|
|
648
|
+
Explore features hands-on with our Jupyter notebook:
|
|
649
|
+
|
|
650
|
+
```bash
|
|
651
|
+
# Install Jupyter
|
|
652
|
+
pip install jupyter
|
|
653
|
+
|
|
654
|
+
# Launch the tutorial
|
|
655
|
+
jupyter notebook examples/interactive_tutorial.ipynb
|
|
656
|
+
```
|
|
657
|
+
|
|
658
|
+
The tutorial covers:
|
|
659
|
+
- Agent creation and permissions
|
|
660
|
+
- Shadow Mode testing
|
|
661
|
+
- Multi-agent orchestration
|
|
662
|
+
- Ethical alignment and bias detection
|
|
663
|
+
- Privacy analysis
|
|
664
|
+
- Tool registry usage
|
|
665
|
+
|
|
666
|
+
## Architecture
|
|
667
|
+
|
|
668
|
+
```
|
|
669
|
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
670
|
+
β Application Layer β
|
|
671
|
+
β (Chat, Workflow, Tools) β
|
|
672
|
+
βββββββββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββ
|
|
673
|
+
β
|
|
674
|
+
βββββββββββββββββββββββββββΌββββββββββββββββββββββββββββββββββββ
|
|
675
|
+
β Agent Control Plane β
|
|
676
|
+
β ββββββββββββββββ ββββββββββββββββ ββββββββββββββββ β
|
|
677
|
+
β β Agent β β Policy β β Audit β β
|
|
678
|
+
β β Kernel ββββ€ Engine ββββ€ Logger β β
|
|
679
|
+
β ββββββββ¬ββββββββ ββββββββββββββββ ββββββββββββββββ β
|
|
680
|
+
β β β
|
|
681
|
+
β ββββββββΌββββββββ ββββββββββββββββ β
|
|
682
|
+
β β Resource β β Execution β β
|
|
683
|
+
β β Manager ββββ€ Engine β β
|
|
684
|
+
β ββββββββββββββββ ββββββββββββββββ β
|
|
685
|
+
βββββββββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββ
|
|
686
|
+
β
|
|
687
|
+
βββββββββββββββββββββββββββΌββββββββββββββββββββββββββββββββββββ
|
|
688
|
+
β LLM (Raw Compute) β
|
|
689
|
+
β (GPT-4, Claude, Llama, etc.) β
|
|
690
|
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
691
|
+
β
|
|
692
|
+
βββββββββββββββββββββββββββΌββββββββββββββββββββββββββββββββββββ
|
|
693
|
+
β Execution Environment β
|
|
694
|
+
β (Code, Databases, APIs, File System) β
|
|
695
|
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
696
|
+
```
|
|
697
|
+
|
|
698
|
+
## Components
|
|
699
|
+
|
|
700
|
+
### Core Components
|
|
701
|
+
|
|
702
|
+
#### Agent Kernel
|
|
703
|
+
The kernel mediates all interactions between the LLM and execution environment:
|
|
704
|
+
- Permission checking with OS-like rigor
|
|
705
|
+
- Request validation
|
|
706
|
+
- Risk assessment
|
|
707
|
+
- Audit logging
|
|
708
|
+
- Session management
|
|
709
|
+
|
|
710
|
+
#### Policy Engine
|
|
711
|
+
Enforces governance rules deterministically:
|
|
712
|
+
- Rate limiting and quotas
|
|
713
|
+
- Custom policy rules
|
|
714
|
+
- Risk management
|
|
715
|
+
- Access control
|
|
716
|
+
- Compliance enforcement
|
|
717
|
+
|
|
718
|
+
#### Execution Engine
|
|
719
|
+
Safely executes agent actions:
|
|
720
|
+
- Sandboxed environments (4 levels: NONE, BASIC, STRICT, ISOLATED)
|
|
721
|
+
- Timeout enforcement
|
|
722
|
+
- Resource monitoring
|
|
723
|
+
- Error handling
|
|
724
|
+
- Transaction support
|
|
725
|
+
|
|
726
|
+
### Advanced Components
|
|
727
|
+
|
|
728
|
+
#### The Mute Agent
|
|
729
|
+
Implements "Scale by Subtraction" philosophy:
|
|
730
|
+
- Capability-based execution
|
|
731
|
+
- Returns NULL for out-of-scope requests instead of hallucinating
|
|
732
|
+
- No creativity, only precision
|
|
733
|
+
- Example: SQL agent that only executes SELECT queries
|
|
734
|
+
|
|
735
|
+
#### Shadow Mode
|
|
736
|
+
The "Matrix" for agents - simulation without execution:
|
|
737
|
+
- Intercepts all actions before execution
|
|
738
|
+
- Validates against policies without side effects
|
|
739
|
+
- Logs reasoning chains
|
|
740
|
+
- Analyzes potential impact
|
|
741
|
+
- Perfect for testing before production
|
|
742
|
+
|
|
743
|
+
#### Constraint Graphs
|
|
744
|
+
Multi-dimensional context defining the "physics" of the agent's world:
|
|
745
|
+
- **Data Graph**: What data resources exist and are accessible
|
|
746
|
+
- **Policy Graph**: What corporate rules and compliance constraints apply
|
|
747
|
+
- **Temporal Graph**: What is true RIGHT NOW (maintenance windows, business hours, freeze periods)
|
|
748
|
+
- Deterministic enforcement: LLM can think anything, but can only ACT on what graphs permit
|
|
749
|
+
|
|
750
|
+
#### Supervisor Agents
|
|
751
|
+
Recursive governance - agents watching agents:
|
|
752
|
+
- Specialized, highly constrained monitoring agents
|
|
753
|
+
- Detect violations, anomalies, and suspicious patterns
|
|
754
|
+
- Flag issues to humans
|
|
755
|
+
- Optional auto-remediation
|
|
756
|
+
- Hierarchical supervision (supervisors watching supervisors)
|
|
757
|
+
|
|
758
|
+
## Examples
|
|
759
|
+
|
|
760
|
+
### Basic Examples
|
|
761
|
+
|
|
762
|
+
Run the basic examples:
|
|
763
|
+
|
|
764
|
+
```bash
|
|
765
|
+
python3 examples.py
|
|
766
|
+
```
|
|
767
|
+
|
|
768
|
+
This demonstrates:
|
|
769
|
+
- Basic usage
|
|
770
|
+
- Permission control
|
|
771
|
+
- Rate limiting
|
|
772
|
+
- Policy enforcement
|
|
773
|
+
- Audit logging
|
|
774
|
+
- Risk management
|
|
775
|
+
|
|
776
|
+
### Advanced Examples
|
|
777
|
+
|
|
778
|
+
Run the advanced feature examples:
|
|
779
|
+
|
|
780
|
+
```bash
|
|
781
|
+
python3 advanced_examples.py
|
|
782
|
+
```
|
|
783
|
+
|
|
784
|
+
This demonstrates:
|
|
785
|
+
- The Mute Agent (capability-based execution)
|
|
786
|
+
- Shadow Mode (simulation)
|
|
787
|
+
- Constraint Graphs (multi-dimensional context)
|
|
788
|
+
- Supervisor Agents (recursive governance)
|
|
789
|
+
- Integrated workflows
|
|
790
|
+
|
|
791
|
+
## Testing
|
|
792
|
+
|
|
793
|
+
Run the complete test suite:
|
|
794
|
+
|
|
795
|
+
```bash
|
|
796
|
+
# Basic features
|
|
797
|
+
python3 test_control_plane.py
|
|
798
|
+
|
|
799
|
+
# Advanced features
|
|
800
|
+
python3 test_advanced_features.py
|
|
801
|
+
|
|
802
|
+
# Or run all tests
|
|
803
|
+
python3 test_control_plane.py && python3 test_advanced_features.py
|
|
804
|
+
```
|
|
805
|
+
|
|
806
|
+
Total: 31 tests covering all features.
|
|
807
|
+
|
|
808
|
+
## Benchmark: Comparative Safety Study
|
|
809
|
+
|
|
810
|
+
A comprehensive benchmark demonstrates the effectiveness of the Control Plane approach compared to traditional prompt-based safety.
|
|
811
|
+
|
|
812
|
+
### Running the Benchmark
|
|
813
|
+
|
|
814
|
+
```bash
|
|
815
|
+
python3 benchmark.py
|
|
816
|
+
```
|
|
817
|
+
|
|
818
|
+
### Key Results
|
|
819
|
+
|
|
820
|
+
The benchmark compares **Prompt-Based Safety (Baseline)** vs **Control Plane Governance (Experimental)** using 60 red team prompts:
|
|
821
|
+
|
|
822
|
+
| Metric | Baseline (Prompts) | Control Plane | Improvement |
|
|
823
|
+
|--------|-------------------|---------------|-------------|
|
|
824
|
+
| **Safety Violation Rate** | 26.67% | **0.00%** | β 100% enforcement |
|
|
825
|
+
| **False Positive Rate** | 0.00% | **0.00%** | β Perfect precision |
|
|
826
|
+
| **Avg Output Tokens** | 26.1 | **0.5** | β 98.1% reduction |
|
|
827
|
+
|
|
828
|
+
**Key Findings:**
|
|
829
|
+
- β **100% Safety Enforcement**: Control Plane blocked all 45 malicious prompts (0% SVR)
|
|
830
|
+
- β **Zero False Positives**: All 15 valid requests were correctly allowed
|
|
831
|
+
- β **Scale by Subtraction**: 98.1% fewer tokens (returns "NULL" instead of verbose refusals)
|
|
832
|
+
- β **Jailbreak Immunity**: Deterministic enforcement catches prompt injection attacks that bypass prompt-based safety
|
|
833
|
+
|
|
834
|
+
The benchmark includes:
|
|
835
|
+
- **15 Direct Violations**: SQL injection, system commands
|
|
836
|
+
- **15 Prompt Injections**: Jailbreaks and instruction overrides
|
|
837
|
+
- **15 Contextual Confusion**: Social engineering attempts
|
|
838
|
+
- **15 Valid Requests**: Legitimate operations (false positive testing)
|
|
839
|
+
|
|
840
|
+
See [`benchmark/README.md`](benchmark/README.md) for detailed methodology and results.
|
|
841
|
+
|
|
842
|
+
## Demo Video
|
|
843
|
+
|
|
844
|
+
### Quick Start (2-3 minutes)
|
|
845
|
+
|
|
846
|
+
π₯ **Coming Soon**: A short video walkthrough showing:
|
|
847
|
+
1. Installing Agent Control Plane
|
|
848
|
+
2. Creating a governed agent
|
|
849
|
+
3. Testing safety with red team prompts
|
|
850
|
+
4. Viewing audit logs
|
|
851
|
+
|
|
852
|
+
*Video will be published to YouTube and embedded here*
|
|
853
|
+
|
|
854
|
+
### Full Tutorial (12 minutes)
|
|
855
|
+
|
|
856
|
+
πΉ **Coming Soon**: Complete tutorial covering:
|
|
857
|
+
1. **Introduction** (2 min): What is Agent Control Plane?
|
|
858
|
+
2. **Installation** (2 min): Setup and dependencies
|
|
859
|
+
3. **Basic Usage** (3 min): Create your first governed agent
|
|
860
|
+
4. **Safety Demo** (3 min): Test against adversarial prompts
|
|
861
|
+
5. **Multi-Agent** (2 min): Coordinate multiple agents safely
|
|
862
|
+
|
|
863
|
+
**Planned Release**: Q1 2026
|
|
864
|
+
|
|
865
|
+
### Interactive Demos
|
|
866
|
+
|
|
867
|
+
Try these live demos in your browser:
|
|
868
|
+
|
|
869
|
+
- **[Google Colab Notebook](https://colab.research.google.com/github/imran-siddique/agent-control-plane)**: Interactive tutorial (coming soon)
|
|
870
|
+
- **[Jupyter Notebook](examples/)**: Local demos in `examples/` directory
|
|
871
|
+
- **[Streamlit App](https://huggingface.co/spaces/imran-siddique/agent-control-demo)**: Web UI demo (coming soon)
|
|
872
|
+
|
|
873
|
+
### Community Videos
|
|
874
|
+
|
|
875
|
+
Have you created a tutorial or demo? [Submit it here](https://github.com/imran-siddique/agent-control-plane/issues/new?template=community-video.md) to be featured!
|
|
876
|
+
|
|
877
|
+
---
|
|
878
|
+
|
|
879
|
+
## Use Cases
|
|
880
|
+
|
|
881
|
+
### Enterprise AI Agents
|
|
882
|
+
Deploy agents with strict governance for enterprise environments:
|
|
883
|
+
- Compliance with security policies through Constraint Graphs
|
|
884
|
+
- Complete audit trails for regulatory requirements
|
|
885
|
+
- Resource quotas to control costs
|
|
886
|
+
- Shadow Mode testing before production deployment
|
|
887
|
+
|
|
888
|
+
### SQL-Generating Agents
|
|
889
|
+
Build precise, non-creative agents:
|
|
890
|
+
- Mute Agent configuration for SQL-only operations
|
|
891
|
+
- Returns NULL for out-of-scope requests
|
|
892
|
+
- No hallucination or conversational pivots
|
|
893
|
+
- Example: Finance team data access agent
|
|
894
|
+
|
|
895
|
+
### Multi-tenant AI Platforms
|
|
896
|
+
Safely run multiple agents with isolation:
|
|
897
|
+
- Per-tenant quotas and policies
|
|
898
|
+
- Isolated execution environments
|
|
899
|
+
- Fair resource allocation
|
|
900
|
+
- Supervisor Agents monitoring all tenants
|
|
901
|
+
|
|
902
|
+
### Development & Testing
|
|
903
|
+
Experiment safely with agent capabilities:
|
|
904
|
+
- Shadow Mode for risk-free testing
|
|
905
|
+
- Sandboxed execution
|
|
906
|
+
- Complete reasoning telemetry
|
|
907
|
+
- Comprehensive logging
|
|
908
|
+
|
|
909
|
+
### Production Workflows
|
|
910
|
+
Run reliable, auditable agent workflows:
|
|
911
|
+
- Error handling and recovery
|
|
912
|
+
- Performance monitoring
|
|
913
|
+
- Traceability for debugging
|
|
914
|
+
|
|
915
|
+
## API Reference
|
|
916
|
+
|
|
917
|
+
See [architecture.md](architecture.md) for detailed architecture documentation.
|
|
918
|
+
|
|
919
|
+
### Core Classes
|
|
920
|
+
|
|
921
|
+
- `AgentControlPlane`: Main control plane interface
|
|
922
|
+
- `AgentKernel`: Core kernel component
|
|
923
|
+
- `PolicyEngine`: Policy enforcement
|
|
924
|
+
- `ExecutionEngine`: Safe execution
|
|
925
|
+
- `AgentContext`: Agent session context
|
|
926
|
+
- `ExecutionRequest`: Action request
|
|
927
|
+
- `ExecutionResult`: Action result
|
|
928
|
+
|
|
929
|
+
### New Multi-Agent & Governance Classes
|
|
930
|
+
|
|
931
|
+
- `AgentOrchestrator`: Multi-agent coordination and workflows
|
|
932
|
+
- `ToolRegistry`: Dynamic tool management and discovery
|
|
933
|
+
- `GovernanceLayer`: Ethical alignment and advanced safety
|
|
934
|
+
- `AgentNode`: Agent representation in orchestration graphs
|
|
935
|
+
- `Tool`: Tool definition with schemas and handlers
|
|
936
|
+
|
|
937
|
+
### Action Types
|
|
938
|
+
|
|
939
|
+
- `FILE_READ`: Read file operations
|
|
940
|
+
- `FILE_WRITE`: Write file operations
|
|
941
|
+
- `CODE_EXECUTION`: Execute code
|
|
942
|
+
- `API_CALL`: Make API calls
|
|
943
|
+
- `DATABASE_QUERY`: Query databases
|
|
944
|
+
- `DATABASE_WRITE`: Write to databases
|
|
945
|
+
- `WORKFLOW_TRIGGER`: Trigger workflows
|
|
946
|
+
|
|
947
|
+
### Permission Levels
|
|
948
|
+
|
|
949
|
+
- `NONE`: No access
|
|
950
|
+
- `READ_ONLY`: Read-only access
|
|
951
|
+
- `READ_WRITE`: Read and write access
|
|
952
|
+
- `ADMIN`: Full administrative access
|
|
953
|
+
|
|
954
|
+
## Best Practices
|
|
955
|
+
|
|
956
|
+
1. **Start with minimal permissions**: Grant only what's needed
|
|
957
|
+
2. **Use rate limits**: Prevent runaway agents
|
|
958
|
+
3. **Enable audit logging**: Track all agent actions
|
|
959
|
+
4. **Test policies**: Validate governance rules work as expected
|
|
960
|
+
5. **Monitor resource usage**: Watch for anomalies
|
|
961
|
+
6. **Regular policy reviews**: Keep policies up to date
|
|
962
|
+
|
|
963
|
+
## Security Considerations
|
|
964
|
+
|
|
965
|
+
- Default policies block system file access
|
|
966
|
+
- Credentials should never be in parameters
|
|
967
|
+
- High-risk actions require elevated permissions
|
|
968
|
+
- All actions are audited
|
|
969
|
+
- Sandboxed execution by default
|
|
970
|
+
|
|
971
|
+
## Future Enhancements
|
|
972
|
+
|
|
973
|
+
Recent additions (2025):
|
|
974
|
+
- [x] Multi-agent orchestration with workflows
|
|
975
|
+
- [x] Dynamic tool registry with auto-discovery
|
|
976
|
+
- [x] Governance layer with ethical alignment
|
|
977
|
+
- [x] Bias detection and privacy analysis
|
|
978
|
+
- [x] Docker deployment with docker-compose
|
|
979
|
+
- [x] Command-line interface (CLI)
|
|
980
|
+
- [x] Interactive Jupyter notebooks
|
|
981
|
+
|
|
982
|
+
Planned enhancements:
|
|
983
|
+
- [ ] Kubernetes deployment manifests and Helm charts
|
|
984
|
+
- [ ] Integration with external policy engines (OPA, Cedar)
|
|
985
|
+
- [ ] Real-time monitoring dashboard with metrics
|
|
986
|
+
- [ ] Machine learning-based anomaly detection
|
|
987
|
+
- [ ] Automatic policy generation from past behavior
|
|
988
|
+
- [ ] Integration with secrets management systems (Vault, AWS Secrets Manager)
|
|
989
|
+
- [ ] Enhanced container-based sandboxing with gVisor
|
|
990
|
+
- [ ] Transaction rollback for database operations
|
|
991
|
+
- [ ] Federated learning support for privacy-preserving models
|
|
992
|
+
- [ ] Integration with AdvBench and WildGuard datasets
|
|
993
|
+
|
|
994
|
+
## Reproducibility
|
|
995
|
+
|
|
996
|
+
All experiments and results in this repository are fully reproducible. We provide:
|
|
997
|
+
|
|
998
|
+
### π¦ Dataset
|
|
999
|
+
- **Red Team Dataset (60 prompts)**: [HuggingFace Hub β](https://huggingface.co/datasets/imran-siddique/agent-control-redteam-60)
|
|
1000
|
+
- Categories: Direct violations, prompt injections, social engineering, valid requests
|
|
1001
|
+
- Use to benchmark your own agent safety systems
|
|
1002
|
+
|
|
1003
|
+
### π¬ Reproducibility Package
|
|
1004
|
+
Complete materials in [`reproducibility/`](reproducibility/) directory:
|
|
1005
|
+
- **Hardware specs**: Exact hardware and software environment
|
|
1006
|
+
- **Seeds**: All random seeds used (primary: 42)
|
|
1007
|
+
- **Commands**: Exact commands for every experiment
|
|
1008
|
+
- **Docker**: Containerized environment for consistent results
|
|
1009
|
+
- **Frozen dependencies**: 109 packages with exact versions
|
|
1010
|
+
|
|
1011
|
+
### π§ͺ Experiments
|
|
1012
|
+
- **Comparative Study**: Baseline vs Control Plane safety (benchmark.py)
|
|
1013
|
+
- **Ablation Studies**: Component removal analysis (7 configurations Γ 5 seeds)
|
|
1014
|
+
- **Multi-Agent RAG**: Governed retrieval-augmented generation chain
|
|
1015
|
+
- **Long-Horizon Purge**: State management over 100+ steps
|
|
1016
|
+
|
|
1017
|
+
### π Statistical Analysis
|
|
1018
|
+
- **Mean Β± Std Dev** for all metrics
|
|
1019
|
+
- **P-values** with Bonferroni correction
|
|
1020
|
+
- **Effect sizes** (Cohen's d)
|
|
1021
|
+
- **95% confidence intervals**
|
|
1022
|
+
- **Power analysis** confirming sufficient sample size
|
|
1023
|
+
|
|
1024
|
+
### π Quick Reproduce
|
|
1025
|
+
```bash
|
|
1026
|
+
# Using Docker (recommended)
|
|
1027
|
+
cd reproducibility/docker_config
|
|
1028
|
+
docker build -t acp-reproducibility:v1.1.0 .
|
|
1029
|
+
docker run -it acp-reproducibility:v1.1.0 bash
|
|
1030
|
+
bash reproducibility/run_all_experiments.sh
|
|
1031
|
+
|
|
1032
|
+
# Or locally
|
|
1033
|
+
pip install -r reproducibility/requirements_frozen.txt
|
|
1034
|
+
python benchmark.py --seed 42
|
|
1035
|
+
python experiments/multi_agent_rag.py --seed 42
|
|
1036
|
+
python experiments/long_horizon_purge.py --seed 42
|
|
1037
|
+
```
|
|
1038
|
+
|
|
1039
|
+
**See [`reproducibility/README.md`](reproducibility/README.md) for complete guide.**
|
|
1040
|
+
|
|
1041
|
+
## How This Differs from Other Approaches
|
|
1042
|
+
|
|
1043
|
+
### vs. "Manager" Models (e.g., Gas Town)
|
|
1044
|
+
|
|
1045
|
+
Projects like Steve Yegge's Gas Town use a "City" metaphor where a "Mayor" agent orchestrates "Worker" agents to maximize coding throughput. This is brilliant for velocity.
|
|
1046
|
+
|
|
1047
|
+
**The Difference:**
|
|
1048
|
+
- **Gas Town solves for COORDINATION** (getting things done fast)
|
|
1049
|
+
- **Agent Control Plane solves for CONTAINMENT** (ensuring things are safe)
|
|
1050
|
+
- In an enterprise, you don't just need a Manager; you need a Compliance Officer who can pull the plug
|
|
1051
|
+
|
|
1052
|
+
The Agent Control Plane complements coordination systems by providing the safety layer.
|
|
1053
|
+
|
|
1054
|
+
### vs. "Guardrails" Models (e.g., NeMo, LlamaGuard)
|
|
1055
|
+
|
|
1056
|
+
Most current safety tools operate as "sidecars" that check input/output for toxicity, PII, or harmful content. They are largely text-based and probabilistic.
|
|
1057
|
+
|
|
1058
|
+
**The Difference:**
|
|
1059
|
+
- **Guardrails are ADVISORY or REACTIVE** (sanitizing output after generation)
|
|
1060
|
+
- **Agent Control Plane is ARCHITECTURAL** (preventing action at the kernel level)
|
|
1061
|
+
- A guardrail scrubs a bad SQL query; a Control Plane ensures the agent never had the connection string to begin with
|
|
1062
|
+
- Guardrails work on content; Control Plane works on capabilities and execution
|
|
1063
|
+
|
|
1064
|
+
### vs. "Tool Directory" Models
|
|
1065
|
+
|
|
1066
|
+
Recent academic papers propose "Control Planes" that act as a phonebook, helping agents find the right tools.
|
|
1067
|
+
|
|
1068
|
+
**The Difference:**
|
|
1069
|
+
- **Tool Directory is SERVICE DISCOVERY** (finding what's available)
|
|
1070
|
+
- **Agent Control Plane is a KERNEL** (strict enforcement of boundaries)
|
|
1071
|
+
- The Linux Kernel doesn't just "help" processes find memory; it strictly enforces that Process A cannot touch Process B's memory
|
|
1072
|
+
- We need that same hardness for Agents
|
|
1073
|
+
|
|
1074
|
+
### The Agent Control Plane Approach
|
|
1075
|
+
|
|
1076
|
+
**Deterministic Enforcement, Not Advisory Hints:**
|
|
1077
|
+
- LLM can "think" whatever it wants
|
|
1078
|
+
- But it can only ACT on what the Control Plane permits
|
|
1079
|
+
- Constraint Graphs define the "physics" of the agent's world
|
|
1080
|
+
- Shadow Mode lets you test everything before production
|
|
1081
|
+
- Supervisor Agents provide recursive oversight
|
|
1082
|
+
|
|
1083
|
+
This is **systems engineering** for AI, not prompt engineering.
|
|
1084
|
+
|
|
1085
|
+
## Research & Academic Grounding
|
|
1086
|
+
|
|
1087
|
+
The Agent Control Plane is built on peer-reviewed research and industry best practices, not just intuition.
|
|
1088
|
+
|
|
1089
|
+
### Key Research Foundations
|
|
1090
|
+
|
|
1091
|
+
Our design is informed by academic research across multiple domains:
|
|
1092
|
+
|
|
1093
|
+
1. **Agent Safety**: "A Safety Framework for Real-World Agentic Systems" (arXiv:2511.21990) - contextual risk management
|
|
1094
|
+
2. **Multi-Agent Systems**: "Multi-Agent Systems: A Survey" (arXiv:2308.05391) - hierarchical control, cascade failure prevention
|
|
1095
|
+
3. **Security**: "MAESTRO: A Threat Modeling Framework for Agentic AI" (CSA, 2025) - threat vectors and defenses
|
|
1096
|
+
4. **Governance**: "Practices for Governing Agentic AI Systems" (OpenAI, 2023) - pre/post-deployment practices
|
|
1097
|
+
5. **Privacy**: "Privacy in Agentic Systems" (arXiv:2409.1087) - differential privacy, secure computation
|
|
1098
|
+
6. **Evaluation**: "Evaluating Agentic AI" (WEF, 2025) - standardized metrics and benchmarks
|
|
1099
|
+
|
|
1100
|
+
### Research-Backed Design Decisions
|
|
1101
|
+
|
|
1102
|
+
- **Deterministic enforcement** over probabilistic filtering (OS security model)
|
|
1103
|
+
- **Layered defense** architecture (defense-in-depth from security research)
|
|
1104
|
+
- **Capability-based security** (principle of least privilege)
|
|
1105
|
+
- **Simulation before execution** (risk-free pre-deployment testing)
|
|
1106
|
+
- **Multi-dimensional context** (ABAC - NIST SP 800-162)
|
|
1107
|
+
|
|
1108
|
+
### Benchmark Methodology
|
|
1109
|
+
|
|
1110
|
+
Our comparative safety study follows research standards:
|
|
1111
|
+
- Dataset based on "Red-Teaming Agentic AI" taxonomy (60 prompts across 4 categories)
|
|
1112
|
+
- Metrics follow "Evaluating Agentic AI" frameworks (SVR, FPR, token efficiency)
|
|
1113
|
+
- Baseline comparison with industry-standard prompt-based safety
|
|
1114
|
+
|
|
1115
|
+
### Academic Resources
|
|
1116
|
+
|
|
1117
|
+
- **[Research Foundation](docs/RESEARCH_FOUNDATION.md)** - Detailed research citations and applications
|
|
1118
|
+
- **[Bibliography](docs/BIBLIOGRAPHY.md)** - Complete list of 26+ research papers and reports
|
|
1119
|
+
- **[Benchmark Methodology](benchmark/README.md)** - Research-backed evaluation approach
|
|
1120
|
+
|
|
1121
|
+
### Citation
|
|
1122
|
+
|
|
1123
|
+
If you use Agent Control Plane in research:
|
|
1124
|
+
|
|
1125
|
+
```bibtex
|
|
1126
|
+
@software{agent_control_plane,
|
|
1127
|
+
title = {Agent Control Plane: A Governance Layer for Autonomous AI Agents},
|
|
1128
|
+
author = {Agent Control Plane Contributors},
|
|
1129
|
+
year = {2025},
|
|
1130
|
+
url = {https://github.com/imran-siddique/agent-control-plane},
|
|
1131
|
+
note = {MIT License}
|
|
1132
|
+
}
|
|
1133
|
+
```
|
|
1134
|
+
|
|
1135
|
+
## Community & Support
|
|
1136
|
+
|
|
1137
|
+
### Get Involved
|
|
1138
|
+
|
|
1139
|
+
Join our growing community:
|
|
1140
|
+
|
|
1141
|
+
- π **[Star the repository](https://github.com/imran-siddique/agent-control-plane)** to show support
|
|
1142
|
+
- π¬ **[GitHub Discussions](https://github.com/imran-siddique/agent-control-plane/discussions)** - Ask questions, share ideas, showcase projects
|
|
1143
|
+
- π **[Issue Tracker](https://github.com/imran-siddique/agent-control-plane/issues)** - Report bugs or request features
|
|
1144
|
+
- π **[Documentation](docs/)** - Comprehensive guides and API reference
|
|
1145
|
+
- πΊοΈ **[Roadmap](ROADMAP.md)** - See what's planned for 2026
|
|
1146
|
+
- π‘ **[Support Guide](SUPPORT.md)** - How to get help
|
|
1147
|
+
|
|
1148
|
+
### Roadmap Highlights
|
|
1149
|
+
|
|
1150
|
+
See [ROADMAP.md](ROADMAP.md) for the complete 2026 roadmap. Key upcoming features:
|
|
1151
|
+
|
|
1152
|
+
**Q1 2026** (Current)
|
|
1153
|
+
- β
PyPI distribution (`pip install agent-control-plane`)
|
|
1154
|
+
- β
GitHub releases and versioning
|
|
1155
|
+
- β
Community forum (GitHub Discussions)
|
|
1156
|
+
- π§ Documentation portal (ReadTheDocs/GitHub Pages)
|
|
1157
|
+
|
|
1158
|
+
**Q2 2026** - Advanced Intelligence
|
|
1159
|
+
- ML-based intent classification
|
|
1160
|
+
- Constitutional fine-tuning hooks (RLHF/LoRA)
|
|
1161
|
+
- Multi-turn red-teaming datasets
|
|
1162
|
+
- Privacy enhancements (differential privacy, federated learning)
|
|
1163
|
+
|
|
1164
|
+
**Q3 2026** - Multimodal & Ecosystem
|
|
1165
|
+
- Production-ready vision and audio governance
|
|
1166
|
+
- Vector store integrations (Pinecone, Weaviate, Qdrant)
|
|
1167
|
+
- Advanced RAG patterns with fact verification
|
|
1168
|
+
|
|
1169
|
+
**Q4 2026** - Scale & Operations
|
|
1170
|
+
- Grafana/Prometheus dashboard templates
|
|
1171
|
+
- Streamlit interactive governance UI
|
|
1172
|
+
- Tool marketplace (200+ governed tools)
|
|
1173
|
+
|
|
1174
|
+
### Success Metrics & Goals
|
|
1175
|
+
|
|
1176
|
+
**2026 Targets:**
|
|
1177
|
+
- 1,000+ GitHub stars
|
|
1178
|
+
- 10,000+ monthly PyPI downloads
|
|
1179
|
+
- 50+ production deployments
|
|
1180
|
+
- 20+ active contributors
|
|
1181
|
+
- 10+ academic citations
|
|
1182
|
+
|
|
1183
|
+
### Stay Connected
|
|
1184
|
+
|
|
1185
|
+
- Watch the repository for updates and releases
|
|
1186
|
+
- Follow project announcements in Discussions
|
|
1187
|
+
- Contribute to help us reach our goals!
|
|
1188
|
+
|
|
1189
|
+
## Contributing
|
|
1190
|
+
|
|
1191
|
+
We welcome contributions! The Agent Control Plane is designed to be production-ready and contributor-friendly.
|
|
1192
|
+
|
|
1193
|
+
### Getting Started
|
|
1194
|
+
|
|
1195
|
+
1. Fork the repository
|
|
1196
|
+
2. Clone your fork: `git clone https://github.com/YOUR-USERNAME/agent-control-plane.git`
|
|
1197
|
+
3. Install in development mode: `pip install -e ".[dev]"`
|
|
1198
|
+
4. Create a branch: `git checkout -b feature/your-feature-name`
|
|
1199
|
+
|
|
1200
|
+
### Running Tests
|
|
1201
|
+
|
|
1202
|
+
```bash
|
|
1203
|
+
# Run all tests
|
|
1204
|
+
python -m unittest discover -s tests -p 'test_*.py' -v
|
|
1205
|
+
|
|
1206
|
+
# Run specific test file
|
|
1207
|
+
python -m unittest tests/test_control_plane.py
|
|
1208
|
+
|
|
1209
|
+
# Run specific test
|
|
1210
|
+
python -m unittest tests.test_control_plane.TestAgentKernel.test_create_agent_session
|
|
1211
|
+
```
|
|
1212
|
+
|
|
1213
|
+
### Project Structure
|
|
1214
|
+
|
|
1215
|
+
- `src/agent_control_plane/` - Main package source code
|
|
1216
|
+
- `tests/` - Test suite (unittest framework)
|
|
1217
|
+
- `examples/` - Example scripts and use cases
|
|
1218
|
+
- `docs/` - Documentation and guides
|
|
1219
|
+
- `.github/workflows/` - CI/CD configuration
|
|
1220
|
+
|
|
1221
|
+
### Guidelines
|
|
1222
|
+
|
|
1223
|
+
- Follow existing code style and patterns
|
|
1224
|
+
- Add tests for new features
|
|
1225
|
+
- Update documentation as needed
|
|
1226
|
+
- Keep changes focused and minimal
|
|
1227
|
+
- Write clear commit messages
|
|
1228
|
+
|
|
1229
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
|
|
1230
|
+
|
|
1231
|
+
## Testing
|
|
1232
|
+
|
|
1233
|
+
The project uses Python's built-in `unittest` framework. All tests are located in the `tests/` directory.
|
|
1234
|
+
|
|
1235
|
+
### Test Coverage
|
|
1236
|
+
|
|
1237
|
+
- **Core functionality tests**: `test_control_plane.py`
|
|
1238
|
+
- **Advanced features tests**: `test_advanced_features.py`
|
|
1239
|
+
|
|
1240
|
+
Current test coverage: **31 tests** covering:
|
|
1241
|
+
- Agent creation and lifecycle
|
|
1242
|
+
- Permission management
|
|
1243
|
+
- Policy enforcement
|
|
1244
|
+
- Rate limiting
|
|
1245
|
+
- Shadow mode simulation
|
|
1246
|
+
- Mute agent capabilities
|
|
1247
|
+
- Constraint graphs
|
|
1248
|
+
- Supervisor agents
|
|
1249
|
+
- Audit logging
|
|
1250
|
+
|
|
1251
|
+
## Documentation
|
|
1252
|
+
|
|
1253
|
+
Comprehensive documentation is available in the `docs/` directory:
|
|
1254
|
+
|
|
1255
|
+
- **[Quick Start Guide](docs/guides/QUICKSTART.md)** - Get up and running quickly
|
|
1256
|
+
- **[Implementation Guide](docs/guides/IMPLEMENTATION.md)** - Detailed implementation details
|
|
1257
|
+
- **[Philosophy](docs/guides/PHILOSOPHY.md)** - Core principles and design philosophy
|
|
1258
|
+
- **[Architecture](docs/architecture/architecture.md)** - System architecture overview
|
|
1259
|
+
- **[Research Foundation](docs/RESEARCH_FOUNDATION.md)** - Academic grounding and citations
|
|
1260
|
+
- **[Bibliography](docs/BIBLIOGRAPHY.md)** - Complete list of research references
|
|
1261
|
+
|
|
1262
|
+
## License
|
|
1263
|
+
|
|
1264
|
+
MIT License - See LICENSE file for details
|