agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Baseline Agent - The "Steel Man" Reflective Tool-User
|
|
3
|
+
|
|
4
|
+
This represents a state-of-the-art reflective agent that:
|
|
5
|
+
1. Has access to system state (like `kubectl get all`)
|
|
6
|
+
2. Can reflect on failures and retry (up to 3 turns)
|
|
7
|
+
3. Can ask for clarification when parameters are missing
|
|
8
|
+
4. Uses reasoning to infer context from available information
|
|
9
|
+
|
|
10
|
+
This is the "fair fight" baseline - not a strawman, but a competent agent
|
|
11
|
+
that represents current industry best practices (e.g., ReAct with reflection).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
import sys
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
# Import the shared infrastructure API
|
|
21
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
|
|
22
|
+
from src.core.tools import (
|
|
23
|
+
MockInfrastructureAPI,
|
|
24
|
+
SessionContext,
|
|
25
|
+
User,
|
|
26
|
+
UserRole,
|
|
27
|
+
Environment,
|
|
28
|
+
ResourceState,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ReflectionStep:
|
|
34
|
+
"""Represents one step in the reflection loop."""
|
|
35
|
+
turn: int
|
|
36
|
+
thought: str
|
|
37
|
+
action: Optional[str]
|
|
38
|
+
parameters: Optional[Dict[str, Any]]
|
|
39
|
+
result: Optional[Dict[str, Any]]
|
|
40
|
+
error: Optional[str]
|
|
41
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class BaselineAgentResult:
|
|
46
|
+
"""Result from baseline agent execution."""
|
|
47
|
+
success: bool
|
|
48
|
+
action_taken: Optional[str]
|
|
49
|
+
parameters_used: Optional[Dict[str, Any]]
|
|
50
|
+
final_result: Optional[Dict[str, Any]]
|
|
51
|
+
|
|
52
|
+
# Failure analysis
|
|
53
|
+
hallucinated: bool
|
|
54
|
+
hallucination_details: Optional[str]
|
|
55
|
+
safety_violation: bool
|
|
56
|
+
state_misalignment: bool
|
|
57
|
+
|
|
58
|
+
# Performance metrics
|
|
59
|
+
token_count: int
|
|
60
|
+
reflection_steps: List[ReflectionStep]
|
|
61
|
+
turns_used: int
|
|
62
|
+
latency_ms: float
|
|
63
|
+
|
|
64
|
+
# Clarification
|
|
65
|
+
needed_clarification: bool
|
|
66
|
+
clarification_question: Optional[str]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class BaselineAgent:
|
|
70
|
+
"""
|
|
71
|
+
The Steel Man Baseline Agent - Reflective Tool-User
|
|
72
|
+
|
|
73
|
+
Architecture:
|
|
74
|
+
- Maintains context by querying system state
|
|
75
|
+
- Uses reasoning to infer missing parameters
|
|
76
|
+
- Reflects on failures and retries (up to 3 turns)
|
|
77
|
+
- Can ask user for clarification
|
|
78
|
+
|
|
79
|
+
This represents a "good" baseline that doesn't just guess blindly,
|
|
80
|
+
but uses available tools and reasoning to make informed decisions.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
# Token costs (simulated)
|
|
84
|
+
BASE_SYSTEM_PROMPT_TOKENS = 800 # Includes reflection instructions
|
|
85
|
+
TOOL_DEFINITIONS_TOKENS = 500 # All tool schemas in context
|
|
86
|
+
REASONING_TOKENS = 300 # Per reasoning step
|
|
87
|
+
REFLECTION_TOKENS = 400 # Per reflection turn
|
|
88
|
+
|
|
89
|
+
MAX_REFLECTION_TURNS = 3
|
|
90
|
+
|
|
91
|
+
def __init__(self, api: MockInfrastructureAPI):
|
|
92
|
+
"""Initialize with access to infrastructure API."""
|
|
93
|
+
self.api = api
|
|
94
|
+
self.execution_history: List[BaselineAgentResult] = []
|
|
95
|
+
|
|
96
|
+
def execute_request(
|
|
97
|
+
self,
|
|
98
|
+
user_command: str,
|
|
99
|
+
context: SessionContext,
|
|
100
|
+
allow_clarification: bool = True
|
|
101
|
+
) -> BaselineAgentResult:
|
|
102
|
+
"""
|
|
103
|
+
Execute a user request using reflection and tool access.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
user_command: Natural language command from user
|
|
107
|
+
context: Session context with user info and history
|
|
108
|
+
allow_clarification: Whether agent can ask user for clarification
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
BaselineAgentResult with execution details
|
|
112
|
+
"""
|
|
113
|
+
start_time = datetime.now()
|
|
114
|
+
reflection_steps: List[ReflectionStep] = []
|
|
115
|
+
token_count = self.BASE_SYSTEM_PROMPT_TOKENS + self.TOOL_DEFINITIONS_TOKENS
|
|
116
|
+
|
|
117
|
+
# Turn 1: Initial attempt with reasoning
|
|
118
|
+
result = self._attempt_execution(
|
|
119
|
+
user_command, context, reflection_steps, allow_clarification
|
|
120
|
+
)
|
|
121
|
+
token_count += self.REASONING_TOKENS
|
|
122
|
+
|
|
123
|
+
# If initial attempt needs clarification, return early
|
|
124
|
+
if result.needed_clarification:
|
|
125
|
+
latency = (datetime.now() - start_time).total_seconds() * 1000
|
|
126
|
+
result.token_count = token_count
|
|
127
|
+
result.latency_ms = latency
|
|
128
|
+
result.turns_used = 1
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
# Reflection loop (up to MAX_REFLECTION_TURNS)
|
|
132
|
+
turn = 1
|
|
133
|
+
while not result.success and turn < self.MAX_REFLECTION_TURNS:
|
|
134
|
+
turn += 1
|
|
135
|
+
token_count += self.REFLECTION_TOKENS
|
|
136
|
+
|
|
137
|
+
# Reflect on why previous attempt failed
|
|
138
|
+
reflection = self._reflect_on_failure(result, context)
|
|
139
|
+
reflection_steps.append(reflection)
|
|
140
|
+
|
|
141
|
+
# Retry with updated understanding
|
|
142
|
+
result = self._attempt_execution(
|
|
143
|
+
user_command, context, reflection_steps, allow_clarification
|
|
144
|
+
)
|
|
145
|
+
token_count += self.REASONING_TOKENS
|
|
146
|
+
|
|
147
|
+
if result.needed_clarification:
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
# Finalize result
|
|
151
|
+
latency = (datetime.now() - start_time).total_seconds() * 1000
|
|
152
|
+
result.token_count = token_count
|
|
153
|
+
result.latency_ms = latency
|
|
154
|
+
result.turns_used = turn
|
|
155
|
+
result.reflection_steps = reflection_steps
|
|
156
|
+
|
|
157
|
+
self.execution_history.append(result)
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
def _attempt_execution(
|
|
161
|
+
self,
|
|
162
|
+
user_command: str,
|
|
163
|
+
context: SessionContext,
|
|
164
|
+
reflection_steps: List[ReflectionStep],
|
|
165
|
+
allow_clarification: bool
|
|
166
|
+
) -> BaselineAgentResult:
|
|
167
|
+
"""
|
|
168
|
+
Attempt to execute the user command.
|
|
169
|
+
|
|
170
|
+
This simulates the agent's reasoning process:
|
|
171
|
+
1. Query system state for context
|
|
172
|
+
2. Parse the command and infer intent
|
|
173
|
+
3. Try to resolve missing parameters
|
|
174
|
+
4. Execute the action
|
|
175
|
+
"""
|
|
176
|
+
# Step 1: Get system state for context (this is the agent's "awareness")
|
|
177
|
+
system_state = self.api.get_system_state(context)
|
|
178
|
+
|
|
179
|
+
# Step 2: Parse command and infer intent
|
|
180
|
+
intent = self._parse_command(user_command)
|
|
181
|
+
|
|
182
|
+
# Step 3: Try to resolve parameters
|
|
183
|
+
resolution_result = self._resolve_parameters(
|
|
184
|
+
intent, user_command, context, system_state, reflection_steps
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if resolution_result["needs_clarification"] and allow_clarification:
|
|
188
|
+
return BaselineAgentResult(
|
|
189
|
+
success=False,
|
|
190
|
+
action_taken=None,
|
|
191
|
+
parameters_used=None,
|
|
192
|
+
final_result=None,
|
|
193
|
+
hallucinated=False,
|
|
194
|
+
hallucination_details=None,
|
|
195
|
+
safety_violation=False,
|
|
196
|
+
state_misalignment=False,
|
|
197
|
+
token_count=0, # Will be set later
|
|
198
|
+
reflection_steps=[],
|
|
199
|
+
turns_used=0,
|
|
200
|
+
latency_ms=0,
|
|
201
|
+
needed_clarification=True,
|
|
202
|
+
clarification_question=resolution_result["clarification_question"]
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Step 4: Execute the action
|
|
206
|
+
action = resolution_result["action"]
|
|
207
|
+
params = resolution_result["parameters"]
|
|
208
|
+
|
|
209
|
+
return self._execute_action(action, params, context, system_state)
|
|
210
|
+
|
|
211
|
+
def _parse_command(self, command: str) -> str:
|
|
212
|
+
"""
|
|
213
|
+
Parse user command to determine intent.
|
|
214
|
+
|
|
215
|
+
This uses simple keyword matching. A real agent would use an LLM.
|
|
216
|
+
"""
|
|
217
|
+
command_lower = command.lower()
|
|
218
|
+
|
|
219
|
+
if "restart" in command_lower:
|
|
220
|
+
return "restart_service"
|
|
221
|
+
elif "scale" in command_lower:
|
|
222
|
+
return "scale_service"
|
|
223
|
+
elif "rollback" in command_lower:
|
|
224
|
+
return "rollback_deployment"
|
|
225
|
+
elif "force delete" in command_lower or "force-delete" in command_lower:
|
|
226
|
+
return "force_delete"
|
|
227
|
+
elif "delete" in command_lower or "remove" in command_lower or "clean" in command_lower:
|
|
228
|
+
return "force_delete"
|
|
229
|
+
elif "start" in command_lower and "restart" not in command_lower:
|
|
230
|
+
return "start_service"
|
|
231
|
+
elif "stop" in command_lower:
|
|
232
|
+
return "stop_service"
|
|
233
|
+
elif "fix" in command_lower:
|
|
234
|
+
return "restart_service" # Assume restart is the fix
|
|
235
|
+
else:
|
|
236
|
+
return "unknown"
|
|
237
|
+
|
|
238
|
+
def _resolve_parameters(
|
|
239
|
+
self,
|
|
240
|
+
intent: str,
|
|
241
|
+
command: str,
|
|
242
|
+
context: SessionContext,
|
|
243
|
+
system_state: Dict[str, Any],
|
|
244
|
+
reflection_steps: List[ReflectionStep]
|
|
245
|
+
) -> Dict[str, Any]:
|
|
246
|
+
"""
|
|
247
|
+
Try to resolve parameters for the action.
|
|
248
|
+
|
|
249
|
+
The Baseline Agent's Strategy:
|
|
250
|
+
1. Check if service is explicitly named in command
|
|
251
|
+
2. If not, use context.last_service_accessed (STALE STATE RISK!)
|
|
252
|
+
3. If still missing, look for services mentioned in reflection
|
|
253
|
+
4. If still missing, ask for clarification
|
|
254
|
+
|
|
255
|
+
This is where the baseline agent is vulnerable to the "Stale State" scenario.
|
|
256
|
+
"""
|
|
257
|
+
services = system_state.get("services", {})
|
|
258
|
+
|
|
259
|
+
# For actions that need a service
|
|
260
|
+
if intent in ["restart_service", "scale_service", "start_service", "stop_service", "force_delete"]:
|
|
261
|
+
# Try to find service in command
|
|
262
|
+
service_id = self._find_service_in_command(command, services)
|
|
263
|
+
|
|
264
|
+
if service_id:
|
|
265
|
+
# Found explicitly in command
|
|
266
|
+
result = {
|
|
267
|
+
"action": intent,
|
|
268
|
+
"parameters": {"service_id": service_id},
|
|
269
|
+
"needs_clarification": False,
|
|
270
|
+
"clarification_question": None,
|
|
271
|
+
"resolution_method": "explicit"
|
|
272
|
+
}
|
|
273
|
+
elif context.last_service_accessed:
|
|
274
|
+
# VULNERABILITY: Use last accessed service (might be stale!)
|
|
275
|
+
service_id = context.last_service_accessed
|
|
276
|
+
result = {
|
|
277
|
+
"action": intent,
|
|
278
|
+
"parameters": {"service_id": service_id},
|
|
279
|
+
"needs_clarification": False,
|
|
280
|
+
"clarification_question": None,
|
|
281
|
+
"resolution_method": "stale_context" # This is the problem!
|
|
282
|
+
}
|
|
283
|
+
else:
|
|
284
|
+
# Need clarification
|
|
285
|
+
result = {
|
|
286
|
+
"action": intent,
|
|
287
|
+
"parameters": None,
|
|
288
|
+
"needs_clarification": True,
|
|
289
|
+
"clarification_question": f"Which service would you like to {intent.replace('_', ' ')}?",
|
|
290
|
+
"resolution_method": "clarification"
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
# Add replicas for scale operations
|
|
294
|
+
if intent == "scale_service" and not result["needs_clarification"]:
|
|
295
|
+
replicas = self._extract_number(command)
|
|
296
|
+
if replicas:
|
|
297
|
+
result["parameters"]["replicas"] = replicas
|
|
298
|
+
else:
|
|
299
|
+
result["needs_clarification"] = True
|
|
300
|
+
result["clarification_question"] = "How many replicas would you like?"
|
|
301
|
+
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
elif intent == "rollback_deployment":
|
|
305
|
+
# Try to find deployment ID
|
|
306
|
+
deployment_id = self._find_deployment_in_state(system_state)
|
|
307
|
+
|
|
308
|
+
if deployment_id:
|
|
309
|
+
return {
|
|
310
|
+
"action": intent,
|
|
311
|
+
"parameters": {"deployment_id": deployment_id},
|
|
312
|
+
"needs_clarification": False,
|
|
313
|
+
"clarification_question": None,
|
|
314
|
+
"resolution_method": "inferred"
|
|
315
|
+
}
|
|
316
|
+
else:
|
|
317
|
+
return {
|
|
318
|
+
"action": intent,
|
|
319
|
+
"parameters": None,
|
|
320
|
+
"needs_clarification": True,
|
|
321
|
+
"clarification_question": "Which deployment would you like to rollback?",
|
|
322
|
+
"resolution_method": "clarification"
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
else:
|
|
326
|
+
# Unknown intent
|
|
327
|
+
return {
|
|
328
|
+
"action": "unknown",
|
|
329
|
+
"parameters": None,
|
|
330
|
+
"needs_clarification": True,
|
|
331
|
+
"clarification_question": f"I'm not sure what you want to do. Can you clarify?",
|
|
332
|
+
"resolution_method": "clarification"
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
def _find_service_in_command(self, command: str, services: Dict[str, Any]) -> Optional[str]:
|
|
336
|
+
"""
|
|
337
|
+
Try to find a service ID from the command text.
|
|
338
|
+
|
|
339
|
+
Looks for service names or IDs mentioned in the command.
|
|
340
|
+
"""
|
|
341
|
+
command_lower = command.lower()
|
|
342
|
+
|
|
343
|
+
# Check each service
|
|
344
|
+
for service_id, service_data in services.items():
|
|
345
|
+
service_name = service_data.get("name", "").lower()
|
|
346
|
+
env = service_data.get("environment", "").lower()
|
|
347
|
+
|
|
348
|
+
# Check if service name is in command
|
|
349
|
+
if service_name in command_lower:
|
|
350
|
+
# If environment is also mentioned, use that to disambiguate
|
|
351
|
+
if env in command_lower:
|
|
352
|
+
return service_id
|
|
353
|
+
# If only one service with this name, use it
|
|
354
|
+
matching_services = [
|
|
355
|
+
sid for sid, sdata in services.items()
|
|
356
|
+
if sdata.get("name", "").lower() == service_name
|
|
357
|
+
]
|
|
358
|
+
if len(matching_services) == 1:
|
|
359
|
+
return matching_services[0]
|
|
360
|
+
# Multiple services with same name, check environment hints
|
|
361
|
+
if "prod" in command_lower and "prod" in env:
|
|
362
|
+
return service_id
|
|
363
|
+
if "dev" in command_lower and "dev" in env:
|
|
364
|
+
return service_id
|
|
365
|
+
if "staging" in command_lower and "staging" in env:
|
|
366
|
+
return service_id
|
|
367
|
+
|
|
368
|
+
# Check for pronouns like "it" or "the service"
|
|
369
|
+
# These are ambiguous - we'll return None and rely on context
|
|
370
|
+
if any(word in command_lower for word in ["it", "the service", "this service"]):
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
def _extract_number(self, text: str) -> Optional[int]:
|
|
376
|
+
"""Extract a number from text (for replica counts, etc.)"""
|
|
377
|
+
import re
|
|
378
|
+
numbers = re.findall(r'\d+', text)
|
|
379
|
+
return int(numbers[0]) if numbers else None
|
|
380
|
+
|
|
381
|
+
def _find_deployment_in_state(self, system_state: Dict[str, Any]) -> Optional[str]:
|
|
382
|
+
"""Find a deployment ID from system state."""
|
|
383
|
+
deployments = system_state.get("deployments", {})
|
|
384
|
+
if deployments:
|
|
385
|
+
# Return first deployment (might not be the right one!)
|
|
386
|
+
return list(deployments.keys())[0]
|
|
387
|
+
return None
|
|
388
|
+
|
|
389
|
+
def _execute_action(
|
|
390
|
+
self,
|
|
391
|
+
action: str,
|
|
392
|
+
parameters: Dict[str, Any],
|
|
393
|
+
context: SessionContext,
|
|
394
|
+
system_state: Dict[str, Any]
|
|
395
|
+
) -> BaselineAgentResult:
|
|
396
|
+
"""Execute the determined action."""
|
|
397
|
+
# Check if parameters is None (couldn't resolve)
|
|
398
|
+
if parameters is None:
|
|
399
|
+
return BaselineAgentResult(
|
|
400
|
+
success=False,
|
|
401
|
+
action_taken=action,
|
|
402
|
+
parameters_used=None,
|
|
403
|
+
final_result={"error": "Could not resolve parameters"},
|
|
404
|
+
hallucinated=False,
|
|
405
|
+
hallucination_details=None,
|
|
406
|
+
safety_violation=False,
|
|
407
|
+
state_misalignment=False,
|
|
408
|
+
token_count=0,
|
|
409
|
+
reflection_steps=[],
|
|
410
|
+
turns_used=0,
|
|
411
|
+
latency_ms=0,
|
|
412
|
+
needed_clarification=False,
|
|
413
|
+
clarification_question=None
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Map action to API call
|
|
417
|
+
if action == "restart_service":
|
|
418
|
+
result = self.api.restart_service(parameters["service_id"], context)
|
|
419
|
+
elif action == "scale_service":
|
|
420
|
+
result = self.api.scale_service(
|
|
421
|
+
parameters["service_id"],
|
|
422
|
+
parameters["replicas"],
|
|
423
|
+
context
|
|
424
|
+
)
|
|
425
|
+
elif action == "rollback_deployment":
|
|
426
|
+
result = self.api.rollback_deployment(parameters["deployment_id"], context)
|
|
427
|
+
elif action == "force_delete":
|
|
428
|
+
result = self.api.force_delete(parameters["service_id"], context)
|
|
429
|
+
else:
|
|
430
|
+
result = {"error": "Unknown action", "safety_violation": False}
|
|
431
|
+
|
|
432
|
+
# Analyze result
|
|
433
|
+
success = result.get("success", False)
|
|
434
|
+
error = result.get("error")
|
|
435
|
+
safety_violation = result.get("safety_violation", False)
|
|
436
|
+
|
|
437
|
+
# Detect hallucination (using stale context)
|
|
438
|
+
hallucinated = False
|
|
439
|
+
hallucination_details = None
|
|
440
|
+
state_misalignment = False
|
|
441
|
+
|
|
442
|
+
# Check if we used stale context (didn't match current focus)
|
|
443
|
+
if context.current_focus and context.current_focus != context.last_service_accessed:
|
|
444
|
+
if parameters and parameters.get("service_id") == context.last_service_accessed:
|
|
445
|
+
hallucinated = True
|
|
446
|
+
hallucination_details = f"Used stale context: {context.last_service_accessed} instead of current focus: {context.current_focus}"
|
|
447
|
+
state_misalignment = True
|
|
448
|
+
|
|
449
|
+
return BaselineAgentResult(
|
|
450
|
+
success=success,
|
|
451
|
+
action_taken=action,
|
|
452
|
+
parameters_used=parameters,
|
|
453
|
+
final_result=result if success else None,
|
|
454
|
+
hallucinated=hallucinated,
|
|
455
|
+
hallucination_details=hallucination_details,
|
|
456
|
+
safety_violation=safety_violation,
|
|
457
|
+
state_misalignment=state_misalignment,
|
|
458
|
+
token_count=0, # Will be set by caller
|
|
459
|
+
reflection_steps=[],
|
|
460
|
+
turns_used=0,
|
|
461
|
+
latency_ms=0,
|
|
462
|
+
needed_clarification=False,
|
|
463
|
+
clarification_question=None
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def _reflect_on_failure(
|
|
467
|
+
self,
|
|
468
|
+
previous_result: BaselineAgentResult,
|
|
469
|
+
context: SessionContext
|
|
470
|
+
) -> ReflectionStep:
|
|
471
|
+
"""
|
|
472
|
+
Reflect on why the previous attempt failed.
|
|
473
|
+
|
|
474
|
+
This simulates the agent thinking about what went wrong and
|
|
475
|
+
adjusting its strategy.
|
|
476
|
+
"""
|
|
477
|
+
error = previous_result.final_result.get("error") if previous_result.final_result else "Unknown error"
|
|
478
|
+
|
|
479
|
+
# Analyze the error and form a reflection
|
|
480
|
+
if "Permission denied" in str(error):
|
|
481
|
+
thought = "The user doesn't have permission. Should check user role before attempting."
|
|
482
|
+
elif "partial state" in str(error):
|
|
483
|
+
thought = "Service is in partial/zombie state. Should use force_delete instead."
|
|
484
|
+
elif "not found" in str(error):
|
|
485
|
+
thought = "Resource doesn't exist. Should verify existence first."
|
|
486
|
+
else:
|
|
487
|
+
thought = f"Operation failed: {error}. Need to reconsider approach."
|
|
488
|
+
|
|
489
|
+
return ReflectionStep(
|
|
490
|
+
turn=len(previous_result.reflection_steps) + 1,
|
|
491
|
+
thought=thought,
|
|
492
|
+
action=previous_result.action_taken,
|
|
493
|
+
parameters=previous_result.parameters_used,
|
|
494
|
+
result=previous_result.final_result,
|
|
495
|
+
error=error,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
def get_statistics(self) -> Dict[str, Any]:
|
|
499
|
+
"""Get performance statistics across all executions."""
|
|
500
|
+
if not self.execution_history:
|
|
501
|
+
return {}
|
|
502
|
+
|
|
503
|
+
total = len(self.execution_history)
|
|
504
|
+
successful = len([r for r in self.execution_history if r.success])
|
|
505
|
+
hallucinated = len([r for r in self.execution_history if r.hallucinated])
|
|
506
|
+
safety_violations = len([r for r in self.execution_history if r.safety_violation])
|
|
507
|
+
state_misalignments = len([r for r in self.execution_history if r.state_misalignment])
|
|
508
|
+
needed_clarification = len([r for r in self.execution_history if r.needed_clarification])
|
|
509
|
+
|
|
510
|
+
avg_tokens = sum(r.token_count for r in self.execution_history) / total
|
|
511
|
+
avg_latency = sum(r.latency_ms for r in self.execution_history) / total
|
|
512
|
+
avg_turns = sum(r.turns_used for r in self.execution_history) / total
|
|
513
|
+
|
|
514
|
+
return {
|
|
515
|
+
"total_executions": total,
|
|
516
|
+
"success_rate": successful / total if total > 0 else 0,
|
|
517
|
+
"hallucination_rate": hallucinated / total if total > 0 else 0,
|
|
518
|
+
"safety_violation_rate": safety_violations / total if total > 0 else 0,
|
|
519
|
+
"state_misalignment_rate": state_misalignments / total if total > 0 else 0,
|
|
520
|
+
"clarification_rate": needed_clarification / total if total > 0 else 0,
|
|
521
|
+
"avg_tokens": avg_tokens,
|
|
522
|
+
"avg_latency_ms": avg_latency,
|
|
523
|
+
"avg_turns": avg_turns,
|
|
524
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
InteractiveAgent - The "Steel Man" / SOTA Baseline
|
|
3
|
+
|
|
4
|
+
This represents the State-of-the-Art (SOTA) approach to building agents,
|
|
5
|
+
based on modern frameworks like LangGraph and AutoGen.
|
|
6
|
+
|
|
7
|
+
Key Capabilities (The "Steel Man" Features):
|
|
8
|
+
1. Reflection: If a tool fails, reads the error and retries (Max 3 attempts)
|
|
9
|
+
2. Human-in-the-Loop: Can pause and ask user for clarification
|
|
10
|
+
3. System State Access: Can query infrastructure state
|
|
11
|
+
4. Context Reasoning: Uses available information to infer intent
|
|
12
|
+
|
|
13
|
+
This is implemented as an alias/wrapper for BaselineAgent to clearly
|
|
14
|
+
document what we're comparing against in the benchmarks.
|
|
15
|
+
|
|
16
|
+
Why this matters:
|
|
17
|
+
We admit that this baseline CAN solve problems, but we argue it solves
|
|
18
|
+
them INEFFICIENTLY due to:
|
|
19
|
+
- High token costs from reflection loops
|
|
20
|
+
- Latency from clarification requests
|
|
21
|
+
- User interruption from Human-in-the-Loop
|
|
22
|
+
|
|
23
|
+
The Mute Agent wins on EFFICIENCY, not just correctness.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from typing import Dict, Any, List, Optional
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
import sys
|
|
29
|
+
import os
|
|
30
|
+
|
|
31
|
+
# Import the BaselineAgent implementation
|
|
32
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
|
|
33
|
+
from src.agents.baseline_agent import (
|
|
34
|
+
BaselineAgent,
|
|
35
|
+
BaselineAgentResult,
|
|
36
|
+
ReflectionStep,
|
|
37
|
+
)
|
|
38
|
+
from src.core.tools import MockInfrastructureAPI, SessionContext
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class InteractiveAgent(BaselineAgent):
|
|
42
|
+
"""
|
|
43
|
+
InteractiveAgent - The "Steel Man" SOTA Baseline
|
|
44
|
+
|
|
45
|
+
This is essentially the BaselineAgent with explicit documentation
|
|
46
|
+
that it represents the State-of-the-Art approach (LangGraph, AutoGen style).
|
|
47
|
+
|
|
48
|
+
Architecture:
|
|
49
|
+
- Maintains context by querying system state
|
|
50
|
+
- Uses reasoning to infer missing parameters
|
|
51
|
+
- Reflects on failures and retries (up to 3 turns)
|
|
52
|
+
- Can ask user for clarification (Human-in-the-Loop)
|
|
53
|
+
|
|
54
|
+
This is the "fair fight" baseline - not a strawman, but a competent agent
|
|
55
|
+
that represents current industry best practices.
|
|
56
|
+
|
|
57
|
+
The Thesis:
|
|
58
|
+
"Clarification is a bug, not a feature, in autonomous systems."
|
|
59
|
+
|
|
60
|
+
In high-throughput production systems:
|
|
61
|
+
- Clarification kills latency (waiting for human response)
|
|
62
|
+
- Reflection kills efficiency (multiple LLM calls)
|
|
63
|
+
- State queries kill simplicity (complex context management)
|
|
64
|
+
|
|
65
|
+
The Mute Agent proves that graph constraints provide:
|
|
66
|
+
- Zero clarification needed (deterministic from graph)
|
|
67
|
+
- Zero reflection needed (fail fast on constraints)
|
|
68
|
+
- Zero state queries needed (context encoded in graph)
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, api: MockInfrastructureAPI):
|
|
72
|
+
"""
|
|
73
|
+
Initialize the Interactive Agent.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
api: MockInfrastructureAPI for infrastructure operations
|
|
77
|
+
"""
|
|
78
|
+
super().__init__(api)
|
|
79
|
+
# Inherit all functionality from BaselineAgent
|
|
80
|
+
# This class exists primarily for documentation and clarity
|
|
81
|
+
|
|
82
|
+
def execute_request(
|
|
83
|
+
self,
|
|
84
|
+
user_command: str,
|
|
85
|
+
context: SessionContext,
|
|
86
|
+
allow_clarification: bool = True
|
|
87
|
+
) -> BaselineAgentResult:
|
|
88
|
+
"""
|
|
89
|
+
Execute a user request using reflection and interactive clarification.
|
|
90
|
+
|
|
91
|
+
This is the "SOTA" approach that:
|
|
92
|
+
1. Attempts execution with available context
|
|
93
|
+
2. Reflects on failures (up to 3 turns)
|
|
94
|
+
3. May ask user for clarification (Human-in-the-Loop)
|
|
95
|
+
|
|
96
|
+
The cost of this approach:
|
|
97
|
+
- Multiple LLM calls for reflection
|
|
98
|
+
- Waiting for user response (latency)
|
|
99
|
+
- High token usage from tool definitions
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
user_command: Natural language command from user
|
|
103
|
+
context: Session context with user info and history
|
|
104
|
+
allow_clarification: Whether to ask user for clarification (default: True)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
BaselineAgentResult with execution details
|
|
108
|
+
"""
|
|
109
|
+
return super().execute_request(user_command, context, allow_clarification)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# Export the same result types for convenience
|
|
113
|
+
__all__ = ['InteractiveAgent', 'BaselineAgentResult', 'ReflectionStep']
|