agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Self-Correcting Agent Kernel - Main orchestrator.
|
|
3
|
+
|
|
4
|
+
Implements the Dual-Loop Architecture:
|
|
5
|
+
- Loop 1 (Runtime): Constraint Engine (Safety)
|
|
6
|
+
- Loop 2 (Offline): Alignment Engine (Quality & Efficiency)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Optional, Dict, Any, List
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
from .models import (
|
|
14
|
+
AgentFailure, FailureAnalysis, SimulationResult, CorrectionPatch, AgentState,
|
|
15
|
+
AgentOutcome, CompletenessAudit, ClassifiedPatch,
|
|
16
|
+
ToolExecutionTelemetry, NudgeResult
|
|
17
|
+
)
|
|
18
|
+
from .detector import FailureDetector
|
|
19
|
+
from .analyzer import FailureAnalyzer
|
|
20
|
+
from .simulator import PathSimulator
|
|
21
|
+
from .patcher import AgentPatcher
|
|
22
|
+
from .outcome_analyzer import OutcomeAnalyzer
|
|
23
|
+
from .completeness_auditor import CompletenessAuditor
|
|
24
|
+
from .semantic_purge import SemanticPurge
|
|
25
|
+
from .triage import FailureTriage, FixStrategy
|
|
26
|
+
from .nudge_mechanism import NudgeMechanism
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SelfCorrectingAgentKernel:
|
|
32
|
+
"""
|
|
33
|
+
Main kernel implementing the Dual-Loop Architecture.
|
|
34
|
+
|
|
35
|
+
LOOP 1 (Runtime): The Constraint Engine filters for Safety
|
|
36
|
+
LOOP 2 (Offline): The Alignment Engine filters for Quality & Efficiency:
|
|
37
|
+
- Completeness Auditor: Detects "laziness" (give-up signals)
|
|
38
|
+
- Semantic Purge: Manages patch lifecycle (scale by subtraction)
|
|
39
|
+
|
|
40
|
+
When an agent fails OR gives up:
|
|
41
|
+
1. Detects and classifies the outcome
|
|
42
|
+
2. Analyzes for safety (Loop 1) and competence (Loop 2)
|
|
43
|
+
3. Simulates alternative paths
|
|
44
|
+
4. Patches the agent with classified, lifecycle-managed fixes
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
48
|
+
"""
|
|
49
|
+
Initialize the self-correcting agent kernel with Dual-Loop Architecture.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
config: Optional configuration dictionary
|
|
53
|
+
"""
|
|
54
|
+
self.config = config or {}
|
|
55
|
+
|
|
56
|
+
# LOOP 1: Runtime Safety Components
|
|
57
|
+
self.detector = FailureDetector()
|
|
58
|
+
self.analyzer = FailureAnalyzer()
|
|
59
|
+
self.simulator = PathSimulator()
|
|
60
|
+
self.patcher = AgentPatcher()
|
|
61
|
+
|
|
62
|
+
# LOOP 2: Offline Alignment Components
|
|
63
|
+
use_semantic_analysis = self.config.get("use_semantic_analysis", True)
|
|
64
|
+
self.outcome_analyzer = OutcomeAnalyzer(use_semantic_analysis=use_semantic_analysis)
|
|
65
|
+
self.completeness_auditor = CompletenessAuditor(
|
|
66
|
+
teacher_model=self.config.get("teacher_model", "o1-preview")
|
|
67
|
+
)
|
|
68
|
+
self.semantic_purge = SemanticPurge()
|
|
69
|
+
self.nudge_mechanism = NudgeMechanism()
|
|
70
|
+
|
|
71
|
+
# Triage Engine: Decides sync (JIT) vs async (batch) correction
|
|
72
|
+
self.triage = FailureTriage(config=self.config.get("triage_config", {}))
|
|
73
|
+
|
|
74
|
+
# Background queue for async failures (placeholder for production implementation)
|
|
75
|
+
self.async_failure_queue = []
|
|
76
|
+
|
|
77
|
+
# Model version tracking for semantic purge
|
|
78
|
+
self.current_model_version = self.config.get("model_version", "gpt-4o")
|
|
79
|
+
|
|
80
|
+
# Configure logging
|
|
81
|
+
self._setup_logging()
|
|
82
|
+
|
|
83
|
+
logger.info("=" * 80)
|
|
84
|
+
logger.info("Self-Correcting Agent Kernel initialized (Dual-Loop Architecture)")
|
|
85
|
+
logger.info(f" Loop 1 (Runtime): Constraint Engine (Safety)")
|
|
86
|
+
logger.info(f" Loop 2 (Offline): Alignment Engine (Quality & Efficiency)")
|
|
87
|
+
logger.info(f" - Completeness Auditor: {self.completeness_auditor.teacher_model}")
|
|
88
|
+
logger.info(f" - Semantic Purge: Active")
|
|
89
|
+
logger.info(f" - Failure Triage: Active (Sync/Async routing)")
|
|
90
|
+
logger.info(f" - Semantic Analysis: {use_semantic_analysis}")
|
|
91
|
+
logger.info(f" - Nudge Mechanism: Active")
|
|
92
|
+
logger.info(f" Model Version: {self.current_model_version}")
|
|
93
|
+
logger.info("=" * 80)
|
|
94
|
+
|
|
95
|
+
def _setup_logging(self):
|
|
96
|
+
"""Setup logging configuration."""
|
|
97
|
+
log_level = self.config.get("log_level", "INFO")
|
|
98
|
+
logging.basicConfig(
|
|
99
|
+
level=getattr(logging, log_level),
|
|
100
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def handle_failure(
|
|
104
|
+
self,
|
|
105
|
+
agent_id: str,
|
|
106
|
+
error_message: str,
|
|
107
|
+
context: Optional[Dict[str, Any]] = None,
|
|
108
|
+
stack_trace: Optional[str] = None,
|
|
109
|
+
auto_patch: bool = True,
|
|
110
|
+
user_prompt: Optional[str] = None,
|
|
111
|
+
chain_of_thought: Optional[List[str]] = None,
|
|
112
|
+
failed_action: Optional[Dict[str, Any]] = None,
|
|
113
|
+
user_metadata: Optional[Dict[str, Any]] = None
|
|
114
|
+
) -> Dict[str, Any]:
|
|
115
|
+
"""
|
|
116
|
+
Handle an agent failure through the full self-correction pipeline.
|
|
117
|
+
|
|
118
|
+
Enhanced to support full trace capture, cognitive diagnosis, and triage routing.
|
|
119
|
+
|
|
120
|
+
This is the main entry point when an agent fails in production.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
agent_id: Identifier of the failed agent
|
|
124
|
+
error_message: Error message from the failure
|
|
125
|
+
context: Additional context about the failure
|
|
126
|
+
stack_trace: Optional stack trace
|
|
127
|
+
auto_patch: Whether to automatically apply the patch (default: True)
|
|
128
|
+
user_prompt: Original user prompt (for full trace)
|
|
129
|
+
chain_of_thought: Agent's reasoning steps (for cognitive analysis)
|
|
130
|
+
failed_action: The specific action that failed
|
|
131
|
+
user_metadata: User metadata (e.g., VIP status) for triage decisions
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Dictionary containing the results of the self-correction process
|
|
135
|
+
"""
|
|
136
|
+
logger.info(f"=" * 80)
|
|
137
|
+
logger.info(f"AGENT FAILURE DETECTED - Starting enhanced self-correction process")
|
|
138
|
+
logger.info(f"Agent ID: {agent_id}")
|
|
139
|
+
logger.info(f"Error: {error_message}")
|
|
140
|
+
logger.info(f"=" * 80)
|
|
141
|
+
|
|
142
|
+
# Step 0: Triage - Decide sync (JIT) or async (batch) correction strategy
|
|
143
|
+
if user_prompt:
|
|
144
|
+
tool_name = context.get("action") if context else None
|
|
145
|
+
|
|
146
|
+
# Prepare enhanced context for triage including failed_action and chain_of_thought
|
|
147
|
+
triage_context = dict(context) if context else {}
|
|
148
|
+
if failed_action:
|
|
149
|
+
triage_context["failed_action"] = failed_action
|
|
150
|
+
if chain_of_thought:
|
|
151
|
+
triage_context["chain_of_thought"] = chain_of_thought
|
|
152
|
+
|
|
153
|
+
strategy = self.triage.decide_strategy(
|
|
154
|
+
prompt=user_prompt,
|
|
155
|
+
tool_name=tool_name,
|
|
156
|
+
user_metadata=user_metadata,
|
|
157
|
+
context=triage_context
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
logger.info(f"[TRIAGE] Decision: {strategy.value}")
|
|
161
|
+
|
|
162
|
+
if strategy == FixStrategy.ASYNC_BATCH:
|
|
163
|
+
logger.info(">> Non-Critical Failure. Queuing for async optimization.")
|
|
164
|
+
logger.info(">> Returning error to user immediately (low latency).")
|
|
165
|
+
|
|
166
|
+
# Add to async queue for later processing
|
|
167
|
+
self.async_failure_queue.append({
|
|
168
|
+
"agent_id": agent_id,
|
|
169
|
+
"error_message": error_message,
|
|
170
|
+
"context": context,
|
|
171
|
+
"stack_trace": stack_trace,
|
|
172
|
+
"user_prompt": user_prompt,
|
|
173
|
+
"chain_of_thought": chain_of_thought,
|
|
174
|
+
"failed_action": failed_action,
|
|
175
|
+
"timestamp": datetime.utcnow()
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
return {
|
|
179
|
+
"success": False,
|
|
180
|
+
"strategy": strategy,
|
|
181
|
+
"message": "Non-critical failure queued for async correction",
|
|
182
|
+
"queued": True,
|
|
183
|
+
"error": error_message
|
|
184
|
+
}
|
|
185
|
+
else:
|
|
186
|
+
logger.info(">> Critical Failure Detected. Entering Self-Correction Mode (User Waiting)...")
|
|
187
|
+
logger.info(">> High latency path - fixing immediately for reliability.")
|
|
188
|
+
|
|
189
|
+
# Step 1: Detect and classify failure with full trace
|
|
190
|
+
logger.info("[1/5] Detecting and classifying failure (capturing full trace)...")
|
|
191
|
+
failure = self.detector.detect_failure(
|
|
192
|
+
agent_id=agent_id,
|
|
193
|
+
error_message=error_message,
|
|
194
|
+
context=context,
|
|
195
|
+
stack_trace=stack_trace,
|
|
196
|
+
user_prompt=user_prompt,
|
|
197
|
+
chain_of_thought=chain_of_thought,
|
|
198
|
+
failed_action=failed_action
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Step 2: Deep cognitive analysis
|
|
202
|
+
logger.info("[2/5] Analyzing failure (identifying cognitive glitches)...")
|
|
203
|
+
failure_history = self.detector.get_failure_history(agent_id=agent_id)
|
|
204
|
+
similar_failures = self.analyzer.find_similar_failures(failure, failure_history)
|
|
205
|
+
analysis = self.analyzer.analyze(failure, similar_failures)
|
|
206
|
+
|
|
207
|
+
# Generate cognitive diagnosis if trace available
|
|
208
|
+
diagnosis = None
|
|
209
|
+
if failure.failure_trace:
|
|
210
|
+
logger.info(" → Performing deep cognitive analysis...")
|
|
211
|
+
diagnosis = self.analyzer.diagnose_cognitive_glitch(failure)
|
|
212
|
+
logger.info(f" → Cognitive glitch: {diagnosis.cognitive_glitch.value}")
|
|
213
|
+
|
|
214
|
+
# Step 3: Simulate alternative path
|
|
215
|
+
logger.info("[3/5] Simulating alternative path...")
|
|
216
|
+
simulation = self.simulator.simulate(analysis)
|
|
217
|
+
|
|
218
|
+
# Step 4: Counterfactual simulation with Shadow Agent
|
|
219
|
+
shadow_result = None
|
|
220
|
+
if diagnosis and failure.failure_trace:
|
|
221
|
+
logger.info("[4/5] Running counterfactual simulation (Shadow Agent)...")
|
|
222
|
+
shadow_result = self.simulator.simulate_counterfactual(diagnosis, failure)
|
|
223
|
+
logger.info(f" → Shadow agent verified: {shadow_result.verified}")
|
|
224
|
+
else:
|
|
225
|
+
logger.info("[4/5] Skipping Shadow Agent (no trace available)")
|
|
226
|
+
|
|
227
|
+
if not simulation.success and (not shadow_result or not shadow_result.verified):
|
|
228
|
+
logger.warning("Simulation did not produce a viable alternative path")
|
|
229
|
+
return {
|
|
230
|
+
"success": False,
|
|
231
|
+
"failure": failure,
|
|
232
|
+
"analysis": analysis,
|
|
233
|
+
"diagnosis": diagnosis,
|
|
234
|
+
"simulation": simulation,
|
|
235
|
+
"shadow_result": shadow_result,
|
|
236
|
+
"patch": None,
|
|
237
|
+
"message": "Could not find a viable alternative path"
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# Step 5: Create and optionally apply patch
|
|
241
|
+
logger.info("[5/5] Creating correction patch (The Optimizer)...")
|
|
242
|
+
patch = self.patcher.create_patch(
|
|
243
|
+
agent_id, analysis, simulation, diagnosis, shadow_result
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Classify patch for lifecycle management (Semantic Purge integration)
|
|
247
|
+
classified_patch = self.semantic_purge.register_patch(
|
|
248
|
+
patch=patch,
|
|
249
|
+
current_model_version=self.current_model_version
|
|
250
|
+
)
|
|
251
|
+
logger.info(f" → Patch classified as: {classified_patch.decay_type.value}")
|
|
252
|
+
|
|
253
|
+
patch_applied = False
|
|
254
|
+
if auto_patch:
|
|
255
|
+
logger.info("Auto-patching enabled, applying patch...")
|
|
256
|
+
patch_applied = self.patcher.apply_patch(patch)
|
|
257
|
+
else:
|
|
258
|
+
logger.info("Auto-patching disabled, patch created but not applied")
|
|
259
|
+
|
|
260
|
+
logger.info(f"=" * 80)
|
|
261
|
+
logger.info(f"SELF-CORRECTION COMPLETE")
|
|
262
|
+
logger.info(f"Patch ID: {patch.patch_id}")
|
|
263
|
+
logger.info(f"Patch Type: {patch.patch_type}")
|
|
264
|
+
logger.info(f"Decay Type: {classified_patch.decay_type.value}")
|
|
265
|
+
logger.info(f"Purge on Upgrade: {classified_patch.should_purge_on_upgrade}")
|
|
266
|
+
if diagnosis:
|
|
267
|
+
logger.info(f"Cognitive Glitch: {diagnosis.cognitive_glitch.value}")
|
|
268
|
+
logger.info(f"Patch Applied: {patch_applied}")
|
|
269
|
+
logger.info(f"Expected Success Rate: {simulation.estimated_success_rate:.2%}")
|
|
270
|
+
logger.info(f"=" * 80)
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"success": True,
|
|
274
|
+
"failure": failure,
|
|
275
|
+
"analysis": analysis,
|
|
276
|
+
"diagnosis": diagnosis,
|
|
277
|
+
"simulation": simulation,
|
|
278
|
+
"shadow_result": shadow_result,
|
|
279
|
+
"patch": patch,
|
|
280
|
+
"classified_patch": classified_patch,
|
|
281
|
+
"patch_applied": patch_applied,
|
|
282
|
+
"message": "Agent successfully patched" if patch_applied else "Patch created, awaiting manual approval"
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
def get_agent_status(self, agent_id: str) -> AgentState:
|
|
286
|
+
"""
|
|
287
|
+
Get the current status of an agent.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
agent_id: ID of the agent
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
AgentState object with current status
|
|
294
|
+
"""
|
|
295
|
+
return self.patcher.get_agent_state(agent_id)
|
|
296
|
+
|
|
297
|
+
def rollback_patch(self, patch_id: str) -> bool:
|
|
298
|
+
"""
|
|
299
|
+
Rollback a previously applied patch.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
patch_id: ID of the patch to rollback
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
True if rollback was successful
|
|
306
|
+
"""
|
|
307
|
+
return self.patcher.rollback_patch(patch_id)
|
|
308
|
+
|
|
309
|
+
def get_failure_history(self, agent_id: Optional[str] = None, limit: int = 100) -> List[AgentFailure]:
|
|
310
|
+
"""
|
|
311
|
+
Get failure history.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
agent_id: Optional filter by agent ID
|
|
315
|
+
limit: Maximum number of failures to return
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
List of AgentFailure objects
|
|
319
|
+
"""
|
|
320
|
+
return self.detector.get_failure_history(agent_id, limit)
|
|
321
|
+
|
|
322
|
+
def get_patch_history(self, agent_id: Optional[str] = None) -> List[CorrectionPatch]:
|
|
323
|
+
"""
|
|
324
|
+
Get patch history.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
agent_id: Optional filter by agent ID
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
List of CorrectionPatch objects
|
|
331
|
+
"""
|
|
332
|
+
return self.patcher.get_patch_history(agent_id)
|
|
333
|
+
|
|
334
|
+
def wake_up_and_fix(self, agent_id: str, error_message: str, context: Optional[Dict[str, Any]] = None):
|
|
335
|
+
"""
|
|
336
|
+
Convenience method that wakes up the kernel, analyzes the failure,
|
|
337
|
+
simulates a better path, and patches the agent.
|
|
338
|
+
|
|
339
|
+
This is the main method referenced in the problem statement.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
agent_id: ID of the failed agent
|
|
343
|
+
error_message: Error message from the failure
|
|
344
|
+
context: Additional context
|
|
345
|
+
"""
|
|
346
|
+
logger.info("🚀 Kernel waking up to fix agent failure...")
|
|
347
|
+
result = self.handle_failure(agent_id, error_message, context, auto_patch=True)
|
|
348
|
+
|
|
349
|
+
if result["success"] and result["patch_applied"]:
|
|
350
|
+
logger.info("✅ Agent fixed and patched successfully!")
|
|
351
|
+
else:
|
|
352
|
+
logger.warning("⚠️ Agent fix incomplete, manual intervention may be required")
|
|
353
|
+
|
|
354
|
+
return result
|
|
355
|
+
|
|
356
|
+
# ============================================================================
|
|
357
|
+
# DUAL-LOOP ARCHITECTURE: Loop 2 (Alignment Engine) Methods
|
|
358
|
+
# ============================================================================
|
|
359
|
+
|
|
360
|
+
def handle_outcome(
|
|
361
|
+
self,
|
|
362
|
+
agent_id: str,
|
|
363
|
+
user_prompt: str,
|
|
364
|
+
agent_response: str,
|
|
365
|
+
context: Optional[Dict[str, Any]] = None,
|
|
366
|
+
tool_telemetry: Optional[List[ToolExecutionTelemetry]] = None,
|
|
367
|
+
auto_nudge: bool = True
|
|
368
|
+
) -> Dict[str, Any]:
|
|
369
|
+
"""
|
|
370
|
+
Handle an agent outcome through the Alignment Engine (Loop 2).
|
|
371
|
+
|
|
372
|
+
This is the entry point for the Completeness Auditor. Instead of waiting
|
|
373
|
+
for hard failures, we proactively detect when agents "give up" with
|
|
374
|
+
negative results.
|
|
375
|
+
|
|
376
|
+
Enhanced with:
|
|
377
|
+
- Tool execution telemetry correlation
|
|
378
|
+
- Automatic nudging on give-up detection
|
|
379
|
+
- Semantic analysis
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
agent_id: ID of the agent
|
|
383
|
+
user_prompt: Original user request
|
|
384
|
+
agent_response: Agent's response
|
|
385
|
+
context: Additional context
|
|
386
|
+
tool_telemetry: Optional tool execution telemetry
|
|
387
|
+
auto_nudge: Whether to automatically nudge on give-up (default: True)
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
Dictionary with outcome analysis, audit results, and nudge results
|
|
391
|
+
"""
|
|
392
|
+
logger.info(f"🔄 Loop 2 (Alignment Engine): Analyzing outcome for {agent_id}")
|
|
393
|
+
|
|
394
|
+
# Step 1: Analyze the outcome with enhanced telemetry
|
|
395
|
+
outcome = self.outcome_analyzer.analyze_outcome(
|
|
396
|
+
agent_id=agent_id,
|
|
397
|
+
user_prompt=user_prompt,
|
|
398
|
+
agent_response=agent_response,
|
|
399
|
+
context=context,
|
|
400
|
+
tool_telemetry=tool_telemetry
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
result = {
|
|
404
|
+
"outcome": outcome,
|
|
405
|
+
"audit": None,
|
|
406
|
+
"patch": None,
|
|
407
|
+
"classified_patch": None,
|
|
408
|
+
"nudge_result": None
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
# Step 2: Check if this triggers Completeness Audit (Give-Up Signal)
|
|
412
|
+
if self.outcome_analyzer.should_trigger_audit(outcome):
|
|
413
|
+
logger.info(f"🔍 Give-Up Signal detected! Triggering Completeness Auditor...")
|
|
414
|
+
|
|
415
|
+
# Step 2a: Auto-nudge if enabled
|
|
416
|
+
if auto_nudge and self.nudge_mechanism.should_nudge(outcome):
|
|
417
|
+
logger.info(f"💡 Auto-nudge enabled - attempting nudge...")
|
|
418
|
+
nudge_prompt = self.nudge_mechanism.generate_nudge(outcome)
|
|
419
|
+
logger.info(f"Nudge prompt: {nudge_prompt[:100]}...")
|
|
420
|
+
|
|
421
|
+
# Note: In a real system, you would re-invoke the agent with the nudge
|
|
422
|
+
# For demo purposes, we'll simulate the nudge result
|
|
423
|
+
# Real implementation would call: retry_response = agent.invoke(nudge_prompt)
|
|
424
|
+
result["nudge_prompt"] = nudge_prompt
|
|
425
|
+
logger.info(f"✓ Nudge prompt generated (agent should be re-invoked)")
|
|
426
|
+
|
|
427
|
+
# Step 3: Run Completeness Audit (Differential Auditing)
|
|
428
|
+
audit = self.completeness_auditor.audit_give_up(outcome)
|
|
429
|
+
result["audit"] = audit
|
|
430
|
+
|
|
431
|
+
# Step 4: If teacher found data (laziness detected), create competence patch
|
|
432
|
+
if audit.teacher_found_data:
|
|
433
|
+
logger.info(f"⚠️ LAZINESS DETECTED: Creating competence patch...")
|
|
434
|
+
|
|
435
|
+
# Create a patch from the competence lesson
|
|
436
|
+
patch = self._create_competence_patch(agent_id, audit)
|
|
437
|
+
result["patch"] = patch
|
|
438
|
+
|
|
439
|
+
# Step 5: Classify patch for lifecycle management (Semantic Purge)
|
|
440
|
+
classified_patch = self.semantic_purge.register_patch(
|
|
441
|
+
patch=patch,
|
|
442
|
+
current_model_version=self.current_model_version
|
|
443
|
+
)
|
|
444
|
+
result["classified_patch"] = classified_patch
|
|
445
|
+
|
|
446
|
+
# Register with auditor
|
|
447
|
+
self.semantic_purge.register_completeness_audit(
|
|
448
|
+
audit=audit,
|
|
449
|
+
current_model_version=self.current_model_version
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# Apply patch
|
|
453
|
+
if self.config.get("auto_patch", True):
|
|
454
|
+
self.patcher.apply_patch(patch)
|
|
455
|
+
logger.info(f"✓ Competence patch applied")
|
|
456
|
+
else:
|
|
457
|
+
logger.info(f"✓ No give-up signal detected - agent performing well")
|
|
458
|
+
|
|
459
|
+
return result
|
|
460
|
+
|
|
461
|
+
def _create_competence_patch(
|
|
462
|
+
self,
|
|
463
|
+
agent_id: str,
|
|
464
|
+
audit: CompletenessAudit
|
|
465
|
+
) -> CorrectionPatch:
|
|
466
|
+
"""
|
|
467
|
+
Create a patch from a completeness audit.
|
|
468
|
+
|
|
469
|
+
Competence patches teach the agent to avoid giving up too early.
|
|
470
|
+
"""
|
|
471
|
+
import uuid
|
|
472
|
+
from datetime import datetime
|
|
473
|
+
from .models import FailureAnalysis, SimulationResult, AgentFailure, FailureType, FailureSeverity
|
|
474
|
+
|
|
475
|
+
# Create a synthetic failure for the audit
|
|
476
|
+
failure = AgentFailure(
|
|
477
|
+
agent_id=agent_id,
|
|
478
|
+
failure_type=FailureType.LOGIC_ERROR,
|
|
479
|
+
severity=FailureSeverity.MEDIUM,
|
|
480
|
+
error_message=f"Agent gave up: {audit.agent_outcome.agent_response}",
|
|
481
|
+
context=audit.agent_outcome.context
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Create analysis
|
|
485
|
+
analysis = FailureAnalysis(
|
|
486
|
+
failure=failure,
|
|
487
|
+
root_cause="Agent gave up too early without exhaustive search",
|
|
488
|
+
contributing_factors=[audit.gap_analysis],
|
|
489
|
+
suggested_fixes=[audit.competence_patch],
|
|
490
|
+
confidence_score=audit.confidence,
|
|
491
|
+
similar_failures=[]
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Create simulation
|
|
495
|
+
simulation = SimulationResult(
|
|
496
|
+
simulation_id=f"sim-{uuid.uuid4().hex[:8]}",
|
|
497
|
+
success=True,
|
|
498
|
+
alternative_path=[
|
|
499
|
+
{
|
|
500
|
+
"step": 1,
|
|
501
|
+
"action": "exhaustive_search",
|
|
502
|
+
"description": "Check all data sources before reporting 'not found'"
|
|
503
|
+
},
|
|
504
|
+
{
|
|
505
|
+
"step": 2,
|
|
506
|
+
"action": "apply_competence_lesson",
|
|
507
|
+
"description": audit.competence_patch
|
|
508
|
+
}
|
|
509
|
+
],
|
|
510
|
+
expected_outcome="Agent will search exhaustively before giving up",
|
|
511
|
+
risk_score=0.1,
|
|
512
|
+
estimated_success_rate=0.9
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Create patch
|
|
516
|
+
patch_id = f"competence-patch-{uuid.uuid4().hex[:8]}"
|
|
517
|
+
|
|
518
|
+
patch = CorrectionPatch(
|
|
519
|
+
patch_id=patch_id,
|
|
520
|
+
agent_id=agent_id,
|
|
521
|
+
failure_analysis=analysis,
|
|
522
|
+
simulation_result=simulation,
|
|
523
|
+
patch_type="system_prompt",
|
|
524
|
+
patch_content={
|
|
525
|
+
"type": "competence_rule",
|
|
526
|
+
"rule": audit.competence_patch,
|
|
527
|
+
"from_audit": audit.audit_id,
|
|
528
|
+
"teacher_model": audit.teacher_model
|
|
529
|
+
},
|
|
530
|
+
applied=False
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
return patch
|
|
534
|
+
|
|
535
|
+
def upgrade_model(self, new_model_version: str) -> Dict[str, Any]:
|
|
536
|
+
"""
|
|
537
|
+
Upgrade the model version and trigger Semantic Purge.
|
|
538
|
+
|
|
539
|
+
This is the "Purge Event" that removes Type A (Syntax) patches
|
|
540
|
+
that are likely fixed in the new model version.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
new_model_version: New model version (e.g., "gpt-5")
|
|
544
|
+
|
|
545
|
+
Returns:
|
|
546
|
+
Dictionary with purge statistics
|
|
547
|
+
"""
|
|
548
|
+
logger.info(f"=" * 80)
|
|
549
|
+
logger.info(f"MODEL UPGRADE: {self.current_model_version} → {new_model_version}")
|
|
550
|
+
logger.info(f"=" * 80)
|
|
551
|
+
|
|
552
|
+
old_version = self.current_model_version
|
|
553
|
+
|
|
554
|
+
# Trigger semantic purge
|
|
555
|
+
purge_result = self.semantic_purge.purge_on_upgrade(
|
|
556
|
+
old_model_version=old_version,
|
|
557
|
+
new_model_version=new_model_version
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# Update model version
|
|
561
|
+
self.current_model_version = new_model_version
|
|
562
|
+
|
|
563
|
+
# Update all agent states
|
|
564
|
+
for agent_state in self.patcher.agent_states.values():
|
|
565
|
+
agent_state.model_version = new_model_version
|
|
566
|
+
|
|
567
|
+
logger.info(f"=" * 80)
|
|
568
|
+
logger.info(f"MODEL UPGRADE COMPLETE")
|
|
569
|
+
logger.info(f" Purged: {purge_result['stats']['purged_count']} Type A patches")
|
|
570
|
+
logger.info(f" Retained: {purge_result['stats']['retained_count']} Type B patches")
|
|
571
|
+
logger.info(f" Tokens Reclaimed: {purge_result['stats']['tokens_reclaimed']}")
|
|
572
|
+
logger.info(f"=" * 80)
|
|
573
|
+
|
|
574
|
+
return purge_result
|
|
575
|
+
|
|
576
|
+
def get_alignment_stats(self) -> Dict[str, Any]:
|
|
577
|
+
"""
|
|
578
|
+
Get statistics about the Alignment Engine (Loop 2).
|
|
579
|
+
|
|
580
|
+
Enhanced to include:
|
|
581
|
+
- Completeness auditor metrics
|
|
582
|
+
- Semantic purge metrics
|
|
583
|
+
- Nudge mechanism effectiveness
|
|
584
|
+
- Value delivery metrics (competence focus)
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
Dictionary with comprehensive stats about quality and efficiency
|
|
588
|
+
"""
|
|
589
|
+
return {
|
|
590
|
+
"completeness_auditor": self.completeness_auditor.get_audit_stats(),
|
|
591
|
+
"semantic_purge": self.semantic_purge.get_purge_stats(),
|
|
592
|
+
"outcome_analyzer": {
|
|
593
|
+
"total_outcomes": len(self.outcome_analyzer.outcome_history),
|
|
594
|
+
"give_up_rate": self.outcome_analyzer.get_give_up_rate()
|
|
595
|
+
},
|
|
596
|
+
"nudge_mechanism": self.nudge_mechanism.get_nudge_stats(),
|
|
597
|
+
"value_delivery": self._calculate_value_delivery_metrics()
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
def _calculate_value_delivery_metrics(self) -> Dict[str, Any]:
|
|
601
|
+
"""
|
|
602
|
+
Calculate metrics focused on competence and value delivery.
|
|
603
|
+
|
|
604
|
+
This differentiates the system from standard governance tools
|
|
605
|
+
that only focus on safety/compliance (Loop 1). We measure:
|
|
606
|
+
- Give-up rate (lower is better)
|
|
607
|
+
- Laziness detection rate
|
|
608
|
+
- Nudge success rate
|
|
609
|
+
- Competence patch effectiveness
|
|
610
|
+
|
|
611
|
+
Returns:
|
|
612
|
+
Dictionary with value delivery metrics
|
|
613
|
+
"""
|
|
614
|
+
audit_stats = self.completeness_auditor.get_audit_stats()
|
|
615
|
+
nudge_stats = self.nudge_mechanism.get_nudge_stats()
|
|
616
|
+
give_up_rate = self.outcome_analyzer.get_give_up_rate()
|
|
617
|
+
|
|
618
|
+
# Calculate competence score (0-100)
|
|
619
|
+
# Higher score = better value delivery
|
|
620
|
+
competence_score = 100.0
|
|
621
|
+
|
|
622
|
+
# Penalize for high give-up rate
|
|
623
|
+
competence_score -= (give_up_rate * 30) # Max 30 point penalty
|
|
624
|
+
|
|
625
|
+
# Penalize for laziness detection
|
|
626
|
+
laziness_rate = audit_stats.get("laziness_rate", 0.0)
|
|
627
|
+
competence_score -= (laziness_rate * 40) # Max 40 point penalty
|
|
628
|
+
|
|
629
|
+
# Reward for nudge effectiveness
|
|
630
|
+
nudge_success_rate = nudge_stats.get("success_rate", 0.0)
|
|
631
|
+
competence_score += (nudge_success_rate * 20) # Max 20 point bonus
|
|
632
|
+
|
|
633
|
+
# Ensure bounds
|
|
634
|
+
competence_score = max(0, min(100, competence_score))
|
|
635
|
+
|
|
636
|
+
return {
|
|
637
|
+
"competence_score": round(competence_score, 2),
|
|
638
|
+
"give_up_rate": round(give_up_rate, 4),
|
|
639
|
+
"laziness_detection_rate": round(laziness_rate, 4),
|
|
640
|
+
"nudge_success_rate": round(nudge_success_rate, 4),
|
|
641
|
+
"total_audits": audit_stats.get("total_audits", 0),
|
|
642
|
+
"laziness_caught": audit_stats.get("laziness_detected", 0),
|
|
643
|
+
"focus": "Competence & Value Delivery (differentiates from safety-only tools)"
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
def get_classified_patches(self) -> Dict[str, List[ClassifiedPatch]]:
|
|
647
|
+
"""
|
|
648
|
+
Get patches classified by type.
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
Dictionary with purgeable and permanent patches
|
|
652
|
+
"""
|
|
653
|
+
return {
|
|
654
|
+
"purgeable": self.semantic_purge.get_purgeable_patches(),
|
|
655
|
+
"permanent": self.semantic_purge.get_permanent_patches()
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
def process_async_queue(self, batch_size: int = 10) -> Dict[str, Any]:
|
|
659
|
+
"""
|
|
660
|
+
Process failures from the async queue (background/nightly processing).
|
|
661
|
+
|
|
662
|
+
This method would typically run in a background worker or during
|
|
663
|
+
off-peak hours to fix non-critical failures that were queued.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
batch_size: Maximum number of failures to process in this batch
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
Dictionary with processing statistics
|
|
670
|
+
"""
|
|
671
|
+
logger.info(f"=" * 80)
|
|
672
|
+
logger.info(f"ASYNC QUEUE PROCESSING - Processing up to {batch_size} failures")
|
|
673
|
+
logger.info(f"Queue size: {len(self.async_failure_queue)}")
|
|
674
|
+
logger.info(f"=" * 80)
|
|
675
|
+
|
|
676
|
+
processed = 0
|
|
677
|
+
succeeded = 0
|
|
678
|
+
failed = 0
|
|
679
|
+
|
|
680
|
+
# Process up to batch_size items
|
|
681
|
+
while self.async_failure_queue and processed < batch_size:
|
|
682
|
+
failure_data = self.async_failure_queue.pop(0)
|
|
683
|
+
processed += 1
|
|
684
|
+
|
|
685
|
+
logger.info(f"Processing async failure {processed}/{batch_size}")
|
|
686
|
+
logger.info(f" Agent: {failure_data['agent_id']}")
|
|
687
|
+
logger.info(f" Error: {failure_data['error_message']}")
|
|
688
|
+
|
|
689
|
+
try:
|
|
690
|
+
# Process the failure without triage (already decided async)
|
|
691
|
+
# Temporarily remove user_prompt to skip triage
|
|
692
|
+
user_prompt = failure_data.pop('user_prompt', None)
|
|
693
|
+
|
|
694
|
+
result = self.handle_failure(
|
|
695
|
+
agent_id=failure_data['agent_id'],
|
|
696
|
+
error_message=failure_data['error_message'],
|
|
697
|
+
context=failure_data.get('context'),
|
|
698
|
+
stack_trace=failure_data.get('stack_trace'),
|
|
699
|
+
auto_patch=True,
|
|
700
|
+
user_prompt=None, # Skip triage by not providing user_prompt
|
|
701
|
+
chain_of_thought=failure_data.get('chain_of_thought'),
|
|
702
|
+
failed_action=failure_data.get('failed_action')
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
if result.get('success') and result.get('patch_applied'):
|
|
706
|
+
succeeded += 1
|
|
707
|
+
logger.info(f" ✓ Fixed successfully")
|
|
708
|
+
else:
|
|
709
|
+
failed += 1
|
|
710
|
+
logger.info(f" ✗ Fix failed")
|
|
711
|
+
except Exception as e:
|
|
712
|
+
failed += 1
|
|
713
|
+
logger.error(f" ✗ Error processing: {str(e)}")
|
|
714
|
+
|
|
715
|
+
logger.info(f"=" * 80)
|
|
716
|
+
logger.info(f"ASYNC QUEUE PROCESSING COMPLETE")
|
|
717
|
+
logger.info(f" Processed: {processed}")
|
|
718
|
+
logger.info(f" Succeeded: {succeeded}")
|
|
719
|
+
logger.info(f" Failed: {failed}")
|
|
720
|
+
logger.info(f" Remaining in queue: {len(self.async_failure_queue)}")
|
|
721
|
+
logger.info(f"=" * 80)
|
|
722
|
+
|
|
723
|
+
return {
|
|
724
|
+
"processed": processed,
|
|
725
|
+
"succeeded": succeeded,
|
|
726
|
+
"failed": failed,
|
|
727
|
+
"remaining": len(self.async_failure_queue)
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
def get_triage_stats(self) -> Dict[str, Any]:
|
|
731
|
+
"""
|
|
732
|
+
Get statistics about triage decisions.
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
Dictionary with triage statistics
|
|
736
|
+
"""
|
|
737
|
+
return {
|
|
738
|
+
"async_queue_size": len(self.async_failure_queue),
|
|
739
|
+
"critical_tools": len(self.triage.critical_tools),
|
|
740
|
+
"high_effort_keywords": len(self.triage.high_effort_keywords)
|
|
741
|
+
}
|