agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Self-Correcting Agent Kernel
|
|
3
|
+
|
|
4
|
+
A Dual-Loop Architecture for Enterprise Agents:
|
|
5
|
+
- Loop 1 (Runtime): Constraint Engine (Safety)
|
|
6
|
+
- Loop 2 (Offline): Alignment Engine (Quality & Efficiency)
|
|
7
|
+
- Completeness Auditor (detects laziness)
|
|
8
|
+
- Semantic Purge (scales by subtraction)
|
|
9
|
+
|
|
10
|
+
Reference Implementations:
|
|
11
|
+
- auditor.py: Simplified soft failure detection
|
|
12
|
+
- teacher.py: Shadow Teacher diagnosis
|
|
13
|
+
- memory_manager.py: Lesson lifecycle management
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__version__ = "0.2.0"
|
|
17
|
+
|
|
18
|
+
from .kernel import SelfCorrectingAgentKernel
|
|
19
|
+
from .models import (
|
|
20
|
+
AgentFailure, FailureAnalysis, CorrectionPatch,
|
|
21
|
+
AgentOutcome, CompletenessAudit, ClassifiedPatch,
|
|
22
|
+
OutcomeType, GiveUpSignal, PatchDecayType,
|
|
23
|
+
ToolExecutionTelemetry, ToolExecutionStatus,
|
|
24
|
+
SemanticAnalysis, NudgeResult
|
|
25
|
+
)
|
|
26
|
+
from .outcome_analyzer import OutcomeAnalyzer
|
|
27
|
+
from .completeness_auditor import CompletenessAuditor
|
|
28
|
+
from .semantic_purge import SemanticPurge, PatchClassifier
|
|
29
|
+
from .triage import FailureTriage, FixStrategy
|
|
30
|
+
from .semantic_analyzer import SemanticAnalyzer
|
|
31
|
+
from .nudge_mechanism import NudgeMechanism
|
|
32
|
+
|
|
33
|
+
# Reference implementations (simplified examples)
|
|
34
|
+
from .auditor import CompletenessAuditor as SimpleCompletenessAuditor
|
|
35
|
+
from .teacher import diagnose_failure
|
|
36
|
+
from .memory_manager import MemoryManager, LessonType
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"SelfCorrectingAgentKernel",
|
|
40
|
+
"AgentFailure",
|
|
41
|
+
"FailureAnalysis",
|
|
42
|
+
"CorrectionPatch",
|
|
43
|
+
"AgentOutcome",
|
|
44
|
+
"CompletenessAudit",
|
|
45
|
+
"ClassifiedPatch",
|
|
46
|
+
"OutcomeType",
|
|
47
|
+
"GiveUpSignal",
|
|
48
|
+
"PatchDecayType",
|
|
49
|
+
"ToolExecutionTelemetry",
|
|
50
|
+
"ToolExecutionStatus",
|
|
51
|
+
"SemanticAnalysis",
|
|
52
|
+
"NudgeResult",
|
|
53
|
+
"OutcomeAnalyzer",
|
|
54
|
+
"CompletenessAuditor",
|
|
55
|
+
"SemanticPurge",
|
|
56
|
+
"PatchClassifier",
|
|
57
|
+
"FailureTriage",
|
|
58
|
+
"FixStrategy",
|
|
59
|
+
"SemanticAnalyzer",
|
|
60
|
+
"NudgeMechanism",
|
|
61
|
+
# Reference implementations
|
|
62
|
+
"SimpleCompletenessAuditor",
|
|
63
|
+
"diagnose_failure",
|
|
64
|
+
"MemoryManager",
|
|
65
|
+
"LessonType",
|
|
66
|
+
]
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Failure analysis system that diagnoses root causes.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import List, Optional, Dict
|
|
7
|
+
from collections import Counter
|
|
8
|
+
|
|
9
|
+
from .models import AgentFailure, FailureAnalysis, FailureType, DiagnosisJSON, CognitiveGlitch
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FailureAnalyzer:
|
|
15
|
+
"""Analyzes failures to identify root causes and suggest fixes."""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self.analysis_history: List[FailureAnalysis] = []
|
|
19
|
+
self.known_patterns: Dict[str, dict] = self._load_known_patterns()
|
|
20
|
+
|
|
21
|
+
def _load_known_patterns(self) -> Dict[str, dict]:
|
|
22
|
+
"""Load known failure patterns and their solutions."""
|
|
23
|
+
return {
|
|
24
|
+
FailureType.BLOCKED_BY_CONTROL_PLANE: {
|
|
25
|
+
"root_causes": [
|
|
26
|
+
"Missing permission validation",
|
|
27
|
+
"Attempting unauthorized resource access",
|
|
28
|
+
"Policy violation",
|
|
29
|
+
"Security constraint violation"
|
|
30
|
+
],
|
|
31
|
+
"fixes": [
|
|
32
|
+
"Add permission checks before actions",
|
|
33
|
+
"Implement resource access validation",
|
|
34
|
+
"Use safe alternatives for restricted operations",
|
|
35
|
+
"Request proper authorization before attempting action"
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
FailureType.TIMEOUT: {
|
|
39
|
+
"root_causes": [
|
|
40
|
+
"Operation taking too long",
|
|
41
|
+
"Infinite loop or deadlock",
|
|
42
|
+
"Network latency",
|
|
43
|
+
"Resource contention"
|
|
44
|
+
],
|
|
45
|
+
"fixes": [
|
|
46
|
+
"Implement operation timeout handling",
|
|
47
|
+
"Add progress monitoring",
|
|
48
|
+
"Optimize algorithm efficiency",
|
|
49
|
+
"Add async/parallel processing"
|
|
50
|
+
]
|
|
51
|
+
},
|
|
52
|
+
FailureType.INVALID_ACTION: {
|
|
53
|
+
"root_causes": [
|
|
54
|
+
"Invalid input parameters",
|
|
55
|
+
"Action not supported in current state",
|
|
56
|
+
"Precondition not met"
|
|
57
|
+
],
|
|
58
|
+
"fixes": [
|
|
59
|
+
"Add input validation",
|
|
60
|
+
"Check state before action",
|
|
61
|
+
"Verify preconditions"
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
FailureType.RESOURCE_EXHAUSTED: {
|
|
65
|
+
"root_causes": [
|
|
66
|
+
"Memory leak",
|
|
67
|
+
"Unbounded resource allocation",
|
|
68
|
+
"Missing cleanup"
|
|
69
|
+
],
|
|
70
|
+
"fixes": [
|
|
71
|
+
"Implement resource cleanup",
|
|
72
|
+
"Add resource limits",
|
|
73
|
+
"Use resource pooling"
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
FailureType.LOGIC_ERROR: {
|
|
77
|
+
"root_causes": [
|
|
78
|
+
"Incorrect algorithm",
|
|
79
|
+
"Edge case not handled",
|
|
80
|
+
"Type mismatch"
|
|
81
|
+
],
|
|
82
|
+
"fixes": [
|
|
83
|
+
"Fix algorithm logic",
|
|
84
|
+
"Add edge case handling",
|
|
85
|
+
"Add type checking"
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
def analyze(self, failure: AgentFailure, similar_failures: Optional[List[AgentFailure]] = None) -> FailureAnalysis:
|
|
91
|
+
"""
|
|
92
|
+
Analyze a failure to identify root cause and suggest fixes.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
failure: The failure to analyze
|
|
96
|
+
similar_failures: Optional list of similar past failures
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
FailureAnalysis with root cause and suggested fixes
|
|
100
|
+
"""
|
|
101
|
+
logger.info(f"Analyzing failure for agent {failure.agent_id}")
|
|
102
|
+
|
|
103
|
+
# Get known patterns for this failure type
|
|
104
|
+
patterns = self.known_patterns.get(failure.failure_type, {})
|
|
105
|
+
|
|
106
|
+
# Identify root cause
|
|
107
|
+
root_cause = self._identify_root_cause(failure, patterns)
|
|
108
|
+
|
|
109
|
+
# Identify contributing factors
|
|
110
|
+
contributing_factors = self._identify_contributing_factors(failure, patterns)
|
|
111
|
+
|
|
112
|
+
# Generate suggested fixes
|
|
113
|
+
suggested_fixes = self._generate_fixes(failure, patterns)
|
|
114
|
+
|
|
115
|
+
# Calculate confidence based on pattern matching and similar failures
|
|
116
|
+
confidence_score = self._calculate_confidence(failure, similar_failures)
|
|
117
|
+
|
|
118
|
+
# Find similar failures
|
|
119
|
+
similar_failure_ids = []
|
|
120
|
+
if similar_failures:
|
|
121
|
+
similar_failure_ids = [f.agent_id + "_" + str(f.timestamp) for f in similar_failures[:5]]
|
|
122
|
+
|
|
123
|
+
analysis = FailureAnalysis(
|
|
124
|
+
failure=failure,
|
|
125
|
+
root_cause=root_cause,
|
|
126
|
+
contributing_factors=contributing_factors,
|
|
127
|
+
suggested_fixes=suggested_fixes,
|
|
128
|
+
confidence_score=confidence_score,
|
|
129
|
+
similar_failures=similar_failure_ids
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
self.analysis_history.append(analysis)
|
|
133
|
+
logger.info(f"Analysis complete. Root cause: {root_cause} (confidence: {confidence_score:.2f})")
|
|
134
|
+
|
|
135
|
+
return analysis
|
|
136
|
+
|
|
137
|
+
def _identify_root_cause(self, failure: AgentFailure, patterns: dict) -> str:
|
|
138
|
+
"""Identify the root cause of the failure."""
|
|
139
|
+
root_causes = patterns.get("root_causes", ["Unknown root cause"])
|
|
140
|
+
|
|
141
|
+
# For control plane blocks, check context for more specific cause
|
|
142
|
+
if failure.failure_type == FailureType.BLOCKED_BY_CONTROL_PLANE:
|
|
143
|
+
context = failure.context
|
|
144
|
+
if "permission" in failure.error_message.lower():
|
|
145
|
+
return "Missing or insufficient permissions for requested operation"
|
|
146
|
+
elif "policy" in failure.error_message.lower():
|
|
147
|
+
return "Action violates control plane policy"
|
|
148
|
+
else:
|
|
149
|
+
return root_causes[0]
|
|
150
|
+
|
|
151
|
+
# Return the first root cause as default
|
|
152
|
+
return root_causes[0]
|
|
153
|
+
|
|
154
|
+
def _identify_contributing_factors(self, failure: AgentFailure, patterns: dict) -> List[str]:
|
|
155
|
+
"""Identify contributing factors to the failure."""
|
|
156
|
+
factors = []
|
|
157
|
+
|
|
158
|
+
# Check for common contributing factors
|
|
159
|
+
if failure.severity.value in ["high", "critical"]:
|
|
160
|
+
factors.append("High severity failure requiring immediate attention")
|
|
161
|
+
|
|
162
|
+
if failure.stack_trace:
|
|
163
|
+
factors.append("Stack trace available for detailed debugging")
|
|
164
|
+
|
|
165
|
+
if failure.context:
|
|
166
|
+
factors.append(f"Additional context available: {', '.join(failure.context.keys())}")
|
|
167
|
+
|
|
168
|
+
return factors
|
|
169
|
+
|
|
170
|
+
def _generate_fixes(self, failure: AgentFailure, patterns: dict) -> List[str]:
|
|
171
|
+
"""Generate suggested fixes for the failure."""
|
|
172
|
+
fixes = patterns.get("fixes", ["Manual investigation required"])
|
|
173
|
+
|
|
174
|
+
# Add specific fixes based on failure type
|
|
175
|
+
if failure.failure_type == FailureType.BLOCKED_BY_CONTROL_PLANE:
|
|
176
|
+
if "file" in failure.context:
|
|
177
|
+
fixes.append(f"Validate access permissions for: {failure.context['file']}")
|
|
178
|
+
if "action" in failure.context:
|
|
179
|
+
fixes.append(f"Check if action '{failure.context['action']}' is allowed by policy")
|
|
180
|
+
|
|
181
|
+
return fixes[:3] # Return top 3 fixes
|
|
182
|
+
|
|
183
|
+
def _calculate_confidence(self, failure: AgentFailure, similar_failures: Optional[List[AgentFailure]]) -> float:
|
|
184
|
+
"""Calculate confidence score for the analysis."""
|
|
185
|
+
confidence = 0.5 # Base confidence
|
|
186
|
+
|
|
187
|
+
# Increase confidence if we have a known pattern
|
|
188
|
+
if failure.failure_type in self.known_patterns:
|
|
189
|
+
confidence += 0.2
|
|
190
|
+
|
|
191
|
+
# Increase confidence if we have similar failures
|
|
192
|
+
if similar_failures and len(similar_failures) > 0:
|
|
193
|
+
confidence += min(0.2, len(similar_failures) * 0.05)
|
|
194
|
+
|
|
195
|
+
# Increase confidence if we have detailed context
|
|
196
|
+
if failure.context and len(failure.context) > 0:
|
|
197
|
+
confidence += 0.1
|
|
198
|
+
|
|
199
|
+
return min(1.0, confidence)
|
|
200
|
+
|
|
201
|
+
def diagnose_cognitive_glitch(self, failure: AgentFailure) -> DiagnosisJSON:
|
|
202
|
+
"""
|
|
203
|
+
Deep diagnosis to identify cognitive glitches in agent reasoning.
|
|
204
|
+
|
|
205
|
+
This is "The Analyst" - looking at the reasoning that led to the error,
|
|
206
|
+
not just the error itself.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
failure: AgentFailure with full trace
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
DiagnosisJSON with cognitive glitch identification
|
|
213
|
+
"""
|
|
214
|
+
logger.info(f"Diagnosing cognitive glitch for agent {failure.agent_id}")
|
|
215
|
+
|
|
216
|
+
if not failure.failure_trace:
|
|
217
|
+
# Fall back to basic diagnosis if no trace available
|
|
218
|
+
return self._basic_diagnosis(failure)
|
|
219
|
+
|
|
220
|
+
trace = failure.failure_trace
|
|
221
|
+
|
|
222
|
+
# Identify cognitive glitch type
|
|
223
|
+
glitch = self._identify_cognitive_glitch(failure, trace)
|
|
224
|
+
|
|
225
|
+
# Deep problem analysis
|
|
226
|
+
deep_problem = self._analyze_deep_problem(failure, trace, glitch)
|
|
227
|
+
|
|
228
|
+
# Collect evidence
|
|
229
|
+
evidence = self._collect_evidence(failure, trace, glitch)
|
|
230
|
+
|
|
231
|
+
# Generate hint for counterfactual simulation
|
|
232
|
+
hint = self._generate_hint(failure, trace, glitch)
|
|
233
|
+
|
|
234
|
+
# Expected fix description
|
|
235
|
+
expected_fix = self._describe_expected_fix(glitch, hint)
|
|
236
|
+
|
|
237
|
+
# Calculate confidence
|
|
238
|
+
confidence = self._calculate_diagnosis_confidence(failure, trace, evidence)
|
|
239
|
+
|
|
240
|
+
diagnosis = DiagnosisJSON(
|
|
241
|
+
cognitive_glitch=glitch,
|
|
242
|
+
deep_problem=deep_problem,
|
|
243
|
+
evidence=evidence,
|
|
244
|
+
hint=hint,
|
|
245
|
+
expected_fix=expected_fix,
|
|
246
|
+
confidence=confidence
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
logger.info(f"Diagnosis complete: {glitch.value} (confidence: {confidence:.2f})")
|
|
250
|
+
return diagnosis
|
|
251
|
+
|
|
252
|
+
def _identify_cognitive_glitch(self, failure: AgentFailure, trace) -> CognitiveGlitch:
|
|
253
|
+
"""Identify the type of cognitive glitch."""
|
|
254
|
+
error_lower = failure.error_message.lower()
|
|
255
|
+
|
|
256
|
+
# Check for tool misuse (wrong parameter types) - high priority
|
|
257
|
+
if any(keyword in error_lower for keyword in ["type error", "invalid type", "expected uuid", "wrong parameter type", "parameter type mismatch"]):
|
|
258
|
+
return CognitiveGlitch.TOOL_MISUSE
|
|
259
|
+
if "uuid" in error_lower or ("id" in error_lower and any(kw in error_lower for kw in ["invalid", "malformed", "not a valid", "format"])):
|
|
260
|
+
# Check if this looks like a tool misuse scenario
|
|
261
|
+
if trace.failed_action:
|
|
262
|
+
action_str = str(trace.failed_action).lower()
|
|
263
|
+
if any(kw in action_str for kw in ["name", "username", "email", "params", "id"]):
|
|
264
|
+
return CognitiveGlitch.TOOL_MISUSE
|
|
265
|
+
return CognitiveGlitch.TOOL_MISUSE
|
|
266
|
+
|
|
267
|
+
# Check for policy violations (e.g., medical advice, legal advice) - high priority
|
|
268
|
+
if any(keyword in error_lower for keyword in ["policy violation", "violates policy", "not allowed to", "cannot advise", "cannot provide"]):
|
|
269
|
+
return CognitiveGlitch.POLICY_VIOLATION
|
|
270
|
+
# Check for specific policy domains in combination with blocking
|
|
271
|
+
if trace.user_prompt:
|
|
272
|
+
prompt_lower = trace.user_prompt.lower()
|
|
273
|
+
if any(domain in prompt_lower for domain in ["medical", "health", "diagnosis", "treatment", "medicine", "legal", "attorney", "sue", "investment", "stock"]):
|
|
274
|
+
if any(keyword in error_lower for keyword in ["blocked", "violation", "not permitted", "cannot", "policy"]):
|
|
275
|
+
return CognitiveGlitch.POLICY_VIOLATION
|
|
276
|
+
|
|
277
|
+
# Check for hallucination (inventing facts) - check early before context gap
|
|
278
|
+
if any(keyword in error_lower for keyword in ["not found", "does not exist", "unknown", "deprecated", "invalid reference", "no such"]):
|
|
279
|
+
return CognitiveGlitch.HALLUCINATION
|
|
280
|
+
|
|
281
|
+
# Check for schema mismatch
|
|
282
|
+
if trace.failed_action:
|
|
283
|
+
action_str = str(trace.failed_action).lower()
|
|
284
|
+
if "schema" in error_lower and "mismatch" in action_str:
|
|
285
|
+
return CognitiveGlitch.SCHEMA_MISMATCH
|
|
286
|
+
|
|
287
|
+
# Check for logic error (misunderstanding)
|
|
288
|
+
if trace.chain_of_thought:
|
|
289
|
+
cot_text = " ".join(trace.chain_of_thought).lower()
|
|
290
|
+
# Look for misinterpretation of terms like "recent", "delete", etc.
|
|
291
|
+
if any(keyword in cot_text for keyword in ["i think", "probably", "assume", "guess"]):
|
|
292
|
+
return CognitiveGlitch.LOGIC_ERROR
|
|
293
|
+
|
|
294
|
+
# Check for context gap (missing information) - lower priority
|
|
295
|
+
if not trace.chain_of_thought or len(trace.chain_of_thought) < 2:
|
|
296
|
+
# Don't default to context gap if we have other clear signals
|
|
297
|
+
if trace.failed_action and ("uuid" in error_lower or "id" in error_lower):
|
|
298
|
+
return CognitiveGlitch.TOOL_MISUSE
|
|
299
|
+
if any(keyword in error_lower for keyword in ["not found", "does not exist"]):
|
|
300
|
+
return CognitiveGlitch.HALLUCINATION
|
|
301
|
+
return CognitiveGlitch.CONTEXT_GAP
|
|
302
|
+
|
|
303
|
+
# Check for permission errors
|
|
304
|
+
if any(keyword in error_lower for keyword in ["permission", "unauthorized", "forbidden"]):
|
|
305
|
+
# Distinguish from policy violations
|
|
306
|
+
if "policy" not in error_lower and "violates" not in error_lower:
|
|
307
|
+
return CognitiveGlitch.PERMISSION_ERROR
|
|
308
|
+
|
|
309
|
+
return CognitiveGlitch.LOGIC_ERROR # Default
|
|
310
|
+
|
|
311
|
+
def _analyze_deep_problem(self, failure: AgentFailure, trace, glitch: CognitiveGlitch) -> str:
|
|
312
|
+
"""Analyze the deep problem behind the glitch."""
|
|
313
|
+
if glitch == CognitiveGlitch.HALLUCINATION:
|
|
314
|
+
return f"Agent invented non-existent entities in action: {trace.failed_action}"
|
|
315
|
+
elif glitch == CognitiveGlitch.LOGIC_ERROR:
|
|
316
|
+
return f"Agent misunderstood user intent in prompt: '{trace.user_prompt}'"
|
|
317
|
+
elif glitch == CognitiveGlitch.CONTEXT_GAP:
|
|
318
|
+
return f"Agent lacked necessary context (schema/permissions) to safely execute action"
|
|
319
|
+
elif glitch == CognitiveGlitch.PERMISSION_ERROR:
|
|
320
|
+
return f"Agent attempted unauthorized action without checking permissions first"
|
|
321
|
+
elif glitch == CognitiveGlitch.SCHEMA_MISMATCH:
|
|
322
|
+
return f"Agent referenced incorrect schema elements in action"
|
|
323
|
+
elif glitch == CognitiveGlitch.TOOL_MISUSE:
|
|
324
|
+
return f"Agent used tool with wrong parameter type or value: {trace.failed_action}"
|
|
325
|
+
elif glitch == CognitiveGlitch.POLICY_VIOLATION:
|
|
326
|
+
return f"Agent violated policy boundaries by attempting: '{trace.user_prompt}'"
|
|
327
|
+
return "Unknown deep problem"
|
|
328
|
+
|
|
329
|
+
def _collect_evidence(self, failure: AgentFailure, trace, glitch: CognitiveGlitch) -> List[str]:
|
|
330
|
+
"""Collect evidence supporting the diagnosis."""
|
|
331
|
+
evidence = []
|
|
332
|
+
|
|
333
|
+
evidence.append(f"User prompt: '{trace.user_prompt}'")
|
|
334
|
+
evidence.append(f"Failed action: {trace.failed_action}")
|
|
335
|
+
evidence.append(f"Error: {failure.error_message}")
|
|
336
|
+
|
|
337
|
+
if trace.chain_of_thought:
|
|
338
|
+
evidence.append(f"Reasoning steps: {len(trace.chain_of_thought)} steps")
|
|
339
|
+
if trace.chain_of_thought:
|
|
340
|
+
evidence.append(f"Last thought: '{trace.chain_of_thought[-1]}'")
|
|
341
|
+
|
|
342
|
+
return evidence
|
|
343
|
+
|
|
344
|
+
def _generate_hint(self, failure: AgentFailure, trace, glitch: CognitiveGlitch) -> str:
|
|
345
|
+
"""Generate a hint to inject for counterfactual simulation."""
|
|
346
|
+
if glitch == CognitiveGlitch.HALLUCINATION:
|
|
347
|
+
return "HINT: Always verify entity names against the provided schema before using them. Available tables/resources must be explicitly listed."
|
|
348
|
+
elif glitch == CognitiveGlitch.LOGIC_ERROR:
|
|
349
|
+
return f"HINT: When interpreting '{trace.user_prompt}', be precise about terms like 'recent', 'delete', 'modify'. Ask for clarification if ambiguous."
|
|
350
|
+
elif glitch == CognitiveGlitch.CONTEXT_GAP:
|
|
351
|
+
return "HINT: Before executing actions, ensure you have: 1) Complete schema information, 2) Permission requirements, 3) Clear action scope."
|
|
352
|
+
elif glitch == CognitiveGlitch.PERMISSION_ERROR:
|
|
353
|
+
return "HINT: Always check permissions before attempting actions. Use validate_permissions() first."
|
|
354
|
+
elif glitch == CognitiveGlitch.SCHEMA_MISMATCH:
|
|
355
|
+
return "HINT: Available schema elements must be verified before use. Do not assume table/column names."
|
|
356
|
+
elif glitch == CognitiveGlitch.TOOL_MISUSE:
|
|
357
|
+
return "HINT: Always verify parameter types match the tool schema. For example, use UUIDs where required, not names or strings."
|
|
358
|
+
elif glitch == CognitiveGlitch.POLICY_VIOLATION:
|
|
359
|
+
return "HINT: Some topics are outside your policy boundaries. Refuse requests for medical advice, legal advice, or other restricted domains."
|
|
360
|
+
return "HINT: Proceed with caution and verify all assumptions."
|
|
361
|
+
|
|
362
|
+
def _describe_expected_fix(self, glitch: CognitiveGlitch, hint: str) -> str:
|
|
363
|
+
"""Describe the expected outcome of applying the hint."""
|
|
364
|
+
if glitch == CognitiveGlitch.HALLUCINATION:
|
|
365
|
+
return "Agent will verify schema before action and use only existing entities"
|
|
366
|
+
elif glitch == CognitiveGlitch.LOGIC_ERROR:
|
|
367
|
+
return "Agent will correctly interpret user intent and clarify ambiguous terms"
|
|
368
|
+
elif glitch == CognitiveGlitch.CONTEXT_GAP:
|
|
369
|
+
return "Agent will request necessary context before proceeding with action"
|
|
370
|
+
elif glitch == CognitiveGlitch.PERMISSION_ERROR:
|
|
371
|
+
return "Agent will validate permissions before attempting action"
|
|
372
|
+
elif glitch == CognitiveGlitch.TOOL_MISUSE:
|
|
373
|
+
return "Agent will use correct parameter types according to tool schema"
|
|
374
|
+
elif glitch == CognitiveGlitch.POLICY_VIOLATION:
|
|
375
|
+
return "Agent will refuse to provide advice in restricted domains"
|
|
376
|
+
return "Agent will handle the situation correctly"
|
|
377
|
+
|
|
378
|
+
def _calculate_diagnosis_confidence(self, failure: AgentFailure, trace, evidence: List[str]) -> float:
|
|
379
|
+
"""Calculate confidence in the diagnosis."""
|
|
380
|
+
confidence = 0.5 # Base
|
|
381
|
+
|
|
382
|
+
# More confidence with complete trace
|
|
383
|
+
if trace.chain_of_thought and len(trace.chain_of_thought) > 2:
|
|
384
|
+
confidence += 0.2
|
|
385
|
+
|
|
386
|
+
# More confidence with detailed action
|
|
387
|
+
if trace.failed_action and len(trace.failed_action) > 0:
|
|
388
|
+
confidence += 0.15
|
|
389
|
+
|
|
390
|
+
# More confidence with rich evidence
|
|
391
|
+
if len(evidence) >= 4:
|
|
392
|
+
confidence += 0.15
|
|
393
|
+
|
|
394
|
+
return min(1.0, confidence)
|
|
395
|
+
|
|
396
|
+
def _basic_diagnosis(self, failure: AgentFailure) -> DiagnosisJSON:
|
|
397
|
+
"""Fallback diagnosis when no trace is available."""
|
|
398
|
+
return DiagnosisJSON(
|
|
399
|
+
cognitive_glitch=CognitiveGlitch.NONE,
|
|
400
|
+
deep_problem=f"No trace available. Basic error: {failure.error_message}",
|
|
401
|
+
evidence=[f"Error message: {failure.error_message}"],
|
|
402
|
+
hint="HINT: Ensure proper validation before actions.",
|
|
403
|
+
expected_fix="Action will be validated before execution",
|
|
404
|
+
confidence=0.5
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
def find_similar_failures(self, failure: AgentFailure, history: List[AgentFailure]) -> List[AgentFailure]:
|
|
408
|
+
"""Find similar failures in history."""
|
|
409
|
+
similar = []
|
|
410
|
+
|
|
411
|
+
for past_failure in history:
|
|
412
|
+
if past_failure.failure_type == failure.failure_type:
|
|
413
|
+
# Calculate similarity based on error message
|
|
414
|
+
similarity = self._calculate_similarity(failure.error_message, past_failure.error_message)
|
|
415
|
+
if similarity > 0.6:
|
|
416
|
+
similar.append(past_failure)
|
|
417
|
+
|
|
418
|
+
return similar[:10] # Return top 10 similar failures
|
|
419
|
+
|
|
420
|
+
def _calculate_similarity(self, msg1: str, msg2: str) -> float:
|
|
421
|
+
"""Calculate similarity between two error messages."""
|
|
422
|
+
# Simple word-based similarity
|
|
423
|
+
words1 = set(msg1.lower().split())
|
|
424
|
+
words2 = set(msg2.lower().split())
|
|
425
|
+
|
|
426
|
+
if not words1 or not words2:
|
|
427
|
+
return 0.0
|
|
428
|
+
|
|
429
|
+
intersection = words1.intersection(words2)
|
|
430
|
+
union = words1.union(words2)
|
|
431
|
+
|
|
432
|
+
return len(intersection) / len(union) if union else 0.0
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auditor - Simplified reference implementation for soft failure detection.
|
|
3
|
+
|
|
4
|
+
This is a reference implementation showing the core concept of detecting
|
|
5
|
+
"soft failures" (laziness) where agents give up without trying hard enough.
|
|
6
|
+
|
|
7
|
+
The production implementation is in completeness_auditor.py, which includes
|
|
8
|
+
more sophisticated features like differential auditing and teacher model integration.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
class CompletenessAuditor:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
# Trigger words that suggest the agent gave up
|
|
14
|
+
self.lazy_signals = [
|
|
15
|
+
"i cannot", "i'm sorry", "no data found",
|
|
16
|
+
"unable to access", "context does not contain"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
def audit_response(self, agent_response, tool_output):
|
|
20
|
+
"""
|
|
21
|
+
returns: needs_intervention (bool)
|
|
22
|
+
"""
|
|
23
|
+
# 1. Check for verbal resignation
|
|
24
|
+
if any(sig in agent_response.lower() for sig in self.lazy_signals):
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
# 2. Check for "Empty Success" (Tool worked, but returned nothing)
|
|
28
|
+
if tool_output and len(tool_output) < 10: # e.g. "[]" or ""
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
return False
|