agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Path simulation system to test alternative solutions.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
|
|
9
|
+
from .models import FailureAnalysis, SimulationResult, DiagnosisJSON, ShadowAgentResult, AgentFailure, CognitiveGlitch
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ShadowAgent:
|
|
15
|
+
"""
|
|
16
|
+
Shadow Agent for counterfactual simulation.
|
|
17
|
+
|
|
18
|
+
Replays the user prompt with an injected hint in a sandbox environment.
|
|
19
|
+
This is "The Scientist" that verifies if the hint actually fixes the problem.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.execution_history: List[ShadowAgentResult] = []
|
|
24
|
+
|
|
25
|
+
def replay_with_hint(
|
|
26
|
+
self,
|
|
27
|
+
original_prompt: str,
|
|
28
|
+
hint: str,
|
|
29
|
+
diagnosis: DiagnosisJSON,
|
|
30
|
+
failure: AgentFailure
|
|
31
|
+
) -> ShadowAgentResult:
|
|
32
|
+
"""
|
|
33
|
+
Replay the original prompt with an injected hint.
|
|
34
|
+
|
|
35
|
+
This simulates the agent executing with additional context/guidance.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
original_prompt: Original user prompt that led to failure
|
|
39
|
+
hint: Hint to inject based on diagnosis
|
|
40
|
+
diagnosis: Cognitive glitch diagnosis
|
|
41
|
+
failure: Original failure for context
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
ShadowAgentResult with execution outcome
|
|
45
|
+
"""
|
|
46
|
+
shadow_id = f"shadow-{uuid.uuid4().hex[:8]}"
|
|
47
|
+
|
|
48
|
+
logger.info(f"Shadow agent {shadow_id} replaying with hint")
|
|
49
|
+
|
|
50
|
+
# Construct modified prompt with hint
|
|
51
|
+
modified_prompt = f"{original_prompt}\n\n{hint}"
|
|
52
|
+
|
|
53
|
+
# Simulate execution (in real system, this would run actual agent in sandbox)
|
|
54
|
+
execution_success, output, reasoning, action = self._simulate_execution(
|
|
55
|
+
modified_prompt, diagnosis, failure
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Verify the fix
|
|
59
|
+
verified = self._verify_fix(execution_success, action, failure)
|
|
60
|
+
|
|
61
|
+
result = ShadowAgentResult(
|
|
62
|
+
shadow_id=shadow_id,
|
|
63
|
+
original_prompt=original_prompt,
|
|
64
|
+
injected_hint=hint,
|
|
65
|
+
modified_prompt=modified_prompt,
|
|
66
|
+
execution_success=execution_success,
|
|
67
|
+
output=output,
|
|
68
|
+
reasoning_chain=reasoning,
|
|
69
|
+
action_taken=action,
|
|
70
|
+
verified=verified
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.execution_history.append(result)
|
|
74
|
+
logger.info(f"Shadow execution complete. Success: {execution_success}, Verified: {verified}")
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
def _simulate_execution(
|
|
79
|
+
self,
|
|
80
|
+
prompt: str,
|
|
81
|
+
diagnosis: DiagnosisJSON,
|
|
82
|
+
failure: AgentFailure
|
|
83
|
+
) -> tuple:
|
|
84
|
+
"""
|
|
85
|
+
Simulate agent execution with the modified prompt.
|
|
86
|
+
|
|
87
|
+
In a real system, this would:
|
|
88
|
+
1. Spin up a sandboxed agent instance
|
|
89
|
+
2. Inject the hint into the system prompt or context
|
|
90
|
+
3. Execute the agent with the original user prompt
|
|
91
|
+
4. Capture the reasoning chain and action
|
|
92
|
+
5. Check if the action would pass control plane checks
|
|
93
|
+
"""
|
|
94
|
+
# Simulate reasoning chain with hint consideration
|
|
95
|
+
reasoning = [
|
|
96
|
+
"Parse user request",
|
|
97
|
+
f"Consider hint: {diagnosis.hint[:50]}...",
|
|
98
|
+
"Validate assumptions against provided context",
|
|
99
|
+
"Construct safe action"
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
# Simulate a corrected action
|
|
103
|
+
# In reality, this would be the actual agent's output
|
|
104
|
+
if diagnosis.cognitive_glitch == CognitiveGlitch.HALLUCINATION:
|
|
105
|
+
# Agent would verify schema
|
|
106
|
+
action = {
|
|
107
|
+
"action": "execute_with_validation",
|
|
108
|
+
"validation": "schema_check_passed",
|
|
109
|
+
"safe_mode": True
|
|
110
|
+
}
|
|
111
|
+
success = True
|
|
112
|
+
output = "Action validated and executed successfully"
|
|
113
|
+
elif diagnosis.cognitive_glitch == CognitiveGlitch.PERMISSION_ERROR:
|
|
114
|
+
# Agent would check permissions first
|
|
115
|
+
action = {
|
|
116
|
+
"action": "check_permissions_then_execute",
|
|
117
|
+
"permission_validation": True
|
|
118
|
+
}
|
|
119
|
+
success = True
|
|
120
|
+
output = "Permissions validated, action executed"
|
|
121
|
+
else:
|
|
122
|
+
# Generic safe action
|
|
123
|
+
action = {
|
|
124
|
+
"action": "safe_execute",
|
|
125
|
+
"hint_applied": True
|
|
126
|
+
}
|
|
127
|
+
# Success based on diagnosis confidence (deterministic)
|
|
128
|
+
success = diagnosis.confidence > 0.7
|
|
129
|
+
output = "Action executed with safety checks" if success else "Action still failed"
|
|
130
|
+
|
|
131
|
+
return success, output, reasoning, action
|
|
132
|
+
|
|
133
|
+
def _verify_fix(self, success: bool, action: Optional[Dict], failure: AgentFailure) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Verify that the fix actually works.
|
|
136
|
+
|
|
137
|
+
This is the key validation step - confirming the hint flips the outcome
|
|
138
|
+
from Fail to Pass.
|
|
139
|
+
"""
|
|
140
|
+
if not success:
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
# Check if action has safety mechanisms that original lacked
|
|
144
|
+
if action:
|
|
145
|
+
has_validation = any(
|
|
146
|
+
key in action for key in ["validation", "permission_validation", "schema_check", "safe_mode"]
|
|
147
|
+
)
|
|
148
|
+
return has_validation
|
|
149
|
+
|
|
150
|
+
return success
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class PathSimulator:
|
|
154
|
+
"""Simulates alternative paths to avoid failures."""
|
|
155
|
+
|
|
156
|
+
def __init__(self):
|
|
157
|
+
self.simulation_history: List[SimulationResult] = []
|
|
158
|
+
self.shadow_agent = ShadowAgent()
|
|
159
|
+
self.mcts_iterations = 5 # MCTS search iterations
|
|
160
|
+
|
|
161
|
+
def simulate(self, analysis: FailureAnalysis) -> SimulationResult:
|
|
162
|
+
"""
|
|
163
|
+
Simulate an alternative path based on failure analysis.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
analysis: The failure analysis containing suggested fixes
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
SimulationResult with the alternative path and predicted outcome
|
|
170
|
+
"""
|
|
171
|
+
logger.info(f"Simulating alternative path for agent {analysis.failure.agent_id}")
|
|
172
|
+
|
|
173
|
+
# Generate simulation ID
|
|
174
|
+
simulation_id = str(uuid.uuid4())
|
|
175
|
+
|
|
176
|
+
# Build alternative path from suggested fixes
|
|
177
|
+
alternative_path = self._build_alternative_path(analysis)
|
|
178
|
+
|
|
179
|
+
# Predict outcome
|
|
180
|
+
expected_outcome = self._predict_outcome(analysis, alternative_path)
|
|
181
|
+
|
|
182
|
+
# Calculate risk score
|
|
183
|
+
risk_score = self._calculate_risk(analysis, alternative_path)
|
|
184
|
+
|
|
185
|
+
# Estimate success rate
|
|
186
|
+
estimated_success_rate = self._estimate_success_rate(analysis, risk_score)
|
|
187
|
+
|
|
188
|
+
# Determine if simulation is successful
|
|
189
|
+
success = risk_score < 0.5 and estimated_success_rate > 0.7
|
|
190
|
+
|
|
191
|
+
result = SimulationResult(
|
|
192
|
+
simulation_id=simulation_id,
|
|
193
|
+
success=success,
|
|
194
|
+
alternative_path=alternative_path,
|
|
195
|
+
expected_outcome=expected_outcome,
|
|
196
|
+
risk_score=risk_score,
|
|
197
|
+
estimated_success_rate=estimated_success_rate
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
self.simulation_history.append(result)
|
|
201
|
+
|
|
202
|
+
if success:
|
|
203
|
+
logger.info(f"Simulation successful. Success rate: {estimated_success_rate:.2f}, Risk: {risk_score:.2f}")
|
|
204
|
+
else:
|
|
205
|
+
logger.warning(f"Simulation failed. Success rate: {estimated_success_rate:.2f}, Risk: {risk_score:.2f}")
|
|
206
|
+
|
|
207
|
+
return result
|
|
208
|
+
|
|
209
|
+
def simulate_counterfactual(
|
|
210
|
+
self,
|
|
211
|
+
diagnosis: DiagnosisJSON,
|
|
212
|
+
failure: AgentFailure
|
|
213
|
+
) -> ShadowAgentResult:
|
|
214
|
+
"""
|
|
215
|
+
Counterfactual simulation using Shadow Agent.
|
|
216
|
+
|
|
217
|
+
This is "The Simulator" - it replays the user prompt but injects a hint
|
|
218
|
+
based on the DiagnosisJSON. Uses MCTS-like approach to find minimal
|
|
219
|
+
change required to flip outcome from Fail to Pass.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
diagnosis: Cognitive glitch diagnosis with hint
|
|
223
|
+
failure: Original failure with trace
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
ShadowAgentResult showing if hint fixes the problem
|
|
227
|
+
"""
|
|
228
|
+
logger.info("Starting counterfactual simulation with Shadow Agent")
|
|
229
|
+
|
|
230
|
+
if not failure.failure_trace:
|
|
231
|
+
logger.warning("No failure trace available for counterfactual simulation")
|
|
232
|
+
# Create a dummy result
|
|
233
|
+
return ShadowAgentResult(
|
|
234
|
+
shadow_id="shadow-no-trace",
|
|
235
|
+
original_prompt="No prompt available",
|
|
236
|
+
injected_hint=diagnosis.hint,
|
|
237
|
+
modified_prompt="No prompt available",
|
|
238
|
+
execution_success=False,
|
|
239
|
+
output="No trace available for simulation",
|
|
240
|
+
reasoning_chain=[],
|
|
241
|
+
action_taken=None,
|
|
242
|
+
verified=False
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Use MCTS-inspired approach: try multiple hint variations
|
|
246
|
+
best_result = self._mcts_search_minimal_hint(
|
|
247
|
+
failure.failure_trace.user_prompt,
|
|
248
|
+
diagnosis,
|
|
249
|
+
failure
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return best_result
|
|
253
|
+
|
|
254
|
+
def _mcts_search_minimal_hint(
|
|
255
|
+
self,
|
|
256
|
+
prompt: str,
|
|
257
|
+
diagnosis: DiagnosisJSON,
|
|
258
|
+
failure: AgentFailure
|
|
259
|
+
) -> ShadowAgentResult:
|
|
260
|
+
"""
|
|
261
|
+
MCTS-inspired search for minimal hint that fixes the problem.
|
|
262
|
+
|
|
263
|
+
This searches for the minimal change required to flip the outcome.
|
|
264
|
+
In full MCTS, we'd build a tree of hint variations and explore/exploit,
|
|
265
|
+
but here we do a simplified version with multiple trials.
|
|
266
|
+
"""
|
|
267
|
+
logger.info(f"MCTS search across {self.mcts_iterations} iterations")
|
|
268
|
+
|
|
269
|
+
hint_variations = self._generate_hint_variations(diagnosis.hint)
|
|
270
|
+
results = []
|
|
271
|
+
|
|
272
|
+
for i, hint in enumerate(hint_variations[:self.mcts_iterations]):
|
|
273
|
+
logger.debug(f"MCTS iteration {i+1}: Testing hint variation")
|
|
274
|
+
result = self.shadow_agent.replay_with_hint(
|
|
275
|
+
prompt, hint, diagnosis, failure
|
|
276
|
+
)
|
|
277
|
+
results.append(result)
|
|
278
|
+
|
|
279
|
+
# Early exit if we find a verified solution
|
|
280
|
+
if result.verified and result.execution_success:
|
|
281
|
+
logger.info(f"Found verified solution at iteration {i+1}")
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
# Select best result (verified + successful, or highest success)
|
|
285
|
+
best = max(
|
|
286
|
+
results,
|
|
287
|
+
key=lambda r: (r.verified, r.execution_success, len(r.reasoning_chain))
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
logger.info(f"MCTS search complete. Best result verified: {best.verified}")
|
|
291
|
+
return best
|
|
292
|
+
|
|
293
|
+
def _generate_hint_variations(self, base_hint: str) -> List[str]:
|
|
294
|
+
"""
|
|
295
|
+
Generate variations of the hint for MCTS exploration.
|
|
296
|
+
|
|
297
|
+
This finds different ways to provide the same guidance,
|
|
298
|
+
searching for the minimal effective intervention.
|
|
299
|
+
"""
|
|
300
|
+
variations = [base_hint] # Original hint
|
|
301
|
+
|
|
302
|
+
# Variation 1: More concise
|
|
303
|
+
if len(base_hint) > 50:
|
|
304
|
+
concise = base_hint.split(".")[0] + "."
|
|
305
|
+
variations.append(concise)
|
|
306
|
+
|
|
307
|
+
# Variation 2: More explicit
|
|
308
|
+
explicit = base_hint + " Double-check all assumptions."
|
|
309
|
+
variations.append(explicit)
|
|
310
|
+
|
|
311
|
+
# Variation 3: Focus on specific action
|
|
312
|
+
if "validate" in base_hint.lower():
|
|
313
|
+
variations.append("VALIDATION REQUIRED: " + base_hint)
|
|
314
|
+
|
|
315
|
+
# Variation 4: Minimal version
|
|
316
|
+
if "HINT:" in base_hint:
|
|
317
|
+
minimal = base_hint.replace("HINT: ", "")
|
|
318
|
+
variations.append(minimal)
|
|
319
|
+
|
|
320
|
+
return variations
|
|
321
|
+
|
|
322
|
+
def _build_alternative_path(self, analysis: FailureAnalysis) -> List[Dict[str, Any]]:
|
|
323
|
+
"""Build an alternative execution path from suggested fixes."""
|
|
324
|
+
path = []
|
|
325
|
+
failure = analysis.failure
|
|
326
|
+
|
|
327
|
+
# Add validation step for control plane blocks
|
|
328
|
+
if failure.failure_type.value == "blocked_by_control_plane":
|
|
329
|
+
path.append({
|
|
330
|
+
"step": 1,
|
|
331
|
+
"action": "validate_permissions",
|
|
332
|
+
"description": "Check permissions before attempting action",
|
|
333
|
+
"params": {
|
|
334
|
+
"resource": failure.context.get("resource", "unknown"),
|
|
335
|
+
"action": failure.context.get("action", "unknown")
|
|
336
|
+
}
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
path.append({
|
|
340
|
+
"step": 2,
|
|
341
|
+
"action": "request_authorization",
|
|
342
|
+
"description": "Request proper authorization if needed",
|
|
343
|
+
"params": {
|
|
344
|
+
"required_permission": "resource_access"
|
|
345
|
+
}
|
|
346
|
+
})
|
|
347
|
+
|
|
348
|
+
path.append({
|
|
349
|
+
"step": 3,
|
|
350
|
+
"action": "safe_execute",
|
|
351
|
+
"description": "Execute action with safety checks",
|
|
352
|
+
"params": {
|
|
353
|
+
"original_action": failure.context.get("action", "unknown"),
|
|
354
|
+
"safety_mode": "enabled"
|
|
355
|
+
}
|
|
356
|
+
})
|
|
357
|
+
|
|
358
|
+
# Add timeout handling for timeout failures
|
|
359
|
+
elif failure.failure_type.value == "timeout":
|
|
360
|
+
path.append({
|
|
361
|
+
"step": 1,
|
|
362
|
+
"action": "set_timeout",
|
|
363
|
+
"description": "Configure appropriate timeout",
|
|
364
|
+
"params": {"timeout_seconds": 30}
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
path.append({
|
|
368
|
+
"step": 2,
|
|
369
|
+
"action": "add_progress_monitoring",
|
|
370
|
+
"description": "Add progress monitoring",
|
|
371
|
+
"params": {"check_interval_seconds": 5}
|
|
372
|
+
})
|
|
373
|
+
|
|
374
|
+
path.append({
|
|
375
|
+
"step": 3,
|
|
376
|
+
"action": "execute_with_timeout",
|
|
377
|
+
"description": "Execute with timeout handling",
|
|
378
|
+
"params": {"allow_partial_results": True}
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
# Generic alternative path for other failures
|
|
382
|
+
else:
|
|
383
|
+
for i, fix in enumerate(analysis.suggested_fixes[:3], 1):
|
|
384
|
+
path.append({
|
|
385
|
+
"step": i,
|
|
386
|
+
"action": "apply_fix",
|
|
387
|
+
"description": fix,
|
|
388
|
+
"params": {"fix": fix}
|
|
389
|
+
})
|
|
390
|
+
|
|
391
|
+
return path
|
|
392
|
+
|
|
393
|
+
def _predict_outcome(self, analysis: FailureAnalysis, alternative_path: List[Dict[str, Any]]) -> str:
|
|
394
|
+
"""Predict the outcome of executing the alternative path."""
|
|
395
|
+
failure = analysis.failure
|
|
396
|
+
|
|
397
|
+
if failure.failure_type.value == "blocked_by_control_plane":
|
|
398
|
+
return "Action will be executed with proper authorization and safety checks"
|
|
399
|
+
elif failure.failure_type.value == "timeout":
|
|
400
|
+
return "Operation will complete within timeout with progress monitoring"
|
|
401
|
+
else:
|
|
402
|
+
return f"Failure {failure.failure_type.value} will be prevented by applying suggested fixes"
|
|
403
|
+
|
|
404
|
+
def _calculate_risk(self, analysis: FailureAnalysis, alternative_path: List[Dict[str, Any]]) -> float:
|
|
405
|
+
"""Calculate risk score for the alternative path."""
|
|
406
|
+
risk = 0.3 # Base risk
|
|
407
|
+
|
|
408
|
+
# Lower risk if confidence is high
|
|
409
|
+
risk -= (analysis.confidence_score * 0.2)
|
|
410
|
+
|
|
411
|
+
# Lower risk if we have multiple steps (more thorough)
|
|
412
|
+
if len(alternative_path) >= 3:
|
|
413
|
+
risk -= 0.1
|
|
414
|
+
|
|
415
|
+
# Higher risk for unknown failure types
|
|
416
|
+
if analysis.failure.failure_type.value == "unknown":
|
|
417
|
+
risk += 0.2
|
|
418
|
+
|
|
419
|
+
return max(0.0, min(1.0, risk))
|
|
420
|
+
|
|
421
|
+
def _estimate_success_rate(self, analysis: FailureAnalysis, risk_score: float) -> float:
|
|
422
|
+
"""Estimate success rate of the alternative path."""
|
|
423
|
+
# Base success rate from confidence
|
|
424
|
+
success_rate = analysis.confidence_score
|
|
425
|
+
|
|
426
|
+
# Adjust based on risk
|
|
427
|
+
success_rate = success_rate * (1.0 - risk_score * 0.5)
|
|
428
|
+
|
|
429
|
+
# Bonus for having similar failures (we've seen this before)
|
|
430
|
+
if len(analysis.similar_failures) > 0:
|
|
431
|
+
success_rate += 0.1
|
|
432
|
+
|
|
433
|
+
return max(0.0, min(1.0, success_rate))
|
|
434
|
+
|
|
435
|
+
def get_best_simulation(self, simulations: List[SimulationResult]) -> SimulationResult:
|
|
436
|
+
"""Get the best simulation from a list based on success rate and risk."""
|
|
437
|
+
if not simulations:
|
|
438
|
+
raise ValueError("No simulations provided")
|
|
439
|
+
|
|
440
|
+
# Sort by estimated success rate (desc) and risk score (asc)
|
|
441
|
+
sorted_sims = sorted(
|
|
442
|
+
simulations,
|
|
443
|
+
key=lambda s: (s.estimated_success_rate, -s.risk_score),
|
|
444
|
+
reverse=True
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
return sorted_sims[0]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Teacher - Simplified reference implementation for the Shadow Teacher.
|
|
3
|
+
|
|
4
|
+
This is a reference implementation showing the core concept of using a
|
|
5
|
+
"Teacher Model" (stronger reasoning model) to diagnose why an agent failed.
|
|
6
|
+
|
|
7
|
+
The production implementation is integrated throughout the analyzer.py and
|
|
8
|
+
completeness_auditor.py modules with full trace capture and cognitive diagnosis.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _sanitize_input(text: str, max_length: int = 1000) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Sanitize input to prevent prompt injection.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
text: Input text to sanitize
|
|
18
|
+
max_length: Maximum allowed length
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Sanitized text
|
|
22
|
+
"""
|
|
23
|
+
if not text:
|
|
24
|
+
return ""
|
|
25
|
+
|
|
26
|
+
# Truncate to max length
|
|
27
|
+
text = str(text)[:max_length]
|
|
28
|
+
|
|
29
|
+
# Remove potential prompt injection patterns
|
|
30
|
+
# In production, use more sophisticated sanitization
|
|
31
|
+
dangerous_patterns = ["ignore previous", "ignore all", "disregard", "new instructions"]
|
|
32
|
+
text_lower = text.lower()
|
|
33
|
+
for pattern in dangerous_patterns:
|
|
34
|
+
if pattern in text_lower:
|
|
35
|
+
text = text.replace(pattern, "[FILTERED]")
|
|
36
|
+
|
|
37
|
+
return text
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def diagnose_failure(prompt, failed_response, tool_trace):
|
|
41
|
+
"""
|
|
42
|
+
Uses a 'Reasoning Model' (e.g., o1 or Claude 3.5 Sonnet)
|
|
43
|
+
to find the Root Cause.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
prompt: The original task/prompt that failed
|
|
47
|
+
failed_response: The agent's failed response
|
|
48
|
+
tool_trace: Trace of tools/actions the agent attempted
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
dict: Diagnosis with cause and lesson_patch
|
|
52
|
+
"""
|
|
53
|
+
# Sanitize inputs to prevent prompt injection
|
|
54
|
+
safe_prompt = _sanitize_input(prompt)
|
|
55
|
+
safe_response = _sanitize_input(failed_response)
|
|
56
|
+
safe_trace = _sanitize_input(tool_trace)
|
|
57
|
+
|
|
58
|
+
teacher_prompt = f"""
|
|
59
|
+
The Agent failed to complete this task: '{safe_prompt}'.
|
|
60
|
+
|
|
61
|
+
Agent Output: {safe_response}
|
|
62
|
+
Tool Trace: {safe_trace}
|
|
63
|
+
|
|
64
|
+
Task:
|
|
65
|
+
1. Did the agent try hard enough? (Laziness)
|
|
66
|
+
2. Did the agent hallucinate a tool parameter? (Skill Issue)
|
|
67
|
+
3. Write a 1-sentence 'Lesson' that fixes this specific error.
|
|
68
|
+
|
|
69
|
+
Output Format: JSON {{ "cause": "...", "lesson_patch": "..." }}
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
# In production, this would call the "Expensive" Model only on failure
|
|
73
|
+
# For this reference implementation, we simulate the response
|
|
74
|
+
# diagnosis = await llm_client.generate(model="o1-preview", prompt=teacher_prompt)
|
|
75
|
+
|
|
76
|
+
# Simulated diagnosis for reference
|
|
77
|
+
diagnosis = {
|
|
78
|
+
"cause": "Agent gave up without exhaustive search",
|
|
79
|
+
"lesson_patch": "Before reporting 'not found', check all data sources including archived partitions"
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return diagnosis
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Failure Triage Engine - Decides sync (JIT) vs async (batch) correction strategy.
|
|
3
|
+
|
|
4
|
+
This is the missing component that sits between Failure Detection and Correction.
|
|
5
|
+
It analyzes the context to determine if a failure should be fixed immediately
|
|
6
|
+
(blocking the user) or asynchronously (returning error quickly, fixing later).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Dict, Any, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FixStrategy(Enum):
|
|
14
|
+
"""Strategy for fixing agent failures."""
|
|
15
|
+
SYNC_JIT = "jit_retry" # High Latency, High Reliability - Fix NOW and wait
|
|
16
|
+
ASYNC_BATCH = "async_patch" # Low Latency, Eventual Consistency - Fix LATER
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FailureTriage:
|
|
20
|
+
"""
|
|
21
|
+
Decision engine for routing failures to sync (JIT) or async (batch) correction.
|
|
22
|
+
|
|
23
|
+
The triage engine applies three rules to determine criticality:
|
|
24
|
+
1. Safety/Write Operations → SYNC_JIT (must fix immediately)
|
|
25
|
+
2. High Effort Prompts → SYNC_JIT (user expects deep thinking)
|
|
26
|
+
3. Read/Query Operations → ASYNC_BATCH (save user time)
|
|
27
|
+
|
|
28
|
+
This enables "thinking fast" (async) for trivial failures and
|
|
29
|
+
"thinking slow" (sync) for critical failures.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
33
|
+
"""
|
|
34
|
+
Initialize the triage engine.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
config: Optional configuration for custom critical tools and keywords
|
|
38
|
+
"""
|
|
39
|
+
self.config = config or {}
|
|
40
|
+
|
|
41
|
+
# Critical tools that require synchronous fixing (can be customized via config)
|
|
42
|
+
self.critical_tools = self.config.get("critical_tools", [
|
|
43
|
+
"delete_resource",
|
|
44
|
+
"update_db",
|
|
45
|
+
"execute_payment",
|
|
46
|
+
"drop_table",
|
|
47
|
+
"refund_user",
|
|
48
|
+
"delete_file",
|
|
49
|
+
"execute_sql",
|
|
50
|
+
"write_file",
|
|
51
|
+
"modify_permissions",
|
|
52
|
+
"delete_user" # User deletion is critical
|
|
53
|
+
])
|
|
54
|
+
|
|
55
|
+
# Keywords indicating high-effort prompts requiring deep thinking
|
|
56
|
+
self.high_effort_keywords = self.config.get("high_effort_keywords", [
|
|
57
|
+
"carefully",
|
|
58
|
+
"critical",
|
|
59
|
+
"important",
|
|
60
|
+
"urgent",
|
|
61
|
+
"must",
|
|
62
|
+
"required",
|
|
63
|
+
"ensure"
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
def decide_strategy(
|
|
67
|
+
self,
|
|
68
|
+
prompt: str,
|
|
69
|
+
tool_name: Optional[str] = None,
|
|
70
|
+
user_metadata: Optional[Dict[str, Any]] = None,
|
|
71
|
+
context: Optional[Dict[str, Any]] = None
|
|
72
|
+
) -> FixStrategy:
|
|
73
|
+
"""
|
|
74
|
+
Decide whether to fix this failure sync (JIT) or async (batch).
|
|
75
|
+
|
|
76
|
+
Decision Rules (in priority order):
|
|
77
|
+
1. Cognitive failures with full trace → SYNC_JIT (deep diagnosis needed)
|
|
78
|
+
2. Safety/Write Operations → SYNC_JIT
|
|
79
|
+
3. High Effort Prompts → SYNC_JIT
|
|
80
|
+
4. VIP Users → SYNC_JIT
|
|
81
|
+
5. Default (Read/Query) → ASYNC_BATCH
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
prompt: The user prompt that led to the failure
|
|
85
|
+
tool_name: Name of the tool that failed (if available)
|
|
86
|
+
user_metadata: Metadata about the user (e.g., VIP status)
|
|
87
|
+
context: Additional context about the failure
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
FixStrategy indicating sync (JIT) or async (batch) correction
|
|
91
|
+
"""
|
|
92
|
+
# Rule 0: Cognitive failures with full trace (chain_of_thought + failed_action)
|
|
93
|
+
# These warrant immediate deep analysis with Shadow Teacher
|
|
94
|
+
if context:
|
|
95
|
+
has_chain = context.get("chain_of_thought") is not None
|
|
96
|
+
has_failed_action = context.get("failed_action") is not None
|
|
97
|
+
if has_chain and has_failed_action:
|
|
98
|
+
return FixStrategy.SYNC_JIT
|
|
99
|
+
|
|
100
|
+
# Rule 1: Safety/Write Operations are always Critical
|
|
101
|
+
if tool_name and tool_name in self.critical_tools:
|
|
102
|
+
return FixStrategy.SYNC_JIT
|
|
103
|
+
|
|
104
|
+
# Check context for critical actions (fallback if tool_name not provided)
|
|
105
|
+
if context:
|
|
106
|
+
action = context.get("action", "")
|
|
107
|
+
if action in self.critical_tools:
|
|
108
|
+
return FixStrategy.SYNC_JIT
|
|
109
|
+
|
|
110
|
+
# Also check failed_action if present
|
|
111
|
+
failed_action = context.get("failed_action")
|
|
112
|
+
if failed_action and isinstance(failed_action, dict):
|
|
113
|
+
failed_action_name = failed_action.get("action", "")
|
|
114
|
+
if failed_action_name in self.critical_tools:
|
|
115
|
+
return FixStrategy.SYNC_JIT
|
|
116
|
+
|
|
117
|
+
# Rule 2: "High Effort" prompts request deep thinking
|
|
118
|
+
prompt_lower = prompt.lower()
|
|
119
|
+
if any(keyword in prompt_lower for keyword in self.high_effort_keywords):
|
|
120
|
+
return FixStrategy.SYNC_JIT
|
|
121
|
+
|
|
122
|
+
# Rule 3: VIP users get priority treatment (optional)
|
|
123
|
+
if user_metadata and user_metadata.get("is_vip", False):
|
|
124
|
+
return FixStrategy.SYNC_JIT
|
|
125
|
+
|
|
126
|
+
# Rule 4: Default to Async for "Read/Query" failures to save user time
|
|
127
|
+
return FixStrategy.ASYNC_BATCH
|
|
128
|
+
|
|
129
|
+
def is_critical(
|
|
130
|
+
self,
|
|
131
|
+
prompt: str,
|
|
132
|
+
tool_name: Optional[str] = None,
|
|
133
|
+
user_metadata: Optional[Dict[str, Any]] = None,
|
|
134
|
+
context: Optional[Dict[str, Any]] = None
|
|
135
|
+
) -> bool:
|
|
136
|
+
"""
|
|
137
|
+
Convenience method to check if a failure is critical (needs SYNC_JIT).
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
prompt: The user prompt
|
|
141
|
+
tool_name: Name of the tool that failed
|
|
142
|
+
user_metadata: User metadata
|
|
143
|
+
context: Additional context
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
True if critical (SYNC_JIT), False if non-critical (ASYNC_BATCH)
|
|
147
|
+
"""
|
|
148
|
+
strategy = self.decide_strategy(prompt, tool_name, user_metadata, context)
|
|
149
|
+
return strategy == FixStrategy.SYNC_JIT
|