agent-os-kernel 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.3.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.3.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data contracts (schemas) for self-correcting agent kernel.
|
|
3
|
+
|
|
4
|
+
This module defines the rigorous data contracts between Auditor and Patcher.
|
|
5
|
+
These schemas use Pydantic to enforce type safety and can be exported
|
|
6
|
+
directly into Fine-Tuning datasets (RLAIF).
|
|
7
|
+
|
|
8
|
+
The "Spine" of the self-correcting system:
|
|
9
|
+
1. Lesson - The atomic unit of learning (what we learned)
|
|
10
|
+
2. FailureTrace - The evidence (what happened)
|
|
11
|
+
3. PatchRequest - The prescription (how to fix it)
|
|
12
|
+
|
|
13
|
+
v2 Extensions (Evolutionary Swarm):
|
|
14
|
+
4. SwarmTrace - Multi-agent interaction trace
|
|
15
|
+
5. RubricUpdate - Reward shaping changes
|
|
16
|
+
6. AnomalyDecision - Emergence detection results
|
|
17
|
+
7. Rubric - Reward scoring rubric
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field
|
|
21
|
+
from typing import List, Optional, Literal, Dict, Any
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from uuid import uuid4
|
|
24
|
+
from enum import Enum
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MemoryTier(str, Enum):
|
|
28
|
+
"""
|
|
29
|
+
Three-tier memory hierarchy for deterministic lesson routing.
|
|
30
|
+
|
|
31
|
+
This implements "Scale by Subtraction" by injecting only relevant context:
|
|
32
|
+
- Tier 1: Always active (safety-critical)
|
|
33
|
+
- Tier 2: Conditionally injected (tool-specific)
|
|
34
|
+
- Tier 3: Retrieved on-demand (long-tail edge cases)
|
|
35
|
+
"""
|
|
36
|
+
TIER_1_KERNEL = "kernel" # Permanent System Prompt
|
|
37
|
+
TIER_2_SKILL_CACHE = "skill_cache" # Injected based on active Tool
|
|
38
|
+
TIER_3_ARCHIVE = "rag_archive" # Semantic Search
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# 1. The Atomic Lesson (What we learned)
|
|
42
|
+
class Lesson(BaseModel):
|
|
43
|
+
"""
|
|
44
|
+
An atomic lesson learned from a failure.
|
|
45
|
+
|
|
46
|
+
This represents a single, specific piece of knowledge that should
|
|
47
|
+
be added to the agent's system prompt or memory to prevent future failures.
|
|
48
|
+
"""
|
|
49
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
50
|
+
trigger_pattern: str = Field(..., description="The context/keywords that triggered this failure")
|
|
51
|
+
rule_text: str = Field(..., description="The actual instruction to add to System Prompt")
|
|
52
|
+
lesson_type: Literal["syntax", "business", "security"] = Field(
|
|
53
|
+
...,
|
|
54
|
+
description="Type of lesson: syntax (model capability), business (domain knowledge), security (safety rule)"
|
|
55
|
+
)
|
|
56
|
+
confidence_score: float = Field(
|
|
57
|
+
...,
|
|
58
|
+
ge=0.0,
|
|
59
|
+
le=1.0,
|
|
60
|
+
description="Teacher's confidence in this fix (0.0-1.0)"
|
|
61
|
+
)
|
|
62
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
63
|
+
|
|
64
|
+
# Tiering metadata
|
|
65
|
+
tier: Optional[MemoryTier] = Field(
|
|
66
|
+
None,
|
|
67
|
+
description="The memory tier where this lesson is stored"
|
|
68
|
+
)
|
|
69
|
+
retrieval_count: int = Field(
|
|
70
|
+
default=0,
|
|
71
|
+
description="Number of times this lesson was retrieved (for promotion logic)"
|
|
72
|
+
)
|
|
73
|
+
last_retrieved_at: Optional[datetime] = Field(
|
|
74
|
+
None,
|
|
75
|
+
description="Last time this lesson was retrieved from Tier 3"
|
|
76
|
+
)
|
|
77
|
+
last_triggered_at: Optional[datetime] = Field(
|
|
78
|
+
None,
|
|
79
|
+
description="Last time this lesson triggered a block/correction (for demotion logic)"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
model_config = {
|
|
83
|
+
"json_schema_extra": {
|
|
84
|
+
"example": {
|
|
85
|
+
"id": "lesson-abc123",
|
|
86
|
+
"trigger_pattern": "search logs, empty result, archived partition",
|
|
87
|
+
"rule_text": "When searching logs, always check archived partitions if recent logs are empty",
|
|
88
|
+
"lesson_type": "business",
|
|
89
|
+
"confidence_score": 0.92,
|
|
90
|
+
"created_at": "2026-01-15T23:00:00"
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# 2. The Failure Trace (The Evidence)
|
|
97
|
+
class FailureTrace(BaseModel):
|
|
98
|
+
"""
|
|
99
|
+
Complete trace of a failure including evidence.
|
|
100
|
+
|
|
101
|
+
This captures everything about what went wrong, including the user prompt,
|
|
102
|
+
agent reasoning, tool execution, and the specific failure that occurred.
|
|
103
|
+
"""
|
|
104
|
+
trace_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
105
|
+
user_prompt: str = Field(..., description="The user's original request")
|
|
106
|
+
agent_reasoning: str = Field(..., description="The agent's reasoning/response")
|
|
107
|
+
tool_call: Optional[Dict[str, Any]] = Field(
|
|
108
|
+
None,
|
|
109
|
+
description="The tool call that was made (if any)"
|
|
110
|
+
)
|
|
111
|
+
tool_output: Optional[str] = Field(
|
|
112
|
+
None,
|
|
113
|
+
description="The output from the tool execution"
|
|
114
|
+
)
|
|
115
|
+
failure_type: Literal["omission_laziness", "commission_safety", "hallucination"] = Field(
|
|
116
|
+
...,
|
|
117
|
+
description="Type of failure: omission (gave up too early), commission (unsafe action), hallucination (invented facts)"
|
|
118
|
+
)
|
|
119
|
+
severity: Literal["critical", "non_critical"] = Field(
|
|
120
|
+
...,
|
|
121
|
+
description="Severity of the failure"
|
|
122
|
+
)
|
|
123
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
124
|
+
|
|
125
|
+
model_config = {
|
|
126
|
+
"json_schema_extra": {
|
|
127
|
+
"example": {
|
|
128
|
+
"trace_id": "trace-xyz789",
|
|
129
|
+
"user_prompt": "Find the Q3 report",
|
|
130
|
+
"agent_reasoning": "I searched for 'Q3 report' but found no exact matches.",
|
|
131
|
+
"tool_call": {"tool": "search_files", "query": "Q3 report"},
|
|
132
|
+
"tool_output": "[]",
|
|
133
|
+
"failure_type": "omission_laziness",
|
|
134
|
+
"severity": "non_critical",
|
|
135
|
+
"timestamp": "2026-01-15T23:00:00"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# 3. The Patch (The Prescription)
|
|
142
|
+
class PatchRequest(BaseModel):
|
|
143
|
+
"""
|
|
144
|
+
A request to patch an agent with a lesson.
|
|
145
|
+
|
|
146
|
+
This combines the failure evidence (trace_id) with the diagnosis
|
|
147
|
+
and proposed fix (lesson). It also specifies the application strategy
|
|
148
|
+
(hotfix now vs batch later).
|
|
149
|
+
"""
|
|
150
|
+
trace_id: str = Field(..., description="Reference to the FailureTrace that triggered this patch")
|
|
151
|
+
diagnosis: str = Field(..., description="Why did it fail? Root cause analysis.")
|
|
152
|
+
proposed_lesson: Lesson = Field(..., description="The lesson to apply")
|
|
153
|
+
apply_strategy: Literal["hotfix_now", "batch_later"] = Field(
|
|
154
|
+
...,
|
|
155
|
+
description="When to apply: hotfix_now (critical, sync) or batch_later (non-critical, async)"
|
|
156
|
+
)
|
|
157
|
+
context: Dict[str, Any] = Field(
|
|
158
|
+
default_factory=dict,
|
|
159
|
+
description="Additional context for the patch"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
model_config = {
|
|
163
|
+
"json_schema_extra": {
|
|
164
|
+
"example": {
|
|
165
|
+
"trace_id": "trace-xyz789",
|
|
166
|
+
"diagnosis": "Agent gave up after finding no exact match for 'Q3 report' without trying alternative search terms like 'Quarter 3' or 'Q3-2024'",
|
|
167
|
+
"proposed_lesson": {
|
|
168
|
+
"id": "lesson-abc123",
|
|
169
|
+
"trigger_pattern": "search failure, no exact matches",
|
|
170
|
+
"rule_text": "When search returns no results, try alternative terms and synonyms before giving up",
|
|
171
|
+
"lesson_type": "business",
|
|
172
|
+
"confidence_score": 0.88
|
|
173
|
+
},
|
|
174
|
+
"apply_strategy": "batch_later",
|
|
175
|
+
"context": {"priority": "medium"}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ========================================
|
|
182
|
+
# v2 SCHEMAS: Evolutionary Swarm Kernel
|
|
183
|
+
# ========================================
|
|
184
|
+
|
|
185
|
+
class SwarmStep(BaseModel):
|
|
186
|
+
"""
|
|
187
|
+
A single step in multi-agent swarm interaction.
|
|
188
|
+
|
|
189
|
+
Captures the message flow between agents for emergence detection.
|
|
190
|
+
"""
|
|
191
|
+
step_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
192
|
+
source: str = Field(..., description="Source agent ID")
|
|
193
|
+
target: str = Field(..., description="Target agent ID")
|
|
194
|
+
content: str = Field(..., description="Message content")
|
|
195
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
196
|
+
semantic_embedding: Optional[List[float]] = Field(
|
|
197
|
+
None,
|
|
198
|
+
description="Vector embedding for semantic drift detection"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class SwarmTrace(BaseModel):
|
|
203
|
+
"""
|
|
204
|
+
Complete trace of multi-agent swarm interaction.
|
|
205
|
+
|
|
206
|
+
Used by EmergenceMonitor to detect anomalies across agent interactions.
|
|
207
|
+
"""
|
|
208
|
+
trace_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
209
|
+
original_intent: str = Field(..., description="Original user prompt/goal")
|
|
210
|
+
steps: List[SwarmStep] = Field(default_factory=list)
|
|
211
|
+
started_at: datetime = Field(default_factory=datetime.now)
|
|
212
|
+
completed_at: Optional[datetime] = None
|
|
213
|
+
agent_ids: List[str] = Field(default_factory=list, description="All agents involved")
|
|
214
|
+
|
|
215
|
+
model_config = {
|
|
216
|
+
"json_schema_extra": {
|
|
217
|
+
"example": {
|
|
218
|
+
"trace_id": "swarm-abc123",
|
|
219
|
+
"original_intent": "Analyze customer churn data",
|
|
220
|
+
"steps": [
|
|
221
|
+
{
|
|
222
|
+
"source": "analyst-001",
|
|
223
|
+
"target": "verifier-001",
|
|
224
|
+
"content": "Found 15% churn rate"
|
|
225
|
+
}
|
|
226
|
+
],
|
|
227
|
+
"agent_ids": ["analyst-001", "verifier-001"]
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class AnomalyType(str, Enum):
|
|
234
|
+
"""Types of emergent anomalies in swarm behavior."""
|
|
235
|
+
INFINITE_LOOP = "infinite_loop" # Circular approval
|
|
236
|
+
GOAL_DRIFT = "goal_drift" # Semantic divergence from original intent
|
|
237
|
+
ECHO_CHAMBER = "echo_chamber" # Repetitive similar content
|
|
238
|
+
ESCALATION_SPIRAL = "escalation_spiral" # Agents keep deferring to each other
|
|
239
|
+
SAFE = "safe" # No anomaly detected
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class AnomalyDecision(BaseModel):
|
|
243
|
+
"""
|
|
244
|
+
Decision from EmergenceMonitor about swarm safety.
|
|
245
|
+
|
|
246
|
+
Determines whether swarm execution should continue or be terminated.
|
|
247
|
+
"""
|
|
248
|
+
is_anomaly: bool = Field(..., description="Whether an anomaly was detected")
|
|
249
|
+
type: AnomalyType = Field(default=AnomalyType.SAFE)
|
|
250
|
+
is_safe: bool = Field(default=True, description="Inverse of is_anomaly for clarity")
|
|
251
|
+
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
|
252
|
+
reasoning: Optional[str] = Field(None, description="Why this decision was made")
|
|
253
|
+
suggested_action: Optional[str] = Field(None, description="Circuit breaker, retry, etc.")
|
|
254
|
+
drift_score: Optional[float] = Field(None, description="Semantic distance from original intent")
|
|
255
|
+
cycle_detected: bool = Field(default=False, description="Whether a graph cycle was found")
|
|
256
|
+
|
|
257
|
+
model_config = {
|
|
258
|
+
"json_schema_extra": {
|
|
259
|
+
"example": {
|
|
260
|
+
"is_anomaly": True,
|
|
261
|
+
"type": "infinite_loop",
|
|
262
|
+
"is_safe": False,
|
|
263
|
+
"confidence": 0.95,
|
|
264
|
+
"reasoning": "Agents A and B are in circular approval pattern (3 iterations)",
|
|
265
|
+
"suggested_action": "CIRCUIT_BREAK"
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class Rubric(BaseModel):
|
|
272
|
+
"""
|
|
273
|
+
Reward scoring rubric for agent behavior.
|
|
274
|
+
|
|
275
|
+
Defines weights for different evaluation criteria.
|
|
276
|
+
This is the dynamic part that RewardShaper adjusts.
|
|
277
|
+
"""
|
|
278
|
+
rubric_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
279
|
+
weights: Dict[str, float] = Field(
|
|
280
|
+
default_factory=lambda: {
|
|
281
|
+
"conciseness": 0.3,
|
|
282
|
+
"accuracy": 0.5,
|
|
283
|
+
"thoroughness": 0.2
|
|
284
|
+
},
|
|
285
|
+
description="Weights for different scoring criteria"
|
|
286
|
+
)
|
|
287
|
+
version: int = Field(default=1, description="Rubric version for tracking evolution")
|
|
288
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
289
|
+
parent_rubric_id: Optional[str] = Field(None, description="Previous version if evolved")
|
|
290
|
+
|
|
291
|
+
model_config = {
|
|
292
|
+
"json_schema_extra": {
|
|
293
|
+
"example": {
|
|
294
|
+
"rubric_id": "rubric-v1",
|
|
295
|
+
"weights": {
|
|
296
|
+
"conciseness": 0.4, # Increased from 0.3 - user prefers brevity
|
|
297
|
+
"accuracy": 0.5,
|
|
298
|
+
"thoroughness": 0.1 # Decreased from 0.2
|
|
299
|
+
},
|
|
300
|
+
"version": 2
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class RubricUpdate(BaseModel):
|
|
307
|
+
"""
|
|
308
|
+
An update to the reward rubric based on feedback.
|
|
309
|
+
|
|
310
|
+
Generated by RewardShaper when adapting agent behavior.
|
|
311
|
+
"""
|
|
312
|
+
update_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
313
|
+
rubric_before: Rubric = Field(..., description="Original rubric")
|
|
314
|
+
rubric_after: Rubric = Field(..., description="Updated rubric")
|
|
315
|
+
prompt_nudge: str = Field(..., description="Natural language instruction for agent")
|
|
316
|
+
feedback_signal: str = Field(..., description="What triggered this update")
|
|
317
|
+
correction_vector: Dict[str, float] = Field(
|
|
318
|
+
...,
|
|
319
|
+
description="Delta for each weight (e.g., {'conciseness': +0.1})"
|
|
320
|
+
)
|
|
321
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
322
|
+
|
|
323
|
+
model_config = {
|
|
324
|
+
"json_schema_extra": {
|
|
325
|
+
"example": {
|
|
326
|
+
"update_id": "update-xyz",
|
|
327
|
+
"feedback_signal": "User feedback: 'Too verbose'",
|
|
328
|
+
"correction_vector": {
|
|
329
|
+
"conciseness": 0.1, # Increase
|
|
330
|
+
"thoroughness": -0.1 # Decrease
|
|
331
|
+
},
|
|
332
|
+
"prompt_nudge": "Current Policy Update: Prioritize conciseness +15%. Reduce verbosity."
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class AgentPerformance(BaseModel):
|
|
339
|
+
"""
|
|
340
|
+
Performance metrics for an agent in the swarm.
|
|
341
|
+
|
|
342
|
+
Used by EvolvableOrchestrator to decide on hot-swapping.
|
|
343
|
+
"""
|
|
344
|
+
agent_id: str
|
|
345
|
+
role: str
|
|
346
|
+
reward_score: float = Field(ge=0.0, description="Cumulative reward based on rubric")
|
|
347
|
+
tasks_completed: int = Field(default=0)
|
|
348
|
+
tasks_failed: int = Field(default=0)
|
|
349
|
+
avg_latency_ms: float = Field(default=0.0)
|
|
350
|
+
success_rate: float = Field(ge=0.0, le=1.0, description="tasks_completed / total_tasks")
|
|
351
|
+
last_updated: datetime = Field(default_factory=datetime.now)
|
|
352
|
+
|
|
353
|
+
model_config = {
|
|
354
|
+
"json_schema_extra": {
|
|
355
|
+
"example": {
|
|
356
|
+
"agent_id": "analyst-001",
|
|
357
|
+
"role": "analyst",
|
|
358
|
+
"reward_score": 0.72,
|
|
359
|
+
"tasks_completed": 45,
|
|
360
|
+
"tasks_failed": 5,
|
|
361
|
+
"success_rate": 0.9
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
class AgentSwapDecision(BaseModel):
|
|
368
|
+
"""
|
|
369
|
+
Decision to hot-swap an underperforming agent.
|
|
370
|
+
|
|
371
|
+
Generated by EvolvableOrchestrator.
|
|
372
|
+
"""
|
|
373
|
+
decision_id: str = Field(default_factory=lambda: str(uuid4()))
|
|
374
|
+
old_agent_id: str
|
|
375
|
+
new_agent_id: str
|
|
376
|
+
reason: str = Field(..., description="Why the swap is needed")
|
|
377
|
+
performance_before: AgentPerformance
|
|
378
|
+
expected_improvement: Optional[float] = Field(None, description="Expected reward delta")
|
|
379
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
|
380
|
+
|
|
381
|
+
model_config = {
|
|
382
|
+
"json_schema_extra": {
|
|
383
|
+
"example": {
|
|
384
|
+
"old_agent_id": "analyst-basic",
|
|
385
|
+
"new_agent_id": "analyst-senior",
|
|
386
|
+
"reason": "Performance below threshold (0.65 < 0.70), swapping to stronger model",
|
|
387
|
+
"expected_improvement": 0.15
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Skill Mapper - Tool signature matching and lesson-to-tool mapping.
|
|
3
|
+
|
|
4
|
+
This module implements the "Signature Matching" strategy that determines which tool
|
|
5
|
+
owns a specific lesson. This is critical for the Skill Cache (Tier 2) system, ensuring
|
|
6
|
+
that SQL lessons are only injected when SQL tools are active.
|
|
7
|
+
|
|
8
|
+
Key Components:
|
|
9
|
+
1. ToolSignature: Defines the "fingerprint" of a tool (keywords, file patterns)
|
|
10
|
+
2. SkillMapper: Maps failure traces to tools using signature matching
|
|
11
|
+
3. Two-phase extraction: Direct hit (explicit tool name) + Semantic fallback (content analysis)
|
|
12
|
+
|
|
13
|
+
The Problem This Solves:
|
|
14
|
+
Without this, we'd inject SQL lessons when using an Email tool, or Python lessons
|
|
15
|
+
when querying a database. The Skill Cache is only valuable if we inject the RIGHT
|
|
16
|
+
lessons for the RIGHT tools.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from pydantic import BaseModel, Field
|
|
21
|
+
from typing import List, Dict, Optional
|
|
22
|
+
from src.kernel.schemas import FailureTrace
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ToolSignature(BaseModel):
|
|
28
|
+
"""
|
|
29
|
+
The "fingerprint" of a tool for signature matching.
|
|
30
|
+
|
|
31
|
+
This defines what makes a tool recognizable:
|
|
32
|
+
- tool_name: Canonical name (e.g., "sql_db", "python_repl")
|
|
33
|
+
- keywords: Terms that indicate this tool (e.g., ["SELECT", "JOIN", "table"])
|
|
34
|
+
- file_patterns: File extensions/patterns (e.g., [".sql", ".db"])
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
SQL Tool: keywords=["select", "join", "query"], file_patterns=[".sql"]
|
|
38
|
+
Python Tool: keywords=["import", "print", "def"], file_patterns=[".py"]
|
|
39
|
+
"""
|
|
40
|
+
tool_name: str = Field(..., description="Canonical tool name")
|
|
41
|
+
keywords: List[str] = Field(
|
|
42
|
+
default_factory=list,
|
|
43
|
+
description="Keywords that indicate this tool"
|
|
44
|
+
)
|
|
45
|
+
file_patterns: List[str] = Field(
|
|
46
|
+
default_factory=list,
|
|
47
|
+
description="File patterns/extensions associated with this tool"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
model_config = {
|
|
51
|
+
"json_schema_extra": {
|
|
52
|
+
"example": {
|
|
53
|
+
"tool_name": "sql_db",
|
|
54
|
+
"keywords": ["select", "join", "table", "query", "database"],
|
|
55
|
+
"file_patterns": [".sql", ".db"]
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SkillMapper:
|
|
62
|
+
"""
|
|
63
|
+
Maps failure traces to tools using signature matching.
|
|
64
|
+
|
|
65
|
+
This implements the two-phase extraction strategy:
|
|
66
|
+
1. Direct Hit: Check if tool name is explicitly in the trace
|
|
67
|
+
2. Semantic Fallback: Analyze content for tool-specific keywords
|
|
68
|
+
|
|
69
|
+
The registry contains all known tools and their signatures. In production,
|
|
70
|
+
this would be loaded from configuration or discovered dynamically.
|
|
71
|
+
|
|
72
|
+
Architecture:
|
|
73
|
+
- Registry: Dict[tool_name, ToolSignature]
|
|
74
|
+
- extract_tool_context(): Main entry point for tool extraction
|
|
75
|
+
- _check_direct_hit(): Phase 1 - explicit tool name matching
|
|
76
|
+
- _check_semantic_match(): Phase 2 - keyword-based matching
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
# Confidence threshold for semantic matching (minimum keyword matches required)
|
|
80
|
+
SEMANTIC_CONFIDENCE_THRESHOLD = 2
|
|
81
|
+
|
|
82
|
+
def __init__(self, custom_registry: Optional[Dict[str, ToolSignature]] = None):
|
|
83
|
+
"""
|
|
84
|
+
Initialize SkillMapper with tool registry.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
custom_registry: Optional custom tool registry. If None, uses default.
|
|
88
|
+
"""
|
|
89
|
+
if custom_registry:
|
|
90
|
+
self.registry = custom_registry
|
|
91
|
+
else:
|
|
92
|
+
# Default registry with common tools
|
|
93
|
+
self.registry = self._build_default_registry()
|
|
94
|
+
|
|
95
|
+
logger.info(f"SkillMapper initialized with {len(self.registry)} tools")
|
|
96
|
+
|
|
97
|
+
def _build_default_registry(self) -> Dict[str, ToolSignature]:
|
|
98
|
+
"""
|
|
99
|
+
Build the default tool registry.
|
|
100
|
+
|
|
101
|
+
This contains common tools with their signatures. In production,
|
|
102
|
+
this would be loaded from configuration files or discovered from
|
|
103
|
+
the agent's available tools.
|
|
104
|
+
"""
|
|
105
|
+
return {
|
|
106
|
+
"sql_db": ToolSignature(
|
|
107
|
+
tool_name="sql_db",
|
|
108
|
+
keywords=["select", "table", "query", "join", "where", "database", "sql"],
|
|
109
|
+
file_patterns=[".sql", ".db"]
|
|
110
|
+
),
|
|
111
|
+
"python_repl": ToolSignature(
|
|
112
|
+
tool_name="python_repl",
|
|
113
|
+
keywords=["import", "print", "def", "class", "python", "execute"],
|
|
114
|
+
file_patterns=[".py"]
|
|
115
|
+
),
|
|
116
|
+
"file_operations": ToolSignature(
|
|
117
|
+
tool_name="file_operations",
|
|
118
|
+
keywords=["read file", "write file", "file", "directory", "path"],
|
|
119
|
+
file_patterns=[".txt", ".json", ".csv"]
|
|
120
|
+
),
|
|
121
|
+
"api_client": ToolSignature(
|
|
122
|
+
tool_name="api_client",
|
|
123
|
+
keywords=["http", "request", "api", "endpoint", "get", "post"],
|
|
124
|
+
file_patterns=[".json", ".xml"]
|
|
125
|
+
),
|
|
126
|
+
"search": ToolSignature(
|
|
127
|
+
tool_name="search",
|
|
128
|
+
keywords=["search", "find", "lookup", "query", "results"],
|
|
129
|
+
file_patterns=[]
|
|
130
|
+
)
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
def extract_tool_context(self, failure_trace: FailureTrace) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Determines which tool owns this lesson.
|
|
136
|
+
|
|
137
|
+
This is the main entry point for tool extraction. It uses a two-phase
|
|
138
|
+
strategy to maximize accuracy:
|
|
139
|
+
|
|
140
|
+
Phase 1 - Direct Hit:
|
|
141
|
+
Check if the tool name is explicitly mentioned in the tool_call field.
|
|
142
|
+
This is the most reliable indicator.
|
|
143
|
+
|
|
144
|
+
Phase 2 - Semantic Fallback:
|
|
145
|
+
If no explicit tool name, analyze the agent_reasoning and tool_output
|
|
146
|
+
for keywords that match tool signatures.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
failure_trace: The failure trace to analyze
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
str: The tool name (e.g., "sql_db") or "general" if no match
|
|
153
|
+
|
|
154
|
+
Examples:
|
|
155
|
+
>>> trace = FailureTrace(
|
|
156
|
+
... user_prompt="Query the database",
|
|
157
|
+
... agent_reasoning="I'll SELECT * FROM users",
|
|
158
|
+
... tool_call={"tool": "sql_db", "query": "SELECT * FROM users"},
|
|
159
|
+
... tool_output="Error: missing WHERE clause",
|
|
160
|
+
... failure_type="commission_safety",
|
|
161
|
+
... severity="critical"
|
|
162
|
+
... )
|
|
163
|
+
>>> mapper.extract_tool_context(trace)
|
|
164
|
+
'sql_db' # Direct hit from tool_call
|
|
165
|
+
"""
|
|
166
|
+
# Phase 1: Direct Hit - Check tool_call for explicit tool name
|
|
167
|
+
tool_name = self._check_direct_hit(failure_trace)
|
|
168
|
+
if tool_name:
|
|
169
|
+
logger.info(f"✓ Direct hit: Tool '{tool_name}' from tool_call")
|
|
170
|
+
return tool_name
|
|
171
|
+
|
|
172
|
+
# Phase 2: Semantic Fallback - Analyze content for keywords
|
|
173
|
+
tool_name = self._check_semantic_match(failure_trace)
|
|
174
|
+
if tool_name:
|
|
175
|
+
logger.info(f"✓ Semantic match: Tool '{tool_name}' from content analysis")
|
|
176
|
+
return tool_name
|
|
177
|
+
|
|
178
|
+
# No match found - belongs to general agent context
|
|
179
|
+
logger.info("✓ No tool match - assigning to 'general' context")
|
|
180
|
+
return "general"
|
|
181
|
+
|
|
182
|
+
def _check_direct_hit(self, failure_trace: FailureTrace) -> Optional[str]:
|
|
183
|
+
"""
|
|
184
|
+
Phase 1: Check for explicit tool name in tool_call.
|
|
185
|
+
|
|
186
|
+
This is the most reliable method - if the tool_call contains
|
|
187
|
+
a "tool" or "name" field, we use that directly.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
failure_trace: The failure trace to check
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Optional[str]: Tool name if found, None otherwise
|
|
194
|
+
"""
|
|
195
|
+
if not failure_trace.tool_call:
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
# Check common field names for tool identification
|
|
199
|
+
tool_call = failure_trace.tool_call
|
|
200
|
+
|
|
201
|
+
# Direct tool name in tool_call
|
|
202
|
+
for field in ["tool", "tool_name", "name", "function"]:
|
|
203
|
+
if field in tool_call:
|
|
204
|
+
tool_value = tool_call[field]
|
|
205
|
+
# Check if it's in our registry
|
|
206
|
+
if tool_value in self.registry:
|
|
207
|
+
return tool_value
|
|
208
|
+
# Check if it's a variant (e.g., "sql" -> "sql_db")
|
|
209
|
+
for registered_tool in self.registry:
|
|
210
|
+
if tool_value.lower() in registered_tool.lower():
|
|
211
|
+
return registered_tool
|
|
212
|
+
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
def _check_semantic_match(self, failure_trace: FailureTrace) -> Optional[str]:
|
|
216
|
+
"""
|
|
217
|
+
Phase 2: Semantic fallback using keyword matching.
|
|
218
|
+
|
|
219
|
+
This analyzes the agent_reasoning and tool_output for keywords
|
|
220
|
+
that match tool signatures. We score each tool and return the
|
|
221
|
+
best match if confidence is high enough.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
failure_trace: The failure trace to analyze
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Optional[str]: Best matching tool name, or None if no strong match
|
|
228
|
+
"""
|
|
229
|
+
# Combine text fields for analysis
|
|
230
|
+
content_parts = []
|
|
231
|
+
|
|
232
|
+
if failure_trace.agent_reasoning:
|
|
233
|
+
content_parts.append(failure_trace.agent_reasoning.lower())
|
|
234
|
+
|
|
235
|
+
if failure_trace.tool_output:
|
|
236
|
+
content_parts.append(failure_trace.tool_output.lower())
|
|
237
|
+
|
|
238
|
+
if failure_trace.user_prompt:
|
|
239
|
+
content_parts.append(failure_trace.user_prompt.lower())
|
|
240
|
+
|
|
241
|
+
if not content_parts:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
content = " ".join(content_parts)
|
|
245
|
+
|
|
246
|
+
# Score each tool based on keyword matches
|
|
247
|
+
scores: Dict[str, int] = {}
|
|
248
|
+
|
|
249
|
+
for tool_name, signature in self.registry.items():
|
|
250
|
+
score = 0
|
|
251
|
+
|
|
252
|
+
# Count keyword matches
|
|
253
|
+
for keyword in signature.keywords:
|
|
254
|
+
if keyword.lower() in content:
|
|
255
|
+
score += 1
|
|
256
|
+
|
|
257
|
+
# Count file pattern matches
|
|
258
|
+
for pattern in signature.file_patterns:
|
|
259
|
+
if pattern in content:
|
|
260
|
+
score += 2 # File patterns are stronger signals
|
|
261
|
+
|
|
262
|
+
if score > 0:
|
|
263
|
+
scores[tool_name] = score
|
|
264
|
+
|
|
265
|
+
if not scores:
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
# Return tool with highest score (if confidence threshold met)
|
|
269
|
+
best_tool = max(scores, key=scores.get)
|
|
270
|
+
best_score = scores[best_tool]
|
|
271
|
+
|
|
272
|
+
# Require minimum threshold for confidence
|
|
273
|
+
if best_score >= self.SEMANTIC_CONFIDENCE_THRESHOLD:
|
|
274
|
+
return best_tool
|
|
275
|
+
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
def add_tool_signature(self, signature: ToolSignature) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Add a new tool signature to the registry.
|
|
281
|
+
|
|
282
|
+
This allows dynamic registration of tools at runtime.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
signature: The tool signature to add
|
|
286
|
+
"""
|
|
287
|
+
self.registry[signature.tool_name] = signature
|
|
288
|
+
logger.info(f"Added tool signature: {signature.tool_name}")
|
|
289
|
+
|
|
290
|
+
def get_tool_signature(self, tool_name: str) -> Optional[ToolSignature]:
|
|
291
|
+
"""
|
|
292
|
+
Get the signature for a specific tool.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
tool_name: The tool name to look up
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Optional[ToolSignature]: The signature if found, None otherwise
|
|
299
|
+
"""
|
|
300
|
+
return self.registry.get(tool_name)
|
|
301
|
+
|
|
302
|
+
def list_tools(self) -> List[str]:
|
|
303
|
+
"""
|
|
304
|
+
List all registered tool names.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List[str]: List of tool names in the registry
|
|
308
|
+
"""
|
|
309
|
+
return list(self.registry.keys())
|