agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Nudge Mechanism - Automatic retry with targeted prompting.
|
|
3
|
+
|
|
4
|
+
When an agent gives up (GIVE_UP outcome), this module automatically
|
|
5
|
+
injects a "nudge" prompt to encourage the agent to try harder.
|
|
6
|
+
|
|
7
|
+
This implements "The Nudge" pattern from industry best practices:
|
|
8
|
+
- Automatic intervention without human involvement
|
|
9
|
+
- Targeted prompting based on the give-up type
|
|
10
|
+
- Tracking of nudge effectiveness
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import uuid
|
|
15
|
+
from typing import Optional, List
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
from .models import AgentOutcome, NudgeResult, GiveUpSignal
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NudgeMechanism:
|
|
24
|
+
"""
|
|
25
|
+
Implements automatic nudging when agents give up.
|
|
26
|
+
|
|
27
|
+
The "nudge" is a system prompt injection that asks the agent
|
|
28
|
+
to confirm it executed the task correctly and encourages
|
|
29
|
+
a more thorough attempt.
|
|
30
|
+
|
|
31
|
+
Example nudge:
|
|
32
|
+
"You claimed no data was found. Please confirm you executed the
|
|
33
|
+
search tool with the correct parameters and checked all data sources."
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
"""Initialize the nudge mechanism."""
|
|
38
|
+
self.nudge_history: List[NudgeResult] = []
|
|
39
|
+
self.nudge_templates = self._load_nudge_templates()
|
|
40
|
+
|
|
41
|
+
def _load_nudge_templates(self) -> dict:
|
|
42
|
+
"""Load nudge prompt templates for different give-up signals."""
|
|
43
|
+
return {
|
|
44
|
+
GiveUpSignal.NO_DATA_FOUND: (
|
|
45
|
+
"You claimed no data was found. Please confirm you:\n"
|
|
46
|
+
"1. Executed the search/query tool with the correct parameters\n"
|
|
47
|
+
"2. Checked all relevant data sources including archives\n"
|
|
48
|
+
"3. Used appropriate time ranges and filters\n"
|
|
49
|
+
"Please retry with a more comprehensive search strategy."
|
|
50
|
+
),
|
|
51
|
+
GiveUpSignal.CANNOT_ANSWER: (
|
|
52
|
+
"You indicated you cannot answer this question. Please confirm you:\n"
|
|
53
|
+
"1. Have access to all necessary tools and resources\n"
|
|
54
|
+
"2. Attempted to use available tools to gather information\n"
|
|
55
|
+
"3. Considered alternative approaches to the problem\n"
|
|
56
|
+
"Please retry with a different strategy."
|
|
57
|
+
),
|
|
58
|
+
GiveUpSignal.NO_RESULTS: (
|
|
59
|
+
"You reported no results. Please confirm you:\n"
|
|
60
|
+
"1. Used the correct query syntax and parameters\n"
|
|
61
|
+
"2. Checked for typos or incorrect field names\n"
|
|
62
|
+
"3. Tried alternative search terms or filters\n"
|
|
63
|
+
"Please retry with validated parameters."
|
|
64
|
+
),
|
|
65
|
+
GiveUpSignal.NOT_AVAILABLE: (
|
|
66
|
+
"You indicated the resource is not available. Please confirm you:\n"
|
|
67
|
+
"1. Checked the resource location and accessibility\n"
|
|
68
|
+
"2. Verified you have the correct permissions\n"
|
|
69
|
+
"3. Checked for alternative access methods\n"
|
|
70
|
+
"Please retry with proper access verification."
|
|
71
|
+
),
|
|
72
|
+
GiveUpSignal.INSUFFICIENT_INFO: (
|
|
73
|
+
"You claimed insufficient information. Please confirm you:\n"
|
|
74
|
+
"1. Attempted to gather additional context from available sources\n"
|
|
75
|
+
"2. Used all available tools to retrieve more information\n"
|
|
76
|
+
"3. Considered what information is actually required vs. nice-to-have\n"
|
|
77
|
+
"Please retry with available information."
|
|
78
|
+
),
|
|
79
|
+
GiveUpSignal.UNKNOWN: (
|
|
80
|
+
"Your response suggests you may have given up. Please:\n"
|
|
81
|
+
"1. Re-read the user's request carefully\n"
|
|
82
|
+
"2. Use all available tools to attempt the task\n"
|
|
83
|
+
"3. Provide a specific explanation if truly impossible\n"
|
|
84
|
+
"Please retry with full effort."
|
|
85
|
+
)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def generate_nudge(
|
|
89
|
+
self,
|
|
90
|
+
outcome: AgentOutcome,
|
|
91
|
+
include_tool_reminder: bool = True
|
|
92
|
+
) -> str:
|
|
93
|
+
"""
|
|
94
|
+
Generate a nudge prompt for the given outcome.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
outcome: The agent outcome that triggered the nudge
|
|
98
|
+
include_tool_reminder: Whether to include tool usage reminder
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Nudge prompt string
|
|
102
|
+
"""
|
|
103
|
+
signal = outcome.give_up_signal or GiveUpSignal.UNKNOWN
|
|
104
|
+
template = self.nudge_templates.get(signal, self.nudge_templates[GiveUpSignal.UNKNOWN])
|
|
105
|
+
|
|
106
|
+
nudge_prompt = template
|
|
107
|
+
|
|
108
|
+
# Add context-specific enhancements
|
|
109
|
+
if include_tool_reminder and outcome.tool_telemetry:
|
|
110
|
+
called_tools = [t.tool_name for t in outcome.tool_telemetry]
|
|
111
|
+
if called_tools:
|
|
112
|
+
# Tools were called
|
|
113
|
+
nudge_prompt += f"\n\nNote: You previously used tools: {', '.join(called_tools)}. Consider using additional tools or different parameters."
|
|
114
|
+
else:
|
|
115
|
+
# Telemetry exists but no tools were called
|
|
116
|
+
nudge_prompt += "\n\nNote: It appears no tools were called. Please use available tools to complete the task."
|
|
117
|
+
elif include_tool_reminder:
|
|
118
|
+
# No telemetry at all
|
|
119
|
+
nudge_prompt += "\n\nNote: It appears no tools were called. Please use available tools to complete the task."
|
|
120
|
+
|
|
121
|
+
# Add original prompt reminder
|
|
122
|
+
nudge_prompt += f"\n\nOriginal request: {outcome.user_prompt}"
|
|
123
|
+
|
|
124
|
+
return nudge_prompt
|
|
125
|
+
|
|
126
|
+
def should_nudge(
|
|
127
|
+
self,
|
|
128
|
+
outcome: AgentOutcome,
|
|
129
|
+
max_nudges: int = 1
|
|
130
|
+
) -> bool:
|
|
131
|
+
"""
|
|
132
|
+
Determine if we should nudge for this outcome.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
outcome: The agent outcome
|
|
136
|
+
max_nudges: Maximum number of nudges per agent/task
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
True if nudge should be applied
|
|
140
|
+
"""
|
|
141
|
+
# Check if outcome is a give-up
|
|
142
|
+
from .models import OutcomeType
|
|
143
|
+
if outcome.outcome_type != OutcomeType.GIVE_UP:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
# Check if we've already nudged this agent recently
|
|
147
|
+
recent_nudges = [
|
|
148
|
+
n for n in self.nudge_history
|
|
149
|
+
if n.original_outcome.agent_id == outcome.agent_id
|
|
150
|
+
and (datetime.utcnow() - n.original_outcome.timestamp).total_seconds() < 300 # 5 min
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
if len(recent_nudges) >= max_nudges:
|
|
154
|
+
logger.info(f"Max nudges ({max_nudges}) reached for agent {outcome.agent_id}")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
def record_nudge_result(
|
|
160
|
+
self,
|
|
161
|
+
outcome: AgentOutcome,
|
|
162
|
+
nudge_prompt: str,
|
|
163
|
+
retry_response: str,
|
|
164
|
+
retry_successful: bool
|
|
165
|
+
) -> NudgeResult:
|
|
166
|
+
"""
|
|
167
|
+
Record the result of a nudge attempt.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
outcome: Original outcome that triggered nudge
|
|
171
|
+
nudge_prompt: The nudge prompt that was used
|
|
172
|
+
retry_response: Agent's response after nudge
|
|
173
|
+
retry_successful: Whether the retry was successful
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
NudgeResult object
|
|
177
|
+
"""
|
|
178
|
+
nudge_id = f"nudge-{uuid.uuid4().hex[:8]}"
|
|
179
|
+
|
|
180
|
+
# Detect improvement
|
|
181
|
+
improvement = self._detect_improvement(
|
|
182
|
+
original_response=outcome.agent_response,
|
|
183
|
+
retry_response=retry_response
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
result = NudgeResult(
|
|
187
|
+
nudge_id=nudge_id,
|
|
188
|
+
original_outcome=outcome,
|
|
189
|
+
nudge_prompt=nudge_prompt,
|
|
190
|
+
retry_response=retry_response,
|
|
191
|
+
retry_successful=retry_successful,
|
|
192
|
+
improvement_detected=improvement
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
self.nudge_history.append(result)
|
|
196
|
+
|
|
197
|
+
logger.info(f"Nudge {nudge_id}: success={retry_successful}, improvement={improvement}")
|
|
198
|
+
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
def _detect_improvement(
|
|
202
|
+
self,
|
|
203
|
+
original_response: str,
|
|
204
|
+
retry_response: str
|
|
205
|
+
) -> bool:
|
|
206
|
+
"""
|
|
207
|
+
Detect if retry response shows improvement over original.
|
|
208
|
+
|
|
209
|
+
Simple heuristic: longer response with less refusal language.
|
|
210
|
+
"""
|
|
211
|
+
# Length improvement
|
|
212
|
+
length_improved = len(retry_response) > len(original_response) * 1.2
|
|
213
|
+
|
|
214
|
+
# Refusal language reduction
|
|
215
|
+
refusal_words = ["no data", "cannot", "can't", "unable", "not found"]
|
|
216
|
+
original_refusals = sum(1 for word in refusal_words if word in original_response.lower())
|
|
217
|
+
retry_refusals = sum(1 for word in refusal_words if word in retry_response.lower())
|
|
218
|
+
refusal_reduced = retry_refusals < original_refusals
|
|
219
|
+
|
|
220
|
+
# Check for data/results mention
|
|
221
|
+
data_indicators = ["found", "results", "data shows", "here is", "here are"]
|
|
222
|
+
has_data_now = any(indicator in retry_response.lower() for indicator in data_indicators)
|
|
223
|
+
|
|
224
|
+
return length_improved or refusal_reduced or has_data_now
|
|
225
|
+
|
|
226
|
+
def get_nudge_stats(self) -> dict:
|
|
227
|
+
"""Get statistics about nudge effectiveness."""
|
|
228
|
+
if not self.nudge_history:
|
|
229
|
+
return {
|
|
230
|
+
"total_nudges": 0,
|
|
231
|
+
"successful_nudges": 0,
|
|
232
|
+
"success_rate": 0.0,
|
|
233
|
+
"improvements": 0,
|
|
234
|
+
"improvement_rate": 0.0
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
successful = sum(1 for n in self.nudge_history if n.retry_successful)
|
|
238
|
+
improvements = sum(1 for n in self.nudge_history if n.improvement_detected)
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"total_nudges": len(self.nudge_history),
|
|
242
|
+
"successful_nudges": successful,
|
|
243
|
+
"success_rate": successful / len(self.nudge_history),
|
|
244
|
+
"improvements": improvements,
|
|
245
|
+
"improvement_rate": improvements / len(self.nudge_history),
|
|
246
|
+
"recent_nudges": self.nudge_history[-10:] # Last 10 nudges
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
def get_nudge_history(
|
|
250
|
+
self,
|
|
251
|
+
agent_id: Optional[str] = None,
|
|
252
|
+
limit: int = 100
|
|
253
|
+
) -> List[NudgeResult]:
|
|
254
|
+
"""Get nudge history with optional filtering."""
|
|
255
|
+
history = self.nudge_history[-limit:]
|
|
256
|
+
|
|
257
|
+
if agent_id:
|
|
258
|
+
history = [n for n in history if n.original_outcome.agent_id == agent_id]
|
|
259
|
+
|
|
260
|
+
return history
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Outcome Analyzer - Filters agent outcomes for competence issues.
|
|
3
|
+
|
|
4
|
+
This is part of Loop 2 (Alignment Engine) that identifies when agents
|
|
5
|
+
"give up" with negative results instead of delivering value.
|
|
6
|
+
|
|
7
|
+
Enhanced with:
|
|
8
|
+
- Tool execution telemetry to distinguish valid empty results from laziness
|
|
9
|
+
- Semantic analysis for detecting subtle forms of refusal
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from typing import Optional, List
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
from .models import (
|
|
18
|
+
AgentOutcome,
|
|
19
|
+
OutcomeType,
|
|
20
|
+
GiveUpSignal,
|
|
21
|
+
ToolExecutionTelemetry,
|
|
22
|
+
ToolExecutionStatus
|
|
23
|
+
)
|
|
24
|
+
from .semantic_analyzer import SemanticAnalyzer
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class OutcomeAnalyzer:
|
|
30
|
+
"""
|
|
31
|
+
Analyzes agent outcomes to detect "Give-Up Signals" (Laziness).
|
|
32
|
+
|
|
33
|
+
This filters for competence issues - when agents comply with safety rules
|
|
34
|
+
but fail to deliver value (e.g., "No data found" is safe, but wrong if data exists).
|
|
35
|
+
|
|
36
|
+
Enhanced Features:
|
|
37
|
+
1. Tool Execution Telemetry - Correlates give-up signals with tool usage
|
|
38
|
+
2. Semantic Analysis - Goes beyond regex for subtle refusal detection
|
|
39
|
+
3. False Positive Prevention - Distinguishes valid empty results from laziness
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, use_semantic_analysis: bool = True):
|
|
43
|
+
self.give_up_patterns = self._load_give_up_patterns()
|
|
44
|
+
self.outcome_history: List[AgentOutcome] = []
|
|
45
|
+
self.use_semantic_analysis = use_semantic_analysis
|
|
46
|
+
self.semantic_analyzer = SemanticAnalyzer() if use_semantic_analysis else None
|
|
47
|
+
|
|
48
|
+
def _load_give_up_patterns(self) -> dict:
|
|
49
|
+
"""Load patterns that indicate agent is giving up."""
|
|
50
|
+
return {
|
|
51
|
+
GiveUpSignal.NO_DATA_FOUND: [
|
|
52
|
+
r"no (?:data|results|logs|records|information) (?:found|available)",
|
|
53
|
+
r"could(?:n't| not) find (?:any |the )?(?:data|logs|records|information)",
|
|
54
|
+
r"(?:data|logs|records) (?:not found|unavailable|missing)",
|
|
55
|
+
r"no matching (?:data|logs|records|results)"
|
|
56
|
+
],
|
|
57
|
+
GiveUpSignal.CANNOT_ANSWER: [
|
|
58
|
+
r"(?:i )?cannot answer",
|
|
59
|
+
r"(?:i )?(?:can't|cannot) (?:help|assist|answer) (?:with|you)",
|
|
60
|
+
r"unable to (?:answer|respond|help)",
|
|
61
|
+
r"(?:i )?don't have (?:enough|sufficient) information"
|
|
62
|
+
],
|
|
63
|
+
GiveUpSignal.NO_RESULTS: [
|
|
64
|
+
r"no results",
|
|
65
|
+
r"0 results",
|
|
66
|
+
r"zero results",
|
|
67
|
+
r"empty result set",
|
|
68
|
+
r"query returned (?:no|zero) results"
|
|
69
|
+
],
|
|
70
|
+
GiveUpSignal.NOT_AVAILABLE: [
|
|
71
|
+
r"(?:not|isn't) (?:currently )?available",
|
|
72
|
+
r"(?:is|are) unavailable",
|
|
73
|
+
r"(?:service|resource|data) (?:not|isn't) available"
|
|
74
|
+
],
|
|
75
|
+
GiveUpSignal.INSUFFICIENT_INFO: [
|
|
76
|
+
r"insufficient (?:data|information)",
|
|
77
|
+
r"not enough (?:data|information|context)",
|
|
78
|
+
r"incomplete (?:data|information)",
|
|
79
|
+
r"missing (?:required|necessary) (?:data|information)"
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
def analyze_outcome(
|
|
84
|
+
self,
|
|
85
|
+
agent_id: str,
|
|
86
|
+
user_prompt: str,
|
|
87
|
+
agent_response: str,
|
|
88
|
+
context: Optional[dict] = None,
|
|
89
|
+
tool_telemetry: Optional[List[ToolExecutionTelemetry]] = None
|
|
90
|
+
) -> AgentOutcome:
|
|
91
|
+
"""
|
|
92
|
+
Analyze an agent's outcome to determine if it gave up.
|
|
93
|
+
|
|
94
|
+
Enhanced with tool telemetry correlation and semantic analysis.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
agent_id: ID of the agent
|
|
98
|
+
user_prompt: Original user request
|
|
99
|
+
agent_response: Agent's response
|
|
100
|
+
context: Additional context
|
|
101
|
+
tool_telemetry: Tool execution telemetry data
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
AgentOutcome with classification and analysis
|
|
105
|
+
"""
|
|
106
|
+
logger.info(f"Analyzing outcome for agent {agent_id}")
|
|
107
|
+
|
|
108
|
+
# Check if this is a give-up signal (regex-based)
|
|
109
|
+
give_up_signal = self._detect_give_up_signal(agent_response)
|
|
110
|
+
|
|
111
|
+
# Perform semantic analysis if enabled
|
|
112
|
+
semantic_analysis = None
|
|
113
|
+
if self.use_semantic_analysis:
|
|
114
|
+
semantic_analysis = self.semantic_analyzer.analyze(
|
|
115
|
+
agent_response=agent_response,
|
|
116
|
+
user_prompt=user_prompt,
|
|
117
|
+
tool_telemetry=tool_telemetry
|
|
118
|
+
)
|
|
119
|
+
logger.debug(f"Semantic analysis: {semantic_analysis.semantic_category} "
|
|
120
|
+
f"(confidence: {semantic_analysis.refusal_confidence:.2f})")
|
|
121
|
+
|
|
122
|
+
# Determine outcome type with enhanced logic
|
|
123
|
+
outcome_type = self._determine_outcome_type(
|
|
124
|
+
agent_response=agent_response,
|
|
125
|
+
give_up_signal=give_up_signal,
|
|
126
|
+
tool_telemetry=tool_telemetry,
|
|
127
|
+
semantic_analysis=semantic_analysis
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if outcome_type == OutcomeType.GIVE_UP:
|
|
131
|
+
logger.warning(f"Give-up detected: signal={give_up_signal.value if give_up_signal else 'semantic'}")
|
|
132
|
+
|
|
133
|
+
outcome = AgentOutcome(
|
|
134
|
+
agent_id=agent_id,
|
|
135
|
+
outcome_type=outcome_type,
|
|
136
|
+
user_prompt=user_prompt,
|
|
137
|
+
agent_response=agent_response,
|
|
138
|
+
give_up_signal=give_up_signal,
|
|
139
|
+
context=context or {},
|
|
140
|
+
tool_telemetry=tool_telemetry or [],
|
|
141
|
+
semantic_analysis=semantic_analysis
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
self.outcome_history.append(outcome)
|
|
145
|
+
|
|
146
|
+
return outcome
|
|
147
|
+
|
|
148
|
+
def _detect_give_up_signal(self, response: str) -> Optional[GiveUpSignal]:
|
|
149
|
+
"""
|
|
150
|
+
Detect if the response contains a give-up signal.
|
|
151
|
+
|
|
152
|
+
These are "Negative Results" that trigger the Completeness Auditor.
|
|
153
|
+
"""
|
|
154
|
+
response_lower = response.lower()
|
|
155
|
+
|
|
156
|
+
# Check each pattern category
|
|
157
|
+
for signal_type, patterns in self.give_up_patterns.items():
|
|
158
|
+
for pattern in patterns:
|
|
159
|
+
if re.search(pattern, response_lower):
|
|
160
|
+
logger.debug(f"Matched pattern '{pattern}' for signal {signal_type.value}")
|
|
161
|
+
return signal_type
|
|
162
|
+
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
def _determine_outcome_type(
|
|
166
|
+
self,
|
|
167
|
+
agent_response: str,
|
|
168
|
+
give_up_signal: Optional[GiveUpSignal],
|
|
169
|
+
tool_telemetry: Optional[List[ToolExecutionTelemetry]],
|
|
170
|
+
semantic_analysis: Optional[any]
|
|
171
|
+
) -> OutcomeType:
|
|
172
|
+
"""
|
|
173
|
+
Determine outcome type with enhanced logic.
|
|
174
|
+
|
|
175
|
+
Considers:
|
|
176
|
+
1. Regex-based give-up signal
|
|
177
|
+
2. Tool execution telemetry
|
|
178
|
+
3. Semantic analysis
|
|
179
|
+
|
|
180
|
+
Key Enhancement: Correlation with tool execution to avoid false positives
|
|
181
|
+
"""
|
|
182
|
+
# Check regex signal
|
|
183
|
+
has_regex_signal = give_up_signal is not None
|
|
184
|
+
|
|
185
|
+
# Check semantic signal
|
|
186
|
+
has_semantic_signal = (
|
|
187
|
+
semantic_analysis is not None and
|
|
188
|
+
semantic_analysis.is_refusal and
|
|
189
|
+
semantic_analysis.refusal_confidence > 0.6
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Analyze tool telemetry
|
|
193
|
+
tool_analysis = self._analyze_tool_execution(tool_telemetry)
|
|
194
|
+
|
|
195
|
+
# Decision logic with false positive prevention
|
|
196
|
+
if has_regex_signal or has_semantic_signal:
|
|
197
|
+
# Agent said "no data found" or similar
|
|
198
|
+
|
|
199
|
+
# Check if tools were actually called and returned empty
|
|
200
|
+
if tool_analysis["tools_called"] and tool_analysis["all_empty_results"]:
|
|
201
|
+
# Valid empty result: Tools called, returned empty -> SUCCESS
|
|
202
|
+
logger.info("Give-up signal present but tools returned empty results - valid empty set")
|
|
203
|
+
return OutcomeType.SUCCESS
|
|
204
|
+
|
|
205
|
+
elif tool_analysis["tools_called"] and tool_analysis["has_errors"]:
|
|
206
|
+
# Tools called but errored -> Potential laziness (didn't handle errors)
|
|
207
|
+
logger.warning("Give-up with tool errors - potential laziness or error handling issue")
|
|
208
|
+
return OutcomeType.GIVE_UP
|
|
209
|
+
|
|
210
|
+
elif not tool_analysis["tools_called"]:
|
|
211
|
+
# No tools called -> Clear laziness
|
|
212
|
+
logger.warning("Give-up signal without tool execution - clear laziness")
|
|
213
|
+
return OutcomeType.GIVE_UP
|
|
214
|
+
|
|
215
|
+
else:
|
|
216
|
+
# Mixed results or unclear -> Default to GIVE_UP for audit
|
|
217
|
+
logger.warning("Give-up signal with unclear tool usage - flagging for audit")
|
|
218
|
+
return OutcomeType.GIVE_UP
|
|
219
|
+
|
|
220
|
+
else:
|
|
221
|
+
# No give-up signal detected
|
|
222
|
+
if len(agent_response.strip()) < 20:
|
|
223
|
+
return OutcomeType.FAILURE
|
|
224
|
+
else:
|
|
225
|
+
return OutcomeType.SUCCESS
|
|
226
|
+
|
|
227
|
+
def _analyze_tool_execution(
|
|
228
|
+
self,
|
|
229
|
+
tool_telemetry: Optional[List[ToolExecutionTelemetry]]
|
|
230
|
+
) -> dict:
|
|
231
|
+
"""
|
|
232
|
+
Analyze tool execution telemetry.
|
|
233
|
+
|
|
234
|
+
Returns a dict with:
|
|
235
|
+
- tools_called: bool - Were any tools called?
|
|
236
|
+
- all_empty_results: bool - Did all tools return empty results?
|
|
237
|
+
- has_errors: bool - Did any tools error?
|
|
238
|
+
- tool_count: int - Number of tools called
|
|
239
|
+
"""
|
|
240
|
+
if not tool_telemetry:
|
|
241
|
+
return {
|
|
242
|
+
"tools_called": False,
|
|
243
|
+
"all_empty_results": False,
|
|
244
|
+
"has_errors": False,
|
|
245
|
+
"tool_count": 0
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
called_tools = [
|
|
249
|
+
t for t in tool_telemetry
|
|
250
|
+
if t.tool_status != ToolExecutionStatus.NOT_CALLED
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
if not called_tools:
|
|
254
|
+
return {
|
|
255
|
+
"tools_called": False,
|
|
256
|
+
"all_empty_results": False,
|
|
257
|
+
"has_errors": False,
|
|
258
|
+
"tool_count": 0
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
empty_results = [
|
|
262
|
+
t for t in called_tools
|
|
263
|
+
if t.tool_status == ToolExecutionStatus.EMPTY_RESULT
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
errored_tools = [
|
|
267
|
+
t for t in called_tools
|
|
268
|
+
if t.tool_status == ToolExecutionStatus.ERROR
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
"tools_called": True,
|
|
273
|
+
"all_empty_results": len(empty_results) == len(called_tools),
|
|
274
|
+
"has_errors": len(errored_tools) > 0,
|
|
275
|
+
"tool_count": len(called_tools),
|
|
276
|
+
"empty_count": len(empty_results),
|
|
277
|
+
"error_count": len(errored_tools)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
def should_trigger_audit(self, outcome: AgentOutcome) -> bool:
|
|
281
|
+
"""
|
|
282
|
+
Determine if this outcome should trigger a Completeness Audit.
|
|
283
|
+
|
|
284
|
+
The Completeness Auditor is only triggered on "Give-Up Signals"
|
|
285
|
+
to avoid expensive auditing of every interaction.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
outcome: The agent outcome
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
True if Completeness Auditor should be triggered
|
|
292
|
+
"""
|
|
293
|
+
return outcome.outcome_type == OutcomeType.GIVE_UP
|
|
294
|
+
|
|
295
|
+
def get_give_up_rate(self, agent_id: Optional[str] = None, recent_n: int = 100) -> float:
|
|
296
|
+
"""
|
|
297
|
+
Calculate the give-up rate for an agent.
|
|
298
|
+
|
|
299
|
+
This metric helps identify agents that are consistently lazy.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
agent_id: Optional agent ID to filter by
|
|
303
|
+
recent_n: Number of recent outcomes to analyze
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Give-up rate as a float between 0 and 1
|
|
307
|
+
"""
|
|
308
|
+
outcomes = self.outcome_history[-recent_n:]
|
|
309
|
+
|
|
310
|
+
if agent_id:
|
|
311
|
+
outcomes = [o for o in outcomes if o.agent_id == agent_id]
|
|
312
|
+
|
|
313
|
+
if not outcomes:
|
|
314
|
+
return 0.0
|
|
315
|
+
|
|
316
|
+
give_ups = sum(1 for o in outcomes if o.outcome_type == OutcomeType.GIVE_UP)
|
|
317
|
+
|
|
318
|
+
return give_ups / len(outcomes)
|
|
319
|
+
|
|
320
|
+
def get_outcome_history(
|
|
321
|
+
self,
|
|
322
|
+
agent_id: Optional[str] = None,
|
|
323
|
+
outcome_type: Optional[OutcomeType] = None,
|
|
324
|
+
limit: int = 100
|
|
325
|
+
) -> List[AgentOutcome]:
|
|
326
|
+
"""Get outcome history with optional filters."""
|
|
327
|
+
outcomes = self.outcome_history[-limit:]
|
|
328
|
+
|
|
329
|
+
if agent_id:
|
|
330
|
+
outcomes = [o for o in outcomes if o.agent_id == agent_id]
|
|
331
|
+
|
|
332
|
+
if outcome_type:
|
|
333
|
+
outcomes = [o for o in outcomes if o.outcome_type == outcome_type]
|
|
334
|
+
|
|
335
|
+
return outcomes
|