agent-os-kernel 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.3.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.3.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
================================================================================
|
|
2
|
+
arXiv SUBMISSION METADATA REFERENCE
|
|
3
|
+
================================================================================
|
|
4
|
+
Copy/paste these fields into the arXiv submission form.
|
|
5
|
+
All fields comply with arXiv guidelines (no unicode, no all-caps, macros expanded).
|
|
6
|
+
|
|
7
|
+
================================================================================
|
|
8
|
+
TITLE (required)
|
|
9
|
+
================================================================================
|
|
10
|
+
Self-Correcting Agent Kernel: Automated Alignment via Differential Auditing and Semantic Memory Hygiene
|
|
11
|
+
|
|
12
|
+
================================================================================
|
|
13
|
+
AUTHORS (required)
|
|
14
|
+
================================================================================
|
|
15
|
+
[FILL IN YOUR REAL NAMES - arXiv requires non-anonymous submissions]
|
|
16
|
+
|
|
17
|
+
Format: Firstname Lastname (Affiliation)
|
|
18
|
+
|
|
19
|
+
Example single author:
|
|
20
|
+
Imran Siddique (Microsoft Research)
|
|
21
|
+
|
|
22
|
+
Example multiple authors:
|
|
23
|
+
Author One (1), Author Two (1 and 2), Author Three (2) ((1) Microsoft Research, (2) Stanford University)
|
|
24
|
+
|
|
25
|
+
IMPORTANT:
|
|
26
|
+
- Do NOT use "Anonymous" - arXiv rejects anonymous submissions
|
|
27
|
+
- Do NOT include Dr., Professor, PhD, etc.
|
|
28
|
+
- Do NOT use all uppercase letters
|
|
29
|
+
- Affiliations in parentheses, city/country only (no street addresses)
|
|
30
|
+
- Separate multiple authors with commas
|
|
31
|
+
|
|
32
|
+
================================================================================
|
|
33
|
+
ABSTRACT (required - max 1920 characters)
|
|
34
|
+
================================================================================
|
|
35
|
+
Production AI agents face a "Reliability Wall" defined by two invisible pathologies: laziness (premature give-ups on achievable tasks) and context rot (performance degradation due to accumulated prompt instructions). Existing architectures often exacerbate these issues by treating "more context" as the solution to every failure, leading to the Accumulation Paradox. We present the Self-Correcting Agent Kernel (SCAK), a dual-loop OODA architecture grounded in the principle of Scale by Subtraction.
|
|
36
|
+
|
|
37
|
+
SCAK's Runtime Loop ensures deterministic safety, while the Alignment Loop implements Differential Auditing: a probabilistic mechanism that compares a weak agent (GPT-4o) against a stronger teacher (o1-preview) only on "give-up signals" (5-10% of interactions). This catches capability failures that explicit error handlers miss. To combat context rot, we introduce Semantic Purge: a formal decay taxonomy where "Type A" (syntax) patches are actively deleted on model upgrades, while "Type B" (business) knowledge persists.
|
|
38
|
+
|
|
39
|
+
Evaluations on GAIA benchmark extensions demonstrate 100% laziness detection and a 72% correction rate (p<0.001) at 90% lower cost than full verification. Chaos engineering tests show <30s Mean Time To Recovery (MTTR), validating that reliability stems not from infinite memory, but from hygienic forgetting.
|
|
40
|
+
|
|
41
|
+
[Character count: 1,289 - within 1,920 limit]
|
|
42
|
+
|
|
43
|
+
================================================================================
|
|
44
|
+
COMMENTS (optional but recommended)
|
|
45
|
+
================================================================================
|
|
46
|
+
15 pages, 6 figures, 8 tables. Code available at https://github.com/[your-username]/self-correcting-agent-kernel. LLMs (Claude, GPT-4) assisted with drafting; all claims and experiments are author-original.
|
|
47
|
+
|
|
48
|
+
================================================================================
|
|
49
|
+
PRIMARY CATEGORY
|
|
50
|
+
================================================================================
|
|
51
|
+
cs.AI (Artificial Intelligence)
|
|
52
|
+
|
|
53
|
+
Alternative categories to consider:
|
|
54
|
+
- cs.LG (Machine Learning)
|
|
55
|
+
- cs.CL (Computation and Language)
|
|
56
|
+
- cs.MA (Multiagent Systems)
|
|
57
|
+
|
|
58
|
+
================================================================================
|
|
59
|
+
CROSS-LIST CATEGORIES (optional)
|
|
60
|
+
================================================================================
|
|
61
|
+
cs.LG, cs.SE
|
|
62
|
+
|
|
63
|
+
================================================================================
|
|
64
|
+
REPORT NUMBER (only if your institution assigns one)
|
|
65
|
+
================================================================================
|
|
66
|
+
[Leave blank unless your institution provides a report number like "MSR-TR-2026-XX"]
|
|
67
|
+
|
|
68
|
+
================================================================================
|
|
69
|
+
ACM CLASSIFICATION (optional)
|
|
70
|
+
================================================================================
|
|
71
|
+
I.2.11 (Distributed Artificial Intelligence - Multiagent systems)
|
|
72
|
+
|
|
73
|
+
================================================================================
|
|
74
|
+
MSC CLASSIFICATION (optional)
|
|
75
|
+
================================================================================
|
|
76
|
+
68T42 (Agent technology)
|
|
77
|
+
|
|
78
|
+
================================================================================
|
|
79
|
+
JOURNAL REFERENCE (only if already published)
|
|
80
|
+
================================================================================
|
|
81
|
+
[Leave blank - not yet published]
|
|
82
|
+
|
|
83
|
+
================================================================================
|
|
84
|
+
DOI (only if already published)
|
|
85
|
+
================================================================================
|
|
86
|
+
[Leave blank - not yet published]
|
|
87
|
+
|
|
88
|
+
================================================================================
|
|
89
|
+
LICENSE
|
|
90
|
+
================================================================================
|
|
91
|
+
Recommended: CC BY 4.0 (Creative Commons Attribution)
|
|
92
|
+
This allows others to share and adapt your work with attribution.
|
|
93
|
+
|
|
94
|
+
================================================================================
|
|
95
|
+
CHECKLIST BEFORE SUBMISSION
|
|
96
|
+
================================================================================
|
|
97
|
+
[ ] Title: No all-caps, no unicode, macros expanded
|
|
98
|
+
[ ] Authors: Real names (not anonymous), proper format with affiliations
|
|
99
|
+
[ ] Abstract: Under 1920 characters, no "Abstract" word, no leading whitespace
|
|
100
|
+
[ ] Comments: Include page count and figure count
|
|
101
|
+
[ ] Upload: arxiv_submission.tar.gz (flat structure, forward slashes)
|
|
102
|
+
[ ] Figures: All 6 PDF figures included in tar.gz
|
|
103
|
+
[ ] main.tex: \pdfoutput=1 present at top
|
|
104
|
+
[ ] main.bbl: Compiled bibliography included
|
|
105
|
+
[ ] bibliography.bib: Source bibliography included
|
|
106
|
+
[ ] No \includegraphics paths with backslashes
|
|
107
|
+
[ ] No unicode characters in source files
|
|
108
|
+
|
|
109
|
+
================================================================================
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# Paper Preparation Checklist
|
|
2
|
+
|
|
3
|
+
## Submission Target
|
|
4
|
+
|
|
5
|
+
- [x] **Venue:** NeurIPS 2026 / ICML 2026 / ICLR 2026 / AAMAS 2026
|
|
6
|
+
- [x] **Track:** Agent Systems / Production ML / Self-Improving Systems
|
|
7
|
+
- [ ] **Submission Deadline:** TBD
|
|
8
|
+
- [x] **Page Limit:** 9 pages main + unlimited appendix
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Pre-Submission Checklist
|
|
13
|
+
|
|
14
|
+
### 1. Novelty & Contribution Framing ✅
|
|
15
|
+
|
|
16
|
+
- [x] Clear novelty statement in abstract
|
|
17
|
+
- [x] Contribution comparison table (vs. Reflexion, Constitutional AI, Voyager, etc.)
|
|
18
|
+
- [x] Related work section with 30+ citations
|
|
19
|
+
- [x] Quantitative differentiation from baselines
|
|
20
|
+
- [ ] Expert review: Have 2-3 researchers read novelty claims
|
|
21
|
+
|
|
22
|
+
### 2. Empirical Rigor ✅
|
|
23
|
+
|
|
24
|
+
- [x] GAIA Benchmark (50 queries)
|
|
25
|
+
- [x] Amnesia Test (60 patches)
|
|
26
|
+
- [x] Chaos Engineering (20 scenarios)
|
|
27
|
+
- [x] Statistical significance (p-values, confidence intervals)
|
|
28
|
+
- [x] Ablation studies (remove each component)
|
|
29
|
+
- [x] Broader baselines (AutoGen, LangGraph, o1-preview)
|
|
30
|
+
- [x] Error analysis (failure mode breakdown)
|
|
31
|
+
|
|
32
|
+
### 3. Reproducibility ✅
|
|
33
|
+
|
|
34
|
+
- [x] Datasets uploaded (GitHub + plan for Hugging Face)
|
|
35
|
+
- [x] Docker image with pinned dependencies
|
|
36
|
+
- [x] Seed control utilities
|
|
37
|
+
- [x] Experiment scripts tested end-to-end
|
|
38
|
+
- [x] README with exact reproduction commands
|
|
39
|
+
- [x] Hardware specifications documented
|
|
40
|
+
- [x] API costs calculated and reported
|
|
41
|
+
|
|
42
|
+
### 4. Anonymization (Critical for Double-Blind)
|
|
43
|
+
|
|
44
|
+
- [ ] Remove author names from all files
|
|
45
|
+
- [ ] Cite own repos in third person ("Prior work [Anonymous, 2026] introduced...")
|
|
46
|
+
- [ ] No GitHub URLs in paper body (move to appendix after acceptance)
|
|
47
|
+
- [ ] No identifying information in code comments
|
|
48
|
+
- [ ] No screenshots with author names
|
|
49
|
+
- [ ] Check LaTeX metadata (remove author info from PDF properties)
|
|
50
|
+
|
|
51
|
+
### 5. LLM Disclosure (Required by Most 2026 Venues)
|
|
52
|
+
|
|
53
|
+
- [x] Create LLM_DISCLOSURE.md with specific details:
|
|
54
|
+
- [x] Which LLM(s) used (e.g., "Grok-3 for grammar checking")
|
|
55
|
+
- [x] How used (e.g., "Edited abstract for clarity")
|
|
56
|
+
- [x] Scope (e.g., "NOT used for experimental results or claims")
|
|
57
|
+
- [x] Statement: "All intellectual contributions are author-original"
|
|
58
|
+
|
|
59
|
+
### 6. Paper Structure
|
|
60
|
+
|
|
61
|
+
#### Abstract (250 words)
|
|
62
|
+
- [x] Problem statement (context bloat, laziness, silent failures)
|
|
63
|
+
- [x] Novelty statement (Type A/B decay, differential auditing, dual-loop)
|
|
64
|
+
- [x] Empirical claims (72% correction, 50% reduction, <30s MTTR)
|
|
65
|
+
- [x] Significance (production-ready, p<0.001, outperforms baselines)
|
|
66
|
+
|
|
67
|
+
#### 1. Introduction (2 pages)
|
|
68
|
+
- [x] Motivation: Why agent reliability matters (production failures, cost)
|
|
69
|
+
- [x] Gap: What existing work doesn't solve (context bloat, laziness, efficiency)
|
|
70
|
+
- [x] Contributions: Three bullets (Semantic Purge, Differential Auditing, Dual-Loop)
|
|
71
|
+
- [x] Roadmap: "Section 2 reviews related work, Section 3 describes..."
|
|
72
|
+
|
|
73
|
+
#### 2. Related Work (2-3 pages)
|
|
74
|
+
- [x] Self-Correcting Systems (Reflexion, Self-Refine, Self-Debug)
|
|
75
|
+
- [x] Safety & Alignment (Constitutional AI, LlamaGuard, WildGuard)
|
|
76
|
+
- [x] Multi-Agent Systems (Voyager, AutoGen, MetaGPT)
|
|
77
|
+
- [x] Context Management (Lost in the Middle, RAG, Landmark Attention)
|
|
78
|
+
- [x] Production ML (Hidden Technical Debt, Data Validation)
|
|
79
|
+
- [x] Comparison table (Table 1: Contribution Comparison)
|
|
80
|
+
|
|
81
|
+
#### 3. Method (3 pages)
|
|
82
|
+
- [x] 3.1 Problem Formulation (formal definitions)
|
|
83
|
+
- [x] 3.2 Dual-Loop Architecture (Figure 1: OODA diagram)
|
|
84
|
+
- [x] 3.3 Differential Auditing (Algorithm 1: Audit trigger logic)
|
|
85
|
+
- [x] 3.4 Semantic Purge (Algorithm 2: Type A/B classification)
|
|
86
|
+
- [x] 3.5 Memory Hierarchy (Figure 2: Tier 1/2/3 architecture)
|
|
87
|
+
|
|
88
|
+
#### 4. Experiments (3 pages)
|
|
89
|
+
- [x] 4.1 Experimental Setup (datasets, baselines, metrics)
|
|
90
|
+
- [x] 4.2 GAIA Benchmark (Table 2: Detection/correction rates, Figure 3: Bar chart)
|
|
91
|
+
- [x] 4.3 Amnesia Test (Table 3: Context reduction, Figure 4: Line chart)
|
|
92
|
+
- [x] 4.4 Chaos Engineering (Table 4: MTTR, Figure 5: Box plot)
|
|
93
|
+
- [x] 4.5 Ablation Studies (Table 5: Component removal, Figure 6: Heatmap)
|
|
94
|
+
- [x] 4.6 Broader Baselines (Table 6: AutoGen, LangGraph, o1-preview)
|
|
95
|
+
|
|
96
|
+
#### 5. Discussion (1 page)
|
|
97
|
+
- [x] Key findings (summary of empirical results)
|
|
98
|
+
- [x] Limitations (honest assessment from LIMITATIONS.md)
|
|
99
|
+
- [x] Failure modes (multi-turn, adversarial, cold start)
|
|
100
|
+
- [x] Ethical considerations (data privacy, cost, bias)
|
|
101
|
+
|
|
102
|
+
#### 6. Conclusion (0.5 pages)
|
|
103
|
+
- [x] Restate contributions
|
|
104
|
+
- [x] Restate key results
|
|
105
|
+
- [x] Future work (federated patches, meta-learning, causal analysis)
|
|
106
|
+
- [x] Cross-reference to companion paper (Agent Control Plane)
|
|
107
|
+
|
|
108
|
+
#### Appendix (Unlimited)
|
|
109
|
+
- [x] A. Additional Ablation Results
|
|
110
|
+
- [x] B. Full Experiment Results (all 50 GAIA queries)
|
|
111
|
+
- [x] C. Patch Examples (Type A vs Type B)
|
|
112
|
+
- [x] D. Reproducibility Details (Docker commands, seeds, hardware)
|
|
113
|
+
- [x] E. Related Work Extended (full 50+ citations)
|
|
114
|
+
- [x] F. Ethical Statement
|
|
115
|
+
- [x] G. Limitations Extended (from LIMITATIONS.md)
|
|
116
|
+
|
|
117
|
+
### 7. Figures & Tables
|
|
118
|
+
|
|
119
|
+
#### Figures
|
|
120
|
+
- [x] Figure 1: Dual-Loop OODA Architecture (fig1_ooda_architecture.md)
|
|
121
|
+
- [x] Figure 2: Memory Hierarchy (fig2_memory_hierarchy.md)
|
|
122
|
+
- [x] Figure 3: GAIA Results (fig3_gaia_results.md)
|
|
123
|
+
- [x] Figure 4: Ablation Heatmap (fig4_ablation_heatmap.md)
|
|
124
|
+
- [x] Figure 5: Context Reduction (fig5_context_reduction.md)
|
|
125
|
+
- [x] Figure 6: MTTR Comparison (fig6_mttr_boxplot.md)
|
|
126
|
+
|
|
127
|
+
#### Tables
|
|
128
|
+
- [x] Table 1: Contribution Comparison (vs. prior work)
|
|
129
|
+
- [x] Table 2: GAIA Benchmark Results (with CI)
|
|
130
|
+
- [x] Table 3: Amnesia Test Results (context reduction)
|
|
131
|
+
- [x] Table 4: Chaos Engineering Results (MTTR, recovery rate)
|
|
132
|
+
- [x] Table 5: Ablation Study Summary
|
|
133
|
+
- [x] Table 6: Broader Baseline Comparison
|
|
134
|
+
|
|
135
|
+
### 8. Bibliography
|
|
136
|
+
|
|
137
|
+
- [x] Export all citations from RESEARCH.md to BibTeX
|
|
138
|
+
- [x] Add 2025-2026 papers:
|
|
139
|
+
- [x] "Agentic AI: A Comprehensive Survey" (arXiv:2510.25445)
|
|
140
|
+
- [x] LlamaGuard-2 (Meta 2024)
|
|
141
|
+
- [x] WildGuard (arXiv:2406.18495)
|
|
142
|
+
- [x] WEF 2025 Governance Whitepaper
|
|
143
|
+
- [x] Format all citations consistently (ACM/IEEE/NeurIPS style)
|
|
144
|
+
- [x] Verify all URLs are accessible
|
|
145
|
+
- [x] Total citations: 40+ (currently 30+, need minor additions)
|
|
146
|
+
|
|
147
|
+
### 9. Submission Materials
|
|
148
|
+
|
|
149
|
+
- [ ] Main PDF (9 pages + appendix)
|
|
150
|
+
- [ ] Supplementary materials:
|
|
151
|
+
- [ ] Code (GitHub link or ZIP)
|
|
152
|
+
- [ ] Datasets (Hugging Face links)
|
|
153
|
+
- [ ] Reproducibility package (Docker image)
|
|
154
|
+
- [ ] Abstract (250 words, plain text)
|
|
155
|
+
- [ ] Keywords (5-10 keywords)
|
|
156
|
+
- [ ] Author information (name, affiliation, email)
|
|
157
|
+
- [ ] Conflict of interest statement
|
|
158
|
+
- [ ] LLM disclosure statement
|
|
159
|
+
- [ ] Ethics statement
|
|
160
|
+
|
|
161
|
+
### 10. Pre-Submission Review
|
|
162
|
+
|
|
163
|
+
- [ ] Internal review by 2-3 co-authors
|
|
164
|
+
- [ ] External review by 1-2 domain experts (if possible)
|
|
165
|
+
- [ ] Grammar check (Grammarly / LanguageTool)
|
|
166
|
+
- [ ] Plagiarism check (Turnitin / iThenticate)
|
|
167
|
+
- [ ] Anonymity check (no author-identifying information)
|
|
168
|
+
- [ ] Reference formatting check (consistent style)
|
|
169
|
+
|
|
170
|
+
### 11. Venue-Specific Requirements
|
|
171
|
+
|
|
172
|
+
#### NeurIPS 2026
|
|
173
|
+
- [ ] 9 pages main + unlimited appendix
|
|
174
|
+
- [ ] Double-blind review (strict anonymity)
|
|
175
|
+
- [ ] LLM disclosure required (new in 2025+)
|
|
176
|
+
- [ ] Code submission encouraged (GitHub or OpenReview)
|
|
177
|
+
- [ ] No dual submission at time of deadline
|
|
178
|
+
|
|
179
|
+
#### ICML 2026
|
|
180
|
+
- [ ] 8 pages main + unlimited appendix
|
|
181
|
+
- [ ] Double-blind review
|
|
182
|
+
- [ ] Reproducibility checklist (mandatory)
|
|
183
|
+
- [ ] Ethics statement (mandatory)
|
|
184
|
+
|
|
185
|
+
#### ICLR 2026
|
|
186
|
+
- [ ] No page limit (discouraged >10 pages main)
|
|
187
|
+
- [ ] Double-blind review
|
|
188
|
+
- [ ] OpenReview submission (public after acceptance)
|
|
189
|
+
|
|
190
|
+
#### AAMAS 2026
|
|
191
|
+
- [ ] 8 pages main + 1 page references + unlimited appendix
|
|
192
|
+
- [ ] Agent-focused track (perfect fit)
|
|
193
|
+
- [ ] Industry track option (production-ready systems)
|
|
194
|
+
|
|
195
|
+
### 12. Post-Acceptance Tasks
|
|
196
|
+
|
|
197
|
+
- [ ] Upload camera-ready PDF
|
|
198
|
+
- [ ] Upload supplementary materials
|
|
199
|
+
- [ ] Upload poster (if required)
|
|
200
|
+
- [ ] Prepare presentation (15-20 min talk)
|
|
201
|
+
- [ ] Update GitHub README with paper link
|
|
202
|
+
- [ ] Publish to arXiv (if allowed by venue)
|
|
203
|
+
- [ ] Tweet/blog post announcement
|
|
204
|
+
- [ ] Update CITATION.cff with paper details
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## LaTeX Template
|
|
209
|
+
|
|
210
|
+
**Template:** NeurIPS 2026 / ICML 2026 (download from venue website)
|
|
211
|
+
|
|
212
|
+
**Recommended Structure:**
|
|
213
|
+
```latex
|
|
214
|
+
\documentclass{article}
|
|
215
|
+
\usepackage{neurips_2026} % Or icml2026, iclr2026, etc.
|
|
216
|
+
|
|
217
|
+
\title{Self-Correcting Agent Kernel: Automated Alignment via \\ Differential Auditing and Semantic Memory Hygiene}
|
|
218
|
+
|
|
219
|
+
\author{
|
|
220
|
+
Anonymous Authors \\
|
|
221
|
+
Anonymous Institution \\
|
|
222
|
+
\texttt{anonymous@email.com}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
\begin{document}
|
|
226
|
+
|
|
227
|
+
\maketitle
|
|
228
|
+
|
|
229
|
+
\begin{abstract}
|
|
230
|
+
% 250 words
|
|
231
|
+
\end{abstract}
|
|
232
|
+
|
|
233
|
+
\section{Introduction}
|
|
234
|
+
% Motivation, gap, contributions, roadmap
|
|
235
|
+
|
|
236
|
+
\section{Related Work}
|
|
237
|
+
% 5 subsections: Self-correction, Safety, Multi-agent, Context, Production ML
|
|
238
|
+
|
|
239
|
+
\section{Method}
|
|
240
|
+
% 5 subsections: Problem, Dual-Loop, Differential Auditing, Semantic Purge, Memory
|
|
241
|
+
|
|
242
|
+
\section{Experiments}
|
|
243
|
+
% 6 subsections: Setup, GAIA, Amnesia, Chaos, Ablation, Baselines
|
|
244
|
+
|
|
245
|
+
\section{Discussion}
|
|
246
|
+
% Findings, limitations, ethics
|
|
247
|
+
|
|
248
|
+
\section{Conclusion}
|
|
249
|
+
% Summary, future work
|
|
250
|
+
|
|
251
|
+
\bibliographystyle{plain}
|
|
252
|
+
\bibliography{references}
|
|
253
|
+
|
|
254
|
+
\appendix
|
|
255
|
+
\section{Additional Results}
|
|
256
|
+
% Full tables, figures, proofs
|
|
257
|
+
|
|
258
|
+
\end{document}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## Timeline
|
|
264
|
+
|
|
265
|
+
**Assuming submission in 6 months (July 2026):**
|
|
266
|
+
|
|
267
|
+
- **Month 1-2 (Now - March):** Complete experiments, ablation, baselines
|
|
268
|
+
- **Month 3 (April):** Statistical analysis, generate figures/tables
|
|
269
|
+
- **Month 4 (May):** Write first draft (all sections)
|
|
270
|
+
- **Month 5 (June):** Internal review, revisions, polish
|
|
271
|
+
- **Month 6 (July):** Final checks, submit
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
## Resources
|
|
276
|
+
|
|
277
|
+
- **NeurIPS 2026:** https://neurips.cc/
|
|
278
|
+
- **ICML 2026:** https://icml.cc/
|
|
279
|
+
- **ICLR 2026:** https://iclr.cc/
|
|
280
|
+
- **AAMAS 2026:** https://aamas2026.org/
|
|
281
|
+
- **arXiv:** https://arxiv.org/
|
|
282
|
+
- **Hugging Face Datasets:** https://huggingface.co/datasets
|
|
283
|
+
- **OpenReview:** https://openreview.net/
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## arXiv Pre-print Checklist
|
|
288
|
+
|
|
289
|
+
- [x] PDF builds without errors (Pandoc tested)
|
|
290
|
+
- [x] Abstract ≤1920 characters
|
|
291
|
+
- [x] Title ≤200 characters
|
|
292
|
+
- [ ] Author metadata prepared
|
|
293
|
+
- [ ] Primary category: cs.AI or cs.LG
|
|
294
|
+
- [ ] Secondary categories: cs.CL, cs.SE
|
|
295
|
+
- [ ] License: CC BY 4.0
|
|
296
|
+
- [x] No copyrighted figures
|
|
297
|
+
- [x] References formatted correctly
|
|
298
|
+
- [ ] Ancillary files (code.zip) prepared
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
**Last Updated:** 2026-01-18
|
|
303
|
+
**Version:** 1.1
|
|
304
|
+
**Next Review:** Before submission
|
|
Binary file
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Paper
|
|
2
|
+
|
|
3
|
+
**Project:** Self-Correcting Agent Kernel (SCAK)
|
|
4
|
+
**Target Venue:** NeurIPS 2026 / ICML 2026
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Files
|
|
9
|
+
|
|
10
|
+
| File | Purpose |
|
|
11
|
+
|------|---------|
|
|
12
|
+
| `main.tex` | Primary paper (LaTeX) |
|
|
13
|
+
| `main.md` | Paper source (Markdown) |
|
|
14
|
+
| `appendix.md` | Paper appendix (Sections A-G) |
|
|
15
|
+
| `bibliography.bib` | BibTeX references (30+ citations) |
|
|
16
|
+
| `LLM_DISCLOSURE.md` | LLM usage disclosure |
|
|
17
|
+
| `figures/` | Figure specifications |
|
|
18
|
+
| `build.sh` | Build script |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Building the Paper
|
|
23
|
+
|
|
24
|
+
### Build PDF (Local - LaTeX)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Using pdflatex (recommended)
|
|
28
|
+
cd paper
|
|
29
|
+
pdflatex main.tex
|
|
30
|
+
bibtex main
|
|
31
|
+
pdflatex main.tex
|
|
32
|
+
pdflatex main.tex
|
|
33
|
+
|
|
34
|
+
# Or using latexmk
|
|
35
|
+
latexmk -pdf main.tex
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Build PDF (Overleaf)
|
|
39
|
+
|
|
40
|
+
1. Create new project on [Overleaf](https://www.overleaf.com)
|
|
41
|
+
2. Upload `main.tex`, `bibliography.bib`, and `figures/` folder
|
|
42
|
+
3. Set compiler to pdfLaTeX
|
|
43
|
+
4. Click "Recompile"
|
|
44
|
+
|
|
45
|
+
### Build PDF (Docker)
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
docker run --rm -v $(pwd):/data blang/latex:ctanfull \
|
|
49
|
+
sh -c "pdflatex main.tex && bibtex main && pdflatex main.tex && pdflatex main.tex"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Paper Structure
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
Title: Self-Correcting Agent Kernel: Automated Alignment via
|
|
58
|
+
Differential Auditing and Semantic Memory Hygiene
|
|
59
|
+
|
|
60
|
+
1. Abstract (~250 words)
|
|
61
|
+
2. Introduction (2 pages)
|
|
62
|
+
3. Related Work (1.5 pages)
|
|
63
|
+
4. System Design (3 pages)
|
|
64
|
+
- 4.1 Problem Formulation
|
|
65
|
+
- 4.2 Dual-Loop Architecture
|
|
66
|
+
- 4.3 Differential Auditing
|
|
67
|
+
- 4.4 Semantic Purge
|
|
68
|
+
- 4.5 Memory Hierarchy
|
|
69
|
+
5. Experiments (3 pages)
|
|
70
|
+
- 5.1 Setup
|
|
71
|
+
- 5.2 GAIA Laziness Benchmark
|
|
72
|
+
- 5.3 Amnesia Test
|
|
73
|
+
- 5.4 Chaos Engineering
|
|
74
|
+
- 5.5 Ablation Studies
|
|
75
|
+
- 5.6 Cost Analysis
|
|
76
|
+
6. Discussion (1 page)
|
|
77
|
+
7. Conclusion (0.5 pages)
|
|
78
|
+
8. References (30+ citations)
|
|
79
|
+
9. Appendix
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Key Results
|
|
85
|
+
|
|
86
|
+
| Metric | Result | Comparison |
|
|
87
|
+
|--------|--------|------------|
|
|
88
|
+
| Laziness Detection | 100% | vs. 0% baseline |
|
|
89
|
+
| Correction Rate | 72% ± 4.2% | vs. 8% baseline (p<0.001) |
|
|
90
|
+
| Context Reduction | 45% | vs. 0% without purge |
|
|
91
|
+
| MTTR | 28s ± 6s | vs. ∞ (never recovers) |
|
|
92
|
+
| Post-Patch Success | 82% ± 3.1% | validated on similar queries |
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Figures
|
|
97
|
+
|
|
98
|
+
| Figure | Description |
|
|
99
|
+
|--------|-------------|
|
|
100
|
+
| Figure 1 | Dual-Loop OODA Architecture |
|
|
101
|
+
| Figure 2 | Three-Tier Memory Hierarchy |
|
|
102
|
+
| Figure 3 | GAIA Results Bar Chart |
|
|
103
|
+
| Figure 4 | Ablation Heatmap |
|
|
104
|
+
| Figure 5 | Context Reduction Over Time |
|
|
105
|
+
| Figure 6 | MTTR Comparison Box Plot |
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Cross-References
|
|
110
|
+
|
|
111
|
+
- **Reproducibility:** `../reproducibility/`
|
|
112
|
+
- **Ablation Details:** `../reproducibility/ABLATIONS.md`
|
|
113
|
+
- **Limitations:** `../LIMITATIONS.md`
|