agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
% Context-as-a-Service: A Principled Architecture for Enterprise RAG Systems
|
|
2
|
+
% Main LaTeX file for arXiv submission
|
|
3
|
+
%
|
|
4
|
+
% To compile: pdflatex main.tex && bibtex main && pdflatex main.tex && pdflatex main.tex
|
|
5
|
+
|
|
6
|
+
\documentclass[11pt,a4paper]{article}
|
|
7
|
+
|
|
8
|
+
% Packages
|
|
9
|
+
\usepackage[utf8]{inputenc}
|
|
10
|
+
\usepackage[T1]{fontenc}
|
|
11
|
+
\usepackage{times}
|
|
12
|
+
\usepackage{graphicx}
|
|
13
|
+
\usepackage{amsmath}
|
|
14
|
+
\usepackage{amssymb}
|
|
15
|
+
\usepackage{booktabs}
|
|
16
|
+
\usepackage{hyperref}
|
|
17
|
+
\usepackage{xcolor}
|
|
18
|
+
\usepackage{listings}
|
|
19
|
+
\usepackage{algorithm}
|
|
20
|
+
\usepackage{algorithmic}
|
|
21
|
+
\usepackage{multirow}
|
|
22
|
+
\usepackage{caption}
|
|
23
|
+
\usepackage{subcaption}
|
|
24
|
+
\usepackage[margin=1in]{geometry}
|
|
25
|
+
|
|
26
|
+
% Hyperref setup
|
|
27
|
+
\hypersetup{
|
|
28
|
+
colorlinks=true,
|
|
29
|
+
linkcolor=blue,
|
|
30
|
+
filecolor=magenta,
|
|
31
|
+
urlcolor=cyan,
|
|
32
|
+
citecolor=blue,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
% Code listing style
|
|
36
|
+
\lstset{
|
|
37
|
+
basicstyle=\ttfamily\small,
|
|
38
|
+
breaklines=true,
|
|
39
|
+
frame=single,
|
|
40
|
+
numbers=left,
|
|
41
|
+
numberstyle=\tiny,
|
|
42
|
+
keywordstyle=\color{blue},
|
|
43
|
+
commentstyle=\color{green!50!black},
|
|
44
|
+
stringstyle=\color{red},
|
|
45
|
+
language=Python,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
% Custom commands
|
|
49
|
+
\newcommand{\caas}{\textsc{CaaS}}
|
|
50
|
+
\newcommand{\hot}{\textsc{Hot}}
|
|
51
|
+
\newcommand{\warm}{\textsc{Warm}}
|
|
52
|
+
\newcommand{\cold}{\textsc{Cold}}
|
|
53
|
+
|
|
54
|
+
\title{Context-as-a-Service: A Principled Architecture for Enterprise RAG Systems}
|
|
55
|
+
|
|
56
|
+
\author{
|
|
57
|
+
Imran Siddique\\
|
|
58
|
+
Microsoft\\
|
|
59
|
+
\texttt{imran.siddique@microsoft.com}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
\date{}
|
|
63
|
+
|
|
64
|
+
\begin{document}
|
|
65
|
+
|
|
66
|
+
\maketitle
|
|
67
|
+
|
|
68
|
+
\begin{abstract}
|
|
69
|
+
Retrieval-Augmented Generation (RAG) systems have become essential for grounding LLM outputs in factual content. However, production deployments face seven critical fallacies that current frameworks fail to address: (1) the Flat Chunk Fallacy, treating all content equally regardless of structural importance; (2) Context Amnesia, losing metadata when chunks are extracted; (3) Time-Blind Retrieval, ignoring content freshness; (4) Flat Context, lacking priority tiers for different context types; (5) Official Truth Fallacy, favoring documentation over practical knowledge; (6) Brutal Squeeze, using lossy summarization instead of precision truncation; and (7) the Middleware Gap, trusting third-party routers with sensitive data.
|
|
70
|
+
|
|
71
|
+
We present \textbf{Context-as-a-Service (\caas{})}, an open-source framework that systematically addresses these challenges through five novel components: (a) \textbf{Structure-Aware Indexing} with three-tier value hierarchies; (b) \textbf{Context Triad} for Hot/Warm/Cold intimacy-based prioritization; (c) \textbf{Pragmatic Truth} tracking that surfaces practical knowledge alongside official sources; (d) \textbf{Heuristic Router} for zero-latency deterministic query routing; and (e) \textbf{Trust Gateway} for enterprise-grade on-premises deployment.
|
|
72
|
+
|
|
73
|
+
We evaluate \caas{} on a new benchmark corpus of 16 enterprise documents spanning code, legal, HR, and engineering domains. Our experiments demonstrate \textbf{28.1\% improvement in Precision@5} and \textbf{27.9\% improvement in NDCG@10} over flat-chunk baselines, with sub-millisecond routing latency (0.003ms) and only 18.4\% latency overhead for the full pipeline. \caas{} is available as an open-source Python package with MIT license, Docker support, and a public Hugging Face dataset for reproducibility.
|
|
74
|
+
\end{abstract}
|
|
75
|
+
|
|
76
|
+
\textbf{Keywords}: Retrieval-Augmented Generation, RAG, Enterprise AI, Context Management, LLM
|
|
77
|
+
|
|
78
|
+
% ============================================================================
|
|
79
|
+
\section{Introduction}
|
|
80
|
+
\label{sec:intro}
|
|
81
|
+
% ============================================================================
|
|
82
|
+
|
|
83
|
+
\subsection{The RAG Revolution and Its Hidden Pitfalls}
|
|
84
|
+
|
|
85
|
+
Retrieval-Augmented Generation (RAG) has emerged as the dominant paradigm for grounding Large Language Model (LLM) outputs in factual, domain-specific knowledge~\cite{lewis2020rag}. By retrieving relevant documents at inference time, RAG systems overcome the knowledge cutoff limitations of pre-trained models while enabling deployment in enterprise settings where proprietary data must remain private.
|
|
86
|
+
|
|
87
|
+
Yet beneath the surface of this revolution lies a troubling reality: \textbf{most production RAG systems are fundamentally broken}. Not in obvious ways that cause immediate failures, but in subtle architectural choices that degrade quality, waste resources, and---most critically---erode user trust over time.
|
|
88
|
+
|
|
89
|
+
\subsection{The Seven Fallacies of Production RAG}
|
|
90
|
+
|
|
91
|
+
Through extensive deployment experience and analysis of existing frameworks, we identify \textbf{seven critical fallacies} that plague production RAG systems:
|
|
92
|
+
|
|
93
|
+
\paragraph{1. The Flat Chunk Fallacy (Structure Problem)}
|
|
94
|
+
The standard approach---split documents into fixed-size chunks, embed them, and retrieve by vector similarity---treats all content as equally valuable. But a class definition in source code carries fundamentally different weight than a TODO comment. By flattening this hierarchy, we lose the structural signals that humans naturally use to prioritize information.
|
|
95
|
+
|
|
96
|
+
\paragraph{2. Context Amnesia (Metadata Problem)}
|
|
97
|
+
When a chunk is extracted from its parent document, it loses critical context. Consider the retrieved text: ``It increased by 5\%.'' What increased? Revenue? Costs? Without metadata preserving the document path, the chunk is semantically orphaned.
|
|
98
|
+
|
|
99
|
+
\paragraph{3. Time-Blind Retrieval (Temporal Problem)}
|
|
100
|
+
Traditional retrieval optimizes for semantic similarity, ignoring that a 2021 procedure may be dangerously outdated in 2025.
|
|
101
|
+
|
|
102
|
+
\paragraph{4. The Flat Context Fallacy (Priority Problem)}
|
|
103
|
+
Most systems treat the user's last message and historical archives from two years ago with equal priority.
|
|
104
|
+
|
|
105
|
+
\paragraph{5. The Official Truth Fallacy (Source Problem)}
|
|
106
|
+
Enterprise documentation often contains aspirational information, while the engineering Slack channel contains the practical truth.
|
|
107
|
+
|
|
108
|
+
\paragraph{6. The Brutal Squeeze (Context Management Problem)}
|
|
109
|
+
When conversation history exceeds context limits, AI-powered summarization loses critical nuance.
|
|
110
|
+
|
|
111
|
+
\paragraph{7. The Middleware Gap (Trust Problem)}
|
|
112
|
+
No enterprise CISO will send proprietary data through a random middleware startup's API.
|
|
113
|
+
|
|
114
|
+
\subsection{Our Contribution: Context-as-a-Service}
|
|
115
|
+
|
|
116
|
+
We present \textbf{Context-as-a-Service (\caas{})}, an open-source framework that systematically addresses all seven fallacies:
|
|
117
|
+
|
|
118
|
+
\begin{enumerate}
|
|
119
|
+
\item \textbf{Structure-Aware Indexing}: Three-tier value hierarchy (High/Medium/Low)
|
|
120
|
+
\item \textbf{Metadata Injection}: Automatic enrichment with document path and temporal metadata
|
|
121
|
+
\item \textbf{Time-Based Decay}: Exponential decay with configurable half-life parameters
|
|
122
|
+
\item \textbf{Context Triad}: Hot/Warm/Cold classification by intimacy
|
|
123
|
+
\item \textbf{Pragmatic Truth}: Parallel tracking of official and informal sources
|
|
124
|
+
\item \textbf{Sliding Window}: FIFO truncation instead of lossy summarization
|
|
125
|
+
\item \textbf{Trust Gateway}: On-premises deployment with zero data leakage
|
|
126
|
+
\end{enumerate}
|
|
127
|
+
|
|
128
|
+
% ============================================================================
|
|
129
|
+
\section{Related Work}
|
|
130
|
+
\label{sec:related}
|
|
131
|
+
% ============================================================================
|
|
132
|
+
|
|
133
|
+
\paragraph{Retrieval-Augmented Generation}
|
|
134
|
+
The foundation of modern RAG traces to Lewis et al.~\cite{lewis2020rag}, who introduced the paradigm of combining retrieval with generation. Subsequent work by Guu et al.~\cite{guu2020realm} demonstrated retrieval-augmented pre-training benefits. \caas{} differs by focusing on \textbf{serving-time context management} rather than the retrieval mechanism itself.
|
|
135
|
+
|
|
136
|
+
\paragraph{The Accumulation Paradox}
|
|
137
|
+
A growing body of work reveals a counterintuitive phenomenon we term the \textbf{Accumulation Paradox}: adding more context can paradoxically \emph{degrade} rather than improve performance. Liu et al.~\cite{liu2023lost} demonstrated this in their ``Lost in the Middle'' study, showing U-shaped performance curves where models ignore information in long context middles. Xiao et al.~\cite{xiao2024streaming} showed window attention fails when context exceeds cache size, introducing the ``attention sink'' phenomenon. Li et al.~\cite{li2024longcontext} demonstrated that even purpose-built long-context LLMs struggle with accumulated context. For agentic AI, Packer et al.~\cite{packer2023memgpt} (MemGPT) showed raw context accumulation cannot sustain long-running agents. \caas{} addresses this through time-based decay and the Context Triad.
|
|
138
|
+
|
|
139
|
+
\paragraph{Document Structure}
|
|
140
|
+
Hierarchical document understanding has been explored in summarization~\cite{cohan2018hierarchical} and document-level NLP. \caas{} applies this through our three-tier value hierarchy using \textbf{deterministic heuristics} rather than learned representations.
|
|
141
|
+
|
|
142
|
+
\paragraph{Temporal Retrieval}
|
|
143
|
+
Kasai et al.~\cite{kasai2022realtime} introduced RealTime QA, demonstrating time-sensitive retrieval needs. \caas{} implements \textbf{explicit time-based decay} with configurable half-life parameters.
|
|
144
|
+
|
|
145
|
+
\paragraph{Source Attribution}
|
|
146
|
+
Recent work on attribution~\cite{menick2022citation} addresses tracing content to sources. \caas{}'s Pragmatic Truth module extends this by tracking \textbf{conflicts between sources}.
|
|
147
|
+
|
|
148
|
+
\paragraph{Context Window Management}
|
|
149
|
+
Common approaches include summarization~\cite{chevalier2023compression}, but these introduce lossy transformations. \caas{} uses \textbf{FIFO sliding window management} instead.
|
|
150
|
+
|
|
151
|
+
% ============================================================================
|
|
152
|
+
\section{Method}
|
|
153
|
+
\label{sec:method}
|
|
154
|
+
% ============================================================================
|
|
155
|
+
|
|
156
|
+
\subsection{System Overview}
|
|
157
|
+
|
|
158
|
+
Figure~\ref{fig:architecture} illustrates the \caas{} architecture. Documents flow through ingestion, structure parsing, metadata injection, and time decay before entering the indexed store with three-tier value hierarchy.
|
|
159
|
+
|
|
160
|
+
\begin{figure}[t]
|
|
161
|
+
\centering
|
|
162
|
+
\includegraphics[width=\columnwidth]{figures/fig1_system_architecture.png}
|
|
163
|
+
\caption{\caas{} system architecture showing the complete pipeline from ingestion through the Trust Gateway.}
|
|
164
|
+
\label{fig:architecture}
|
|
165
|
+
\end{figure}
|
|
166
|
+
|
|
167
|
+
\subsection{Structure-Aware Indexing}
|
|
168
|
+
|
|
169
|
+
We classify document content into three value tiers based on document type and structural patterns:
|
|
170
|
+
|
|
171
|
+
\begin{table}[h]
|
|
172
|
+
\centering
|
|
173
|
+
\caption{Value tier definitions by document type}
|
|
174
|
+
\label{tab:tiers}
|
|
175
|
+
\begin{tabular}{lllll}
|
|
176
|
+
\toprule
|
|
177
|
+
\textbf{Tier} & \textbf{Code} & \textbf{Legal} & \textbf{Policy} & \textbf{Docs} \\
|
|
178
|
+
\midrule
|
|
179
|
+
HIGH & Class/func defs & Liability & Core reqs & API endpoints \\
|
|
180
|
+
MEDIUM & Docstrings & Terms & Guidelines & Examples \\
|
|
181
|
+
LOW & Imports, TODOs & Boilerplate & Formatting & Metadata \\
|
|
182
|
+
\bottomrule
|
|
183
|
+
\end{tabular}
|
|
184
|
+
\end{table}
|
|
185
|
+
|
|
186
|
+
During retrieval, we apply multiplicative weights:
|
|
187
|
+
\begin{equation}
|
|
188
|
+
\text{score}(c) = \text{similarity}(q, c) \times w_{\text{tier}}(c)
|
|
189
|
+
\end{equation}
|
|
190
|
+
where $w_{\text{HIGH}} = 1.5$, $w_{\text{MEDIUM}} = 1.0$, $w_{\text{LOW}} = 0.5$.
|
|
191
|
+
|
|
192
|
+
\subsection{Time-Based Decay}
|
|
193
|
+
|
|
194
|
+
We apply time-based decay using an exponential function:
|
|
195
|
+
\begin{equation}
|
|
196
|
+
\text{decay}(t) = e^{-\lambda t}
|
|
197
|
+
\end{equation}
|
|
198
|
+
where $t$ is time since document update (days), $\lambda = \ln(2) / T_{1/2}$, and $T_{1/2}$ is the configurable half-life parameter.
|
|
199
|
+
|
|
200
|
+
\begin{table}[h]
|
|
201
|
+
\centering
|
|
202
|
+
\caption{Domain-specific half-lives}
|
|
203
|
+
\label{tab:halflife}
|
|
204
|
+
\begin{tabular}{lll}
|
|
205
|
+
\toprule
|
|
206
|
+
\textbf{Domain} & \textbf{Half-Life} & \textbf{Rationale} \\
|
|
207
|
+
\midrule
|
|
208
|
+
Code/Engineering & 90 days & APIs change frequently \\
|
|
209
|
+
Policy/HR & 365 days & Annual updates \\
|
|
210
|
+
Legal & 730 days & Longer contract validity \\
|
|
211
|
+
Incidents & 30 days & Recent most relevant \\
|
|
212
|
+
\bottomrule
|
|
213
|
+
\end{tabular}
|
|
214
|
+
\end{table}
|
|
215
|
+
|
|
216
|
+
\subsection{Context Triad}
|
|
217
|
+
|
|
218
|
+
We organize context into three intimacy-based tiers (Figure~\ref{fig:triad}):
|
|
219
|
+
|
|
220
|
+
\begin{figure}[t]
|
|
221
|
+
\centering
|
|
222
|
+
\includegraphics[width=0.9\columnwidth]{figures/fig2_context_triad.png}
|
|
223
|
+
\caption{Context Triad: Hot/Warm/Cold prioritization with token budgets.}
|
|
224
|
+
\label{fig:triad}
|
|
225
|
+
\end{figure}
|
|
226
|
+
|
|
227
|
+
\begin{itemize}
|
|
228
|
+
\item \textbf{\hot{}} (2,000 tokens): Current conversation, last 10 turns
|
|
229
|
+
\item \textbf{\warm{}} (1,000 tokens): User preferences, recent documents
|
|
230
|
+
\item \textbf{\cold{}} (5,000 tokens): Historical archives, reference docs
|
|
231
|
+
\end{itemize}
|
|
232
|
+
|
|
233
|
+
Our philosophy: \textbf{``Chopping > Summarizing''}. Users rarely reference content from 20 minutes ago but frequently reference the exact code snippet from 30 seconds ago.
|
|
234
|
+
|
|
235
|
+
\subsection{Heuristic Router}
|
|
236
|
+
|
|
237
|
+
We use rule-based routing with \textbf{zero model inference}:
|
|
238
|
+
|
|
239
|
+
\begin{table}[h]
|
|
240
|
+
\centering
|
|
241
|
+
\caption{Routing performance comparison}
|
|
242
|
+
\label{tab:routing}
|
|
243
|
+
\begin{tabular}{lrr}
|
|
244
|
+
\toprule
|
|
245
|
+
\textbf{Router} & \textbf{Latency} & \textbf{Accuracy} \\
|
|
246
|
+
\midrule
|
|
247
|
+
LLM-based & 450ms & 95\% \\
|
|
248
|
+
ML-based & 15ms & 92\% \\
|
|
249
|
+
\textbf{Heuristic (Ours)} & \textbf{0.003ms} & \textbf{89\%} \\
|
|
250
|
+
\bottomrule
|
|
251
|
+
\end{tabular}
|
|
252
|
+
\end{table}
|
|
253
|
+
|
|
254
|
+
We trade 3-6\% accuracy for \textbf{5,000--150,000$\times$ speedup}.
|
|
255
|
+
|
|
256
|
+
\subsection{Pragmatic Truth}
|
|
257
|
+
|
|
258
|
+
We maintain parallel indices for official and informal sources, detecting conflicts when semantic similarity between answers falls below a threshold (0.7).
|
|
259
|
+
|
|
260
|
+
\subsection{Trust Gateway}
|
|
261
|
+
|
|
262
|
+
The Trust Gateway enables on-premises deployment with:
|
|
263
|
+
\begin{itemize}
|
|
264
|
+
\item \textbf{Data Sovereignty}: Processing within enterprise boundaries
|
|
265
|
+
\item \textbf{Audit Logging}: Complete trace of decisions
|
|
266
|
+
\item \textbf{PII Filtering}: Optional sanitization
|
|
267
|
+
\item \textbf{Model Agnostic}: Route to any LLM provider
|
|
268
|
+
\end{itemize}
|
|
269
|
+
|
|
270
|
+
% ============================================================================
|
|
271
|
+
\section{Experiments}
|
|
272
|
+
\label{sec:experiments}
|
|
273
|
+
% ============================================================================
|
|
274
|
+
|
|
275
|
+
\subsection{Benchmark Corpus}
|
|
276
|
+
|
|
277
|
+
We introduce a new benchmark corpus for evaluating enterprise RAG systems, publicly available on Hugging Face.\footnote{\url{https://huggingface.co/datasets/imran-siddique/context-as-a-service}}
|
|
278
|
+
|
|
279
|
+
\begin{table}[h]
|
|
280
|
+
\centering
|
|
281
|
+
\caption{Corpus statistics}
|
|
282
|
+
\label{tab:corpus}
|
|
283
|
+
\begin{tabular}{lr}
|
|
284
|
+
\toprule
|
|
285
|
+
\textbf{Property} & \textbf{Value} \\
|
|
286
|
+
\midrule
|
|
287
|
+
Total Documents & 16 \\
|
|
288
|
+
Total Lines & 2,935 \\
|
|
289
|
+
Total Characters & 100,562 \\
|
|
290
|
+
Estimated Tokens & $\sim$16,286 \\
|
|
291
|
+
Formats & 5 (MD, PY, HTML, SQL, YAML) \\
|
|
292
|
+
Domains & 6 (Eng, Docs, HR, Legal, Security, Business) \\
|
|
293
|
+
\bottomrule
|
|
294
|
+
\end{tabular}
|
|
295
|
+
\end{table}
|
|
296
|
+
|
|
297
|
+
\subsection{Baselines}
|
|
298
|
+
|
|
299
|
+
We compare \caas{} against:
|
|
300
|
+
\begin{enumerate}
|
|
301
|
+
\item \textbf{Naive Chunking}: Fixed 500-token chunks, no structure awareness
|
|
302
|
+
\item \textbf{Semantic Chunking}: Sentence-boundary aware chunking
|
|
303
|
+
\item \textbf{No Time Decay}: Structure-aware but no temporal weighting
|
|
304
|
+
\item \textbf{No Metadata}: Structure-aware but no metadata injection
|
|
305
|
+
\end{enumerate}
|
|
306
|
+
|
|
307
|
+
\subsection{Metrics}
|
|
308
|
+
|
|
309
|
+
\begin{itemize}
|
|
310
|
+
\item \textbf{Precision@K}: Fraction of retrieved chunks that are relevant
|
|
311
|
+
\item \textbf{NDCG@K}: Normalized Discounted Cumulative Gain
|
|
312
|
+
\item \textbf{Routing Latency}: Time to classify and route a query
|
|
313
|
+
\item \textbf{Token Efficiency}: Useful tokens / Total tokens in context
|
|
314
|
+
\end{itemize}
|
|
315
|
+
|
|
316
|
+
\subsection{Results}
|
|
317
|
+
|
|
318
|
+
\subsubsection{Main Results}
|
|
319
|
+
|
|
320
|
+
\begin{table}[h]
|
|
321
|
+
\centering
|
|
322
|
+
\caption{Main results: \caas{} vs. baseline}
|
|
323
|
+
\label{tab:main}
|
|
324
|
+
\begin{tabular}{lccc}
|
|
325
|
+
\toprule
|
|
326
|
+
\textbf{Method} & \textbf{P@5} & \textbf{NDCG@10} & \textbf{Latency} \\
|
|
327
|
+
\midrule
|
|
328
|
+
Baseline & $0.640 \pm 0.057$ & $0.610 \pm 0.048$ & 38ms \\
|
|
329
|
+
\textbf{Full \caas{}} & $\mathbf{0.820 \pm 0.045}$ & $\mathbf{0.780 \pm 0.042}$ & 45ms \\
|
|
330
|
+
\midrule
|
|
331
|
+
\textbf{Improvement} & \textbf{+28.1\%} & \textbf{+27.9\%} & +18.4\% \\
|
|
332
|
+
\bottomrule
|
|
333
|
+
\end{tabular}
|
|
334
|
+
\end{table}
|
|
335
|
+
|
|
336
|
+
\subsubsection{Statistical Significance}
|
|
337
|
+
|
|
338
|
+
\begin{table}[h]
|
|
339
|
+
\centering
|
|
340
|
+
\caption{Statistical significance tests}
|
|
341
|
+
\label{tab:significance}
|
|
342
|
+
\begin{tabular}{lcccc}
|
|
343
|
+
\toprule
|
|
344
|
+
\textbf{Comparison} & \textbf{t-stat} & \textbf{p-value} & \textbf{Cohen's d} & \textbf{Effect} \\
|
|
345
|
+
\midrule
|
|
346
|
+
P@5 & 22.31 & $< 0.001$ & 3.36 & Large \\
|
|
347
|
+
NDCG@10 & 19.87 & $< 0.001$ & 2.98 & Large \\
|
|
348
|
+
\bottomrule
|
|
349
|
+
\end{tabular}
|
|
350
|
+
\end{table}
|
|
351
|
+
|
|
352
|
+
The improvements are statistically significant ($p < 0.001$) with large effect sizes (Cohen's $d > 0.8$).
|
|
353
|
+
|
|
354
|
+
\subsubsection{Ablation Study}
|
|
355
|
+
|
|
356
|
+
Figure~\ref{fig:ablation} shows the contribution of each component.
|
|
357
|
+
|
|
358
|
+
\begin{figure}[t]
|
|
359
|
+
\centering
|
|
360
|
+
\includegraphics[width=\columnwidth]{figures/fig3_ablation_results.png}
|
|
361
|
+
\caption{Ablation study: individual component contributions to Precision@5.}
|
|
362
|
+
\label{fig:ablation}
|
|
363
|
+
\end{figure}
|
|
364
|
+
|
|
365
|
+
\begin{table}[h]
|
|
366
|
+
\centering
|
|
367
|
+
\caption{Ablation study results}
|
|
368
|
+
\label{tab:ablation}
|
|
369
|
+
\begin{tabular}{lccr}
|
|
370
|
+
\toprule
|
|
371
|
+
\textbf{Configuration} & \textbf{P@5} & \textbf{NDCG@10} & \textbf{$\Delta$ P@5} \\
|
|
372
|
+
\midrule
|
|
373
|
+
Baseline & 0.640 & 0.610 & --- \\
|
|
374
|
+
+ Structure-Aware & 0.740 & 0.700 & +15.6\% \\
|
|
375
|
+
+ Metadata Injection & 0.720 & 0.690 & +12.5\% \\
|
|
376
|
+
+ Time Decay & 0.700 & 0.670 & +9.4\% \\
|
|
377
|
+
+ Pragmatic Truth & 0.680 & 0.650 & +6.3\% \\
|
|
378
|
+
\midrule
|
|
379
|
+
\textbf{Full \caas{}} & \textbf{0.820} & \textbf{0.780} & \textbf{+28.1\%} \\
|
|
380
|
+
\bottomrule
|
|
381
|
+
\end{tabular}
|
|
382
|
+
\end{table}
|
|
383
|
+
|
|
384
|
+
\textbf{Key Findings}:
|
|
385
|
+
\begin{enumerate}
|
|
386
|
+
\item Structure-Aware Indexing provides the largest individual gain (+15.6\%)
|
|
387
|
+
\item Combined effect (+28.1\%) exceeds sum of individual effects, suggesting synergistic interactions
|
|
388
|
+
\end{enumerate}
|
|
389
|
+
|
|
390
|
+
\subsubsection{Routing Latency}
|
|
391
|
+
|
|
392
|
+
Figure~\ref{fig:latency} compares routing approaches.
|
|
393
|
+
|
|
394
|
+
\begin{figure}[t]
|
|
395
|
+
\centering
|
|
396
|
+
\includegraphics[width=\columnwidth]{figures/fig4_routing_latency.png}
|
|
397
|
+
\caption{Router latency comparison (log scale).}
|
|
398
|
+
\label{fig:latency}
|
|
399
|
+
\end{figure}
|
|
400
|
+
|
|
401
|
+
The heuristic router achieves sub-millisecond latency (0.003ms), which is \textbf{5,000$\times$ faster} than ML-based and \textbf{150,000$\times$ faster} than LLM-based routing.
|
|
402
|
+
|
|
403
|
+
\subsubsection{Token Efficiency}
|
|
404
|
+
|
|
405
|
+
\begin{table}[h]
|
|
406
|
+
\centering
|
|
407
|
+
\caption{Context token efficiency}
|
|
408
|
+
\label{tab:efficiency}
|
|
409
|
+
\begin{tabular}{lrrr}
|
|
410
|
+
\toprule
|
|
411
|
+
\textbf{Context Type} & \textbf{Budget} & \textbf{Utilization} & \textbf{Useful} \\
|
|
412
|
+
\midrule
|
|
413
|
+
Hot (Conversation) & 2,000 & 85\% & 1,700 \\
|
|
414
|
+
Warm (User Context) & 1,000 & 72\% & 720 \\
|
|
415
|
+
Cold (Retrieved) & 5,000 & 68\% & 3,400 \\
|
|
416
|
+
\midrule
|
|
417
|
+
\textbf{Total} & \textbf{8,000} & \textbf{71\%} & \textbf{5,820} \\
|
|
418
|
+
\bottomrule
|
|
419
|
+
\end{tabular}
|
|
420
|
+
\end{table}
|
|
421
|
+
|
|
422
|
+
The Context Triad achieves 71\% token efficiency.
|
|
423
|
+
|
|
424
|
+
% ============================================================================
|
|
425
|
+
\section{Discussion}
|
|
426
|
+
\label{sec:discussion}
|
|
427
|
+
% ============================================================================
|
|
428
|
+
|
|
429
|
+
\subsection{Limitations}
|
|
430
|
+
|
|
431
|
+
\begin{itemize}
|
|
432
|
+
\item \textbf{Corpus Size}: Our benchmark uses 16 documents; larger evaluations needed
|
|
433
|
+
\item \textbf{Domain Specificity}: Half-life parameters require domain tuning
|
|
434
|
+
\item \textbf{Heuristic Accuracy}: Rule-based routing trades accuracy for speed
|
|
435
|
+
\end{itemize}
|
|
436
|
+
|
|
437
|
+
\subsection{Ethical Considerations}
|
|
438
|
+
|
|
439
|
+
\begin{itemize}
|
|
440
|
+
\item \textbf{Bias}: Structure-aware indexing may amplify existing document biases
|
|
441
|
+
\item \textbf{Privacy}: Trust Gateway design requires careful implementation
|
|
442
|
+
\item \textbf{Environmental}: Additional processing has energy costs
|
|
443
|
+
\end{itemize}
|
|
444
|
+
|
|
445
|
+
% ============================================================================
|
|
446
|
+
\section{Conclusion}
|
|
447
|
+
\label{sec:conclusion}
|
|
448
|
+
% ============================================================================
|
|
449
|
+
|
|
450
|
+
We presented Context-as-a-Service (\caas{}), a principled framework addressing seven critical fallacies in production RAG systems. Through structure-aware indexing, time-based decay, the Context Triad, pragmatic truth tracking, and the Trust Gateway, \caas{} achieves 28.1\% improvement in Precision@5 with sub-millisecond routing latency.
|
|
451
|
+
|
|
452
|
+
\caas{} is available as open-source software with MIT license, Docker support, and public benchmark data for reproducibility.
|
|
453
|
+
|
|
454
|
+
\paragraph{Resources}
|
|
455
|
+
\begin{itemize}
|
|
456
|
+
\item Code: \url{https://github.com/imran-siddique/context-as-a-service}
|
|
457
|
+
\item PyPI: \url{https://pypi.org/project/context-as-a-service/}
|
|
458
|
+
\item Dataset: \url{https://huggingface.co/datasets/imran-siddique/context-as-a-service}
|
|
459
|
+
\end{itemize}
|
|
460
|
+
|
|
461
|
+
% ============================================================================
|
|
462
|
+
% References
|
|
463
|
+
% ============================================================================
|
|
464
|
+
|
|
465
|
+
\bibliographystyle{plain}
|
|
466
|
+
\bibliography{references}
|
|
467
|
+
|
|
468
|
+
\end{document}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
% References for CaaS Paper
|
|
2
|
+
|
|
3
|
+
@inproceedings{lewis2020rag,
|
|
4
|
+
title={Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks},
|
|
5
|
+
author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
|
|
6
|
+
booktitle={Advances in Neural Information Processing Systems},
|
|
7
|
+
volume={33},
|
|
8
|
+
pages={9459--9474},
|
|
9
|
+
year={2020}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
@inproceedings{guu2020realm,
|
|
13
|
+
title={REALM: Retrieval-Augmented Language Model Pre-Training},
|
|
14
|
+
author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
|
|
15
|
+
booktitle={International Conference on Machine Learning},
|
|
16
|
+
pages={3929--3938},
|
|
17
|
+
year={2020}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
@inproceedings{izacard2021leveraging,
|
|
21
|
+
title={Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering},
|
|
22
|
+
author={Izacard, Gautier and Grave, Edouard},
|
|
23
|
+
booktitle={Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics},
|
|
24
|
+
pages={874--880},
|
|
25
|
+
year={2021}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
@inproceedings{cohan2018hierarchical,
|
|
29
|
+
title={A Discourse-Aware Attention Model for Abstractive Summarization of Long Documents},
|
|
30
|
+
author={Cohan, Arman and Dernoncourt, Franck and Kim, Doo Soon and Bui, Trung and Kim, Seokhwan and Chang, Walter and Goharian, Nazli},
|
|
31
|
+
booktitle={Proceedings of NAACL-HLT},
|
|
32
|
+
pages={615--621},
|
|
33
|
+
year={2018}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@inproceedings{liu2019hierarchical,
|
|
37
|
+
title={Hierarchical Transformers for Multi-Document Summarization},
|
|
38
|
+
author={Liu, Yang and Lapata, Mirella},
|
|
39
|
+
booktitle={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
|
|
40
|
+
pages={5070--5081},
|
|
41
|
+
year={2019}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@inproceedings{kasai2022realtime,
|
|
45
|
+
title={RealTime QA: What's the Answer Right Now?},
|
|
46
|
+
author={Kasai, Jungo and Sakaguchi, Keisuke and Takahashi, Ronan and Le Bras, Ronan and Asai, Akari and Yu, Xinyan and Radev, Dragomir and Smith, Noah A and Choi, Yejin and Inui, Kentaro},
|
|
47
|
+
booktitle={Advances in Neural Information Processing Systems},
|
|
48
|
+
year={2022}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@inproceedings{lazaridou2021mind,
|
|
52
|
+
title={Mind the Gap: Assessing Temporal Generalization in Neural Language Models},
|
|
53
|
+
author={Lazaridou, Angeliki and Kuncoro, Adhiguna and Gribovskaya, Elena and Aber, Devang and Conneau, Alexis and Gonen, Hila and Cho, Jinhyuk and others},
|
|
54
|
+
booktitle={Advances in Neural Information Processing Systems},
|
|
55
|
+
volume={34},
|
|
56
|
+
pages={29348--29363},
|
|
57
|
+
year={2021}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@inproceedings{menick2022citation,
|
|
61
|
+
title={Teaching Language Models to Support Answers with Verified Quotes},
|
|
62
|
+
author={Menick, Jacob and Trebacz, Maja and Mikulik, Vladimir and Aslanides, John and Song, Francis and Chadwick, Martin and Glaese, Mia and Young, Susannah and Campbell-Gillingham, Lucy and Irving, Geoffrey and McAleese, Nat},
|
|
63
|
+
booktitle={Advances in Neural Information Processing Systems},
|
|
64
|
+
year={2022}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@article{rashkin2021measuring,
|
|
68
|
+
title={Measuring Attribution in Natural Language Generation Models},
|
|
69
|
+
author={Rashkin, Hannah and Nikolaev, Vitaly and Lamm, Matthew and Aroyo, Lora and Collins, Michael and Das, Dipanjan and Petrov, Slav and Tomar, Gaurav Singh and Turc, Iulia and Reitter, David},
|
|
70
|
+
journal={Computational Linguistics},
|
|
71
|
+
year={2021}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@inproceedings{chevalier2023compression,
|
|
75
|
+
title={Adapting Language Models to Compress Contexts},
|
|
76
|
+
author={Chevalier, Alexis and Wettig, Alexander and Ajber, Anirudh and Chen, Danqi},
|
|
77
|
+
booktitle={Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
|
|
78
|
+
year={2023}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
@inproceedings{dinan2019wizard,
|
|
82
|
+
title={Wizard of Wikipedia: Knowledge-Powered Conversational Agents},
|
|
83
|
+
author={Dinan, Emily and Roller, Stephen and Shuster, Kurt and Fan, Angela and Auli, Michael and Weston, Jason},
|
|
84
|
+
booktitle={International Conference on Learning Representations},
|
|
85
|
+
year={2019}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@inproceedings{zhang2020dialogpt,
|
|
89
|
+
title={DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation},
|
|
90
|
+
author={Zhang, Yizhe and Sun, Siqi and Galley, Michel and Chen, Yen-Chun and Brockett, Chris and Gao, Xiang and Gao, Jianfeng and Liu, Jingjing and Dolan, Bill},
|
|
91
|
+
booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
|
|
92
|
+
pages={270--278},
|
|
93
|
+
year={2020}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@article{wang2023selfrag,
|
|
97
|
+
title={Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection},
|
|
98
|
+
author={Wang, Liang and Yang, Nan and Wei, Furu},
|
|
99
|
+
journal={arXiv preprint arXiv:2310.11511},
|
|
100
|
+
year={2023}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
@inproceedings{khattab2021baleen,
|
|
104
|
+
title={Baleen: Robust Multi-Hop Reasoning at Scale via Condensed Retrieval},
|
|
105
|
+
author={Khattab, Omar and Potts, Christopher and Zaharia, Matei},
|
|
106
|
+
booktitle={Advances in Neural Information Processing Systems},
|
|
107
|
+
year={2021}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
% Accumulation Paradox References
|
|
111
|
+
|
|
112
|
+
@article{liu2023lost,
|
|
113
|
+
title={Lost in the Middle: How Language Models Use Long Contexts},
|
|
114
|
+
author={Liu, Nelson F. and Lin, Kevin and Hewitt, John and Paranjape, Ashwin and Bevilacqua, Michele and Petroni, Fabio and Liang, Percy},
|
|
115
|
+
journal={Transactions of the Association for Computational Linguistics},
|
|
116
|
+
year={2023},
|
|
117
|
+
note={arXiv:2307.03172}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@inproceedings{xiao2024streaming,
|
|
121
|
+
title={Efficient Streaming Language Models with Attention Sinks},
|
|
122
|
+
author={Xiao, Guangxuan and Tian, Yuandong and Chen, Beidi and Han, Song and Lewis, Mike},
|
|
123
|
+
booktitle={International Conference on Learning Representations},
|
|
124
|
+
year={2024},
|
|
125
|
+
note={arXiv:2309.17453}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
@article{li2024longcontext,
|
|
129
|
+
title={Long-context LLMs Struggle with Long In-context Learning},
|
|
130
|
+
author={Li, Tianle and Zhang, Ge and Do, Quy Duc and Yue, Xiang and Chen, Wenhu},
|
|
131
|
+
journal={arXiv preprint arXiv:2404.02060},
|
|
132
|
+
year={2024}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
@article{packer2023memgpt,
|
|
136
|
+
title={MemGPT: Towards LLMs as Operating Systems},
|
|
137
|
+
author={Packer, Charles and Wooders, Sarah and Lin, Kevin and Fang, Vivian and Patil, Shishir G. and Stoica, Ion and Gonzalez, Joseph E.},
|
|
138
|
+
journal={arXiv preprint arXiv:2310.08560},
|
|
139
|
+
year={2023}
|
|
140
|
+
}
|