agent-os-kernel 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.2.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.2.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Experiments Directory
|
|
2
|
+
|
|
3
|
+
This folder contains reproducibility scripts for the CaaS paper.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Run the main reproducibility script
|
|
9
|
+
python experiments/reproduce_results.py
|
|
10
|
+
|
|
11
|
+
# Run with custom seed
|
|
12
|
+
python experiments/reproduce_results.py --seed 42 --output experiments/results.json
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Files
|
|
16
|
+
|
|
17
|
+
- `reproduce_results.py` - Main reproducibility script for paper claims
|
|
18
|
+
- `results.json` - Output results (generated after running)
|
|
19
|
+
|
|
20
|
+
## Full Benchmarks
|
|
21
|
+
|
|
22
|
+
For comprehensive benchmarks including the full corpus evaluation, see:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
python benchmarks/run_evaluation.py --corpus benchmarks/data/sample_corpus/
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Paper Claims
|
|
29
|
+
|
|
30
|
+
The reproducibility script validates the following claims from the paper:
|
|
31
|
+
|
|
32
|
+
| Claim | Metric | Expected |
|
|
33
|
+
|-------|--------|----------|
|
|
34
|
+
| Sub-millisecond routing | Router latency | < 0.01 ms |
|
|
35
|
+
| High routing accuracy | Heuristic accuracy | > 90% |
|
|
36
|
+
| Fast context operations | Triad add operations | < 0.1 ms/item |
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install caas-core numpy
|
|
42
|
+
```
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CaaS Reproducibility Script.
|
|
4
|
+
|
|
5
|
+
This script provides a simple, reproducible way to run the CaaS benchmarks
|
|
6
|
+
and verify the results reported in the paper.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python experiments/reproduce_results.py
|
|
10
|
+
python experiments/reproduce_results.py --seed 42 --output results.json
|
|
11
|
+
|
|
12
|
+
The script will:
|
|
13
|
+
1. Set up a controlled environment with fixed random seed
|
|
14
|
+
2. Run the core CaaS components against sample inputs
|
|
15
|
+
3. Measure metrics (latency, accuracy, token efficiency)
|
|
16
|
+
4. Save results to experiments/results.json
|
|
17
|
+
|
|
18
|
+
For full benchmark evaluation, see: benchmarks/run_evaluation.py
|
|
19
|
+
|
|
20
|
+
Requirements:
|
|
21
|
+
pip install caas-core numpy
|
|
22
|
+
|
|
23
|
+
Author: Imran Siddique
|
|
24
|
+
License: MIT
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import argparse
|
|
30
|
+
import json
|
|
31
|
+
import random
|
|
32
|
+
import sys
|
|
33
|
+
import time
|
|
34
|
+
from dataclasses import asdict, dataclass
|
|
35
|
+
from datetime import datetime
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Any, Dict, List, Optional
|
|
38
|
+
|
|
39
|
+
import numpy as np
|
|
40
|
+
|
|
41
|
+
# Add parent directory for imports if running as script
|
|
42
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
from caas import __version__ as CAAS_VERSION
|
|
46
|
+
from caas.routing.heuristic_router import HeuristicRouter
|
|
47
|
+
from caas.triad import ContextTriadManager
|
|
48
|
+
from caas.decay import TimeDecayCalculator
|
|
49
|
+
from caas.models import ModelTier
|
|
50
|
+
|
|
51
|
+
CAAS_AVAILABLE = True
|
|
52
|
+
except ImportError as e:
|
|
53
|
+
CAAS_AVAILABLE = False
|
|
54
|
+
CAAS_VERSION = "unknown"
|
|
55
|
+
print(f"Warning: CaaS not fully available ({e}). Running in demo mode.")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ============================================================================
|
|
59
|
+
# Configuration
|
|
60
|
+
# ============================================================================
|
|
61
|
+
|
|
62
|
+
DEFAULT_SEED = 42
|
|
63
|
+
DEFAULT_OUTPUT = "experiments/results.json"
|
|
64
|
+
|
|
65
|
+
# Sample queries for routing benchmark
|
|
66
|
+
SAMPLE_QUERIES = [
|
|
67
|
+
# Greetings (should route to CANNED)
|
|
68
|
+
"Hi",
|
|
69
|
+
"Hello there",
|
|
70
|
+
"Thanks!",
|
|
71
|
+
"Ok",
|
|
72
|
+
# Short queries (should route to FAST)
|
|
73
|
+
"What is RAG?",
|
|
74
|
+
"Define context window",
|
|
75
|
+
"List the files",
|
|
76
|
+
# Complex queries (should route to SMART)
|
|
77
|
+
"Summarize the architecture of this system and explain the tradeoffs",
|
|
78
|
+
"Analyze the performance implications of using vector similarity search",
|
|
79
|
+
"Compare the Context Triad approach with traditional RAG systems",
|
|
80
|
+
"Please provide a comprehensive review of the code structure",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# Expected routing results for accuracy calculation
|
|
84
|
+
EXPECTED_ROUTING = {
|
|
85
|
+
"Hi": ModelTier.CANNED,
|
|
86
|
+
"Hello there": ModelTier.CANNED,
|
|
87
|
+
"Thanks!": ModelTier.CANNED,
|
|
88
|
+
"Ok": ModelTier.CANNED,
|
|
89
|
+
"What is RAG?": ModelTier.FAST,
|
|
90
|
+
"Define context window": ModelTier.FAST,
|
|
91
|
+
"List the files": ModelTier.FAST,
|
|
92
|
+
"Summarize the architecture of this system and explain the tradeoffs": ModelTier.SMART,
|
|
93
|
+
"Analyze the performance implications of using vector similarity search": ModelTier.SMART,
|
|
94
|
+
"Compare the Context Triad approach with traditional RAG systems": ModelTier.SMART,
|
|
95
|
+
"Please provide a comprehensive review of the code structure": ModelTier.SMART,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ============================================================================
|
|
100
|
+
# Data Classes
|
|
101
|
+
# ============================================================================
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class BenchmarkConfig:
|
|
106
|
+
"""Configuration for the benchmark run."""
|
|
107
|
+
|
|
108
|
+
seed: int = DEFAULT_SEED
|
|
109
|
+
n_iterations: int = 100
|
|
110
|
+
warmup_iterations: int = 10
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class MetricResult:
|
|
115
|
+
"""A single metric measurement."""
|
|
116
|
+
|
|
117
|
+
name: str
|
|
118
|
+
value: float
|
|
119
|
+
std: Optional[float] = None
|
|
120
|
+
unit: str = ""
|
|
121
|
+
n_samples: int = 1
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class ExperimentResults:
|
|
126
|
+
"""Complete experiment results."""
|
|
127
|
+
|
|
128
|
+
timestamp: str
|
|
129
|
+
caas_version: str
|
|
130
|
+
python_version: str
|
|
131
|
+
seed: int
|
|
132
|
+
metrics: Dict[str, Any]
|
|
133
|
+
config: Dict[str, Any]
|
|
134
|
+
|
|
135
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
136
|
+
"""Convert to dictionary for JSON serialization."""
|
|
137
|
+
return {
|
|
138
|
+
"timestamp": self.timestamp,
|
|
139
|
+
"caas_version": self.caas_version,
|
|
140
|
+
"python_version": self.python_version,
|
|
141
|
+
"seed": self.seed,
|
|
142
|
+
"metrics": self.metrics,
|
|
143
|
+
"config": self.config,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ============================================================================
|
|
148
|
+
# Benchmark Functions
|
|
149
|
+
# ============================================================================
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def set_seed(seed: int) -> None:
|
|
153
|
+
"""Set random seed for reproducibility.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
seed: Random seed value.
|
|
157
|
+
"""
|
|
158
|
+
random.seed(seed)
|
|
159
|
+
np.random.seed(seed)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def benchmark_router_latency(
|
|
163
|
+
router: HeuristicRouter,
|
|
164
|
+
queries: List[str],
|
|
165
|
+
n_iterations: int = 100,
|
|
166
|
+
warmup: int = 10,
|
|
167
|
+
) -> MetricResult:
|
|
168
|
+
"""Benchmark the heuristic router latency.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
router: The HeuristicRouter instance.
|
|
172
|
+
queries: List of queries to route.
|
|
173
|
+
n_iterations: Number of timing iterations.
|
|
174
|
+
warmup: Number of warmup iterations.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
MetricResult with latency statistics.
|
|
178
|
+
"""
|
|
179
|
+
# Warmup
|
|
180
|
+
for _ in range(warmup):
|
|
181
|
+
for query in queries:
|
|
182
|
+
router.route(query)
|
|
183
|
+
|
|
184
|
+
# Timed iterations
|
|
185
|
+
latencies = []
|
|
186
|
+
for _ in range(n_iterations):
|
|
187
|
+
for query in queries:
|
|
188
|
+
start = time.perf_counter()
|
|
189
|
+
router.route(query)
|
|
190
|
+
end = time.perf_counter()
|
|
191
|
+
latencies.append((end - start) * 1000) # Convert to ms
|
|
192
|
+
|
|
193
|
+
latencies_arr = np.array(latencies)
|
|
194
|
+
|
|
195
|
+
return MetricResult(
|
|
196
|
+
name="router_latency",
|
|
197
|
+
value=float(np.mean(latencies_arr)),
|
|
198
|
+
std=float(np.std(latencies_arr)),
|
|
199
|
+
unit="ms",
|
|
200
|
+
n_samples=len(latencies),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def benchmark_router_accuracy(
|
|
205
|
+
router: HeuristicRouter,
|
|
206
|
+
queries: List[str],
|
|
207
|
+
expected: Dict[str, ModelTier],
|
|
208
|
+
) -> MetricResult:
|
|
209
|
+
"""Benchmark the heuristic router accuracy.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
router: The HeuristicRouter instance.
|
|
213
|
+
queries: List of queries to route.
|
|
214
|
+
expected: Dictionary mapping queries to expected ModelTier.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
MetricResult with accuracy percentage.
|
|
218
|
+
"""
|
|
219
|
+
correct = 0
|
|
220
|
+
total = 0
|
|
221
|
+
|
|
222
|
+
for query in queries:
|
|
223
|
+
if query in expected:
|
|
224
|
+
decision = router.route(query)
|
|
225
|
+
if decision.model_tier == expected[query]:
|
|
226
|
+
correct += 1
|
|
227
|
+
total += 1
|
|
228
|
+
|
|
229
|
+
accuracy = (correct / total * 100) if total > 0 else 0.0
|
|
230
|
+
|
|
231
|
+
return MetricResult(
|
|
232
|
+
name="router_accuracy",
|
|
233
|
+
value=accuracy,
|
|
234
|
+
unit="%",
|
|
235
|
+
n_samples=total,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def benchmark_context_triad(
|
|
240
|
+
n_items: int = 100,
|
|
241
|
+
) -> Dict[str, MetricResult]:
|
|
242
|
+
"""Benchmark Context Triad operations.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
n_items: Number of context items to add.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Dictionary of MetricResults for various operations.
|
|
249
|
+
"""
|
|
250
|
+
triad = ContextTriadManager()
|
|
251
|
+
results = {}
|
|
252
|
+
|
|
253
|
+
# Benchmark add_hot_context
|
|
254
|
+
start = time.perf_counter()
|
|
255
|
+
for i in range(n_items):
|
|
256
|
+
triad.add_hot_context(f"Hot context item {i}", metadata={"index": i})
|
|
257
|
+
hot_time = (time.perf_counter() - start) * 1000
|
|
258
|
+
|
|
259
|
+
results["add_hot_context"] = MetricResult(
|
|
260
|
+
name="add_hot_context",
|
|
261
|
+
value=hot_time / n_items,
|
|
262
|
+
unit="ms/item",
|
|
263
|
+
n_samples=n_items,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Benchmark add_warm_context
|
|
267
|
+
start = time.perf_counter()
|
|
268
|
+
for i in range(n_items):
|
|
269
|
+
triad.add_warm_context(f"Warm context item {i}", metadata={"index": i})
|
|
270
|
+
warm_time = (time.perf_counter() - start) * 1000
|
|
271
|
+
|
|
272
|
+
results["add_warm_context"] = MetricResult(
|
|
273
|
+
name="add_warm_context",
|
|
274
|
+
value=warm_time / n_items,
|
|
275
|
+
unit="ms/item",
|
|
276
|
+
n_samples=n_items,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Benchmark add_cold_context
|
|
280
|
+
start = time.perf_counter()
|
|
281
|
+
for i in range(n_items):
|
|
282
|
+
triad.add_cold_context(f"Cold context item {i}", metadata={"index": i})
|
|
283
|
+
cold_time = (time.perf_counter() - start) * 1000
|
|
284
|
+
|
|
285
|
+
results["add_cold_context"] = MetricResult(
|
|
286
|
+
name="add_cold_context",
|
|
287
|
+
value=cold_time / n_items,
|
|
288
|
+
unit="ms/item",
|
|
289
|
+
n_samples=n_items,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return results
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def benchmark_decay_calculator(
|
|
296
|
+
n_items: int = 1000,
|
|
297
|
+
) -> MetricResult:
|
|
298
|
+
"""Benchmark time decay calculations.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
n_items: Number of decay calculations.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
MetricResult with timing statistics.
|
|
305
|
+
"""
|
|
306
|
+
calculator = TimeDecayCalculator()
|
|
307
|
+
|
|
308
|
+
# Generate random ages (in hours)
|
|
309
|
+
ages = np.random.exponential(scale=24, size=n_items)
|
|
310
|
+
|
|
311
|
+
start = time.perf_counter()
|
|
312
|
+
for age in ages:
|
|
313
|
+
calculator.calculate_decay(float(age))
|
|
314
|
+
elapsed = (time.perf_counter() - start) * 1000
|
|
315
|
+
|
|
316
|
+
return MetricResult(
|
|
317
|
+
name="decay_calculation",
|
|
318
|
+
value=elapsed / n_items,
|
|
319
|
+
unit="ms/calculation",
|
|
320
|
+
n_samples=n_items,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
# ============================================================================
|
|
325
|
+
# Main
|
|
326
|
+
# ============================================================================
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def run_experiments(config: BenchmarkConfig) -> ExperimentResults:
|
|
330
|
+
"""Run all benchmark experiments.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
config: Benchmark configuration.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
ExperimentResults with all metrics.
|
|
337
|
+
"""
|
|
338
|
+
print(f"Setting random seed: {config.seed}")
|
|
339
|
+
set_seed(config.seed)
|
|
340
|
+
|
|
341
|
+
metrics: Dict[str, Any] = {}
|
|
342
|
+
|
|
343
|
+
if not CAAS_AVAILABLE:
|
|
344
|
+
print("CaaS not available. Returning empty results.")
|
|
345
|
+
return ExperimentResults(
|
|
346
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
347
|
+
caas_version=CAAS_VERSION,
|
|
348
|
+
python_version=f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
349
|
+
seed=config.seed,
|
|
350
|
+
metrics={},
|
|
351
|
+
config=asdict(config),
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# 1. Router benchmarks
|
|
355
|
+
print("\n[1/4] Benchmarking Heuristic Router...")
|
|
356
|
+
router = HeuristicRouter()
|
|
357
|
+
|
|
358
|
+
latency_result = benchmark_router_latency(
|
|
359
|
+
router,
|
|
360
|
+
SAMPLE_QUERIES,
|
|
361
|
+
n_iterations=config.n_iterations,
|
|
362
|
+
warmup=config.warmup_iterations,
|
|
363
|
+
)
|
|
364
|
+
metrics["router_latency_ms"] = latency_result.value
|
|
365
|
+
metrics["router_latency_std"] = latency_result.std
|
|
366
|
+
print(f" - Latency: {latency_result.value:.4f} ± {latency_result.std:.4f} ms")
|
|
367
|
+
|
|
368
|
+
accuracy_result = benchmark_router_accuracy(router, SAMPLE_QUERIES, EXPECTED_ROUTING)
|
|
369
|
+
metrics["router_accuracy_pct"] = accuracy_result.value
|
|
370
|
+
print(f" - Accuracy: {accuracy_result.value:.1f}%")
|
|
371
|
+
|
|
372
|
+
# 2. Context Triad benchmarks
|
|
373
|
+
print("\n[2/4] Benchmarking Context Triad...")
|
|
374
|
+
triad_results = benchmark_context_triad(n_items=100)
|
|
375
|
+
for name, result in triad_results.items():
|
|
376
|
+
metrics[f"triad_{name}_ms"] = result.value
|
|
377
|
+
print(f" - {name}: {result.value:.4f} {result.unit}")
|
|
378
|
+
|
|
379
|
+
# 3. Decay calculator benchmarks
|
|
380
|
+
print("\n[3/4] Benchmarking Time Decay Calculator...")
|
|
381
|
+
decay_result = benchmark_decay_calculator(n_items=1000)
|
|
382
|
+
metrics["decay_calculation_ms"] = decay_result.value
|
|
383
|
+
print(f" - Decay calculation: {decay_result.value:.6f} {decay_result.unit}")
|
|
384
|
+
|
|
385
|
+
# 4. Summary statistics
|
|
386
|
+
print("\n[4/4] Computing summary statistics...")
|
|
387
|
+
metrics["total_queries_tested"] = len(SAMPLE_QUERIES)
|
|
388
|
+
metrics["n_iterations"] = config.n_iterations
|
|
389
|
+
|
|
390
|
+
return ExperimentResults(
|
|
391
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
392
|
+
caas_version=CAAS_VERSION,
|
|
393
|
+
python_version=f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
|
|
394
|
+
seed=config.seed,
|
|
395
|
+
metrics=metrics,
|
|
396
|
+
config=asdict(config),
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def main() -> None:
|
|
401
|
+
"""Main entry point."""
|
|
402
|
+
parser = argparse.ArgumentParser(
|
|
403
|
+
description="CaaS Reproducibility Script",
|
|
404
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
405
|
+
epilog=__doc__,
|
|
406
|
+
)
|
|
407
|
+
parser.add_argument(
|
|
408
|
+
"--seed",
|
|
409
|
+
type=int,
|
|
410
|
+
default=DEFAULT_SEED,
|
|
411
|
+
help=f"Random seed (default: {DEFAULT_SEED})",
|
|
412
|
+
)
|
|
413
|
+
parser.add_argument(
|
|
414
|
+
"--output",
|
|
415
|
+
type=str,
|
|
416
|
+
default=DEFAULT_OUTPUT,
|
|
417
|
+
help=f"Output file path (default: {DEFAULT_OUTPUT})",
|
|
418
|
+
)
|
|
419
|
+
parser.add_argument(
|
|
420
|
+
"--iterations",
|
|
421
|
+
type=int,
|
|
422
|
+
default=100,
|
|
423
|
+
help="Number of timing iterations (default: 100)",
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
args = parser.parse_args()
|
|
427
|
+
|
|
428
|
+
print("=" * 60)
|
|
429
|
+
print("CaaS Reproducibility Benchmark")
|
|
430
|
+
print("=" * 60)
|
|
431
|
+
print(f"CaaS Version: {CAAS_VERSION}")
|
|
432
|
+
print(f"Python Version: {sys.version}")
|
|
433
|
+
print(f"Timestamp: {datetime.utcnow().isoformat()}")
|
|
434
|
+
|
|
435
|
+
config = BenchmarkConfig(
|
|
436
|
+
seed=args.seed,
|
|
437
|
+
n_iterations=args.iterations,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
results = run_experiments(config)
|
|
441
|
+
|
|
442
|
+
# Ensure output directory exists
|
|
443
|
+
output_path = Path(args.output)
|
|
444
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
445
|
+
|
|
446
|
+
# Save results
|
|
447
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
448
|
+
json.dump(results.to_dict(), f, indent=2)
|
|
449
|
+
|
|
450
|
+
print("\n" + "=" * 60)
|
|
451
|
+
print(f"Results saved to: {output_path}")
|
|
452
|
+
print("=" * 60)
|
|
453
|
+
|
|
454
|
+
# Print summary
|
|
455
|
+
print("\nSummary:")
|
|
456
|
+
print(f" - Router latency: {results.metrics.get('router_latency_ms', 'N/A'):.4f} ms")
|
|
457
|
+
print(f" - Router accuracy: {results.metrics.get('router_accuracy_pct', 'N/A'):.1f}%")
|
|
458
|
+
print(f" - Seed: {results.seed}")
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
if __name__ == "__main__":
|
|
462
|
+
main()
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# arXiv Submission Metadata
|
|
2
|
+
|
|
3
|
+
**Submission Package**: `arxiv_submission.tar` (399 KB)
|
|
4
|
+
**Generated**: January 22, 2026
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Required Metadata Fields
|
|
9
|
+
|
|
10
|
+
### Title
|
|
11
|
+
```
|
|
12
|
+
Context-as-a-Service: A Principled Architecture for Enterprise RAG Systems
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### Authors
|
|
16
|
+
```
|
|
17
|
+
Imran Siddique
|
|
18
|
+
Microsoft
|
|
19
|
+
imran.siddique@microsoft.com
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Abstract
|
|
23
|
+
```
|
|
24
|
+
Retrieval-Augmented Generation (RAG) systems have become essential for grounding LLM outputs in factual content. However, production deployments face seven critical fallacies that current frameworks fail to address: (1) the Flat Chunk Fallacy, treating all content equally regardless of structural importance; (2) Context Amnesia, losing metadata when chunks are extracted; (3) Time-Blind Retrieval, ignoring content freshness; (4) Flat Context, lacking priority tiers for different context types; (5) Official Truth Fallacy, favoring documentation over practical knowledge; (6) Brutal Squeeze, using lossy summarization instead of precision truncation; and (7) the Middleware Gap, trusting third-party routers with sensitive data.
|
|
25
|
+
|
|
26
|
+
We present Context-as-a-Service (CaaS), an open-source framework that systematically addresses these challenges through five novel components: (a) Structure-Aware Indexing with three-tier value hierarchies; (b) Context Triad for Hot/Warm/Cold intimacy-based prioritization; (c) Pragmatic Truth tracking that surfaces practical knowledge alongside official sources; (d) Heuristic Router for zero-latency deterministic query routing; and (e) Trust Gateway for enterprise-grade on-premises deployment.
|
|
27
|
+
|
|
28
|
+
We evaluate CaaS on a new benchmark corpus of 16 enterprise documents spanning code, legal, HR, and engineering domains. Our experiments demonstrate 28.1% improvement in Precision@5 and 27.9% improvement in NDCG@10 over flat-chunk baselines, with sub-millisecond routing latency (0.003ms) and only 18.4% latency overhead for the full pipeline. CaaS is available as an open-source Python package with MIT license, Docker support, and a public Hugging Face dataset for reproducibility.
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Categories
|
|
32
|
+
|
|
33
|
+
**Primary Category**:
|
|
34
|
+
```
|
|
35
|
+
cs.AI - Artificial Intelligence
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Cross-list Categories** (select all that apply):
|
|
39
|
+
```
|
|
40
|
+
cs.CL - Computation and Language
|
|
41
|
+
cs.IR - Information Retrieval
|
|
42
|
+
cs.SE - Software Engineering
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Comments Field
|
|
46
|
+
```
|
|
47
|
+
Code: https://github.com/imran-siddique/context-as-a-service
|
|
48
|
+
PyPI: https://pypi.org/project/context-as-a-service/
|
|
49
|
+
Dataset: https://huggingface.co/datasets/imran-siddique/context-as-a-service
|
|
50
|
+
12 pages, 4 figures, 9 tables
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### License
|
|
54
|
+
```
|
|
55
|
+
CC BY 4.0 (Creative Commons Attribution 4.0 International)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### ACM Classification (optional)
|
|
59
|
+
```
|
|
60
|
+
I.2.7 Natural Language Processing
|
|
61
|
+
H.3.3 Information Search and Retrieval
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Keywords (for discoverability)
|
|
65
|
+
```
|
|
66
|
+
Retrieval-Augmented Generation, RAG, Enterprise AI, Context Management, LLM, Large Language Models, Information Retrieval, Context Window, Document Indexing, Accumulation Paradox
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Submission Checklist
|
|
72
|
+
|
|
73
|
+
### Before Submission
|
|
74
|
+
- [x] Update author names and affiliations in `main.tex` (line ~52)
|
|
75
|
+
- [x] Update email addresses in `main.tex`
|
|
76
|
+
- [ ] Verify all figures render correctly
|
|
77
|
+
- [ ] Verify all citations compile without errors
|
|
78
|
+
- [ ] Run spell check on abstract and paper
|
|
79
|
+
- [ ] Verify URLs in Comments field are accessible
|
|
80
|
+
|
|
81
|
+
### Package Contents
|
|
82
|
+
- [x] `main.tex` - Main LaTeX source (20.7 KB)
|
|
83
|
+
- [x] `references.bib` - BibTeX bibliography with 17 entries (6.2 KB)
|
|
84
|
+
- [x] `figures/fig1_system_architecture.png` - System architecture (105 KB)
|
|
85
|
+
- [x] `figures/fig2_context_triad.png` - Context Triad diagram (115 KB)
|
|
86
|
+
- [x] `figures/fig3_ablation_results.png` - Ablation study (83 KB)
|
|
87
|
+
- [x] `figures/fig4_routing_latency.png` - Routing latency (72 KB)
|
|
88
|
+
|
|
89
|
+
**Total Package Size**: 399 KB (well under arXiv's 10 MB limit)
|
|
90
|
+
|
|
91
|
+
### Compilation Instructions
|
|
92
|
+
```bash
|
|
93
|
+
pdflatex main.tex
|
|
94
|
+
bibtex main
|
|
95
|
+
pdflatex main.tex
|
|
96
|
+
pdflatex main.tex
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Key References Added (Accumulation Paradox)
|
|
102
|
+
|
|
103
|
+
The following verified citations were added to support the Accumulation Paradox concept:
|
|
104
|
+
|
|
105
|
+
1. **Liu et al. (2023)** - "Lost in the Middle: How Language Models Use Long Contexts"
|
|
106
|
+
- Venue: TACL
|
|
107
|
+
- arXiv: 2307.03172
|
|
108
|
+
- Key contribution: U-shaped performance curve in long contexts
|
|
109
|
+
|
|
110
|
+
2. **Xiao et al. (2024)** - "Efficient Streaming Language Models with Attention Sinks"
|
|
111
|
+
- Venue: ICLR 2024
|
|
112
|
+
- arXiv: 2309.17453
|
|
113
|
+
- Key contribution: Attention sink phenomenon
|
|
114
|
+
|
|
115
|
+
3. **Li et al. (2024)** - "Long-context LLMs Struggle with Long In-context Learning"
|
|
116
|
+
- Venue: arXiv preprint
|
|
117
|
+
- arXiv: 2404.02060
|
|
118
|
+
- Key contribution: Performance degradation with accumulated context
|
|
119
|
+
|
|
120
|
+
4. **Packer et al. (2023)** - "MemGPT: Towards LLMs as Operating Systems"
|
|
121
|
+
- Venue: arXiv preprint
|
|
122
|
+
- arXiv: 2310.08560
|
|
123
|
+
- Key contribution: Virtual context management for agentic AI
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## arXiv Submission Steps
|
|
128
|
+
|
|
129
|
+
1. Go to: https://arxiv.org/submit
|
|
130
|
+
2. Sign in or create an arXiv account
|
|
131
|
+
3. Click "Start New Submission"
|
|
132
|
+
4. Select primary category: **cs.AI**
|
|
133
|
+
5. Add cross-list categories: **cs.CL**, **cs.IR**
|
|
134
|
+
6. Fill in metadata from fields above
|
|
135
|
+
7. Upload `arxiv_submission.tar`
|
|
136
|
+
8. Select license: **CC BY 4.0**
|
|
137
|
+
9. Add comments with code/data URLs
|
|
138
|
+
10. Preview PDF compilation
|
|
139
|
+
11. Submit for moderation
|
|
140
|
+
|
|
141
|
+
**Expected Processing Time**: 1-2 business days for moderation
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
*All citations in this submission are verified as real and accurate.*
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# arXiv Submission Package
|
|
2
|
+
|
|
3
|
+
## Contents
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
arxiv_submission.zip
|
|
7
|
+
├── main.tex # Main LaTeX source file
|
|
8
|
+
├── references.bib # BibTeX bibliography
|
|
9
|
+
└── figures/
|
|
10
|
+
├── fig1_system_architecture.png # System architecture diagram
|
|
11
|
+
├── fig2_context_triad.png # Context Triad visualization
|
|
12
|
+
├── fig3_ablation_results.png # Ablation study bar chart
|
|
13
|
+
└── fig4_routing_latency.png # Routing latency comparison
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Compilation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pdflatex main.tex
|
|
20
|
+
bibtex main
|
|
21
|
+
pdflatex main.tex
|
|
22
|
+
pdflatex main.tex
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## arXiv Submission Steps
|
|
26
|
+
|
|
27
|
+
1. Go to: https://arxiv.org/submit
|
|
28
|
+
2. Sign in (or create account)
|
|
29
|
+
3. Start new submission
|
|
30
|
+
4. Fill in metadata:
|
|
31
|
+
- **Primary Category**: `cs.AI` (Artificial Intelligence)
|
|
32
|
+
- **Cross-list**: `cs.CL` (Computation and Language), `cs.IR` (Information Retrieval)
|
|
33
|
+
- **Title**: Context-as-a-Service: A Principled Architecture for Enterprise RAG Systems
|
|
34
|
+
- **License**: CC BY 4.0
|
|
35
|
+
5. Upload `arxiv_submission.zip`
|
|
36
|
+
6. In **Comments** field, add:
|
|
37
|
+
```
|
|
38
|
+
Code: https://github.com/imran-siddique/context-as-a-service
|
|
39
|
+
PyPI: https://pypi.org/project/context-as-a-service/
|
|
40
|
+
Dataset: https://huggingface.co/datasets/imran-siddique/context-as-a-service
|
|
41
|
+
```
|
|
42
|
+
7. Preview and submit
|
|
43
|
+
|
|
44
|
+
## Package Size
|
|
45
|
+
|
|
46
|
+
- Total: ~363 KB
|
|
47
|
+
- Within arXiv's 10MB limit ✓
|