agent-os-kernel 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_os/__init__.py +66 -4
- agent_os/agents_compat.py +286 -0
- agent_os/base_agent.py +308 -0
- agent_os/cli.py +1079 -19
- agent_os/integrations/__init__.py +37 -2
- agent_os/integrations/openai_adapter.py +502 -0
- agent_os/integrations/semantic_kernel_adapter.py +569 -0
- agent_os/stateless.py +349 -0
- agent_os_kernel-1.3.0.dist-info/METADATA +676 -0
- agent_os_kernel-1.3.0.dist-info/RECORD +1053 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/entry_points.txt +0 -1
- modules/amb/.github/workflows/ci.yml +102 -0
- modules/amb/.github/workflows/publish.yml +146 -0
- modules/amb/.gitignore +134 -0
- modules/amb/CHANGELOG.md +118 -0
- modules/amb/CONTRIBUTING.md +141 -0
- modules/amb/LICENSE +21 -0
- modules/amb/README.md +188 -0
- modules/amb/amb_core/__init__.py +175 -0
- modules/amb/amb_core/adapters/__init__.py +55 -0
- modules/amb/amb_core/adapters/aws_sqs_broker.py +374 -0
- modules/amb/amb_core/adapters/azure_servicebus_broker.py +338 -0
- modules/amb/amb_core/adapters/kafka_broker.py +258 -0
- modules/amb/amb_core/adapters/nats_broker.py +283 -0
- modules/amb/amb_core/adapters/rabbitmq_broker.py +233 -0
- modules/amb/amb_core/adapters/redis_broker.py +260 -0
- modules/amb/amb_core/broker.py +143 -0
- modules/amb/amb_core/bus.py +479 -0
- modules/amb/amb_core/cloudevents.py +507 -0
- modules/amb/amb_core/dlq.py +343 -0
- modules/amb/amb_core/hf_utils.py +534 -0
- modules/amb/amb_core/memory_broker.py +408 -0
- modules/amb/amb_core/models.py +139 -0
- modules/amb/amb_core/persistence.py +527 -0
- modules/amb/amb_core/schema.py +292 -0
- modules/amb/amb_core/tracing.py +356 -0
- modules/amb/examples/advanced_features.py +223 -0
- modules/amb/examples/backpressure_demo.py +225 -0
- modules/amb/examples/basic_usage.py +117 -0
- modules/amb/examples/tracing_demo.py +104 -0
- modules/amb/experiments/README.md +52 -0
- modules/amb/experiments/reproduce_results.py +467 -0
- modules/amb/experiments/results.json +324 -0
- modules/amb/paper/README.md +40 -0
- modules/amb/paper/paper.tex +365 -0
- modules/amb/paper/whitepaper.md +377 -0
- modules/amb/pyproject.toml +117 -0
- modules/amb/tests/__init__.py +1 -0
- modules/amb/tests/test_backpressure_priority.py +280 -0
- modules/amb/tests/test_bus.py +198 -0
- modules/amb/tests/test_cloudevents.py +443 -0
- modules/amb/tests/test_features.py +531 -0
- modules/amb/tests/test_models.py +74 -0
- modules/amb/tests/test_tracing.py +254 -0
- modules/atr/.github/workflows/ci.yml +101 -0
- modules/atr/.github/workflows/publish.yml +140 -0
- modules/atr/.gitignore +134 -0
- modules/atr/.pre-commit-config.yaml +37 -0
- modules/atr/CHANGELOG.md +39 -0
- modules/atr/CONTRIBUTING.md +96 -0
- modules/atr/IMPLEMENTATION_SUMMARY.md +143 -0
- modules/atr/README.md +180 -0
- modules/atr/atr/__init__.py +638 -0
- modules/atr/atr/access.py +346 -0
- modules/atr/atr/composition.py +643 -0
- modules/atr/atr/decorator.py +355 -0
- modules/atr/atr/executor.py +382 -0
- modules/atr/atr/health.py +555 -0
- modules/atr/atr/hf_utils.py +447 -0
- modules/atr/atr/injection.py +420 -0
- modules/atr/atr/metrics.py +438 -0
- modules/atr/atr/policies.py +401 -0
- modules/atr/atr/py.typed +2 -0
- modules/atr/atr/registry.py +450 -0
- modules/atr/atr/schema.py +478 -0
- modules/atr/atr/tools/safe/__init__.py +73 -0
- modules/atr/atr/tools/safe/calculator.py +380 -0
- modules/atr/atr/tools/safe/datetime_tool.py +441 -0
- modules/atr/atr/tools/safe/file_reader.py +400 -0
- modules/atr/atr/tools/safe/http_client.py +314 -0
- modules/atr/atr/tools/safe/json_parser.py +372 -0
- modules/atr/atr/tools/safe/text_tool.py +526 -0
- modules/atr/atr/tools/safe/toolkit.py +173 -0
- modules/atr/docs/PYPI_SETUP.md +113 -0
- modules/atr/examples/README.md +27 -0
- modules/atr/examples/demo.py +144 -0
- modules/atr/examples/sandbox_demo.py +218 -0
- modules/atr/experiments/README.md +69 -0
- modules/atr/experiments/reproduce_results.py +509 -0
- modules/atr/experiments/results/.gitkeep +0 -0
- modules/atr/experiments/results/results_20260123_140334.json +71 -0
- modules/atr/paper/README.md +36 -0
- modules/atr/paper/figures/.gitkeep +0 -0
- modules/atr/paper/references.bib +84 -0
- modules/atr/paper/structure.tex +293 -0
- modules/atr/paper/whitepaper.md +234 -0
- modules/atr/pyproject.toml +148 -0
- modules/atr/requirements.txt +1 -0
- modules/atr/setup.py +30 -0
- modules/atr/tests/__init__.py +1 -0
- modules/atr/tests/test_decorator.py +317 -0
- modules/atr/tests/test_executor.py +245 -0
- modules/atr/tests/test_integration_executor.py +184 -0
- modules/atr/tests/test_registry.py +312 -0
- modules/atr/tests/test_schema.py +182 -0
- modules/atr/tests/test_v2_features.py +708 -0
- modules/caas/.dockerignore +63 -0
- modules/caas/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- modules/caas/.github/ISSUE_TEMPLATE/custom.md +10 -0
- modules/caas/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- modules/caas/.github/workflows/ci.yml +100 -0
- modules/caas/.github/workflows/lint.yml +39 -0
- modules/caas/.github/workflows/publish-pypi.yml +124 -0
- modules/caas/.gitignore +73 -0
- modules/caas/.pre-commit-config.yaml +33 -0
- modules/caas/CHANGELOG.md +58 -0
- modules/caas/CONTRIBUTING.md +346 -0
- modules/caas/Dockerfile +41 -0
- modules/caas/LICENSE +21 -0
- modules/caas/MANIFEST.in +11 -0
- modules/caas/README.md +158 -0
- modules/caas/benchmarks/README.md +255 -0
- modules/caas/benchmarks/create_hf_dataset.py +502 -0
- modules/caas/benchmarks/data/sample_corpus/README.md +86 -0
- modules/caas/benchmarks/data/sample_corpus/auth_module.py +211 -0
- modules/caas/benchmarks/data/sample_corpus/contribution_guide.md +185 -0
- modules/caas/benchmarks/data/sample_corpus/remote_work_policy.html +57 -0
- modules/caas/benchmarks/hf_dataset/README.md +214 -0
- modules/caas/benchmarks/hf_dataset/caas_benchmark_corpus.py +73 -0
- modules/caas/benchmarks/hf_dataset/corpus_preview.json +193 -0
- modules/caas/benchmarks/results/README.md +66 -0
- modules/caas/benchmarks/results/evaluation_2026-01-20.json +121 -0
- modules/caas/benchmarks/run_evaluation.py +561 -0
- modules/caas/benchmarks/statistical_tests.py +289 -0
- modules/caas/benchmarks/verify_sample_corpus.py +83 -0
- modules/caas/docker-compose.yml +38 -0
- modules/caas/docs/CONTEXT_TRIAD.md +462 -0
- modules/caas/docs/CONTRIBUTING.md +346 -0
- modules/caas/docs/ETHICS_AND_LIMITATIONS.md +336 -0
- modules/caas/docs/HEURISTIC_ROUTER.md +442 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY.md +363 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_CONTEXT_TRIAD.md +277 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_HEURISTIC_ROUTER.md +231 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_METADATA_INJECTION.md +258 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_PRAGMATIC_TRUTH.md +212 -0
- modules/caas/docs/IMPLEMENTATION_SUMMARY_TRUST_GATEWAY.md +319 -0
- modules/caas/docs/LAYER_1_PRIMITIVE.md +202 -0
- modules/caas/docs/METADATA_INJECTION.md +404 -0
- modules/caas/docs/PRAGMATIC_TRUTH.md +431 -0
- modules/caas/docs/RELATED_WORK.md +312 -0
- modules/caas/docs/RELEASE_CHECKLIST.md +219 -0
- modules/caas/docs/RELEASE_GUIDE.md +285 -0
- modules/caas/docs/REPRODUCIBILITY.md +386 -0
- modules/caas/docs/SLIDING_WINDOW.md +387 -0
- modules/caas/docs/STRUCTURE_AWARE_INDEXING.md +158 -0
- modules/caas/docs/TESTING.md +259 -0
- modules/caas/docs/THREAT_MODEL.md +247 -0
- modules/caas/docs/TRUST_GATEWAY.md +575 -0
- modules/caas/docs/VFS.md +298 -0
- modules/caas/examples/agents/enterprise_security_agent.py +414 -0
- modules/caas/examples/agents/intelligent_document_analyzer.py +380 -0
- modules/caas/examples/demos/demo.py +309 -0
- modules/caas/examples/demos/demo_context_triad.py +225 -0
- modules/caas/examples/demos/demo_conversation_manager.py +285 -0
- modules/caas/examples/demos/demo_heuristic_router.py +133 -0
- modules/caas/examples/demos/demo_metadata_injection.py +198 -0
- modules/caas/examples/demos/demo_pragmatic_truth.py +303 -0
- modules/caas/examples/demos/demo_structure_aware.py +140 -0
- modules/caas/examples/demos/demo_time_decay.py +247 -0
- modules/caas/examples/demos/demo_trust_gateway.py +383 -0
- modules/caas/examples/multi_agent/README.md +159 -0
- modules/caas/examples/multi_agent/research_team.py +369 -0
- modules/caas/examples/multi_agent/vfs_collaboration.py +393 -0
- modules/caas/examples/usage/auth_module.py +142 -0
- modules/caas/examples/usage/usage_example.py +173 -0
- modules/caas/experiments/README.md +42 -0
- modules/caas/experiments/reproduce_results.py +462 -0
- modules/caas/paper/ARXIV_METADATA.md +145 -0
- modules/caas/paper/ARXIV_README.md +47 -0
- modules/caas/paper/CHECKLIST.md +103 -0
- modules/caas/paper/GITHUB_RELEASE_NOTES.md +105 -0
- modules/caas/paper/README.md +71 -0
- modules/caas/paper/abstract.md +24 -0
- modules/caas/paper/arxiv_submission.tar +0 -0
- modules/caas/paper/arxiv_submission.zip +0 -0
- modules/caas/paper/build_pdf.py +355 -0
- modules/caas/paper/experiments.md +149 -0
- modules/caas/paper/figures/.gitkeep +0 -0
- modules/caas/paper/figures/README.md +237 -0
- modules/caas/paper/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/figures/fig1_system_architecture.svg +198 -0
- modules/caas/paper/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/figures/fig2_context_triad.svg +105 -0
- modules/caas/paper/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/figures/fig3_ablation_results.svg +113 -0
- modules/caas/paper/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/figures/fig4_routing_latency.svg +97 -0
- modules/caas/paper/intro.md +103 -0
- modules/caas/paper/latex/figures/fig1_system_architecture.png +0 -0
- modules/caas/paper/latex/figures/fig2_context_triad.png +0 -0
- modules/caas/paper/latex/figures/fig3_ablation_results.png +0 -0
- modules/caas/paper/latex/figures/fig4_routing_latency.png +0 -0
- modules/caas/paper/latex/main.tex +468 -0
- modules/caas/paper/latex/references.bib +140 -0
- modules/caas/paper/method.md +350 -0
- modules/caas/paper/outline.md +123 -0
- modules/caas/paper/related_work.md +101 -0
- modules/caas/paper/tables/.gitkeep +0 -0
- modules/caas/paper/tables/results_tables.md +50 -0
- modules/caas/pyproject.toml +172 -0
- modules/caas/requirements.txt +11 -0
- modules/caas/src/caas/__init__.py +232 -0
- modules/caas/src/caas/api/__init__.py +7 -0
- modules/caas/src/caas/api/server.py +1326 -0
- modules/caas/src/caas/caching.py +832 -0
- modules/caas/src/caas/cli.py +208 -0
- modules/caas/src/caas/conversation.py +221 -0
- modules/caas/src/caas/decay.py +118 -0
- modules/caas/src/caas/detection/__init__.py +7 -0
- modules/caas/src/caas/detection/detector.py +236 -0
- modules/caas/src/caas/enrichment.py +127 -0
- modules/caas/src/caas/gateway/__init__.py +24 -0
- modules/caas/src/caas/gateway/trust_gateway.py +471 -0
- modules/caas/src/caas/hf_utils.py +477 -0
- modules/caas/src/caas/ingestion/__init__.py +21 -0
- modules/caas/src/caas/ingestion/processors.py +251 -0
- modules/caas/src/caas/ingestion/structure_parser.py +185 -0
- modules/caas/src/caas/models.py +354 -0
- modules/caas/src/caas/pragmatic_truth.py +441 -0
- modules/caas/src/caas/routing/__init__.py +8 -0
- modules/caas/src/caas/routing/heuristic_router.py +242 -0
- modules/caas/src/caas/storage/__init__.py +7 -0
- modules/caas/src/caas/storage/store.py +450 -0
- modules/caas/src/caas/triad.py +472 -0
- modules/caas/src/caas/tuning/__init__.py +7 -0
- modules/caas/src/caas/tuning/tuner.py +322 -0
- modules/caas/src/caas/vfs/__init__.py +12 -0
- modules/caas/src/caas/vfs/filesystem.py +450 -0
- modules/caas/tests/__init__.py +3 -0
- modules/caas/tests/conftest.py +8 -0
- modules/caas/tests/test_caching.py +628 -0
- modules/caas/tests/test_context_triad.py +385 -0
- modules/caas/tests/test_conversation_manager.py +289 -0
- modules/caas/tests/test_functionality.py +215 -0
- modules/caas/tests/test_heuristic_router.py +370 -0
- modules/caas/tests/test_metadata_injection.py +328 -0
- modules/caas/tests/test_pragmatic_truth.py +322 -0
- modules/caas/tests/test_structure_aware_indexing.py +283 -0
- modules/caas/tests/test_time_decay.py +268 -0
- modules/caas/tests/test_trust_gateway.py +445 -0
- modules/caas/tests/test_vfs.py +298 -0
- modules/cmvk/.github/FUNDING.yml +9 -0
- modules/cmvk/.github/dependabot.yml +54 -0
- modules/cmvk/.github/workflows/ci.yml +205 -0
- modules/cmvk/.github/workflows/publish.yml +143 -0
- modules/cmvk/.gitignore +147 -0
- modules/cmvk/.pre-commit-config.yaml +58 -0
- modules/cmvk/CHANGELOG.md +146 -0
- modules/cmvk/CITATION.cff +48 -0
- modules/cmvk/CONTRIBUTING.md +229 -0
- modules/cmvk/Dockerfile +87 -0
- modules/cmvk/HF_MODEL_CARD.md +185 -0
- modules/cmvk/LICENSE +21 -0
- modules/cmvk/README.md +149 -0
- modules/cmvk/SECURITY.md +114 -0
- modules/cmvk/config/prompts/generator_v1.txt +23 -0
- modules/cmvk/config/prompts/verifier_hostile.txt +32 -0
- modules/cmvk/config/settings.yaml +40 -0
- modules/cmvk/coverage_html/.gitignore +2 -0
- modules/cmvk/coverage_html/class_index.html +658 -0
- modules/cmvk/coverage_html/coverage_html_cb_188fc9a4.js +735 -0
- modules/cmvk/coverage_html/favicon_32_cb_c827f16f.png +0 -0
- modules/cmvk/coverage_html/function_index.html +1978 -0
- modules/cmvk/coverage_html/index.html +255 -0
- modules/cmvk/coverage_html/keybd_closed_cb_900cfef5.png +0 -0
- modules/cmvk/coverage_html/status.json +1 -0
- modules/cmvk/coverage_html/style_cb_5c747636.css +389 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38___init___py.html +315 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_audit_py.html +499 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_benchmarks_py.html +575 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_constitutional_py.html +1001 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_hf_utils_py.html +398 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_metrics_py.html +570 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_profiles_py.html +397 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_types_py.html +109 -0
- modules/cmvk/coverage_html/z_2c49bd2ed3e01e38_verification_py.html +1053 -0
- modules/cmvk/docs/DIAGRAMS.md +325 -0
- modules/cmvk/docs/architecture.md +345 -0
- modules/cmvk/docs/features.md +308 -0
- modules/cmvk/docs/getting_started.md +279 -0
- modules/cmvk/docs/innovation_layer.md +377 -0
- modules/cmvk/docs/safety.md +281 -0
- modules/cmvk/docs/traceability.md +150 -0
- modules/cmvk/examples/basic_example.py +62 -0
- modules/cmvk/examples/demo_complete_pipeline.py +209 -0
- modules/cmvk/examples/demo_innovation_layer.py +197 -0
- modules/cmvk/examples/example.py +112 -0
- modules/cmvk/examples/model_diversity_comparison.py +110 -0
- modules/cmvk/examples/real_api_integration.py +121 -0
- modules/cmvk/examples/test_full_pipeline.py +303 -0
- modules/cmvk/experiments/FEATURE_2_LATERAL_THINKING.md +187 -0
- modules/cmvk/experiments/README.md +216 -0
- modules/cmvk/experiments/ablation_runner.py +666 -0
- modules/cmvk/experiments/baseline_runner.py +158 -0
- modules/cmvk/experiments/blind_spot_benchmark.py +364 -0
- modules/cmvk/experiments/datasets/README.md +85 -0
- modules/cmvk/experiments/datasets/humaneval_50.json +352 -0
- modules/cmvk/experiments/datasets/humaneval_full.json +1150 -0
- modules/cmvk/experiments/datasets/humaneval_sample.json +32 -0
- modules/cmvk/experiments/datasets/sabotage.json +262 -0
- modules/cmvk/experiments/datasets/sample.json +40 -0
- modules/cmvk/experiments/demo_with_traces.py +110 -0
- modules/cmvk/experiments/efficiency_curve.py +259 -0
- modules/cmvk/experiments/experiment_runner.py +243 -0
- modules/cmvk/experiments/paper_data_generator.py +183 -0
- modules/cmvk/experiments/reproduce_results.py +407 -0
- modules/cmvk/experiments/reproducible_runner.py +352 -0
- modules/cmvk/experiments/sabotage_stress_test.py +311 -0
- modules/cmvk/experiments/test_lateral_thinking.py +116 -0
- modules/cmvk/experiments/test_prosecutor.py +41 -0
- modules/cmvk/experiments/visualize_results.py +735 -0
- modules/cmvk/logs/traces/demo_HumanEval_0_20260121-204900.json +36 -0
- modules/cmvk/notebooks/analysis.ipynb +124 -0
- modules/cmvk/paper/PAPER.md +561 -0
- modules/cmvk/paper/arxiv_checklist.md +230 -0
- modules/cmvk/paper/cmvk_neurips.aux +77 -0
- modules/cmvk/paper/cmvk_neurips.bbl +81 -0
- modules/cmvk/paper/cmvk_neurips.blg +48 -0
- modules/cmvk/paper/cmvk_neurips.out +16 -0
- modules/cmvk/paper/cmvk_neurips.pdf +0 -0
- modules/cmvk/paper/cmvk_neurips.tex +309 -0
- modules/cmvk/paper/figures/ablation.png +0 -0
- modules/cmvk/paper/figures/ablation.svg +39 -0
- modules/cmvk/paper/figures/architecture.png +0 -0
- modules/cmvk/paper/figures/architecture.svg +115 -0
- modules/cmvk/paper/figures/results_bar.png +0 -0
- modules/cmvk/paper/figures/results_bar.svg +70 -0
- modules/cmvk/paper/generate_figures.py +383 -0
- modules/cmvk/paper/neurips_2024.sty +101 -0
- modules/cmvk/paper/references.bib +98 -0
- modules/cmvk/paper/structure.tex +200 -0
- modules/cmvk/pyproject.toml +189 -0
- modules/cmvk/requirements-dev.txt +19 -0
- modules/cmvk/requirements.txt +14 -0
- modules/cmvk/src/cmvk/__init__.py +216 -0
- modules/cmvk/src/cmvk/audit.py +400 -0
- modules/cmvk/src/cmvk/benchmarks.py +476 -0
- modules/cmvk/src/cmvk/constitutional.py +902 -0
- modules/cmvk/src/cmvk/hf_utils.py +299 -0
- modules/cmvk/src/cmvk/metrics.py +471 -0
- modules/cmvk/src/cmvk/profiles.py +298 -0
- modules/cmvk/src/cmvk/py.typed +0 -0
- modules/cmvk/src/cmvk/types.py +10 -0
- modules/cmvk/src/cmvk/verification.py +954 -0
- modules/cmvk/src/cross_model_verification_kernel/__init__.py +91 -0
- modules/cmvk/src/cross_model_verification_kernel/__main__.py +10 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/__init__.py +16 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/base_agent.py +142 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/generator_openai.py +223 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_anthropic.py +448 -0
- modules/cmvk/src/cross_model_verification_kernel/agents/verifier_gemini.py +481 -0
- modules/cmvk/src/cross_model_verification_kernel/cli.py +570 -0
- modules/cmvk/src/cross_model_verification_kernel/core/__init__.py +26 -0
- modules/cmvk/src/cross_model_verification_kernel/core/graph_memory.py +308 -0
- modules/cmvk/src/cross_model_verification_kernel/core/kernel.py +413 -0
- modules/cmvk/src/cross_model_verification_kernel/core/trace_logger.py +75 -0
- modules/cmvk/src/cross_model_verification_kernel/core/types.py +121 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/__init__.py +20 -0
- modules/cmvk/src/cross_model_verification_kernel/datasets/humaneval_loader.py +271 -0
- modules/cmvk/src/cross_model_verification_kernel/generator.py +118 -0
- modules/cmvk/src/cross_model_verification_kernel/kernel.py +292 -0
- modules/cmvk/src/cross_model_verification_kernel/models.py +111 -0
- modules/cmvk/src/cross_model_verification_kernel/py.typed +1 -0
- modules/cmvk/src/cross_model_verification_kernel/simple_kernel.py +185 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/__init__.py +94 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/huggingface_upload.py +394 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/sandbox.py +159 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/statistics.py +468 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/visualizer.py +312 -0
- modules/cmvk/src/cross_model_verification_kernel/tools/web_search.py +86 -0
- modules/cmvk/src/cross_model_verification_kernel/verifier.py +257 -0
- modules/cmvk/tests/__init__.py +3 -0
- modules/cmvk/tests/conftest.py +61 -0
- modules/cmvk/tests/integration/__init__.py +1 -0
- modules/cmvk/tests/integration/test_anthropic_verifier.py +269 -0
- modules/cmvk/tests/integration/test_integration.py +53 -0
- modules/cmvk/tests/integration/test_lateral_thinking_integration.py +199 -0
- modules/cmvk/tests/integration/test_lateral_thinking_witness.py +208 -0
- modules/cmvk/tests/integration/test_prosecutor_mode.py +131 -0
- modules/cmvk/tests/test_constitutional.py +611 -0
- modules/cmvk/tests/test_enhanced_features.py +603 -0
- modules/cmvk/tests/test_verification.py +255 -0
- modules/cmvk/tests/unit/__init__.py +1 -0
- modules/cmvk/tests/unit/test_agents.py +64 -0
- modules/cmvk/tests/unit/test_cli.py +224 -0
- modules/cmvk/tests/unit/test_core.py +126 -0
- modules/cmvk/tests/unit/test_humaneval_loader.py +197 -0
- modules/cmvk/tests/unit/test_kernel.py +255 -0
- modules/cmvk/tests/unit/test_reproducibility.py +160 -0
- modules/cmvk/tests/unit/test_trace_logger.py +115 -0
- modules/cmvk/tests/unit/test_visualizer.py +218 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/bug_report.yml +82 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/config.yml +11 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/feature_request.yml +104 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/question.yml +70 -0
- modules/control-plane/.github/ISSUE_TEMPLATE/security_vulnerability.yml +84 -0
- modules/control-plane/.github/discussions.yml +73 -0
- modules/control-plane/.github/pull_request_template.md +82 -0
- modules/control-plane/.github/workflows/publish.yml +146 -0
- modules/control-plane/.github/workflows/release.yml +39 -0
- modules/control-plane/.github/workflows/tests.yml +58 -0
- modules/control-plane/.gitignore +55 -0
- modules/control-plane/CHANGELOG.md +203 -0
- modules/control-plane/CONTRIBUTING.md +311 -0
- modules/control-plane/CONTRIBUTORS.md +88 -0
- modules/control-plane/Dockerfile +82 -0
- modules/control-plane/LICENSE +21 -0
- modules/control-plane/MANIFEST.in +17 -0
- modules/control-plane/README.md +1264 -0
- modules/control-plane/ROADMAP.md +228 -0
- modules/control-plane/SECURITY.md +210 -0
- modules/control-plane/SUPPORT.md +106 -0
- modules/control-plane/acp-cli.py +212 -0
- modules/control-plane/benchmark/README.md +257 -0
- modules/control-plane/benchmark/__init__.py +19 -0
- modules/control-plane/benchmark/red_team_dataset.py +517 -0
- modules/control-plane/benchmark.py +563 -0
- modules/control-plane/build_and_publish.sh +130 -0
- modules/control-plane/docker-compose.yml +74 -0
- modules/control-plane/docs/ABLATION_STUDIES.md +528 -0
- modules/control-plane/docs/ADAPTER_GUIDE.md +544 -0
- modules/control-plane/docs/ADVANCED_FEATURES.md +543 -0
- modules/control-plane/docs/AIOS_COMPARISON.md +296 -0
- modules/control-plane/docs/BIBLIOGRAPHY.md +367 -0
- modules/control-plane/docs/CASE_STUDIES.md +645 -0
- modules/control-plane/docs/DOCKER_DEPLOYMENT.md +184 -0
- modules/control-plane/docs/ECOSYSTEM_STATUS.md +98 -0
- modules/control-plane/docs/HF_MODEL_CARD.md +168 -0
- modules/control-plane/docs/KERNEL_V1_RELEASE.md +454 -0
- modules/control-plane/docs/LAYER3_FRAMEWORK.md +227 -0
- modules/control-plane/docs/LIMITATIONS.md +523 -0
- modules/control-plane/docs/PYPI_PUBLISHING.md +195 -0
- modules/control-plane/docs/README.md +58 -0
- modules/control-plane/docs/RELATED_WORK.md +319 -0
- modules/control-plane/docs/RELEASE_v1.1.0.md +252 -0
- modules/control-plane/docs/REPRODUCIBILITY.md +540 -0
- modules/control-plane/docs/RESEARCH_FOUNDATION.md +197 -0
- modules/control-plane/docs/api/CORE.md +270 -0
- modules/control-plane/docs/architecture/architecture.md +120 -0
- modules/control-plane/docs/community/ANNOUNCEMENT_TEMPLATES.md +52 -0
- modules/control-plane/docs/guides/IMPLEMENTATION.md +225 -0
- modules/control-plane/docs/guides/PHILOSOPHY.md +354 -0
- modules/control-plane/docs/guides/QUICKSTART.md +217 -0
- modules/control-plane/examples/README.md +138 -0
- modules/control-plane/examples/a2a_demo.py +410 -0
- modules/control-plane/examples/adapter_demo.py +347 -0
- modules/control-plane/examples/advanced_features.py +403 -0
- modules/control-plane/examples/basic_usage.py +261 -0
- modules/control-plane/examples/benchmark_demo.py +186 -0
- modules/control-plane/examples/compliance_demo.py +333 -0
- modules/control-plane/examples/configuration.py +265 -0
- modules/control-plane/examples/getting_started.py +178 -0
- modules/control-plane/examples/hibernation_and_time_travel_demo.py +406 -0
- modules/control-plane/examples/interactive_tutorial.ipynb +497 -0
- modules/control-plane/examples/kernel_interceptor_demo.py +202 -0
- modules/control-plane/examples/kernel_v1_demo.py +273 -0
- modules/control-plane/examples/langchain_demo.py +281 -0
- modules/control-plane/examples/lifecycle_demo.py +724 -0
- modules/control-plane/examples/mcp_demo.py +378 -0
- modules/control-plane/examples/ml_safety_demo.py +157 -0
- modules/control-plane/examples/multimodal_demo.py +347 -0
- modules/control-plane/examples/observability_demo.py +370 -0
- modules/control-plane/examples/use_cases.py +336 -0
- modules/control-plane/experiments/long_horizon_purge.py +235 -0
- modules/control-plane/experiments/multi_agent_rag.py +165 -0
- modules/control-plane/experiments/reproduce_results.py +667 -0
- modules/control-plane/paper/ARXIV_SUBMISSION_INFO.txt +122 -0
- modules/control-plane/paper/ETHICS_STATEMENT.md +248 -0
- modules/control-plane/paper/PAPER_CHECKLIST.md +72 -0
- modules/control-plane/paper/Paper.pdf +0 -0
- modules/control-plane/paper/README.md +71 -0
- modules/control-plane/paper/appendix.md +152 -0
- modules/control-plane/paper/architecture.md +15 -0
- modules/control-plane/paper/arxiv/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/arxiv/figures/architecture.png +0 -0
- modules/control-plane/paper/arxiv/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/arxiv/figures/results_chart.png +0 -0
- modules/control-plane/paper/arxiv/main.aux +97 -0
- modules/control-plane/paper/arxiv/main.bbl +112 -0
- modules/control-plane/paper/arxiv/main.blg +48 -0
- modules/control-plane/paper/arxiv/main.out +33 -0
- modules/control-plane/paper/arxiv/main.pdf +0 -0
- modules/control-plane/paper/arxiv/main.tex +479 -0
- modules/control-plane/paper/arxiv/references.bib +234 -0
- modules/control-plane/paper/arxiv_submission.tar +0 -0
- modules/control-plane/paper/arxiv_submission.zip +0 -0
- modules/control-plane/paper/build.sh +68 -0
- modules/control-plane/paper/figures/README.md +47 -0
- modules/control-plane/paper/figures/ablation_chart.pdf +0 -0
- modules/control-plane/paper/figures/ablation_chart.png +0 -0
- modules/control-plane/paper/figures/architecture.pdf +0 -0
- modules/control-plane/paper/figures/architecture.png +0 -0
- modules/control-plane/paper/figures/constraint_graphs.pdf +0 -0
- modules/control-plane/paper/figures/constraint_graphs.png +0 -0
- modules/control-plane/paper/figures/generate_figures.py +252 -0
- modules/control-plane/paper/figures/results_chart.pdf +0 -0
- modules/control-plane/paper/figures/results_chart.png +0 -0
- modules/control-plane/paper/main.md +273 -0
- modules/control-plane/paper/main.tex +214 -0
- modules/control-plane/paper/main_arxiv.aux +53 -0
- modules/control-plane/paper/main_arxiv.out +17 -0
- modules/control-plane/paper/main_arxiv.pdf +0 -0
- modules/control-plane/paper/main_arxiv.tex +264 -0
- modules/control-plane/paper/references.bib +234 -0
- modules/control-plane/pyproject.toml +124 -0
- modules/control-plane/reproducibility/ABLATIONS.md +136 -0
- modules/control-plane/reproducibility/README.md +288 -0
- modules/control-plane/reproducibility/commands.md +467 -0
- modules/control-plane/reproducibility/docker_config/Dockerfile +39 -0
- modules/control-plane/reproducibility/experiment_configs/purge_config.json +46 -0
- modules/control-plane/reproducibility/experiment_configs/rag_config.json +36 -0
- modules/control-plane/reproducibility/hardware_specs.md +317 -0
- modules/control-plane/reproducibility/requirements_frozen.txt +0 -0
- modules/control-plane/reproducibility/run_all_experiments.sh +45 -0
- modules/control-plane/reproducibility/seeds.json +106 -0
- modules/control-plane/scripts/prepare_pypi.py +46 -0
- modules/control-plane/scripts/prepare_release.py +176 -0
- modules/control-plane/scripts/upload_dataset_to_hf.py +316 -0
- modules/control-plane/setup.py +69 -0
- modules/control-plane/src/agent_control_plane/__init__.py +639 -0
- modules/control-plane/src/agent_control_plane/a2a_adapter.py +541 -0
- modules/control-plane/src/agent_control_plane/adapter.py +415 -0
- modules/control-plane/src/agent_control_plane/agent_hibernation.py +364 -0
- modules/control-plane/src/agent_control_plane/agent_kernel.py +464 -0
- modules/control-plane/src/agent_control_plane/compliance.py +718 -0
- modules/control-plane/src/agent_control_plane/constraint_graphs.py +475 -0
- modules/control-plane/src/agent_control_plane/control_plane.py +848 -0
- modules/control-plane/src/agent_control_plane/example_executors.py +193 -0
- modules/control-plane/src/agent_control_plane/execution_engine.py +229 -0
- modules/control-plane/src/agent_control_plane/flight_recorder.py +600 -0
- modules/control-plane/src/agent_control_plane/governance_layer.py +432 -0
- modules/control-plane/src/agent_control_plane/hf_utils.py +561 -0
- modules/control-plane/src/agent_control_plane/interfaces/__init__.py +53 -0
- modules/control-plane/src/agent_control_plane/interfaces/kernel_interface.py +359 -0
- modules/control-plane/src/agent_control_plane/interfaces/plugin_interface.py +495 -0
- modules/control-plane/src/agent_control_plane/interfaces/protocol_interfaces.py +385 -0
- modules/control-plane/src/agent_control_plane/kernel_space.py +707 -0
- modules/control-plane/src/agent_control_plane/langchain_adapter.py +422 -0
- modules/control-plane/src/agent_control_plane/lifecycle.py +3111 -0
- modules/control-plane/src/agent_control_plane/mcp_adapter.py +517 -0
- modules/control-plane/src/agent_control_plane/ml_safety.py +560 -0
- modules/control-plane/src/agent_control_plane/multimodal.py +724 -0
- modules/control-plane/src/agent_control_plane/mute_agent.py +419 -0
- modules/control-plane/src/agent_control_plane/observability.py +785 -0
- modules/control-plane/src/agent_control_plane/orchestrator.py +480 -0
- modules/control-plane/src/agent_control_plane/plugin_registry.py +748 -0
- modules/control-plane/src/agent_control_plane/policy_engine.py +525 -0
- modules/control-plane/src/agent_control_plane/shadow_mode.py +307 -0
- modules/control-plane/src/agent_control_plane/signals.py +491 -0
- modules/control-plane/src/agent_control_plane/supervisor_agents.py +427 -0
- modules/control-plane/src/agent_control_plane/time_travel_debugger.py +554 -0
- modules/control-plane/src/agent_control_plane/tool_registry.py +350 -0
- modules/control-plane/src/agent_control_plane/vfs.py +695 -0
- modules/control-plane/tests/README.md +33 -0
- modules/control-plane/tests/test_a2a_adapter.py +336 -0
- modules/control-plane/tests/test_adapter.py +422 -0
- modules/control-plane/tests/test_advanced_features.py +389 -0
- modules/control-plane/tests/test_benchmark.py +223 -0
- modules/control-plane/tests/test_compliance.py +214 -0
- modules/control-plane/tests/test_control_plane.py +295 -0
- modules/control-plane/tests/test_hibernation.py +274 -0
- modules/control-plane/tests/test_kernel_interception.py +284 -0
- modules/control-plane/tests/test_langchain_adapter.py +258 -0
- modules/control-plane/tests/test_lifecycle.py +1174 -0
- modules/control-plane/tests/test_mcp_adapter.py +293 -0
- modules/control-plane/tests/test_ml_safety.py +142 -0
- modules/control-plane/tests/test_multimodal.py +317 -0
- modules/control-plane/tests/test_new_features.py +435 -0
- modules/control-plane/tests/test_observability.py +338 -0
- modules/control-plane/tests/test_time_travel.py +387 -0
- modules/emk/.github/workflows/ci.yml +105 -0
- modules/emk/.github/workflows/publish.yml +144 -0
- modules/emk/.gitignore +74 -0
- modules/emk/CHANGELOG.md +41 -0
- modules/emk/CONTRIBUTING.md +295 -0
- modules/emk/IMPLEMENTATION.md +174 -0
- modules/emk/LICENSE +21 -0
- modules/emk/MANIFEST.in +8 -0
- modules/emk/README.md +135 -0
- modules/emk/RELEASE_NOTES.md +82 -0
- modules/emk/SECURITY.md +52 -0
- modules/emk/codecov.yml +39 -0
- modules/emk/docs/MEMORY_MANAGEMENT.md +285 -0
- modules/emk/emk/__init__.py +106 -0
- modules/emk/emk/hf_utils.py +419 -0
- modules/emk/emk/indexer.py +144 -0
- modules/emk/emk/py.typed +0 -0
- modules/emk/emk/schema.py +204 -0
- modules/emk/emk/sleep_cycle.py +345 -0
- modules/emk/emk/store.py +479 -0
- modules/emk/examples/basic_usage.py +123 -0
- modules/emk/examples/memory_features_demo.py +154 -0
- modules/emk/experiments/README.md +59 -0
- modules/emk/experiments/reproduce_results.py +461 -0
- modules/emk/experiments/results.json +61 -0
- modules/emk/paper/structure.tex +192 -0
- modules/emk/paper/whitepaper.md +273 -0
- modules/emk/pyproject.toml +91 -0
- modules/emk/setup.py +5 -0
- modules/emk/tests/test_file_adapter.py +195 -0
- modules/emk/tests/test_indexer.py +174 -0
- modules/emk/tests/test_init.py +55 -0
- modules/emk/tests/test_negative_memory.py +83 -0
- modules/emk/tests/test_schema.py +150 -0
- modules/emk/tests/test_semantic_rules.py +175 -0
- modules/emk/tests/test_sleep_cycle.py +335 -0
- modules/emk/tests/test_store_anti_patterns.py +239 -0
- modules/iatp/.github/workflows/docker-build.yml +124 -0
- modules/iatp/.github/workflows/publish.yml +174 -0
- modules/iatp/.github/workflows/python-package.yml +121 -0
- modules/iatp/.gitignore +67 -0
- modules/iatp/.pre-commit-config.yaml +64 -0
- modules/iatp/CHANGELOG.md +120 -0
- modules/iatp/Dockerfile +91 -0
- modules/iatp/IMPLEMENTATION_SUMMARY.md +218 -0
- modules/iatp/MANIFEST.in +9 -0
- modules/iatp/README.md +180 -0
- modules/iatp/docker/Dockerfile.agent +27 -0
- modules/iatp/docker/Dockerfile.sidecar-python +86 -0
- modules/iatp/docker/README.md +258 -0
- modules/iatp/docker-compose.yml +194 -0
- modules/iatp/docs/ARCHITECTURE.md +243 -0
- modules/iatp/docs/CLI_GUIDE.md +220 -0
- modules/iatp/docs/DEPLOYMENT.md +304 -0
- modules/iatp/examples/README.md +132 -0
- modules/iatp/examples/backend_agent.py +39 -0
- modules/iatp/examples/client.py +168 -0
- modules/iatp/examples/demo_attestation_reputation.py +274 -0
- modules/iatp/examples/demo_client.py +240 -0
- modules/iatp/examples/demo_rbac.py +143 -0
- modules/iatp/examples/integration_demo.py +245 -0
- modules/iatp/examples/manifests/coder_agent.json +20 -0
- modules/iatp/examples/manifests/reviewer_agent.json +19 -0
- modules/iatp/examples/manifests/secure_bank.json +14 -0
- modules/iatp/examples/manifests/standard_agent.json +14 -0
- modules/iatp/examples/manifests/untrusted_honeypot.json +14 -0
- modules/iatp/examples/run_secure_bank_sidecar.py +85 -0
- modules/iatp/examples/run_sidecar.py +105 -0
- modules/iatp/examples/run_untrusted_sidecar.py +77 -0
- modules/iatp/examples/secure_bank_agent.py +138 -0
- modules/iatp/examples/test_untrusted.py +82 -0
- modules/iatp/examples/untrusted_agent.py +119 -0
- modules/iatp/experiments/README.md +58 -0
- modules/iatp/experiments/cascading_hallucination/README.md +149 -0
- modules/iatp/experiments/cascading_hallucination/agent_a_user.py +41 -0
- modules/iatp/experiments/cascading_hallucination/agent_b_summarizer.py +54 -0
- modules/iatp/experiments/cascading_hallucination/agent_c_database.py +47 -0
- modules/iatp/experiments/cascading_hallucination/proof_of_concept.py +290 -0
- modules/iatp/experiments/cascading_hallucination/run_experiment.py +226 -0
- modules/iatp/experiments/cascading_hallucination/sidecar_c.py +61 -0
- modules/iatp/experiments/reproduce_results.py +574 -0
- modules/iatp/experiments/results.json +2336 -0
- modules/iatp/iatp/__init__.py +164 -0
- modules/iatp/iatp/attestation.py +401 -0
- modules/iatp/iatp/cli.py +253 -0
- modules/iatp/iatp/hf_utils.py +469 -0
- modules/iatp/iatp/ipc_pipes.py +578 -0
- modules/iatp/iatp/main.py +410 -0
- modules/iatp/iatp/models/__init__.py +445 -0
- modules/iatp/iatp/policy_engine.py +335 -0
- modules/iatp/iatp/py.typed +2 -0
- modules/iatp/iatp/recovery.py +319 -0
- modules/iatp/iatp/security/__init__.py +268 -0
- modules/iatp/iatp/sidecar/__init__.py +517 -0
- modules/iatp/iatp/telemetry/__init__.py +162 -0
- modules/iatp/iatp/tests/__init__.py +1 -0
- modules/iatp/iatp/tests/test_attestation.py +368 -0
- modules/iatp/iatp/tests/test_cli.py +129 -0
- modules/iatp/iatp/tests/test_models.py +128 -0
- modules/iatp/iatp/tests/test_policy_engine.py +345 -0
- modules/iatp/iatp/tests/test_recovery.py +279 -0
- modules/iatp/iatp/tests/test_security.py +220 -0
- modules/iatp/iatp/tests/test_sidecar.py +165 -0
- modules/iatp/iatp/tests/test_telemetry.py +173 -0
- modules/iatp/paper/BLOG.md +307 -0
- modules/iatp/paper/PAPER.md +236 -0
- modules/iatp/paper/RFC_SUBMISSION.md +299 -0
- modules/iatp/paper/whitepaper.md +369 -0
- modules/iatp/proto/README.md +200 -0
- modules/iatp/proto/generate_stubs.py +81 -0
- modules/iatp/proto/iatp.proto +552 -0
- modules/iatp/pyproject.toml +180 -0
- modules/iatp/requirements-dev.txt +2 -0
- modules/iatp/requirements.txt +6 -0
- modules/iatp/setup.py +60 -0
- modules/iatp/sidecar/README.md +487 -0
- modules/iatp/sidecar/go/Dockerfile +32 -0
- modules/iatp/sidecar/go/README.md +237 -0
- modules/iatp/sidecar/go/go.mod +8 -0
- modules/iatp/sidecar/go/main.go +488 -0
- modules/iatp/spec/001-handshake.md +436 -0
- modules/iatp/spec/002-reversibility.md +394 -0
- modules/iatp/spec/schema/capability_manifest.json +266 -0
- modules/iatp/test_integration.py +310 -0
- modules/mcp-kernel-server/README.md +261 -0
- modules/mcp-kernel-server/pyproject.toml +60 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/__init__.py +26 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/cli.py +229 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/resources.py +215 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/server.py +562 -0
- modules/mcp-kernel-server/src/mcp_kernel_server/tools.py +1172 -0
- modules/mute-agent/.github/workflows/safety_check.yml +45 -0
- modules/mute-agent/.gitignore +53 -0
- modules/mute-agent/ARCHITECTURE.md +531 -0
- modules/mute-agent/BENCHMARK_GUIDE.md +384 -0
- modules/mute-agent/COMPLETION_SUMMARY.md +293 -0
- modules/mute-agent/EXPERIMENT_SUMMARY.md +318 -0
- modules/mute-agent/IMPLEMENTATION_SUMMARY.md +212 -0
- modules/mute-agent/LICENSE +21 -0
- modules/mute-agent/PHASE3_SUMMARY.md +297 -0
- modules/mute-agent/README.md +360 -0
- modules/mute-agent/STEEL_MAN_RESULTS.md +353 -0
- modules/mute-agent/USAGE.md +505 -0
- modules/mute-agent/V2_IMPLEMENTATION_SUMMARY.md +253 -0
- modules/mute-agent/V2_STEEL_MAN_IMPLEMENTATION.md +274 -0
- modules/mute-agent/VERIFICATION_REPORT.md +435 -0
- modules/mute-agent/charts/cost_comparison.png +0 -0
- modules/mute-agent/charts/cost_vs_ambiguity.png +0 -0
- modules/mute-agent/charts/metrics_comparison.png +0 -0
- modules/mute-agent/charts/scenario_breakdown.png +0 -0
- modules/mute-agent/charts/trace_attack_blocked.html +140 -0
- modules/mute-agent/charts/trace_attack_blocked.png +0 -0
- modules/mute-agent/charts/trace_failure.html +140 -0
- modules/mute-agent/charts/trace_failure.png +0 -0
- modules/mute-agent/charts/trace_success.html +140 -0
- modules/mute-agent/charts/trace_success.png +0 -0
- modules/mute-agent/examples/__init__.py +1 -0
- modules/mute-agent/examples/advanced_example.py +384 -0
- modules/mute-agent/examples/graph_debugger_demo.py +241 -0
- modules/mute-agent/examples/listener_example.py +297 -0
- modules/mute-agent/examples/simple_example.py +242 -0
- modules/mute-agent/examples/steel_man_demo.py +297 -0
- modules/mute-agent/experiments/README.md +135 -0
- modules/mute-agent/experiments/__init__.py +3 -0
- modules/mute-agent/experiments/agent_comparison.csv +6 -0
- modules/mute-agent/experiments/agent_comparison_50runs.csv +6 -0
- modules/mute-agent/experiments/ambiguity_test.py +335 -0
- modules/mute-agent/experiments/ambiguity_test_results.csv +31 -0
- modules/mute-agent/experiments/ambiguity_test_results_50runs.csv +51 -0
- modules/mute-agent/experiments/baseline_agent.py +189 -0
- modules/mute-agent/experiments/benchmark.py +402 -0
- modules/mute-agent/experiments/demo.py +172 -0
- modules/mute-agent/experiments/generate_cost_curve.py +474 -0
- modules/mute-agent/experiments/jailbreak_test.py +137 -0
- modules/mute-agent/experiments/latent_state_scenario.py +361 -0
- modules/mute-agent/experiments/mute_agent_experiment.py +349 -0
- modules/mute-agent/experiments/run_extended_experiment.py +40 -0
- modules/mute-agent/experiments/run_v2_experiments.py +266 -0
- modules/mute-agent/experiments/run_v2_experiments_auto.py +247 -0
- modules/mute-agent/experiments/v2_scenarios/README.md +214 -0
- modules/mute-agent/experiments/v2_scenarios/__init__.py +4 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_1_deep_dependency.py +325 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_2_adversarial.py +328 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_3_false_positive.py +303 -0
- modules/mute-agent/experiments/v2_scenarios/scenario_4_performance.py +319 -0
- modules/mute-agent/experiments/visualize.py +400 -0
- modules/mute-agent/mute_agent/__init__.py +66 -0
- modules/mute-agent/mute_agent/core/__init__.py +1 -0
- modules/mute-agent/mute_agent/core/execution_agent.py +164 -0
- modules/mute-agent/mute_agent/core/handshake_protocol.py +199 -0
- modules/mute-agent/mute_agent/core/reasoning_agent.py +236 -0
- modules/mute-agent/mute_agent/knowledge_graph/__init__.py +1 -0
- modules/mute-agent/mute_agent/knowledge_graph/graph_elements.py +63 -0
- modules/mute-agent/mute_agent/knowledge_graph/multidimensional_graph.py +168 -0
- modules/mute-agent/mute_agent/knowledge_graph/subgraph.py +222 -0
- modules/mute-agent/mute_agent/listener/__init__.py +41 -0
- modules/mute-agent/mute_agent/listener/adapters/__init__.py +29 -0
- modules/mute-agent/mute_agent/listener/adapters/base_adapter.py +187 -0
- modules/mute-agent/mute_agent/listener/adapters/caas_adapter.py +342 -0
- modules/mute-agent/mute_agent/listener/adapters/control_plane_adapter.py +434 -0
- modules/mute-agent/mute_agent/listener/adapters/iatp_adapter.py +330 -0
- modules/mute-agent/mute_agent/listener/adapters/scak_adapter.py +249 -0
- modules/mute-agent/mute_agent/listener/listener.py +608 -0
- modules/mute-agent/mute_agent/listener/state_observer.py +434 -0
- modules/mute-agent/mute_agent/listener/threshold_config.py +311 -0
- modules/mute-agent/mute_agent/super_system/__init__.py +1 -0
- modules/mute-agent/mute_agent/super_system/router.py +202 -0
- modules/mute-agent/mute_agent/visualization/__init__.py +8 -0
- modules/mute-agent/mute_agent/visualization/graph_debugger.py +495 -0
- modules/mute-agent/requirements-dev.txt +6 -0
- modules/mute-agent/requirements.txt +9 -0
- modules/mute-agent/setup.py +64 -0
- modules/mute-agent/src/__init__.py +0 -0
- modules/mute-agent/src/agents/__init__.py +0 -0
- modules/mute-agent/src/agents/baseline_agent.py +524 -0
- modules/mute-agent/src/agents/interactive_agent.py +113 -0
- modules/mute-agent/src/agents/mute_agent.py +622 -0
- modules/mute-agent/src/benchmarks/__init__.py +0 -0
- modules/mute-agent/src/benchmarks/evaluator.py +481 -0
- modules/mute-agent/src/benchmarks/scenarios.json +985 -0
- modules/mute-agent/src/core/__init__.py +0 -0
- modules/mute-agent/src/core/mock_state.py +320 -0
- modules/mute-agent/src/core/tools.py +441 -0
- modules/nexus/__init__.py +49 -0
- modules/nexus/arbiter.py +357 -0
- modules/nexus/client.py +464 -0
- modules/nexus/dmz.py +417 -0
- modules/nexus/escrow.py +428 -0
- modules/nexus/exceptions.py +284 -0
- modules/nexus/registry.py +391 -0
- modules/nexus/reputation.py +423 -0
- modules/nexus/schemas/__init__.py +49 -0
- modules/nexus/schemas/compliance.py +274 -0
- modules/nexus/schemas/escrow.py +249 -0
- modules/nexus/schemas/manifest.py +223 -0
- modules/nexus/schemas/receipt.py +206 -0
- modules/observability/README.md +192 -0
- modules/observability/alertmanager/alertmanager.yml +116 -0
- modules/observability/alerts/agent-os-alerts.yaml +197 -0
- modules/observability/docker-compose.yml +128 -0
- modules/observability/grafana/dashboards/agent-os-amb.json +448 -0
- modules/observability/grafana/dashboards/agent-os-cmvk.json +441 -0
- modules/observability/grafana/dashboards/agent-os-overview.json +268 -0
- modules/observability/grafana/dashboards/agent-os-performance.json +15 -0
- modules/observability/grafana/dashboards/agent-os-safety.json +50 -0
- modules/observability/grafana/provisioning/dashboards/dashboards.yml +15 -0
- modules/observability/grafana/provisioning/datasources/datasources.yml +33 -0
- modules/observability/otel/otel-collector-config.yml +61 -0
- modules/observability/prometheus/prometheus.yml +63 -0
- modules/observability/pyproject.toml +53 -0
- modules/observability/scripts/export_dashboards.py +55 -0
- modules/observability/src/agent_os_observability/__init__.py +25 -0
- modules/observability/src/agent_os_observability/dashboards.py +896 -0
- modules/observability/src/agent_os_observability/metrics.py +396 -0
- modules/observability/src/agent_os_observability/server.py +221 -0
- modules/observability/src/agent_os_observability/tracer.py +226 -0
- modules/primitives/.gitignore +8 -0
- modules/primitives/README.md +62 -0
- modules/primitives/agent_primitives/__init__.py +22 -0
- modules/primitives/agent_primitives/failures.py +82 -0
- modules/primitives/agent_primitives/py.typed +0 -0
- modules/primitives/pyproject.toml +68 -0
- modules/scak/.github/copilot-instructions.md +396 -0
- modules/scak/.github/workflows/release.yml +117 -0
- modules/scak/.gitignore +32 -0
- modules/scak/CHANGELOG.md +173 -0
- modules/scak/CITATION.cff +62 -0
- modules/scak/CONTRIBUTING.md +429 -0
- modules/scak/Dockerfile +58 -0
- modules/scak/ENTERPRISE_FEATURES.md +518 -0
- modules/scak/IMPLEMENTATION_SUMMARY.md +206 -0
- modules/scak/LIMITATIONS.md +565 -0
- modules/scak/MANIFEST.in +16 -0
- modules/scak/NOVELTY.md +535 -0
- modules/scak/README.md +928 -0
- modules/scak/RESEARCH.md +670 -0
- modules/scak/agent_kernel/__init__.py +66 -0
- modules/scak/agent_kernel/analyzer.py +432 -0
- modules/scak/agent_kernel/auditor.py +31 -0
- modules/scak/agent_kernel/completeness_auditor.py +234 -0
- modules/scak/agent_kernel/detector.py +200 -0
- modules/scak/agent_kernel/kernel.py +741 -0
- modules/scak/agent_kernel/memory_manager.py +82 -0
- modules/scak/agent_kernel/models.py +372 -0
- modules/scak/agent_kernel/nudge_mechanism.py +260 -0
- modules/scak/agent_kernel/outcome_analyzer.py +335 -0
- modules/scak/agent_kernel/patcher.py +579 -0
- modules/scak/agent_kernel/semantic_analyzer.py +313 -0
- modules/scak/agent_kernel/semantic_purge.py +346 -0
- modules/scak/agent_kernel/simulator.py +447 -0
- modules/scak/agent_kernel/teacher.py +82 -0
- modules/scak/agent_kernel/triage.py +149 -0
- modules/scak/build_and_publish.ps1 +74 -0
- modules/scak/build_and_publish.sh +74 -0
- modules/scak/cli.py +471 -0
- modules/scak/dashboard.py +462 -0
- modules/scak/datasets/DATASET_CARD.md +219 -0
- modules/scak/datasets/README.md +143 -0
- modules/scak/datasets/gaia_vague_queries/vague_queries.json +262 -0
- modules/scak/datasets/hf_upload/README.md +219 -0
- modules/scak/datasets/hf_upload/scak_gaia_laziness.jsonl +50 -0
- modules/scak/datasets/prepare_hf_datasets.py +145 -0
- modules/scak/datasets/red_team/jailbreak_patterns.json +202 -0
- modules/scak/docker-compose.yml +99 -0
- modules/scak/docs/Adaptive-Memory-Hierarchy.md +319 -0
- modules/scak/docs/Data-Contracts-and-Schemas.md +285 -0
- modules/scak/docs/Dual-Loop-Architecture.md +344 -0
- modules/scak/docs/Enhanced-Features.md +612 -0
- modules/scak/docs/LANGCHAIN_INTEGRATION.md +572 -0
- modules/scak/docs/README.md +128 -0
- modules/scak/docs/Reference-Implementations.md +163 -0
- modules/scak/docs/SCAK_V2.md +374 -0
- modules/scak/docs/Three-Failure-Types.md +178 -0
- modules/scak/examples/basic_example.py +155 -0
- modules/scak/examples/circuit_breaker_lazy_eval_demo.py +243 -0
- modules/scak/examples/langchain_integration_example.py +339 -0
- modules/scak/examples/layer4_demo.py +243 -0
- modules/scak/examples/production_features_demo.py +353 -0
- modules/scak/examples/quick_demo.py +79 -0
- modules/scak/examples/scak_v2_demo.py +252 -0
- modules/scak/experiments/README.md +438 -0
- modules/scak/experiments/ablation_studies/README.md +192 -0
- modules/scak/experiments/ablation_studies/ablation_no_audit.py +116 -0
- modules/scak/experiments/ablation_studies/ablation_no_purge.py +133 -0
- modules/scak/experiments/chaos_engineering/README.md +332 -0
- modules/scak/experiments/context_efficiency_test.py +328 -0
- modules/scak/experiments/gaia_benchmark/README.md +208 -0
- modules/scak/experiments/laziness_benchmark.py +179 -0
- modules/scak/experiments/long_horizon_task_experiment.py +252 -0
- modules/scak/experiments/multi_agent_rag_experiment.py +284 -0
- modules/scak/experiments/results/ablation_table.md +12 -0
- modules/scak/experiments/results/long_horizon.json +36 -0
- modules/scak/experiments/results/multi_agent_rag.json +66 -0
- modules/scak/experiments/run_comprehensive_ablations.py +332 -0
- modules/scak/experiments/test_auditor_patcher_integration.py +251 -0
- modules/scak/notebooks/getting_started.ipynb +33 -0
- modules/scak/paper/ARXIV_SUBMISSION_METADATA.txt +109 -0
- modules/scak/paper/PAPER_CHECKLIST.md +304 -0
- modules/scak/paper/Paper.pdf +0 -0
- modules/scak/paper/README.md +113 -0
- modules/scak/paper/appendix.md +351 -0
- modules/scak/paper/arxiv/bibliography.bib +284 -0
- modules/scak/paper/arxiv/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv/main.aux +103 -0
- modules/scak/paper/arxiv/main.bbl +113 -0
- modules/scak/paper/arxiv/main.blg +55 -0
- modules/scak/paper/arxiv/main.out +31 -0
- modules/scak/paper/arxiv/main.pdf +0 -0
- modules/scak/paper/arxiv/main.tex +482 -0
- modules/scak/paper/arxiv_submission/bibliography.bib +284 -0
- modules/scak/paper/arxiv_submission/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/arxiv_submission/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.aux +103 -0
- modules/scak/paper/arxiv_submission/main.bbl +113 -0
- modules/scak/paper/arxiv_submission/main.blg +55 -0
- modules/scak/paper/arxiv_submission/main.out +31 -0
- modules/scak/paper/arxiv_submission/main.pdf +0 -0
- modules/scak/paper/arxiv_submission/main.tex +482 -0
- modules/scak/paper/arxiv_submission.tar.gz +0 -0
- modules/scak/paper/bibliography.bib +284 -0
- modules/scak/paper/build.sh +55 -0
- modules/scak/paper/figures/README.md +32 -0
- modules/scak/paper/figures/fig1_ooda_architecture.md +75 -0
- modules/scak/paper/figures/fig1_ooda_architecture.pdf +0 -0
- modules/scak/paper/figures/fig1_ooda_architecture.png +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.md +83 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.pdf +0 -0
- modules/scak/paper/figures/fig2_memory_hierarchy.png +0 -0
- modules/scak/paper/figures/fig3_gaia_results.md +64 -0
- modules/scak/paper/figures/fig3_gaia_results.pdf +0 -0
- modules/scak/paper/figures/fig3_gaia_results.png +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.md +64 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.pdf +0 -0
- modules/scak/paper/figures/fig4_ablation_heatmap.png +0 -0
- modules/scak/paper/figures/fig5_context_reduction.md +71 -0
- modules/scak/paper/figures/fig5_context_reduction.pdf +0 -0
- modules/scak/paper/figures/fig5_context_reduction.png +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.md +80 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.pdf +0 -0
- modules/scak/paper/figures/fig6_mttr_boxplot.png +0 -0
- modules/scak/paper/figures/generate_figures.py +463 -0
- modules/scak/paper/main.aux +103 -0
- modules/scak/paper/main.bbl +113 -0
- modules/scak/paper/main.blg +55 -0
- modules/scak/paper/main.md +192 -0
- modules/scak/paper/main.out +31 -0
- modules/scak/paper/main.pdf +0 -0
- modules/scak/paper/main.tex +482 -0
- modules/scak/reproducibility/ABLATIONS.md +225 -0
- modules/scak/reproducibility/Dockerfile.reproducibility +34 -0
- modules/scak/reproducibility/README.md +421 -0
- modules/scak/reproducibility/requirements-pinned.txt +32 -0
- modules/scak/reproducibility/run_all_experiments.py +395 -0
- modules/scak/reproducibility/seed_control.py +53 -0
- modules/scak/reproducibility/statistical_analysis.py +302 -0
- modules/scak/requirements.txt +50 -0
- modules/scak/setup.py +93 -0
- modules/scak/src/__init__.py +124 -0
- modules/scak/src/agents/__init__.py +13 -0
- modules/scak/src/agents/conflict_resolution.py +732 -0
- modules/scak/src/agents/orchestrator.py +761 -0
- modules/scak/src/agents/pubsub.py +484 -0
- modules/scak/src/agents/shadow_teacher.py +344 -0
- modules/scak/src/agents/swarm.py +661 -0
- modules/scak/src/agents/worker.py +357 -0
- modules/scak/src/integrations/__init__.py +81 -0
- modules/scak/src/integrations/cmvk_adapter.py +430 -0
- modules/scak/src/integrations/control_plane_adapter.py +601 -0
- modules/scak/src/integrations/langchain_integration.py +902 -0
- modules/scak/src/interfaces/__init__.py +59 -0
- modules/scak/src/interfaces/llm_clients.py +505 -0
- modules/scak/src/interfaces/openapi_tools.py +611 -0
- modules/scak/src/interfaces/plugin_system.py +605 -0
- modules/scak/src/interfaces/protocols.py +365 -0
- modules/scak/src/interfaces/telemetry.py +464 -0
- modules/scak/src/interfaces/tool_registry.py +547 -0
- modules/scak/src/kernel/__init__.py +100 -0
- modules/scak/src/kernel/auditor.py +305 -0
- modules/scak/src/kernel/circuit_breaker.py +398 -0
- modules/scak/src/kernel/core.py +724 -0
- modules/scak/src/kernel/distributed.py +667 -0
- modules/scak/src/kernel/evolution.py +455 -0
- modules/scak/src/kernel/failover.py +621 -0
- modules/scak/src/kernel/governance.py +710 -0
- modules/scak/src/kernel/governance_v2.py +603 -0
- modules/scak/src/kernel/lazy_evaluator.py +514 -0
- modules/scak/src/kernel/load_testing.py +633 -0
- modules/scak/src/kernel/memory.py +945 -0
- modules/scak/src/kernel/patcher.py +581 -0
- modules/scak/src/kernel/rubric.py +419 -0
- modules/scak/src/kernel/schemas.py +390 -0
- modules/scak/src/kernel/skill_mapper.py +309 -0
- modules/scak/src/kernel/triage.py +149 -0
- modules/scak/src/mocks/__init__.py +99 -0
- modules/scak/tests/__init__.py +1 -0
- modules/scak/tests/test_circuit_breaker.py +403 -0
- modules/scak/tests/test_conflict_resolution.py +287 -0
- modules/scak/tests/test_dual_loop.py +463 -0
- modules/scak/tests/test_enhanced_features.py +421 -0
- modules/scak/tests/test_failover_and_load.py +438 -0
- modules/scak/tests/test_governance.py +185 -0
- modules/scak/tests/test_kernel.py +359 -0
- modules/scak/tests/test_langchain_integration.py +451 -0
- modules/scak/tests/test_lazy_evaluator.py +465 -0
- modules/scak/tests/test_llm_clients.py +122 -0
- modules/scak/tests/test_memory_controller.py +528 -0
- modules/scak/tests/test_orchestrator.py +181 -0
- modules/scak/tests/test_phase3_integration.py +265 -0
- modules/scak/tests/test_pubsub_swarm.py +203 -0
- modules/scak/tests/test_reference_implementations.py +240 -0
- modules/scak/tests/test_rubric.py +363 -0
- modules/scak/tests/test_scak_v2.py +651 -0
- modules/scak/tests/test_skill_mapper.py +217 -0
- modules/scak/tests/test_specific_failures.py +393 -0
- modules/scak/tests/test_tool_registry.py +264 -0
- modules/scak/tests/test_tools_and_plugins.py +303 -0
- modules/scak/tests/test_triage.py +596 -0
- modules/scak/tests/test_write_through.py +319 -0
- agent_os_kernel-1.1.0.dist-info/METADATA +0 -400
- agent_os_kernel-1.1.0.dist-info/RECORD +0 -12
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/WHEEL +0 -0
- {agent_os_kernel-1.1.0.dist-info → agent_os_kernel-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Feature 3: The Witness (Traceability)
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Feature 3 implements comprehensive traceability for the Cross-Model Verification Kernel. This feature logs the entire debate between the Generator and Verifier, creating a clean JSON artifact suitable for research papers and supplementary materials.
|
|
6
|
+
|
|
7
|
+
## Key Components
|
|
8
|
+
|
|
9
|
+
### 1. TraceLogger (`src/core/trace_logger.py`)
|
|
10
|
+
|
|
11
|
+
The `TraceLogger` class serializes the complete `NodeState` (including history, forbidden strategies, and feedback) into structured JSON files.
|
|
12
|
+
|
|
13
|
+
**Features:**
|
|
14
|
+
- Automatic directory creation (`logs/traces/` by default)
|
|
15
|
+
- JSON serialization of dataclasses using `asdict()`
|
|
16
|
+
- Metadata generation (timestamp, attempt count, final status)
|
|
17
|
+
- Clean, readable output with proper encoding
|
|
18
|
+
|
|
19
|
+
**Usage:**
|
|
20
|
+
```python
|
|
21
|
+
from src.core.trace_logger import TraceLogger
|
|
22
|
+
from src.core.types import NodeState
|
|
23
|
+
|
|
24
|
+
logger = TraceLogger(log_dir="logs/traces")
|
|
25
|
+
state = NodeState(input_query="Your problem here")
|
|
26
|
+
# ... populate state with history ...
|
|
27
|
+
filepath = logger.save_trace("experiment_01", state)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### 2. SimpleVerificationKernel (`src/simple_kernel.py`)
|
|
31
|
+
|
|
32
|
+
A simplified kernel implementation designed specifically for research experiments. It provides:
|
|
33
|
+
- Direct `solve()` method interface
|
|
34
|
+
- Integration with `NodeState` tracking
|
|
35
|
+
- Automatic trace logging at completion
|
|
36
|
+
- Strategy detection and banning (Feature 2)
|
|
37
|
+
|
|
38
|
+
**Usage:**
|
|
39
|
+
```python
|
|
40
|
+
from src.simple_kernel import SimpleVerificationKernel
|
|
41
|
+
|
|
42
|
+
kernel = SimpleVerificationKernel(max_retries=5)
|
|
43
|
+
solution = kernel.solve(
|
|
44
|
+
query="Write a function to merge sorted arrays",
|
|
45
|
+
run_id="experiment_001"
|
|
46
|
+
)
|
|
47
|
+
# Trace is automatically saved to logs/traces/experiment_001_<timestamp>.json
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 3. Paper Data Generator (`experiments/paper_data_generator.py`)
|
|
51
|
+
|
|
52
|
+
An experiment runner that compares:
|
|
53
|
+
1. **Baseline**: Single agent (OpenAI) without verification
|
|
54
|
+
2. **CMVK**: Full Cross-Model Verification Kernel with traceability
|
|
55
|
+
|
|
56
|
+
**Running the Experiment:**
|
|
57
|
+
```bash
|
|
58
|
+
cd /path/to/cross-model-verification-kernel
|
|
59
|
+
python experiments/paper_data_generator.py
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Output:**
|
|
63
|
+
- Baseline solutions: `logs/baseline_*.py`
|
|
64
|
+
- CMVK traces: `logs/traces/cmvk_*.json`
|
|
65
|
+
|
|
66
|
+
### 4. Enhanced VerificationKernel
|
|
67
|
+
|
|
68
|
+
The main `VerificationKernel` (`src/core/kernel.py`) now supports optional trace logging:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from src.core.kernel import VerificationKernel
|
|
72
|
+
from src.agents.generator_openai import OpenAIGenerator
|
|
73
|
+
from src.agents.verifier_gemini import GeminiVerifier
|
|
74
|
+
|
|
75
|
+
kernel = VerificationKernel(
|
|
76
|
+
generator=OpenAIGenerator(),
|
|
77
|
+
verifier=GeminiVerifier(),
|
|
78
|
+
enable_trace_logging=True # Enable traceability
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Trace File Format
|
|
83
|
+
|
|
84
|
+
Each trace file contains:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"input_query": "The original problem statement",
|
|
89
|
+
"current_code": "The final solution code (if found)",
|
|
90
|
+
"status": "verified|rejected|pending",
|
|
91
|
+
"history": [
|
|
92
|
+
{
|
|
93
|
+
"step_id": 1,
|
|
94
|
+
"code_generated": "First attempt code",
|
|
95
|
+
"verifier_feedback": "Feedback from verifier",
|
|
96
|
+
"status": "success|failed",
|
|
97
|
+
"strategy_used": "recursive|iterative|numpy|..."
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
"forbidden_strategies": ["strategies", "that", "failed"],
|
|
101
|
+
"meta": {
|
|
102
|
+
"timestamp": "20260121-182813",
|
|
103
|
+
"total_attempts": 3,
|
|
104
|
+
"final_status": "solved|failed"
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Benefits for Research
|
|
110
|
+
|
|
111
|
+
1. **Reproducibility**: Complete record of every generation-verification iteration
|
|
112
|
+
2. **Analysis**: Track which strategies were tried and why they failed
|
|
113
|
+
3. **Visualization**: Data ready for charts and graphs
|
|
114
|
+
4. **Supplementary Material**: Professional JSON artifacts for paper submission
|
|
115
|
+
5. **Debugging**: Full audit trail for understanding system behavior
|
|
116
|
+
|
|
117
|
+
## Testing
|
|
118
|
+
|
|
119
|
+
Run the test suite:
|
|
120
|
+
```bash
|
|
121
|
+
python tests/test_trace_logger.py
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Tests cover:
|
|
125
|
+
- Basic trace logging functionality
|
|
126
|
+
- Failed state handling
|
|
127
|
+
- JSON structure validation
|
|
128
|
+
- Metadata generation
|
|
129
|
+
|
|
130
|
+
## Integration with Features 1 & 2
|
|
131
|
+
|
|
132
|
+
- **Feature 1 (Generator ↔ Verifier Loop)**: Traces capture each iteration
|
|
133
|
+
- **Feature 2 (Lateral Thinking)**: Forbidden strategies are logged in traces
|
|
134
|
+
- **Feature 3 (Traceability)**: This feature - complete audit trail
|
|
135
|
+
|
|
136
|
+
## Next Steps
|
|
137
|
+
|
|
138
|
+
As mentioned in the problem statement, you can now:
|
|
139
|
+
|
|
140
|
+
1. **Visualize the Traces**: Create a script to replay the debate
|
|
141
|
+
2. **Scale the Experiment**: Load HumanEval dataset (164 problems)
|
|
142
|
+
3. **Draft the Paper**: Use traces to write the Methodology section
|
|
143
|
+
|
|
144
|
+
## File Locations
|
|
145
|
+
|
|
146
|
+
- **TraceLogger**: `src/core/trace_logger.py`
|
|
147
|
+
- **SimpleKernel**: `src/simple_kernel.py`
|
|
148
|
+
- **Experiment**: `experiments/paper_data_generator.py`
|
|
149
|
+
- **Tests**: `tests/test_trace_logger.py`
|
|
150
|
+
- **Traces**: `logs/traces/` (gitignored)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Example: Basic Adversarial Verification
|
|
3
|
+
|
|
4
|
+
Demonstrates the basic usage of the Cross-Model Verification Kernel
|
|
5
|
+
with GPT-4o as Generator and Gemini 1.5 Pro as Verifier.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from cross_model_verification_kernel.generator import GeneratorConfig
|
|
9
|
+
from cross_model_verification_kernel.kernel import VerificationKernel
|
|
10
|
+
from cross_model_verification_kernel.models import ModelProvider
|
|
11
|
+
from cross_model_verification_kernel.verifier import VerifierConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main() -> None:
|
|
15
|
+
"""Run basic adversarial verification example"""
|
|
16
|
+
|
|
17
|
+
print("Cross-Model Verification Kernel - Basic Example")
|
|
18
|
+
print("=" * 80)
|
|
19
|
+
print("\nInitializing adversarial architecture...")
|
|
20
|
+
print("- Generator: GPT-4o")
|
|
21
|
+
print("- Verifier: Gemini 1.5 Pro (hostile code review mode)\n")
|
|
22
|
+
|
|
23
|
+
# Configure Generator with GPT-4o
|
|
24
|
+
generator_config = GeneratorConfig(model=ModelProvider.GPT4O, temperature=0.7, max_tokens=2000)
|
|
25
|
+
|
|
26
|
+
# Configure Verifier with Gemini 1.5 Pro (different model!)
|
|
27
|
+
verifier_config = VerifierConfig(
|
|
28
|
+
model=ModelProvider.GEMINI_15_PRO, temperature=0.2, adversarial_mode=True
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Create the Verification Kernel
|
|
32
|
+
# Note: This will enforce that generator and verifier use different models
|
|
33
|
+
kernel = VerificationKernel(generator_config=generator_config, verifier_config=verifier_config)
|
|
34
|
+
|
|
35
|
+
# Task: Generate a Fibonacci function
|
|
36
|
+
task = "Create a function to calculate the nth Fibonacci number"
|
|
37
|
+
|
|
38
|
+
print(f"Task: {task}\n")
|
|
39
|
+
print("Executing adversarial verification pipeline...")
|
|
40
|
+
print("1. Generating code with GPT-4o...")
|
|
41
|
+
print("2. Hostile code review with Gemini 1.5 Pro...")
|
|
42
|
+
print("3. Calculating blind spot probability reduction...\n")
|
|
43
|
+
|
|
44
|
+
# Execute verification
|
|
45
|
+
result = kernel.verify_task(task_description=task, language="python")
|
|
46
|
+
|
|
47
|
+
# Print detailed results
|
|
48
|
+
kernel.print_verification_summary(result)
|
|
49
|
+
|
|
50
|
+
# Print statistics
|
|
51
|
+
print("\n\nKernel Statistics:")
|
|
52
|
+
print("-" * 80)
|
|
53
|
+
stats = kernel.get_statistics()
|
|
54
|
+
print(f"Total Verifications: {stats['total_verifications']}")
|
|
55
|
+
print(f"Success Rate: {stats['success_rate']:.1%}")
|
|
56
|
+
print(f"Average Risk Reduction: {stats['average_risk_reduction_factor']:.2f}x")
|
|
57
|
+
print(f"Model Diversity Enforced: {stats['model_diversity']['are_different']}")
|
|
58
|
+
print("-" * 80)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
main()
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Demo: Complete Workflow for Cross-Model Verification Kernel
|
|
4
|
+
|
|
5
|
+
This demo shows the full pipeline from data loading to visualization:
|
|
6
|
+
1. Load problems from HumanEval dataset
|
|
7
|
+
2. Run experiments (simulated - no real API calls for demo)
|
|
8
|
+
3. Visualize the traces
|
|
9
|
+
|
|
10
|
+
Run this demo to see all the new features in action.
|
|
11
|
+
"""
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
16
|
+
|
|
17
|
+
from cross_model_verification_kernel.core.trace_logger import TraceLogger
|
|
18
|
+
from cross_model_verification_kernel.core.types import ExecutionTrace, NodeState
|
|
19
|
+
from cross_model_verification_kernel.datasets.humaneval_loader import HumanEvalLoader
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def print_header(title):
|
|
23
|
+
"""Print a formatted header."""
|
|
24
|
+
print("\n" + "=" * 80)
|
|
25
|
+
print(f" {title}")
|
|
26
|
+
print("=" * 80 + "\n")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def demo_humaneval_loader():
|
|
30
|
+
"""Demonstrate the HumanEval loader."""
|
|
31
|
+
print_header("STEP 1: HumanEval Dataset Loader")
|
|
32
|
+
|
|
33
|
+
print("Loading HumanEval sample dataset...")
|
|
34
|
+
loader = HumanEvalLoader()
|
|
35
|
+
|
|
36
|
+
print(f"✅ Loaded {len(loader)} problems")
|
|
37
|
+
print("\nSample problems:")
|
|
38
|
+
|
|
39
|
+
for i, problem in enumerate(loader.get_problem_subset(0, 3), 1):
|
|
40
|
+
print(f" {i}. {problem['task_id']} - {problem['entry_point']}")
|
|
41
|
+
|
|
42
|
+
print("\n📝 Formatting problems for kernel...")
|
|
43
|
+
formatted = loader.format_all_for_kernel(start=0, count=3)
|
|
44
|
+
|
|
45
|
+
print(f"✅ Formatted {len(formatted)} problems")
|
|
46
|
+
print(f"\nExample formatted problem ID: {formatted[0]['id']}")
|
|
47
|
+
print(f"Query preview: {formatted[0]['query'][:100]}...")
|
|
48
|
+
|
|
49
|
+
return formatted
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def demo_trace_generation(problems):
|
|
53
|
+
"""Demonstrate trace generation (simulated)."""
|
|
54
|
+
print_header("STEP 2: Generating Traces (Simulated)")
|
|
55
|
+
|
|
56
|
+
print("In a real experiment, this would:")
|
|
57
|
+
print(" 1. Run baseline (single GPT-4o) on each problem")
|
|
58
|
+
print(" 2. Run CMVK (GPT-4o + Gemini) on each problem")
|
|
59
|
+
print(" 3. Save traces to logs/traces/")
|
|
60
|
+
|
|
61
|
+
print("\nFor demo purposes, generating a sample trace...")
|
|
62
|
+
|
|
63
|
+
# Create a simulated trace
|
|
64
|
+
state = NodeState(
|
|
65
|
+
input_query=problems[0]["query"][:100] + "...",
|
|
66
|
+
current_code="def solution():\n # Final verified solution\n pass",
|
|
67
|
+
status="verified",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Add some history
|
|
71
|
+
trace1 = ExecutionTrace(
|
|
72
|
+
step_id=1,
|
|
73
|
+
code_generated="def solution():\n # First attempt\n return []",
|
|
74
|
+
verifier_feedback="OBJECTION! Missing edge case handling for empty input",
|
|
75
|
+
status="failed",
|
|
76
|
+
strategy_used="naive",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
trace2 = ExecutionTrace(
|
|
80
|
+
step_id=2,
|
|
81
|
+
code_generated="def solution():\n # Second attempt with edge cases\n if not input:\n return default\n return process(input)",
|
|
82
|
+
verifier_feedback="PASS: Correctly handles edge cases",
|
|
83
|
+
status="success",
|
|
84
|
+
strategy_used="defensive",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
state.history.extend([trace1, trace2])
|
|
88
|
+
state.forbidden_strategies.append("naive")
|
|
89
|
+
|
|
90
|
+
# Save trace
|
|
91
|
+
logger = TraceLogger()
|
|
92
|
+
trace_file = logger.save_trace("demo_experiment", state)
|
|
93
|
+
|
|
94
|
+
print("\n✅ Sample trace generated and saved")
|
|
95
|
+
|
|
96
|
+
return trace_file
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def demo_visualizer(trace_file):
|
|
100
|
+
"""Demonstrate the trace visualizer."""
|
|
101
|
+
print_header("STEP 3: Visualizing Traces")
|
|
102
|
+
|
|
103
|
+
print("The visualizer replays traces as adversarial debates:")
|
|
104
|
+
print(f"\nCommand: python -m src.tools.visualizer {trace_file}\n")
|
|
105
|
+
|
|
106
|
+
print("Output shows:")
|
|
107
|
+
print(" 🎭 Problem statement")
|
|
108
|
+
print(" 🔄 Each attempt with:")
|
|
109
|
+
print(" - Builder (GPT-4o) generating solution")
|
|
110
|
+
print(" - Prosecutor (Gemini) finding flaws")
|
|
111
|
+
print(" - Kernel making decisions")
|
|
112
|
+
print(" 🏁 Final result and statistics")
|
|
113
|
+
|
|
114
|
+
print("\n💡 Try these commands:")
|
|
115
|
+
print(" python -m src.tools.visualizer --list")
|
|
116
|
+
print(" python -m src.tools.visualizer --latest")
|
|
117
|
+
print(f" python -m src.tools.visualizer {trace_file} --speed 0")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def demo_paper_generator():
|
|
121
|
+
"""Demonstrate the updated paper data generator."""
|
|
122
|
+
print_header("STEP 4: Scaling Experiments")
|
|
123
|
+
|
|
124
|
+
print("The paper_data_generator.py now supports HumanEval:\n")
|
|
125
|
+
|
|
126
|
+
print("📊 Run experiments at different scales:")
|
|
127
|
+
print("\n # Small scale (5 problems)")
|
|
128
|
+
print(" python experiments/paper_data_generator.py --humaneval --count 5")
|
|
129
|
+
|
|
130
|
+
print("\n # Medium scale (50 problems - suitable for paper)")
|
|
131
|
+
print(" python experiments/paper_data_generator.py --humaneval --count 50")
|
|
132
|
+
|
|
133
|
+
print("\n # Full dataset (all problems)")
|
|
134
|
+
print(" python experiments/paper_data_generator.py --humaneval")
|
|
135
|
+
|
|
136
|
+
print("\n # Legacy mode (original 2 problems)")
|
|
137
|
+
print(" python experiments/paper_data_generator.py --legacy")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def demo_paper_structure():
|
|
141
|
+
"""Show the paper structure."""
|
|
142
|
+
print_header("STEP 5: Research Paper Draft")
|
|
143
|
+
|
|
144
|
+
print("PAPER.md contains the complete research narrative:\n")
|
|
145
|
+
|
|
146
|
+
sections = [
|
|
147
|
+
("Abstract", "Overview of CMVK and key results"),
|
|
148
|
+
("Introduction", "Problem statement and motivation"),
|
|
149
|
+
("Methodology", "Architecture and adversarial design"),
|
|
150
|
+
("Experiments", "HumanEval benchmark and results"),
|
|
151
|
+
("Tools", "Reproducibility and visualization"),
|
|
152
|
+
("Discussion", "Why multi-model verification works"),
|
|
153
|
+
("Conclusion", "Key insights and future work"),
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
for section, description in sections:
|
|
157
|
+
print(f" 📄 {section:20} - {description}")
|
|
158
|
+
|
|
159
|
+
print("\n💡 Next steps for publication:")
|
|
160
|
+
print(" 1. Run full HumanEval experiments (n=50+)")
|
|
161
|
+
print(" 2. Fill in results tables in Section 3.3")
|
|
162
|
+
print(" 3. Add system prompts to Appendix A")
|
|
163
|
+
print(" 4. Include example traces in Appendix B")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main():
|
|
167
|
+
"""Run the complete demo."""
|
|
168
|
+
print("\n" + "=" * 80)
|
|
169
|
+
print(" 🎭 CROSS-MODEL VERIFICATION KERNEL (CMVK) - DEMO 🎭")
|
|
170
|
+
print(" From Engineering to Science: The Complete Pipeline")
|
|
171
|
+
print("=" * 80)
|
|
172
|
+
|
|
173
|
+
# Step 1: Load data
|
|
174
|
+
problems = demo_humaneval_loader()
|
|
175
|
+
|
|
176
|
+
# Step 2: Generate traces
|
|
177
|
+
trace_file = demo_trace_generation(problems)
|
|
178
|
+
|
|
179
|
+
# Step 3: Visualize
|
|
180
|
+
demo_visualizer(trace_file)
|
|
181
|
+
|
|
182
|
+
# Step 4: Scale experiments
|
|
183
|
+
demo_paper_generator()
|
|
184
|
+
|
|
185
|
+
# Step 5: Paper structure
|
|
186
|
+
demo_paper_structure()
|
|
187
|
+
|
|
188
|
+
# Summary
|
|
189
|
+
print_header("🎉 DEMO COMPLETE")
|
|
190
|
+
|
|
191
|
+
print("You now have a complete research pipeline:")
|
|
192
|
+
print("\n✅ Data ingestion (HumanEval)")
|
|
193
|
+
print("✅ Experiment automation (paper_data_generator.py)")
|
|
194
|
+
print("✅ Traceability (TraceLogger)")
|
|
195
|
+
print("✅ Visualization (visualizer.py)")
|
|
196
|
+
print("✅ Research narrative (PAPER.md)")
|
|
197
|
+
|
|
198
|
+
print("\n🚀 Ready to publish! Next immediate actions:")
|
|
199
|
+
print("\n1. Set up API keys for OpenAI and Gemini")
|
|
200
|
+
print("2. Run: python experiments/paper_data_generator.py --humaneval --count 5")
|
|
201
|
+
print("3. Review traces: python -m src.tools.visualizer --latest")
|
|
202
|
+
print("4. Scale to 50+ problems for statistical significance")
|
|
203
|
+
|
|
204
|
+
print("\n📖 Read PAPER.md for complete methodology and context")
|
|
205
|
+
print("=" * 80 + "\n")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
main()
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Integration demo: Shows all Phase 2 features working together.
|
|
4
|
+
|
|
5
|
+
This script demonstrates:
|
|
6
|
+
1. Prosecutor Mode - Generating hostile tests
|
|
7
|
+
2. Lateral Thinking - Strategy divergence after failures
|
|
8
|
+
3. Witness - Exporting conversation traces
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from cross_model_verification_kernel.core.graph_memory import GraphMemory
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def demo_prosecutor_mode() -> None:
|
|
17
|
+
"""Demo Feature 1: Prosecutor Mode."""
|
|
18
|
+
print("\n" + "=" * 60)
|
|
19
|
+
print("FEATURE 1: PROSECUTOR MODE DEMO")
|
|
20
|
+
print("=" * 60)
|
|
21
|
+
|
|
22
|
+
from cross_model_verification_kernel.agents.verifier_gemini import GeminiVerifier
|
|
23
|
+
|
|
24
|
+
verifier = GeminiVerifier(enable_prosecutor_mode=True)
|
|
25
|
+
|
|
26
|
+
# Test function extraction
|
|
27
|
+
code = """
|
|
28
|
+
def divide(a, b):
|
|
29
|
+
return a / b
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
func_name = verifier._extract_function_name(code)
|
|
33
|
+
print(f"✓ Extracted function name: {func_name}")
|
|
34
|
+
|
|
35
|
+
# Demo hostile test generation
|
|
36
|
+
context = {"task": "Write a division function", "solution": code}
|
|
37
|
+
|
|
38
|
+
hostile_tests = verifier._generate_hostile_tests(context, "No zero check")
|
|
39
|
+
print(f"✓ Generated {len(hostile_tests)} hostile tests")
|
|
40
|
+
print(f" Example: {hostile_tests[0][:60]}...")
|
|
41
|
+
|
|
42
|
+
print("✓ Prosecutor Mode validated")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def demo_lateral_thinking() -> None:
|
|
46
|
+
"""Demo Feature 2: Lateral Thinking."""
|
|
47
|
+
print("\n" + "=" * 60)
|
|
48
|
+
print("FEATURE 2: LATERAL THINKING DEMO")
|
|
49
|
+
print("=" * 60)
|
|
50
|
+
|
|
51
|
+
graph = GraphMemory()
|
|
52
|
+
task = "Write a fibonacci function"
|
|
53
|
+
|
|
54
|
+
# Simulate failures
|
|
55
|
+
recursive_solution = """
|
|
56
|
+
def fib(n):
|
|
57
|
+
if n <= 1:
|
|
58
|
+
return n
|
|
59
|
+
return fib(n-1) + fib(n-2)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# First failure
|
|
63
|
+
print("Loop 1: Trying recursive approach...")
|
|
64
|
+
approach = graph.detect_approach(recursive_solution)
|
|
65
|
+
print(f" Detected approach: {approach}")
|
|
66
|
+
graph.record_approach_failure(recursive_solution, task)
|
|
67
|
+
print(" Recorded failure")
|
|
68
|
+
|
|
69
|
+
# Second failure
|
|
70
|
+
print("Loop 2: Trying recursive approach again...")
|
|
71
|
+
graph.record_approach_failure(recursive_solution, task)
|
|
72
|
+
print(" Recorded second failure")
|
|
73
|
+
|
|
74
|
+
# Check branching
|
|
75
|
+
should_branch = graph.should_branch(recursive_solution, task)
|
|
76
|
+
print(f" Should branch? {should_branch}")
|
|
77
|
+
|
|
78
|
+
if should_branch:
|
|
79
|
+
forbidden = graph.get_forbidden_approaches(task)
|
|
80
|
+
print(f" Forbidden approaches: {forbidden}")
|
|
81
|
+
print("✓ System will force different approach on next loop")
|
|
82
|
+
|
|
83
|
+
print("✓ Lateral Thinking validated")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def demo_witness() -> None:
|
|
87
|
+
"""Demo Feature 3: Witness."""
|
|
88
|
+
print("\n" + "=" * 60)
|
|
89
|
+
print("FEATURE 3: WITNESS (TRACEABILITY) DEMO")
|
|
90
|
+
print("=" * 60)
|
|
91
|
+
|
|
92
|
+
graph = GraphMemory()
|
|
93
|
+
|
|
94
|
+
# Simulate conversation
|
|
95
|
+
graph.add_conversation_entry({"type": "task_start", "task": "Write fibonacci function"})
|
|
96
|
+
|
|
97
|
+
graph.add_conversation_entry({"type": "generation", "loop": 1, "approach": "recursive"})
|
|
98
|
+
|
|
99
|
+
graph.add_conversation_entry(
|
|
100
|
+
{
|
|
101
|
+
"type": "verification",
|
|
102
|
+
"loop": 1,
|
|
103
|
+
"outcome": "fail",
|
|
104
|
+
"critical_issues": ["Missing negative check"],
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
graph.add_conversation_entry({"type": "generation", "loop": 2, "approach": "iterative"})
|
|
109
|
+
|
|
110
|
+
graph.add_conversation_entry(
|
|
111
|
+
{"type": "verification", "loop": 2, "outcome": "pass", "confidence": 0.95}
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
trace = graph.get_conversation_trace()
|
|
115
|
+
print(f"✓ Created conversation trace with {len(trace)} entries")
|
|
116
|
+
|
|
117
|
+
# Export to temp file
|
|
118
|
+
import tempfile
|
|
119
|
+
|
|
120
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
121
|
+
temp_path = f.name
|
|
122
|
+
|
|
123
|
+
graph.export_conversation_trace(temp_path)
|
|
124
|
+
print(f"✓ Exported trace to: {temp_path}")
|
|
125
|
+
|
|
126
|
+
# Read and show snippet
|
|
127
|
+
import json
|
|
128
|
+
|
|
129
|
+
with open(temp_path) as f:
|
|
130
|
+
data = json.load(f)
|
|
131
|
+
|
|
132
|
+
print(f" Trace entries: {len(data['trace'])}")
|
|
133
|
+
print(f" First entry type: {data['trace'][0]['type']}")
|
|
134
|
+
print(f" Last entry type: {data['trace'][-1]['type']}")
|
|
135
|
+
|
|
136
|
+
print("✓ Witness validated")
|
|
137
|
+
|
|
138
|
+
# Cleanup
|
|
139
|
+
Path(temp_path).unlink()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def demo_integration() -> None:
|
|
143
|
+
"""Demo all features working together."""
|
|
144
|
+
print("\n" + "=" * 60)
|
|
145
|
+
print("INTEGRATION: ALL FEATURES TOGETHER")
|
|
146
|
+
print("=" * 60)
|
|
147
|
+
|
|
148
|
+
from cross_model_verification_kernel import GeminiVerifier, OpenAIGenerator, VerificationKernel
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Note: This requires API keys
|
|
152
|
+
generator = OpenAIGenerator()
|
|
153
|
+
verifier = GeminiVerifier(enable_prosecutor_mode=True)
|
|
154
|
+
kernel = VerificationKernel(generator, verifier)
|
|
155
|
+
|
|
156
|
+
print("✓ Initialized kernel with:")
|
|
157
|
+
print(" - Prosecutor Mode enabled")
|
|
158
|
+
print(" - Lateral Thinking active")
|
|
159
|
+
print(" - Witness trace recording")
|
|
160
|
+
|
|
161
|
+
# Get stats
|
|
162
|
+
stats = kernel.get_graph_stats()
|
|
163
|
+
print("\n✓ Graph stats:")
|
|
164
|
+
for key, value in stats.items():
|
|
165
|
+
print(f" {key}: {value}")
|
|
166
|
+
|
|
167
|
+
print("\n✓ All features integrated and ready")
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print(f"✓ Integration setup complete (API not available: {e})")
|
|
171
|
+
print(" Run with valid API keys for full demo")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def main() -> None:
|
|
175
|
+
"""Run all demos."""
|
|
176
|
+
print("\n" + "=" * 70)
|
|
177
|
+
print("CROSS-MODEL VERIFICATION KERNEL - INNOVATION LAYER DEMO")
|
|
178
|
+
print("=" * 70)
|
|
179
|
+
|
|
180
|
+
# Run individual feature demos
|
|
181
|
+
demo_prosecutor_mode()
|
|
182
|
+
demo_lateral_thinking()
|
|
183
|
+
demo_witness()
|
|
184
|
+
demo_integration()
|
|
185
|
+
|
|
186
|
+
print("\n" + "=" * 70)
|
|
187
|
+
print("ALL FEATURES VALIDATED ✓")
|
|
188
|
+
print("=" * 70)
|
|
189
|
+
print("\nThe Innovation Layer is ready for:")
|
|
190
|
+
print(" 1. Research experiments")
|
|
191
|
+
print(" 2. Paper writing")
|
|
192
|
+
print(" 3. Production deployment")
|
|
193
|
+
print()
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
if __name__ == "__main__":
|
|
197
|
+
main()
|