ciris-agent 1.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ciris_adapters/README.md +113 -0
- ciris_adapters/__init__.py +30 -0
- ciris_adapters/ciris_covenant_metrics/README.md +144 -0
- ciris_adapters/ciris_covenant_metrics/__init__.py +36 -0
- ciris_adapters/ciris_covenant_metrics/adapter.py +249 -0
- ciris_adapters/ciris_covenant_metrics/manifest.json +152 -0
- ciris_adapters/ciris_covenant_metrics/services.py +403 -0
- ciris_adapters/ciris_hosted_tools/__init__.py +24 -0
- ciris_adapters/ciris_hosted_tools/adapter.py +169 -0
- ciris_adapters/ciris_hosted_tools/manifest.json +94 -0
- ciris_adapters/ciris_hosted_tools/services.py +744 -0
- ciris_adapters/external_data_sql/README.md +559 -0
- ciris_adapters/external_data_sql/__init__.py +43 -0
- ciris_adapters/external_data_sql/adapter.py +144 -0
- ciris_adapters/external_data_sql/configurable.py +315 -0
- ciris_adapters/external_data_sql/dialects/__init__.py +37 -0
- ciris_adapters/external_data_sql/dialects/base.py +133 -0
- ciris_adapters/external_data_sql/dialects/mysql.py +63 -0
- ciris_adapters/external_data_sql/dialects/postgresql.py +59 -0
- ciris_adapters/external_data_sql/dialects/sqlite.py +62 -0
- ciris_adapters/external_data_sql/example_config.json +88 -0
- ciris_adapters/external_data_sql/example_privacy_schema.yaml +127 -0
- ciris_adapters/external_data_sql/manifest.json +195 -0
- ciris_adapters/external_data_sql/privacy_schema_loader.py +189 -0
- ciris_adapters/external_data_sql/protocol.py +101 -0
- ciris_adapters/external_data_sql/schemas.py +146 -0
- ciris_adapters/external_data_sql/service.py +1547 -0
- ciris_adapters/external_data_sql/service_old.py +492 -0
- ciris_adapters/home_assistant/__init__.py +63 -0
- ciris_adapters/home_assistant/adapter.py +201 -0
- ciris_adapters/home_assistant/communication_service.py +347 -0
- ciris_adapters/home_assistant/configurable.py +667 -0
- ciris_adapters/home_assistant/manifest.json +203 -0
- ciris_adapters/home_assistant/schemas.py +129 -0
- ciris_adapters/home_assistant/service.py +751 -0
- ciris_adapters/home_assistant/tool_service.py +441 -0
- ciris_adapters/mcp_client/__init__.py +82 -0
- ciris_adapters/mcp_client/adapter.py +847 -0
- ciris_adapters/mcp_client/config.py +280 -0
- ciris_adapters/mcp_client/configurable.py +422 -0
- ciris_adapters/mcp_client/manifest.json +185 -0
- ciris_adapters/mcp_client/mcp_communication_service.py +393 -0
- ciris_adapters/mcp_client/mcp_tool_service.py +463 -0
- ciris_adapters/mcp_client/mcp_wise_service.py +394 -0
- ciris_adapters/mcp_client/schemas.py +149 -0
- ciris_adapters/mcp_client/security.py +592 -0
- ciris_adapters/mcp_common/__init__.py +44 -0
- ciris_adapters/mcp_common/manifest.json +25 -0
- ciris_adapters/mcp_common/protocol.py +315 -0
- ciris_adapters/mcp_common/schemas.py +225 -0
- ciris_adapters/mcp_server/__init__.py +47 -0
- ciris_adapters/mcp_server/adapter.py +581 -0
- ciris_adapters/mcp_server/config.py +260 -0
- ciris_adapters/mcp_server/configurable.py +393 -0
- ciris_adapters/mcp_server/handlers.py +663 -0
- ciris_adapters/mcp_server/manifest.json +211 -0
- ciris_adapters/mcp_server/security.py +500 -0
- ciris_adapters/mock_llm/README.md +117 -0
- ciris_adapters/mock_llm/__init__.py +21 -0
- ciris_adapters/mock_llm/adapter.py +131 -0
- ciris_adapters/mock_llm/configurable.py +237 -0
- ciris_adapters/mock_llm/manifest.json +106 -0
- ciris_adapters/mock_llm/protocol.py +37 -0
- ciris_adapters/mock_llm/responses.py +520 -0
- ciris_adapters/mock_llm/responses_action_selection.py +1041 -0
- ciris_adapters/mock_llm/responses_epistemic.py +17 -0
- ciris_adapters/mock_llm/responses_feedback.py +27 -0
- ciris_adapters/mock_llm/schemas.py +35 -0
- ciris_adapters/mock_llm/service.py +294 -0
- ciris_adapters/navigation/__init__.py +21 -0
- ciris_adapters/navigation/adapter.py +129 -0
- ciris_adapters/navigation/configurable.py +239 -0
- ciris_adapters/navigation/manifest.json +104 -0
- ciris_adapters/navigation/service.py +487 -0
- ciris_adapters/reddit/README.md +132 -0
- ciris_adapters/reddit/REDDIT_ADAPTER_ANALYSIS.md +715 -0
- ciris_adapters/reddit/REDDIT_ADAPTER_SUMMARY.txt +278 -0
- ciris_adapters/reddit/REDDIT_ANALYSIS_INDEX.md +307 -0
- ciris_adapters/reddit/REDDIT_PRODUCTION_READINESS_PLAN.md +518 -0
- ciris_adapters/reddit/__init__.py +15 -0
- ciris_adapters/reddit/adapter.py +189 -0
- ciris_adapters/reddit/configurable.py +274 -0
- ciris_adapters/reddit/error_handler.py +307 -0
- ciris_adapters/reddit/manifest.json +218 -0
- ciris_adapters/reddit/observer.py +532 -0
- ciris_adapters/reddit/protocol.py +34 -0
- ciris_adapters/reddit/schemas.py +433 -0
- ciris_adapters/reddit/service.py +1471 -0
- ciris_adapters/sample_adapter/README.md +474 -0
- ciris_adapters/sample_adapter/__init__.py +45 -0
- ciris_adapters/sample_adapter/adapter.py +208 -0
- ciris_adapters/sample_adapter/configurable.py +469 -0
- ciris_adapters/sample_adapter/manifest.json +247 -0
- ciris_adapters/sample_adapter/services.py +486 -0
- ciris_adapters/weather/__init__.py +16 -0
- ciris_adapters/weather/adapter.py +130 -0
- ciris_adapters/weather/configurable.py +240 -0
- ciris_adapters/weather/manifest.json +156 -0
- ciris_adapters/weather/service.py +600 -0
- ciris_agent-1.7.7.dist-info/METADATA +284 -0
- ciris_agent-1.7.7.dist-info/RECORD +986 -0
- ciris_agent-1.7.7.dist-info/WHEEL +5 -0
- ciris_agent-1.7.7.dist-info/entry_points.txt +15 -0
- ciris_agent-1.7.7.dist-info/licenses/LICENSE +205 -0
- ciris_agent-1.7.7.dist-info/licenses/NOTICE +82 -0
- ciris_agent-1.7.7.dist-info/top_level.txt +4 -0
- ciris_engine/__init__.py +15 -0
- ciris_engine/ciris_templates/ally.yaml +632 -0
- ciris_engine/ciris_templates/default.yaml +411 -0
- ciris_engine/ciris_templates/echo-core.yaml +629 -0
- ciris_engine/ciris_templates/echo-speculative.yaml +764 -0
- ciris_engine/ciris_templates/echo.yaml +647 -0
- ciris_engine/ciris_templates/sage.yaml +332 -0
- ciris_engine/ciris_templates/scout.yaml +338 -0
- ciris_engine/ciris_templates/test.yaml +168 -0
- ciris_engine/cli.py +42 -0
- ciris_engine/config/CIRIS_SERVICES.json +19 -0
- ciris_engine/config/MODEL_CAPABILITIES.json +419 -0
- ciris_engine/config/PRICING_DATA.json +179 -0
- ciris_engine/config/__init__.py +50 -0
- ciris_engine/config/ciris_services.py +113 -0
- ciris_engine/config/model_capabilities.py +388 -0
- ciris_engine/config/pricing_models.py +276 -0
- ciris_engine/constants.py +35 -0
- ciris_engine/data/__init__.py +1 -0
- ciris_engine/data/covenant_1.0b.txt +978 -0
- ciris_engine/gui_static/11steps.svg +107 -0
- ciris_engine/gui_static/2x-schematics.png +0 -0
- ciris_engine/gui_static/404/index.html +1 -0
- ciris_engine/gui_static/404.html +1 -0
- ciris_engine/gui_static/_next/static/0edhkwDxd5UccTsCmtaBi/_buildManifest.js +1 -0
- ciris_engine/gui_static/_next/static/0edhkwDxd5UccTsCmtaBi/_ssgManifest.js +1 -0
- ciris_engine/gui_static/_next/static/U-3xTQao7hc2wnAi-Uekm/_buildManifest.js +1 -0
- ciris_engine/gui_static/_next/static/U-3xTQao7hc2wnAi-Uekm/_ssgManifest.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/3297-60e86ba0f8a7b040.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/3835-2aad4b7f5f8e4643.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/4499-99a0bc47de0b8975.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/4534-af88cd4ba6e99bff.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/4541-84b455f9e0dc4cfe.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/4789-61412711484754bb.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/6539-c6398bc9d7018430.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/704-8e827b26cc8c2d32.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/704-fb45d630f3192c6f.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/8072-de4952a2e6d2b33f.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/8315-b91d03a3949db0af.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/8386-f93a83ccbd789bd9.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/87c73c54-781a7f35148d5433.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/8903-fefea3339a02d41b.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/9090-e66485adf8d9d990.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/_not-found/page-a67d9808462c23b1.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/api-keys/page-2d7ee1583bbbd02e.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/api-keys/page-6a3c2bae6fe92b7b.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/consent/page-2ed3a035136bc4e8.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/consent/page-b2f5c91844a32422.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/page-25b90f89af3ea58c.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/page-b65d16c94ecaf69c.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/privacy/page-675b6d05c8f9184f.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/privacy/page-cbee2e1c8ab52145.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/settings/page-0f44da06697cf9f0.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/account/settings/page-563420253577edbf.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/adapters/page-1854631018bc32be.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/agents/page-8353752c176a7c70.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/agents/page-f61a529f110a6040.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/api-demo/page-7f19b9d20d39be28.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/api-demo/page-d1063938f249b8bd.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/audit/page-321b6728b8fff0bb.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/audit/page-ebac35ca961a1277.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/billing/page-6f3dc3bd02924f8e.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/billing/page-fa4a469f814c821a.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/comms/page-0d4f734269addd8f.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/comms/page-79227d426050089c.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/config/page-018d21d683b6e5bc.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/config/page-2aa5a5363ca2a371.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/consent/page-198373205fd316e2.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/consent/page-f2ca39e7713b13f8.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/dashboard/page-1dd5a196f643c60d.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/dashboard/page-530a04d3abbb8cda.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/docs/page-3193b06d094ab654.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/docs/page-330e996dedb87aba.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/layout-0a70f5fc460298b1.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/layout-21f2f99dd5b336e9.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/login/page-33240e6c6034a49d.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/login/page-68ffab6d54a7fdcd.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/logs/page-8a6167aecc4a475c.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/memory/page-9ca8c5d0056de3ff.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/memory/page-e961226941c18f81.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/page-6fdb065a787a4974.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/page-89f87d431be6064a.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/runtime/page-2e728b9c43aa164d.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/runtime/page-c7dd033dc40a72f0.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/services/page-ae9f0bdf11d01a95.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/services/page-b10feb79ca5d75e5.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/sessions/page-13ebe7ef1c16ae11.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/sessions/page-e6c82b16d617f785.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/setup/page-0beb5f5b5a5c20fc.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/setup/page-2595e729eae30c0e.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/status-dashboard/page-1037c987aecc3653.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/status-dashboard/page-2ffd147f6d3162ff.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/system/page-2c5798d58cafcd91.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/system/page-505b1ba4eceb01c3.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-auth/page-b0cad31d5cb1b2fa.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-auth/page-f3ecd7a8012df230.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-login/page-f35117fdc4105801.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-login/page-fb583a7924114906.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-sdk/page-50f116fd76935563.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/test-sdk/page-c37d8aa5ba623a44.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/tools/page-429aec7a707777ef.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/tools/page-5f705aad60e0c04e.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/users/page-13476b8b0f3808cc.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/users/page-7e500d154ed5bba4.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/wa/page-cc4a9d8a5cb44d08.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/app/wa/page-ec3e429efbc79230.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/framework-9d29490f5ba089ba.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/main-1f554952e47a82c4.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/main-app-26fa8aed029082e5.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/main-app-97b0486ef6bcef25.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/pages/_app-6ce685456e616eb2.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/pages/_error-d4bce98d93fe21e7.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- ciris_engine/gui_static/_next/static/chunks/webpack-fcebd240b7f8477d.js +1 -0
- ciris_engine/gui_static/_next/static/css/16b94b1fe0cc6e37.css +3 -0
- ciris_engine/gui_static/_next/static/css/77a24ceaae86deff.css +3 -0
- ciris_engine/gui_static/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/747892c23ea88013-s.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/8d697b304b401681-s.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/93f479601ee12b01-s.p.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/9610d9e46709d722-s.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/ba015fad6dcf6784-s.woff2 +0 -0
- ciris_engine/gui_static/_next/static/media/d8298875641ec7d4-s.p.woff2 +0 -0
- ciris_engine/gui_static/account/api-keys/index.html +1 -0
- ciris_engine/gui_static/account/api-keys/index.txt +27 -0
- ciris_engine/gui_static/account/consent/index.html +1 -0
- ciris_engine/gui_static/account/consent/index.txt +27 -0
- ciris_engine/gui_static/account/index.html +1 -0
- ciris_engine/gui_static/account/index.txt +27 -0
- ciris_engine/gui_static/account/privacy/index.html +1 -0
- ciris_engine/gui_static/account/privacy/index.txt +27 -0
- ciris_engine/gui_static/account/settings/index.html +1 -0
- ciris_engine/gui_static/account/settings/index.txt +27 -0
- ciris_engine/gui_static/adapters/index.html +1 -0
- ciris_engine/gui_static/adapters/index.txt +27 -0
- ciris_engine/gui_static/agents/index.html +1 -0
- ciris_engine/gui_static/agents/index.txt +27 -0
- ciris_engine/gui_static/andrew-roberts-euBRXcx57T4-unsplash.jpg +0 -0
- ciris_engine/gui_static/api-demo/index.html +1 -0
- ciris_engine/gui_static/api-demo/index.txt +27 -0
- ciris_engine/gui_static/audit/index.html +1 -0
- ciris_engine/gui_static/audit/index.txt +27 -0
- ciris_engine/gui_static/billing/index.html +1 -0
- ciris_engine/gui_static/billing/index.txt +27 -0
- ciris_engine/gui_static/blurryinfo.png +0 -0
- ciris_engine/gui_static/chip-vincent-PkQDwfl9Flc-unsplash.jpg +0 -0
- ciris_engine/gui_static/ciris-architecture.svg +338 -0
- ciris_engine/gui_static/comms/index.html +1 -0
- ciris_engine/gui_static/comms/index.txt +27 -0
- ciris_engine/gui_static/config/index.html +1 -0
- ciris_engine/gui_static/config/index.txt +27 -0
- ciris_engine/gui_static/consent/index.html +1 -0
- ciris_engine/gui_static/consent/index.txt +27 -0
- ciris_engine/gui_static/dashboard/index.html +1 -0
- ciris_engine/gui_static/dashboard/index.txt +27 -0
- ciris_engine/gui_static/docs/index.html +1 -0
- ciris_engine/gui_static/docs/index.txt +27 -0
- ciris_engine/gui_static/eric.png +0 -0
- ciris_engine/gui_static/file.svg +1 -0
- ciris_engine/gui_static/globe.svg +1 -0
- ciris_engine/gui_static/index.html +1 -0
- ciris_engine/gui_static/index.txt +27 -0
- ciris_engine/gui_static/infogfx-1@2x.png +0 -0
- ciris_engine/gui_static/infogfx-2.png +0 -0
- ciris_engine/gui_static/infogfx-dark-1.png +0 -0
- ciris_engine/gui_static/kelly-vohs-soSTXmIxTDU-unsplash.jpg +0 -0
- ciris_engine/gui_static/login/index.html +1 -0
- ciris_engine/gui_static/login/index.txt +27 -0
- ciris_engine/gui_static/logs/index.html +1 -0
- ciris_engine/gui_static/logs/index.txt +27 -0
- ciris_engine/gui_static/memory/index.html +1 -0
- ciris_engine/gui_static/memory/index.txt +27 -0
- ciris_engine/gui_static/nathan-farrish-ArcTfEoBgzs-unsplash.jpg +0 -0
- ciris_engine/gui_static/next.svg +1 -0
- ciris_engine/gui_static/overview.svg +512 -0
- ciris_engine/gui_static/overview1.svg +407 -0
- ciris_engine/gui_static/overview2.svg +370 -0
- ciris_engine/gui_static/pipeline-visualization.svg +278 -0
- ciris_engine/gui_static/privacy-policy.html +160 -0
- ciris_engine/gui_static/runtime/index.html +8 -0
- ciris_engine/gui_static/runtime/index.txt +27 -0
- ciris_engine/gui_static/services/index.html +1 -0
- ciris_engine/gui_static/services/index.txt +27 -0
- ciris_engine/gui_static/sessions/index.html +1 -0
- ciris_engine/gui_static/sessions/index.txt +27 -0
- ciris_engine/gui_static/setup/index.html +1 -0
- ciris_engine/gui_static/setup/index.txt +27 -0
- ciris_engine/gui_static/status-dashboard/index.html +1 -0
- ciris_engine/gui_static/status-dashboard/index.txt +27 -0
- ciris_engine/gui_static/system/index.html +1 -0
- ciris_engine/gui_static/system/index.txt +27 -0
- ciris_engine/gui_static/terms-of-service.html +174 -0
- ciris_engine/gui_static/test-auth/index.html +1 -0
- ciris_engine/gui_static/test-auth/index.txt +27 -0
- ciris_engine/gui_static/test-login/index.html +1 -0
- ciris_engine/gui_static/test-login/index.txt +27 -0
- ciris_engine/gui_static/test-sdk/index.html +1 -0
- ciris_engine/gui_static/test-sdk/index.txt +27 -0
- ciris_engine/gui_static/tools/index.html +1 -0
- ciris_engine/gui_static/tools/index.txt +27 -0
- ciris_engine/gui_static/users/index.html +1 -0
- ciris_engine/gui_static/users/index.txt +27 -0
- ciris_engine/gui_static/vercel.svg +1 -0
- ciris_engine/gui_static/videos/video1.mp4 +0 -0
- ciris_engine/gui_static/videos/video3.mp4 +0 -0
- ciris_engine/gui_static/wa/index.html +1 -0
- ciris_engine/gui_static/wa/index.txt +27 -0
- ciris_engine/gui_static/window.svg +1 -0
- ciris_engine/logic/__init__.py +8 -0
- ciris_engine/logic/adapters/__init__.py +74 -0
- ciris_engine/logic/adapters/api/__init__.py +5 -0
- ciris_engine/logic/adapters/api/adapter.py +1037 -0
- ciris_engine/logic/adapters/api/api_communication.py +370 -0
- ciris_engine/logic/adapters/api/api_document.py +330 -0
- ciris_engine/logic/adapters/api/api_observer.py +24 -0
- ciris_engine/logic/adapters/api/api_runtime_control.py +388 -0
- ciris_engine/logic/adapters/api/api_tools.py +299 -0
- ciris_engine/logic/adapters/api/api_vision.py +215 -0
- ciris_engine/logic/adapters/api/app.py +272 -0
- ciris_engine/logic/adapters/api/auth.py +159 -0
- ciris_engine/logic/adapters/api/config.py +101 -0
- ciris_engine/logic/adapters/api/constants.py +55 -0
- ciris_engine/logic/adapters/api/dependencies/__init__.py +1 -0
- ciris_engine/logic/adapters/api/dependencies/auth.py +260 -0
- ciris_engine/logic/adapters/api/endpoints/__init__.py +1 -0
- ciris_engine/logic/adapters/api/endpoints/emergency.py +86 -0
- ciris_engine/logic/adapters/api/middleware/__init__.py +1 -0
- ciris_engine/logic/adapters/api/middleware/rate_limiter.py +302 -0
- ciris_engine/logic/adapters/api/models.py +29 -0
- ciris_engine/logic/adapters/api/routes/__init__.py +52 -0
- ciris_engine/logic/adapters/api/routes/agent.py +1762 -0
- ciris_engine/logic/adapters/api/routes/audit.py +707 -0
- ciris_engine/logic/adapters/api/routes/auth.py +1745 -0
- ciris_engine/logic/adapters/api/routes/billing.py +895 -0
- ciris_engine/logic/adapters/api/routes/config.py +329 -0
- ciris_engine/logic/adapters/api/routes/connectors.py +534 -0
- ciris_engine/logic/adapters/api/routes/consent.py +637 -0
- ciris_engine/logic/adapters/api/routes/dsar.py +637 -0
- ciris_engine/logic/adapters/api/routes/dsar_multi_source.py +484 -0
- ciris_engine/logic/adapters/api/routes/emergency.py +302 -0
- ciris_engine/logic/adapters/api/routes/memory.py +733 -0
- ciris_engine/logic/adapters/api/routes/memory_filters.py +230 -0
- ciris_engine/logic/adapters/api/routes/memory_models.py +112 -0
- ciris_engine/logic/adapters/api/routes/memory_queries.py +236 -0
- ciris_engine/logic/adapters/api/routes/memory_query_helpers.py +394 -0
- ciris_engine/logic/adapters/api/routes/memory_visualization.py +359 -0
- ciris_engine/logic/adapters/api/routes/memory_visualization_helpers.py +110 -0
- ciris_engine/logic/adapters/api/routes/partnership.py +541 -0
- ciris_engine/logic/adapters/api/routes/setup.py +1374 -0
- ciris_engine/logic/adapters/api/routes/system.py +3049 -0
- ciris_engine/logic/adapters/api/routes/system_extensions.py +952 -0
- ciris_engine/logic/adapters/api/routes/telemetry.py +1987 -0
- ciris_engine/logic/adapters/api/routes/telemetry_converters.py +141 -0
- ciris_engine/logic/adapters/api/routes/telemetry_helpers.py +111 -0
- ciris_engine/logic/adapters/api/routes/telemetry_logs_reader.py +280 -0
- ciris_engine/logic/adapters/api/routes/telemetry_metrics.py +131 -0
- ciris_engine/logic/adapters/api/routes/telemetry_models.py +190 -0
- ciris_engine/logic/adapters/api/routes/telemetry_otlp.py +878 -0
- ciris_engine/logic/adapters/api/routes/telemetry_resource_helpers.py +191 -0
- ciris_engine/logic/adapters/api/routes/tickets.py +541 -0
- ciris_engine/logic/adapters/api/routes/tools.py +556 -0
- ciris_engine/logic/adapters/api/routes/transparency.py +281 -0
- ciris_engine/logic/adapters/api/routes/users.py +981 -0
- ciris_engine/logic/adapters/api/routes/verification.py +373 -0
- ciris_engine/logic/adapters/api/routes/wa.py +369 -0
- ciris_engine/logic/adapters/api/service_configuration.py +177 -0
- ciris_engine/logic/adapters/api/services/__init__.py +1 -0
- ciris_engine/logic/adapters/api/services/auth_service.py +1417 -0
- ciris_engine/logic/adapters/api/services/oauth_security.py +68 -0
- ciris_engine/logic/adapters/base.py +141 -0
- ciris_engine/logic/adapters/base_adapter.py +73 -0
- ciris_engine/logic/adapters/base_observer.py +1141 -0
- ciris_engine/logic/adapters/base_vision.py +312 -0
- ciris_engine/logic/adapters/cirisnode_client.py +307 -0
- ciris_engine/logic/adapters/cli/__init__.py +3 -0
- ciris_engine/logic/adapters/cli/adapter.py +207 -0
- ciris_engine/logic/adapters/cli/cli_adapter.py +902 -0
- ciris_engine/logic/adapters/cli/cli_observer.py +268 -0
- ciris_engine/logic/adapters/cli/cli_tools.py +427 -0
- ciris_engine/logic/adapters/cli/cli_wa_service.py +134 -0
- ciris_engine/logic/adapters/cli/config.py +73 -0
- ciris_engine/logic/adapters/discord/__init__.py +3 -0
- ciris_engine/logic/adapters/discord/adapter.py +783 -0
- ciris_engine/logic/adapters/discord/ciris_discord_client.py +159 -0
- ciris_engine/logic/adapters/discord/config.py +177 -0
- ciris_engine/logic/adapters/discord/constants.py +185 -0
- ciris_engine/logic/adapters/discord/discord-stubs.pyi +50 -0
- ciris_engine/logic/adapters/discord/discord_adapter.py +1584 -0
- ciris_engine/logic/adapters/discord/discord_audit.py +150 -0
- ciris_engine/logic/adapters/discord/discord_channel_manager.py +351 -0
- ciris_engine/logic/adapters/discord/discord_connection_manager.py +313 -0
- ciris_engine/logic/adapters/discord/discord_embed_formatter.py +369 -0
- ciris_engine/logic/adapters/discord/discord_error_classifier.py +302 -0
- ciris_engine/logic/adapters/discord/discord_error_handler.py +316 -0
- ciris_engine/logic/adapters/discord/discord_guidance_handler.py +460 -0
- ciris_engine/logic/adapters/discord/discord_message_handler.py +207 -0
- ciris_engine/logic/adapters/discord/discord_observer.py +670 -0
- ciris_engine/logic/adapters/discord/discord_rate_limiter.py +249 -0
- ciris_engine/logic/adapters/discord/discord_reaction_handler.py +278 -0
- ciris_engine/logic/adapters/discord/discord_tool_handler.py +465 -0
- ciris_engine/logic/adapters/discord/discord_tool_service.py +790 -0
- ciris_engine/logic/adapters/discord/discord_tools.py +90 -0
- ciris_engine/logic/adapters/discord/discord_vision_helper.py +148 -0
- ciris_engine/logic/adapters/discord/py.typed +0 -0
- ciris_engine/logic/adapters/document_parser.py +320 -0
- ciris_engine/logic/audit/__init__.py +10 -0
- ciris_engine/logic/audit/hash_chain.py +313 -0
- ciris_engine/logic/audit/signature_manager.py +352 -0
- ciris_engine/logic/audit/verifier.py +408 -0
- ciris_engine/logic/buses/__init__.py +21 -0
- ciris_engine/logic/buses/base_bus.py +178 -0
- ciris_engine/logic/buses/bus_manager.py +121 -0
- ciris_engine/logic/buses/communication_bus.py +387 -0
- ciris_engine/logic/buses/llm_bus.py +722 -0
- ciris_engine/logic/buses/memory_bus.py +577 -0
- ciris_engine/logic/buses/prohibitions.py +502 -0
- ciris_engine/logic/buses/runtime_control_bus.py +539 -0
- ciris_engine/logic/buses/tool_bus.py +482 -0
- ciris_engine/logic/buses/wise_bus.py +684 -0
- ciris_engine/logic/config/__init__.py +25 -0
- ciris_engine/logic/config/bootstrap.py +255 -0
- ciris_engine/logic/config/config_accessor.py +202 -0
- ciris_engine/logic/config/db_paths.py +194 -0
- ciris_engine/logic/config/env_utils.py +39 -0
- ciris_engine/logic/conscience/__init__.py +16 -0
- ciris_engine/logic/conscience/build_deferral_package.py +0 -0
- ciris_engine/logic/conscience/core.py +688 -0
- ciris_engine/logic/conscience/interface.py +33 -0
- ciris_engine/logic/conscience/registry.py +76 -0
- ciris_engine/logic/conscience/thought_depth_guardrail.py +231 -0
- ciris_engine/logic/conscience/updated_status_conscience.py +156 -0
- ciris_engine/logic/context/__init__.py +10 -0
- ciris_engine/logic/context/batch_context.py +550 -0
- ciris_engine/logic/context/builder.py +149 -0
- ciris_engine/logic/context/channel_resolution.py +136 -0
- ciris_engine/logic/context/secrets_snapshot.py +52 -0
- ciris_engine/logic/context/system_snapshot.py +116 -0
- ciris_engine/logic/context/system_snapshot_helpers.py +1651 -0
- ciris_engine/logic/covenant/__init__.py +33 -0
- ciris_engine/logic/covenant/executor.py +303 -0
- ciris_engine/logic/covenant/extractor.py +382 -0
- ciris_engine/logic/covenant/handler.py +241 -0
- ciris_engine/logic/covenant/verifier.py +383 -0
- ciris_engine/logic/dma/__init__.py +15 -0
- ciris_engine/logic/dma/action_selection/__init__.py +11 -0
- ciris_engine/logic/dma/action_selection/action_instruction_generator.py +444 -0
- ciris_engine/logic/dma/action_selection/context_builder.py +508 -0
- ciris_engine/logic/dma/action_selection/faculty_integration.py +193 -0
- ciris_engine/logic/dma/action_selection/special_cases.py +132 -0
- ciris_engine/logic/dma/action_selection_pdma.py +365 -0
- ciris_engine/logic/dma/base_dma.py +335 -0
- ciris_engine/logic/dma/csdma.py +239 -0
- ciris_engine/logic/dma/dma_executor.py +575 -0
- ciris_engine/logic/dma/dsdma_base.py +410 -0
- ciris_engine/logic/dma/exceptions.py +4 -0
- ciris_engine/logic/dma/factory.py +150 -0
- ciris_engine/logic/dma/pdma.py +120 -0
- ciris_engine/logic/dma/prompt_loader.py +189 -0
- ciris_engine/logic/dma/prompts/action_selection_pdma.yml +58 -0
- ciris_engine/logic/dma/prompts/csdma_common_sense.yml +28 -0
- ciris_engine/logic/dma/prompts/dsdma_base.yml +17 -0
- ciris_engine/logic/dma/prompts/pdma_ethical.yml +42 -0
- ciris_engine/logic/formatters/__init__.py +26 -0
- ciris_engine/logic/formatters/crisis_resources.py +80 -0
- ciris_engine/logic/formatters/escalation.py +21 -0
- ciris_engine/logic/formatters/identity.py +224 -0
- ciris_engine/logic/formatters/prompt_blocks.py +64 -0
- ciris_engine/logic/formatters/system_snapshot.py +193 -0
- ciris_engine/logic/formatters/user_profiles.py +108 -0
- ciris_engine/logic/handlers/__init__.py +1 -0
- ciris_engine/logic/handlers/control/__init__.py +1 -0
- ciris_engine/logic/handlers/control/defer_handler.py +195 -0
- ciris_engine/logic/handlers/control/ponder_handler.py +154 -0
- ciris_engine/logic/handlers/control/reject_handler.py +81 -0
- ciris_engine/logic/handlers/external/__init__.py +1 -0
- ciris_engine/logic/handlers/external/observe_handler.py +154 -0
- ciris_engine/logic/handlers/external/speak_handler.py +250 -0
- ciris_engine/logic/handlers/external/tool_handler.py +148 -0
- ciris_engine/logic/handlers/memory/__init__.py +1 -0
- ciris_engine/logic/handlers/memory/forget_handler.py +107 -0
- ciris_engine/logic/handlers/memory/memorize_handler.py +391 -0
- ciris_engine/logic/handlers/memory/recall_handler.py +213 -0
- ciris_engine/logic/handlers/terminal/__init__.py +1 -0
- ciris_engine/logic/handlers/terminal/task_complete_handler.py +299 -0
- ciris_engine/logic/infrastructure/__init__.py +1 -0
- ciris_engine/logic/infrastructure/handlers/__init__.py +8 -0
- ciris_engine/logic/infrastructure/handlers/action_dispatcher.py +382 -0
- ciris_engine/logic/infrastructure/handlers/base_handler.py +450 -0
- ciris_engine/logic/infrastructure/handlers/exceptions.py +2 -0
- ciris_engine/logic/infrastructure/handlers/handler_registry.py +59 -0
- ciris_engine/logic/infrastructure/handlers/helpers.py +55 -0
- ciris_engine/logic/infrastructure/step_streaming.py +149 -0
- ciris_engine/logic/infrastructure/sub_services/__init__.py +1 -0
- ciris_engine/logic/infrastructure/sub_services/identity_variance_monitor.py +1035 -0
- ciris_engine/logic/infrastructure/sub_services/pattern_analysis_loop.py +758 -0
- ciris_engine/logic/infrastructure/sub_services/wa_cli_bootstrap.py +229 -0
- ciris_engine/logic/infrastructure/sub_services/wa_cli_display.py +176 -0
- ciris_engine/logic/infrastructure/sub_services/wa_cli_oauth.py +404 -0
- ciris_engine/logic/infrastructure/sub_services/wa_cli_wizard.py +181 -0
- ciris_engine/logic/persistence/__init__.py +130 -0
- ciris_engine/logic/persistence/analytics.py +97 -0
- ciris_engine/logic/persistence/db/__init__.py +28 -0
- ciris_engine/logic/persistence/db/core.py +520 -0
- ciris_engine/logic/persistence/db/dialect.py +380 -0
- ciris_engine/logic/persistence/db/execution_helpers.py +216 -0
- ciris_engine/logic/persistence/db/migration_runner.py +191 -0
- ciris_engine/logic/persistence/db/operations.py +313 -0
- ciris_engine/logic/persistence/db/query_builder.py +232 -0
- ciris_engine/logic/persistence/db/retry.py +154 -0
- ciris_engine/logic/persistence/db/setup.py +18 -0
- ciris_engine/logic/persistence/migrations/postgres/001_initial_schema.sql +4 -0
- ciris_engine/logic/persistence/migrations/postgres/002_add_retry_status.sql +3 -0
- ciris_engine/logic/persistence/migrations/postgres/003_add_task_update_tracking.sql +8 -0
- ciris_engine/logic/persistence/migrations/postgres/004_add_occurrence_id.sql +54 -0
- ciris_engine/logic/persistence/migrations/postgres/005_add_consolidation_locks.sql +22 -0
- ciris_engine/logic/persistence/migrations/postgres/006_add_correlation_id_unique_index.sql +16 -0
- ciris_engine/logic/persistence/migrations/postgres/007_add_dsar_tickets.sql +39 -0
- ciris_engine/logic/persistence/migrations/postgres/008_rename_to_tickets_add_sop.sql +123 -0
- ciris_engine/logic/persistence/migrations/postgres/009_add_ticket_status_columns.sql +39 -0
- ciris_engine/logic/persistence/migrations/postgres/010_add_images_to_tasks.sql +5 -0
- ciris_engine/logic/persistence/migrations/sqlite/001_initial_schema.sql +357 -0
- ciris_engine/logic/persistence/migrations/sqlite/002_add_retry_status.sql +3 -0
- ciris_engine/logic/persistence/migrations/sqlite/003_add_task_update_tracking.sql +8 -0
- ciris_engine/logic/persistence/migrations/sqlite/004_add_occurrence_id.sql +45 -0
- ciris_engine/logic/persistence/migrations/sqlite/005_add_consolidation_locks.sql +22 -0
- ciris_engine/logic/persistence/migrations/sqlite/006_add_correlation_id_unique_index.sql +16 -0
- ciris_engine/logic/persistence/migrations/sqlite/007_add_dsar_tickets.sql +39 -0
- ciris_engine/logic/persistence/migrations/sqlite/008_rename_to_tickets_add_sop.sql +120 -0
- ciris_engine/logic/persistence/migrations/sqlite/009_add_ticket_status_columns.sql +129 -0
- ciris_engine/logic/persistence/migrations/sqlite/010_add_images_to_tasks.sql +17 -0
- ciris_engine/logic/persistence/models/__init__.py +141 -0
- ciris_engine/logic/persistence/models/correlations.py +881 -0
- ciris_engine/logic/persistence/models/deferral.py +68 -0
- ciris_engine/logic/persistence/models/dsar.py +286 -0
- ciris_engine/logic/persistence/models/graph.py +362 -0
- ciris_engine/logic/persistence/models/identity.py +264 -0
- ciris_engine/logic/persistence/models/queue_status.py +139 -0
- ciris_engine/logic/persistence/models/tasks.py +1043 -0
- ciris_engine/logic/persistence/models/thoughts.py +400 -0
- ciris_engine/logic/persistence/models/tickets.py +518 -0
- ciris_engine/logic/persistence/stores/__init__.py +13 -0
- ciris_engine/logic/persistence/stores/auth_helpers.py +117 -0
- ciris_engine/logic/persistence/stores/authentication_store.py +414 -0
- ciris_engine/logic/persistence/utils.py +212 -0
- ciris_engine/logic/processors/__init__.py +30 -0
- ciris_engine/logic/processors/core/__init__.py +1 -0
- ciris_engine/logic/processors/core/base_processor.py +280 -0
- ciris_engine/logic/processors/core/main_processor.py +1777 -0
- ciris_engine/logic/processors/core/step_decorators.py +1583 -0
- ciris_engine/logic/processors/core/thought_processor/__init__.py +20 -0
- ciris_engine/logic/processors/core/thought_processor/action_execution.py +49 -0
- ciris_engine/logic/processors/core/thought_processor/conscience_execution.py +382 -0
- ciris_engine/logic/processors/core/thought_processor/finalize_action.py +66 -0
- ciris_engine/logic/processors/core/thought_processor/gather_context.py +120 -0
- ciris_engine/logic/processors/core/thought_processor/main.py +920 -0
- ciris_engine/logic/processors/core/thought_processor/perform_aspdma.py +86 -0
- ciris_engine/logic/processors/core/thought_processor/perform_dmas.py +106 -0
- ciris_engine/logic/processors/core/thought_processor/recursive_processing.py +237 -0
- ciris_engine/logic/processors/core/thought_processor/round_complete.py +52 -0
- ciris_engine/logic/processors/core/thought_processor/start_round.py +64 -0
- ciris_engine/logic/processors/exceptions.py +59 -0
- ciris_engine/logic/processors/states/__init__.py +1 -0
- ciris_engine/logic/processors/states/dream_processor.py +1381 -0
- ciris_engine/logic/processors/states/play_processor.py +141 -0
- ciris_engine/logic/processors/states/shutdown_processor.py +623 -0
- ciris_engine/logic/processors/states/solitude_processor.py +305 -0
- ciris_engine/logic/processors/states/wakeup_processor.py +802 -0
- ciris_engine/logic/processors/states/work_processor.py +742 -0
- ciris_engine/logic/processors/support/__init__.py +1 -0
- ciris_engine/logic/processors/support/dma_orchestrator.py +336 -0
- ciris_engine/logic/processors/support/processing_queue.py +133 -0
- ciris_engine/logic/processors/support/shutdown_condition_evaluator.py +294 -0
- ciris_engine/logic/processors/support/state_manager.py +358 -0
- ciris_engine/logic/processors/support/task_manager.py +303 -0
- ciris_engine/logic/processors/support/thought_escalation.py +116 -0
- ciris_engine/logic/processors/support/thought_manager.py +328 -0
- ciris_engine/logic/processors/support/thought_manager_enhanced.py +105 -0
- ciris_engine/logic/registries/__init__.py +34 -0
- ciris_engine/logic/registries/base.py +653 -0
- ciris_engine/logic/registries/circuit_breaker.py +275 -0
- ciris_engine/logic/registries/typed_registries.py +184 -0
- ciris_engine/logic/runtime/__init__.py +7 -0
- ciris_engine/logic/runtime/adapter_loader.py +261 -0
- ciris_engine/logic/runtime/adapter_manager.py +1053 -0
- ciris_engine/logic/runtime/ciris_runtime.py +2342 -0
- ciris_engine/logic/runtime/ciris_runtime_helpers.py +923 -0
- ciris_engine/logic/runtime/component_builder.py +361 -0
- ciris_engine/logic/runtime/identity_manager.py +219 -0
- ciris_engine/logic/runtime/module_loader.py +207 -0
- ciris_engine/logic/runtime/prevent_sideeffects.py +30 -0
- ciris_engine/logic/runtime/runtime_interface.py +23 -0
- ciris_engine/logic/runtime/service_initializer.py +1623 -0
- ciris_engine/logic/secrets/__init__.py +30 -0
- ciris_engine/logic/secrets/encryption.py +175 -0
- ciris_engine/logic/secrets/filter.py +295 -0
- ciris_engine/logic/secrets/service.py +652 -0
- ciris_engine/logic/secrets/store.py +669 -0
- ciris_engine/logic/services/__init__.py +1 -0
- ciris_engine/logic/services/adaptation/__init__.py +3 -0
- ciris_engine/logic/services/base_graph_service.py +142 -0
- ciris_engine/logic/services/base_infrastructure_service.py +69 -0
- ciris_engine/logic/services/base_scheduled_service.py +136 -0
- ciris_engine/logic/services/base_service.py +247 -0
- ciris_engine/logic/services/governance/__init__.py +3 -0
- ciris_engine/logic/services/governance/adaptive_filter/__init__.py +14 -0
- ciris_engine/logic/services/governance/adaptive_filter/service.py +818 -0
- ciris_engine/logic/services/governance/consent/__init__.py +53 -0
- ciris_engine/logic/services/governance/consent/air.py +403 -0
- ciris_engine/logic/services/governance/consent/decay.py +324 -0
- ciris_engine/logic/services/governance/consent/dsar_automation.py +589 -0
- ciris_engine/logic/services/governance/consent/exceptions.py +106 -0
- ciris_engine/logic/services/governance/consent/metrics.py +270 -0
- ciris_engine/logic/services/governance/consent/partnership.py +533 -0
- ciris_engine/logic/services/governance/consent/service.py +1256 -0
- ciris_engine/logic/services/governance/dsar/__init__.py +29 -0
- ciris_engine/logic/services/governance/dsar/orchestrator.py +977 -0
- ciris_engine/logic/services/governance/dsar/schemas.py +141 -0
- ciris_engine/logic/services/governance/dsar/signature_service.py +283 -0
- ciris_engine/logic/services/governance/self_observation/__init__.py +20 -0
- ciris_engine/logic/services/governance/self_observation/service.py +1153 -0
- ciris_engine/logic/services/governance/visibility/__init__.py +17 -0
- ciris_engine/logic/services/governance/visibility/service.py +512 -0
- ciris_engine/logic/services/governance/wise_authority/__init__.py +15 -0
- ciris_engine/logic/services/governance/wise_authority/service.py +827 -0
- ciris_engine/logic/services/graph/__init__.py +5 -0
- ciris_engine/logic/services/graph/audit_service/__init__.py +5 -0
- ciris_engine/logic/services/graph/audit_service/service.py +1675 -0
- ciris_engine/logic/services/graph/base.py +208 -0
- ciris_engine/logic/services/graph/config_service/__init__.py +5 -0
- ciris_engine/logic/services/graph/config_service/service.py +372 -0
- ciris_engine/logic/services/graph/incident_service/__init__.py +5 -0
- ciris_engine/logic/services/graph/incident_service/service.py +803 -0
- ciris_engine/logic/services/graph/memory_service.py +1120 -0
- ciris_engine/logic/services/graph/telemetry_service/__init__.py +5 -0
- ciris_engine/logic/services/graph/telemetry_service/exceptions.py +104 -0
- ciris_engine/logic/services/graph/telemetry_service/helpers.py +1337 -0
- ciris_engine/logic/services/graph/telemetry_service/service.py +2429 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/__init__.py +17 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/aggregation_helpers.py +355 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/cleanup_helpers.py +438 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/compressor.py +260 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/__init__.py +27 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/audit.py +326 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/conversation.py +291 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/memory.py +197 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/metrics.py +251 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/task.py +257 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/consolidators/trace.py +363 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/data_converter.py +545 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/date_calculation_helpers.py +193 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/db_query_helpers.py +296 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/edge_helpers.py +92 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/edge_manager.py +896 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/extensive_helpers.py +322 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/period_manager.py +152 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/profound_helpers.py +277 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/query_manager.py +812 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/service.py +1692 -0
- ciris_engine/logic/services/graph/tsdb_consolidation/sql_builders.py +363 -0
- ciris_engine/logic/services/infrastructure/__init__.py +1 -0
- ciris_engine/logic/services/infrastructure/authentication/__init__.py +5 -0
- ciris_engine/logic/services/infrastructure/authentication/service.py +1634 -0
- ciris_engine/logic/services/infrastructure/database_maintenance/__init__.py +15 -0
- ciris_engine/logic/services/infrastructure/database_maintenance/service.py +764 -0
- ciris_engine/logic/services/infrastructure/resource_monitor/__init__.py +7 -0
- ciris_engine/logic/services/infrastructure/resource_monitor/ciris_billing_provider.py +755 -0
- ciris_engine/logic/services/infrastructure/resource_monitor/service.py +409 -0
- ciris_engine/logic/services/infrastructure/resource_monitor/simple_credit_provider.py +129 -0
- ciris_engine/logic/services/lifecycle/__init__.py +3 -0
- ciris_engine/logic/services/lifecycle/initialization/__init__.py +10 -0
- ciris_engine/logic/services/lifecycle/initialization/service.py +312 -0
- ciris_engine/logic/services/lifecycle/scheduler/__init__.py +5 -0
- ciris_engine/logic/services/lifecycle/scheduler/service.py +607 -0
- ciris_engine/logic/services/lifecycle/shutdown/__init__.py +9 -0
- ciris_engine/logic/services/lifecycle/shutdown/service.py +378 -0
- ciris_engine/logic/services/lifecycle/time/__init__.py +15 -0
- ciris_engine/logic/services/lifecycle/time/service.py +259 -0
- ciris_engine/logic/services/memory_service/__init__.py +8 -0
- ciris_engine/logic/services/mixins/__init__.py +13 -0
- ciris_engine/logic/services/mixins/example_usage.py +200 -0
- ciris_engine/logic/services/mixins/request_metrics.py +179 -0
- ciris_engine/logic/services/runtime/__init__.py +3 -0
- ciris_engine/logic/services/runtime/adapter_configuration/__init__.py +16 -0
- ciris_engine/logic/services/runtime/adapter_configuration/service.py +674 -0
- ciris_engine/logic/services/runtime/adapter_configuration/session.py +67 -0
- ciris_engine/logic/services/runtime/control_service/__init__.py +5 -0
- ciris_engine/logic/services/runtime/control_service/service.py +2269 -0
- ciris_engine/logic/services/runtime/llm_service/__init__.py +14 -0
- ciris_engine/logic/services/runtime/llm_service/pricing_calculator.py +279 -0
- ciris_engine/logic/services/runtime/llm_service/service.py +930 -0
- ciris_engine/logic/services/tools/__init__.py +5 -0
- ciris_engine/logic/services/tools/core_tool_service/__init__.py +8 -0
- ciris_engine/logic/services/tools/core_tool_service/service.py +852 -0
- ciris_engine/logic/setup/__init__.py +1 -0
- ciris_engine/logic/setup/first_run.py +250 -0
- ciris_engine/logic/setup/wizard.py +327 -0
- ciris_engine/logic/telemetry/__init__.py +46 -0
- ciris_engine/logic/telemetry/core.py +239 -0
- ciris_engine/logic/telemetry/hot_cold_config.py +133 -0
- ciris_engine/logic/telemetry/log_collector.py +190 -0
- ciris_engine/logic/telemetry/resource_monitor.py +7 -0
- ciris_engine/logic/telemetry/security.py +79 -0
- ciris_engine/logic/utils/__init__.py +18 -0
- ciris_engine/logic/utils/channel_utils.py +75 -0
- ciris_engine/logic/utils/consent/__init__.py +1 -0
- ciris_engine/logic/utils/consent/partnership_utils.py +172 -0
- ciris_engine/logic/utils/constants.py +92 -0
- ciris_engine/logic/utils/context_utils.py +145 -0
- ciris_engine/logic/utils/directory_setup.py +533 -0
- ciris_engine/logic/utils/graphql_context_provider.py +152 -0
- ciris_engine/logic/utils/identity_resolution.py +843 -0
- ciris_engine/logic/utils/incident_capture_handler.py +303 -0
- ciris_engine/logic/utils/initialization_manager.py +74 -0
- ciris_engine/logic/utils/jsondict_helpers.py +290 -0
- ciris_engine/logic/utils/log_sanitizer.py +97 -0
- ciris_engine/logic/utils/logging_config.py +151 -0
- ciris_engine/logic/utils/observability_decorators.py +544 -0
- ciris_engine/logic/utils/occurrence_utils.py +155 -0
- ciris_engine/logic/utils/path_resolution.py +281 -0
- ciris_engine/logic/utils/platform_detection.py +286 -0
- ciris_engine/logic/utils/privacy.py +266 -0
- ciris_engine/logic/utils/profile_loader.py +124 -0
- ciris_engine/logic/utils/profile_manager.py +16 -0
- ciris_engine/logic/utils/runtime_utils.py +69 -0
- ciris_engine/logic/utils/shutdown_manager.py +107 -0
- ciris_engine/logic/utils/task_formatters.py +60 -0
- ciris_engine/logic/utils/task_thought_factory.py +404 -0
- ciris_engine/logic/utils/thought_utils.py +54 -0
- ciris_engine/logic/utils/user_utils.py +70 -0
- ciris_engine/protocols/__init__.py +0 -0
- ciris_engine/protocols/adapters/__init__.py +35 -0
- ciris_engine/protocols/adapters/base.py +149 -0
- ciris_engine/protocols/adapters/configurable.py +265 -0
- ciris_engine/protocols/adapters/message.py +90 -0
- ciris_engine/protocols/audit/__init__.py +1 -0
- ciris_engine/protocols/buses/__init__.py +1 -0
- ciris_engine/protocols/config/__init__.py +1 -0
- ciris_engine/protocols/conscience/__init__.py +1 -0
- ciris_engine/protocols/consent.py +88 -0
- ciris_engine/protocols/context/__init__.py +1 -0
- ciris_engine/protocols/data/__init__.py +1 -0
- ciris_engine/protocols/dma/__init__.py +1 -0
- ciris_engine/protocols/dma/base.py +107 -0
- ciris_engine/protocols/faculties.py +34 -0
- ciris_engine/protocols/formatters/__init__.py +1 -0
- ciris_engine/protocols/handlers/__init__.py +1 -0
- ciris_engine/protocols/infrastructure/__init__.py +25 -0
- ciris_engine/protocols/infrastructure/base.py +377 -0
- ciris_engine/protocols/persistence/__init__.py +1 -0
- ciris_engine/protocols/pipeline_control.py +609 -0
- ciris_engine/protocols/processors/__init__.py +19 -0
- ciris_engine/protocols/processors/agent.py +299 -0
- ciris_engine/protocols/processors/base.py +130 -0
- ciris_engine/protocols/processors/orchestration.py +62 -0
- ciris_engine/protocols/registries/__init__.py +1 -0
- ciris_engine/protocols/runtime/__init__.py +1 -0
- ciris_engine/protocols/runtime/base.py +163 -0
- ciris_engine/protocols/secrets/__init__.py +1 -0
- ciris_engine/protocols/services/__init__.py +80 -0
- ciris_engine/protocols/services/adaptation/__init__.py +7 -0
- ciris_engine/protocols/services/adaptation/self_observation.py +265 -0
- ciris_engine/protocols/services/governance/__init__.py +20 -0
- ciris_engine/protocols/services/governance/communication.py +58 -0
- ciris_engine/protocols/services/governance/filter.py +56 -0
- ciris_engine/protocols/services/governance/visibility.py +32 -0
- ciris_engine/protocols/services/governance/wa_auth.py +192 -0
- ciris_engine/protocols/services/governance/wise_authority.py +75 -0
- ciris_engine/protocols/services/graph/__init__.py +19 -0
- ciris_engine/protocols/services/graph/audit.py +92 -0
- ciris_engine/protocols/services/graph/config.py +54 -0
- ciris_engine/protocols/services/graph/incident_management.py +103 -0
- ciris_engine/protocols/services/graph/memory.py +110 -0
- ciris_engine/protocols/services/graph/telemetry.py +51 -0
- ciris_engine/protocols/services/graph/tsdb_consolidation.py +87 -0
- ciris_engine/protocols/services/infrastructure/__init__.py +11 -0
- ciris_engine/protocols/services/infrastructure/authentication.py +159 -0
- ciris_engine/protocols/services/infrastructure/credit_gate.py +46 -0
- ciris_engine/protocols/services/infrastructure/database_maintenance.py +25 -0
- ciris_engine/protocols/services/infrastructure/resource_monitor.py +83 -0
- ciris_engine/protocols/services/lifecycle/__init__.py +13 -0
- ciris_engine/protocols/services/lifecycle/initialization.py +41 -0
- ciris_engine/protocols/services/lifecycle/scheduler.py +42 -0
- ciris_engine/protocols/services/lifecycle/shutdown.py +50 -0
- ciris_engine/protocols/services/lifecycle/time.py +31 -0
- ciris_engine/protocols/services/runtime/__init__.py +13 -0
- ciris_engine/protocols/services/runtime/llm.py +50 -0
- ciris_engine/protocols/services/runtime/runtime_control.py +193 -0
- ciris_engine/protocols/services/runtime/secrets.py +100 -0
- ciris_engine/protocols/services/runtime/tool.py +123 -0
- ciris_engine/protocols/telemetry/__init__.py +1 -0
- ciris_engine/protocols/utils/__init__.py +1 -0
- ciris_engine/schemas/__init__.py +112 -0
- ciris_engine/schemas/actions/__init__.py +37 -0
- ciris_engine/schemas/actions/parameters.py +137 -0
- ciris_engine/schemas/adapters/__init__.py +13 -0
- ciris_engine/schemas/adapters/cirisnode.py +135 -0
- ciris_engine/schemas/adapters/cli.py +97 -0
- ciris_engine/schemas/adapters/cli_tools.py +98 -0
- ciris_engine/schemas/adapters/discord.py +125 -0
- ciris_engine/schemas/adapters/graphql_core.py +144 -0
- ciris_engine/schemas/adapters/registration.py +47 -0
- ciris_engine/schemas/adapters/runtime_context.py +48 -0
- ciris_engine/schemas/adapters/tool_execution.py +45 -0
- ciris_engine/schemas/adapters/tools.py +96 -0
- ciris_engine/schemas/api/__init__.py +1 -0
- ciris_engine/schemas/api/agent.py +50 -0
- ciris_engine/schemas/api/audit.py +38 -0
- ciris_engine/schemas/api/auth.py +351 -0
- ciris_engine/schemas/api/config_security.py +242 -0
- ciris_engine/schemas/api/emergency.py +111 -0
- ciris_engine/schemas/api/responses.py +72 -0
- ciris_engine/schemas/api/runtime.py +26 -0
- ciris_engine/schemas/api/telemetry.py +109 -0
- ciris_engine/schemas/api/wa.py +90 -0
- ciris_engine/schemas/audit/__init__.py +13 -0
- ciris_engine/schemas/audit/core.py +139 -0
- ciris_engine/schemas/audit/hash_chain.py +58 -0
- ciris_engine/schemas/audit/verification.py +131 -0
- ciris_engine/schemas/buses/__init__.py +1 -0
- ciris_engine/schemas/config/__init__.py +41 -0
- ciris_engine/schemas/config/agent.py +279 -0
- ciris_engine/schemas/config/cognitive_state_behaviors.py +194 -0
- ciris_engine/schemas/config/default_dsar_sops.py +178 -0
- ciris_engine/schemas/config/essential.py +195 -0
- ciris_engine/schemas/config/tickets.py +86 -0
- ciris_engine/schemas/conscience/__init__.py +25 -0
- ciris_engine/schemas/conscience/context.py +34 -0
- ciris_engine/schemas/conscience/core.py +145 -0
- ciris_engine/schemas/conscience/results.py +24 -0
- ciris_engine/schemas/consent/__init__.py +5 -0
- ciris_engine/schemas/consent/core.py +404 -0
- ciris_engine/schemas/context/__init__.py +1 -0
- ciris_engine/schemas/covenant.py +382 -0
- ciris_engine/schemas/data/__init__.py +1 -0
- ciris_engine/schemas/dma/__init__.py +16 -0
- ciris_engine/schemas/dma/core.py +199 -0
- ciris_engine/schemas/dma/faculty.py +192 -0
- ciris_engine/schemas/dma/prompts.py +172 -0
- ciris_engine/schemas/dma/results.py +103 -0
- ciris_engine/schemas/formatters/__init__.py +1 -0
- ciris_engine/schemas/handlers/__init__.py +10 -0
- ciris_engine/schemas/handlers/context.py +119 -0
- ciris_engine/schemas/handlers/contexts.py +100 -0
- ciris_engine/schemas/handlers/core.py +167 -0
- ciris_engine/schemas/handlers/memory_schemas.py +67 -0
- ciris_engine/schemas/handlers/schemas.py +95 -0
- ciris_engine/schemas/identity.py +149 -0
- ciris_engine/schemas/infrastructure/__init__.py +1 -0
- ciris_engine/schemas/infrastructure/base.py +256 -0
- ciris_engine/schemas/infrastructure/behavioral_patterns.py +129 -0
- ciris_engine/schemas/infrastructure/feedback_loop.py +57 -0
- ciris_engine/schemas/infrastructure/identity_variance.py +141 -0
- ciris_engine/schemas/infrastructure/oauth.py +175 -0
- ciris_engine/schemas/infrastructure/wa_cli_wizard.py +54 -0
- ciris_engine/schemas/persistence/__init__.py +34 -0
- ciris_engine/schemas/persistence/core.py +140 -0
- ciris_engine/schemas/persistence/correlations.py +73 -0
- ciris_engine/schemas/persistence/postgres/__init__.py +1 -0
- ciris_engine/schemas/persistence/postgres/tables.py +280 -0
- ciris_engine/schemas/persistence/sqlite/__init__.py +1 -0
- ciris_engine/schemas/persistence/sqlite/tables.py +281 -0
- ciris_engine/schemas/platform.py +149 -0
- ciris_engine/schemas/processors/__init__.py +26 -0
- ciris_engine/schemas/processors/base.py +130 -0
- ciris_engine/schemas/processors/cognitive.py +77 -0
- ciris_engine/schemas/processors/context.py +35 -0
- ciris_engine/schemas/processors/core.py +152 -0
- ciris_engine/schemas/processors/dma.py +105 -0
- ciris_engine/schemas/processors/error.py +122 -0
- ciris_engine/schemas/processors/main.py +109 -0
- ciris_engine/schemas/processors/phase_results.py +21 -0
- ciris_engine/schemas/processors/results.py +99 -0
- ciris_engine/schemas/processors/solitude.py +79 -0
- ciris_engine/schemas/processors/state.py +202 -0
- ciris_engine/schemas/processors/state_example.py +177 -0
- ciris_engine/schemas/processors/states.py +21 -0
- ciris_engine/schemas/processors/status.py +34 -0
- ciris_engine/schemas/registries/__init__.py +1 -0
- ciris_engine/schemas/registries/base.py +66 -0
- ciris_engine/schemas/resources/__init__.py +15 -0
- ciris_engine/schemas/resources/crisis.py +315 -0
- ciris_engine/schemas/runtime/__init__.py +42 -0
- ciris_engine/schemas/runtime/adapter_management.py +186 -0
- ciris_engine/schemas/runtime/api.py +58 -0
- ciris_engine/schemas/runtime/audit.py +50 -0
- ciris_engine/schemas/runtime/bootstrap.py +33 -0
- ciris_engine/schemas/runtime/contexts.py +61 -0
- ciris_engine/schemas/runtime/core.py +161 -0
- ciris_engine/schemas/runtime/enums.py +167 -0
- ciris_engine/schemas/runtime/extended.py +232 -0
- ciris_engine/schemas/runtime/manifest.py +311 -0
- ciris_engine/schemas/runtime/memory.py +60 -0
- ciris_engine/schemas/runtime/messages.py +108 -0
- ciris_engine/schemas/runtime/models.py +156 -0
- ciris_engine/schemas/runtime/processing_context.py +43 -0
- ciris_engine/schemas/runtime/protocols_core.py +96 -0
- ciris_engine/schemas/runtime/resources.py +33 -0
- ciris_engine/schemas/runtime/system_context.py +417 -0
- ciris_engine/schemas/secrets/__init__.py +1 -0
- ciris_engine/schemas/secrets/core.py +267 -0
- ciris_engine/schemas/secrets/service.py +95 -0
- ciris_engine/schemas/services/__init__.py +33 -0
- ciris_engine/schemas/services/audit_summary_node.py +172 -0
- ciris_engine/schemas/services/authority/__init__.py +39 -0
- ciris_engine/schemas/services/authority/jwt.py +158 -0
- ciris_engine/schemas/services/authority/wa_updates.py +138 -0
- ciris_engine/schemas/services/authority/wise_authority.py +163 -0
- ciris_engine/schemas/services/authority_core.py +370 -0
- ciris_engine/schemas/services/capabilities.py +72 -0
- ciris_engine/schemas/services/community_core.py +95 -0
- ciris_engine/schemas/services/context.py +111 -0
- ciris_engine/schemas/services/conversation_summary_node.py +189 -0
- ciris_engine/schemas/services/core/__init__.py +153 -0
- ciris_engine/schemas/services/core/runtime.py +262 -0
- ciris_engine/schemas/services/core/runtime_config.py +117 -0
- ciris_engine/schemas/services/core/secrets.py +65 -0
- ciris_engine/schemas/services/correlation_node.py +179 -0
- ciris_engine/schemas/services/credit_gate.py +92 -0
- ciris_engine/schemas/services/discord_nodes.py +299 -0
- ciris_engine/schemas/services/feedback_core.py +131 -0
- ciris_engine/schemas/services/filters_core.py +270 -0
- ciris_engine/schemas/services/governance.py +26 -0
- ciris_engine/schemas/services/graph/__init__.py +26 -0
- ciris_engine/schemas/services/graph/attributes.py +254 -0
- ciris_engine/schemas/services/graph/audit.py +98 -0
- ciris_engine/schemas/services/graph/consolidation.py +338 -0
- ciris_engine/schemas/services/graph/edge_types.py +43 -0
- ciris_engine/schemas/services/graph/edges.py +88 -0
- ciris_engine/schemas/services/graph/incident.py +312 -0
- ciris_engine/schemas/services/graph/memory.py +84 -0
- ciris_engine/schemas/services/graph/node_data.py +174 -0
- ciris_engine/schemas/services/graph/query_results.py +82 -0
- ciris_engine/schemas/services/graph/telemetry.py +250 -0
- ciris_engine/schemas/services/graph/tsdb_consolidation.py +27 -0
- ciris_engine/schemas/services/graph/tsdb_models.py +107 -0
- ciris_engine/schemas/services/graph_core.py +196 -0
- ciris_engine/schemas/services/graph_typed_nodes.py +194 -0
- ciris_engine/schemas/services/infrastructure/__init__.py +1 -0
- ciris_engine/schemas/services/infrastructure/resource_monitor.py +20 -0
- ciris_engine/schemas/services/lifecycle/__init__.py +9 -0
- ciris_engine/schemas/services/lifecycle/initialization.py +33 -0
- ciris_engine/schemas/services/lifecycle/time.py +50 -0
- ciris_engine/schemas/services/llm.py +187 -0
- ciris_engine/schemas/services/metadata.py +43 -0
- ciris_engine/schemas/services/nodes.py +704 -0
- ciris_engine/schemas/services/operations.py +126 -0
- ciris_engine/schemas/services/requests.py +128 -0
- ciris_engine/schemas/services/resources_core.py +182 -0
- ciris_engine/schemas/services/runtime_control.py +1010 -0
- ciris_engine/schemas/services/shutdown.py +88 -0
- ciris_engine/schemas/services/special/__init__.py +0 -0
- ciris_engine/schemas/services/special/self_observation.py +396 -0
- ciris_engine/schemas/services/trace_summary_node.py +199 -0
- ciris_engine/schemas/services/visibility.py +98 -0
- ciris_engine/schemas/streaming/__init__.py +10 -0
- ciris_engine/schemas/streaming/reasoning_stream.py +95 -0
- ciris_engine/schemas/telemetry/__init__.py +0 -0
- ciris_engine/schemas/telemetry/collector.py +67 -0
- ciris_engine/schemas/telemetry/core.py +252 -0
- ciris_engine/schemas/telemetry/unified.py +59 -0
- ciris_engine/schemas/tools.py +72 -0
- ciris_engine/schemas/types.py +47 -0
- ciris_engine/schemas/utils/__init__.py +1 -0
- ciris_engine/schemas/utils/config_validator.py +54 -0
- ciris_engine/utils/__init__.py +1 -0
- ciris_engine/utils/serialization.py +35 -0
- ciris_sdk/__init__.py +124 -0
- ciris_sdk/auth_store.py +261 -0
- ciris_sdk/client.py +261 -0
- ciris_sdk/exceptions.py +73 -0
- ciris_sdk/model_types.py +258 -0
- ciris_sdk/models.py +354 -0
- ciris_sdk/pagination.py +214 -0
- ciris_sdk/rate_limiter.py +188 -0
- ciris_sdk/setup.py +17 -0
- ciris_sdk/telemetry_models.py +257 -0
- ciris_sdk/telemetry_responses.py +199 -0
- ciris_sdk/transport.py +177 -0
- ciris_sdk/websocket.py +400 -0
- main.py +766 -0
|
@@ -0,0 +1,2429 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph-based TelemetryService that stores all metrics as memories in the graph.
|
|
3
|
+
|
|
4
|
+
This implements the "Graph Memory as Identity Architecture" patent by routing
|
|
5
|
+
all telemetry data through the memory system as TSDBGraphNodes.
|
|
6
|
+
|
|
7
|
+
Consolidates functionality from:
|
|
8
|
+
- GraphTelemetryService (graph-based metrics)
|
|
9
|
+
- AdapterTelemetryService (system snapshots)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import logging
|
|
14
|
+
import sys
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
19
|
+
|
|
20
|
+
from ciris_engine.logic.utils.jsondict_helpers import get_bool, get_dict, get_float, get_int, get_str
|
|
21
|
+
from ciris_engine.schemas.types import JSONDict
|
|
22
|
+
|
|
23
|
+
# Optional import for psutil
|
|
24
|
+
try:
|
|
25
|
+
import psutil # type: ignore[import,unused-ignore]
|
|
26
|
+
|
|
27
|
+
PSUTIL_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
psutil = None # type: ignore[assignment,no-redef,unused-ignore]
|
|
30
|
+
PSUTIL_AVAILABLE = False
|
|
31
|
+
|
|
32
|
+
from ciris_engine.logic.buses.memory_bus import MemoryBus
|
|
33
|
+
from ciris_engine.logic.services.base_graph_service import BaseGraphService
|
|
34
|
+
from ciris_engine.protocols.infrastructure.base import RegistryAwareServiceProtocol, ServiceRegistryProtocol
|
|
35
|
+
from ciris_engine.protocols.runtime.base import GraphServiceProtocol as TelemetryServiceProtocol
|
|
36
|
+
from ciris_engine.schemas.runtime.enums import ServiceType
|
|
37
|
+
from ciris_engine.schemas.runtime.protocols_core import MetricDataPoint, ResourceLimits
|
|
38
|
+
from ciris_engine.schemas.runtime.resources import ResourceUsage
|
|
39
|
+
from ciris_engine.schemas.runtime.system_context import ChannelContext as SystemChannelContext
|
|
40
|
+
from ciris_engine.schemas.runtime.system_context import ContinuitySummary, SystemSnapshot, TelemetrySummary, UserProfile
|
|
41
|
+
from ciris_engine.schemas.services.core import ServiceStatus
|
|
42
|
+
from ciris_engine.schemas.services.graph.telemetry import (
|
|
43
|
+
AggregatedTelemetryMetadata,
|
|
44
|
+
AggregatedTelemetryResponse,
|
|
45
|
+
BehavioralData,
|
|
46
|
+
MetricRecord,
|
|
47
|
+
ResourceData,
|
|
48
|
+
ServiceTelemetryData,
|
|
49
|
+
TelemetryData,
|
|
50
|
+
TelemetrySnapshotResult,
|
|
51
|
+
)
|
|
52
|
+
from ciris_engine.schemas.services.graph_core import GraphNode, GraphScope, NodeType
|
|
53
|
+
from ciris_engine.schemas.services.operations import MemoryOpStatus
|
|
54
|
+
from ciris_engine.schemas.telemetry.core import ServiceCorrelation
|
|
55
|
+
|
|
56
|
+
logger = logging.getLogger(__name__)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MemoryType(str, Enum):
|
|
60
|
+
"""Types of memories in the unified system."""
|
|
61
|
+
|
|
62
|
+
OPERATIONAL = "operational" # Metrics, logs, performance data
|
|
63
|
+
BEHAVIORAL = "behavioral" # Actions, decisions, patterns
|
|
64
|
+
SOCIAL = "social" # Interactions, relationships, gratitude
|
|
65
|
+
IDENTITY = "identity" # Self-knowledge, capabilities, values
|
|
66
|
+
WISDOM = "wisdom" # Learned principles, insights
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GracePolicy(str, Enum):
|
|
70
|
+
"""Policies for applying grace in memory consolidation."""
|
|
71
|
+
|
|
72
|
+
FORGIVE_ERRORS = "forgive_errors" # Consolidate errors into learning
|
|
73
|
+
EXTEND_PATIENCE = "extend_patience" # Allow more time before judging
|
|
74
|
+
ASSUME_GOOD_INTENT = "assume_good_intent" # Interpret ambiguity positively
|
|
75
|
+
RECIPROCAL_GRACE = "reciprocal_grace" # Mirror the grace we receive
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class ConsolidationCandidate:
|
|
80
|
+
"""A set of memories that could be consolidated."""
|
|
81
|
+
|
|
82
|
+
memory_ids: List[str]
|
|
83
|
+
memory_type: MemoryType
|
|
84
|
+
time_span: timedelta
|
|
85
|
+
total_size: int
|
|
86
|
+
grace_applicable: bool
|
|
87
|
+
grace_reasons: List[str]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TelemetryAggregator:
|
|
91
|
+
"""
|
|
92
|
+
Enterprise telemetry aggregation for unified monitoring.
|
|
93
|
+
|
|
94
|
+
Collects metrics from all 22 required services in parallel and
|
|
95
|
+
provides aggregated views for different stakeholders.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
# Service mappings - v1.4.6 validated (37 real source types with ConsentService)
|
|
99
|
+
CATEGORIES = {
|
|
100
|
+
"buses": ["llm_bus", "memory_bus", "communication_bus", "wise_bus", "tool_bus", "runtime_control_bus"],
|
|
101
|
+
"graph": ["memory", "config", "telemetry", "audit", "incident_management", "tsdb_consolidation"],
|
|
102
|
+
"infrastructure": [
|
|
103
|
+
"time",
|
|
104
|
+
"shutdown",
|
|
105
|
+
"initialization",
|
|
106
|
+
"authentication",
|
|
107
|
+
"resource_monitor",
|
|
108
|
+
"database_maintenance", # Has get_metrics() now
|
|
109
|
+
"secrets", # SecretsService (not SecretsToolService)
|
|
110
|
+
],
|
|
111
|
+
"governance": ["wise_authority", "adaptive_filter", "visibility", "self_observation", "consent"],
|
|
112
|
+
"runtime": ["llm", "runtime_control", "task_scheduler"],
|
|
113
|
+
"tools": ["secrets_tool"], # Separated from runtime for clarity
|
|
114
|
+
"adapters": ["api", "discord", "cli"], # Each can spawn multiple instances
|
|
115
|
+
"components": [
|
|
116
|
+
"service_registry",
|
|
117
|
+
"agent_processor", # Has get_metrics() now
|
|
118
|
+
],
|
|
119
|
+
# New v1.4.3: Covenant/Ethics metrics (computed, not from services)
|
|
120
|
+
"covenant": [], # Will be computed from governance services
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def __init__(self, service_registry: Any, time_service: Any, runtime: Any = None):
|
|
124
|
+
"""Initialize the aggregator with service registry, time service, and optional runtime."""
|
|
125
|
+
self.service_registry = service_registry
|
|
126
|
+
self.time_service = time_service
|
|
127
|
+
self.runtime = runtime # Direct access to runtime for core services
|
|
128
|
+
self.cache: Dict[str, Tuple[datetime, AggregatedTelemetryResponse]] = {}
|
|
129
|
+
self.cache_ttl = timedelta(seconds=30)
|
|
130
|
+
|
|
131
|
+
def _create_collection_tasks(self) -> tuple[list[Any], list[tuple[str, str]]]:
|
|
132
|
+
"""Create collection tasks for all services.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Tuple of (tasks, service_info)
|
|
136
|
+
"""
|
|
137
|
+
tasks = []
|
|
138
|
+
service_info = []
|
|
139
|
+
|
|
140
|
+
# Create collection tasks for all services
|
|
141
|
+
for category, services in self.CATEGORIES.items():
|
|
142
|
+
for service_name in services:
|
|
143
|
+
task = asyncio.create_task(self.collect_service(service_name))
|
|
144
|
+
tasks.append(task)
|
|
145
|
+
service_info.append((category, service_name))
|
|
146
|
+
|
|
147
|
+
# Also collect from dynamic registry services
|
|
148
|
+
registry_tasks = self.collect_from_registry_services()
|
|
149
|
+
tasks.extend(registry_tasks["tasks"])
|
|
150
|
+
service_info.extend(registry_tasks["info"])
|
|
151
|
+
|
|
152
|
+
return tasks, service_info
|
|
153
|
+
|
|
154
|
+
def _process_task_result(
|
|
155
|
+
self, result: Any, service_name: str, category: str, telemetry: Dict[str, Dict[str, ServiceTelemetryData]]
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Process a single task result and add to telemetry dict.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
result: Task result from collect_service
|
|
161
|
+
service_name: Name of the service
|
|
162
|
+
category: Category of the service
|
|
163
|
+
telemetry: Telemetry dict to update
|
|
164
|
+
"""
|
|
165
|
+
# Handle adapter results that return dict of instances
|
|
166
|
+
if isinstance(result, dict) and service_name in ["api", "discord", "cli"]:
|
|
167
|
+
# Adapter returned dict of instances - add each with adapter_id
|
|
168
|
+
for adapter_id, adapter_data in result.items():
|
|
169
|
+
telemetry[category][adapter_id] = adapter_data
|
|
170
|
+
elif isinstance(result, ServiceTelemetryData):
|
|
171
|
+
# Normal service result
|
|
172
|
+
telemetry[category][service_name] = result
|
|
173
|
+
else:
|
|
174
|
+
# Unexpected type - convert to ServiceTelemetryData
|
|
175
|
+
logger.warning(f"Unexpected result type for {service_name}: {type(result)}")
|
|
176
|
+
telemetry[category][service_name] = ServiceTelemetryData(
|
|
177
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _process_completed_tasks(
|
|
181
|
+
self,
|
|
182
|
+
tasks: list[Any],
|
|
183
|
+
service_info: list[tuple[str, str]],
|
|
184
|
+
done: set[Any],
|
|
185
|
+
telemetry: Dict[str, Dict[str, ServiceTelemetryData]],
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Process completed tasks and populate telemetry dict.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
tasks: List of all tasks
|
|
191
|
+
service_info: List of (category, service_name) tuples
|
|
192
|
+
done: Set of completed tasks
|
|
193
|
+
telemetry: Telemetry dict to update
|
|
194
|
+
"""
|
|
195
|
+
for idx, task in enumerate(tasks):
|
|
196
|
+
if idx >= len(service_info):
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
category, service_name = service_info[idx]
|
|
200
|
+
|
|
201
|
+
if task in done:
|
|
202
|
+
try:
|
|
203
|
+
result = task.result()
|
|
204
|
+
self._process_task_result(result, service_name, category, telemetry)
|
|
205
|
+
except Exception as e:
|
|
206
|
+
logger.warning(f"Failed to collect from {service_name}: {e}")
|
|
207
|
+
# Return empty telemetry data instead of empty dict
|
|
208
|
+
telemetry[category][service_name] = ServiceTelemetryData(
|
|
209
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
210
|
+
) # NO FALLBACKS
|
|
211
|
+
else:
|
|
212
|
+
# Task timed out
|
|
213
|
+
telemetry[category][service_name] = self.get_fallback_metrics(service_name)
|
|
214
|
+
|
|
215
|
+
async def collect_all_parallel(self) -> Dict[str, Dict[str, ServiceTelemetryData]]:
|
|
216
|
+
"""
|
|
217
|
+
Collect telemetry from all services in parallel.
|
|
218
|
+
|
|
219
|
+
Returns hierarchical telemetry organized by category.
|
|
220
|
+
"""
|
|
221
|
+
# Create collection tasks
|
|
222
|
+
tasks, service_info = self._create_collection_tasks()
|
|
223
|
+
|
|
224
|
+
# Execute all collections in parallel with timeout
|
|
225
|
+
done, pending = await asyncio.wait(tasks, timeout=5.0, return_when=asyncio.ALL_COMPLETED)
|
|
226
|
+
|
|
227
|
+
# Cancel any timed-out tasks
|
|
228
|
+
for task in pending:
|
|
229
|
+
task.cancel()
|
|
230
|
+
|
|
231
|
+
# Organize results by category
|
|
232
|
+
telemetry: Dict[str, Dict[str, ServiceTelemetryData]] = {cat: {} for cat in self.CATEGORIES.keys()}
|
|
233
|
+
# Add registry category for dynamic services
|
|
234
|
+
telemetry["registry"] = {}
|
|
235
|
+
|
|
236
|
+
# Process completed tasks
|
|
237
|
+
self._process_completed_tasks(tasks, service_info, done, telemetry)
|
|
238
|
+
|
|
239
|
+
# Compute covenant metrics from governance services
|
|
240
|
+
covenant_metrics_data = self.compute_covenant_metrics(telemetry)
|
|
241
|
+
# Wrap covenant metrics in ServiceTelemetryData using custom_metrics
|
|
242
|
+
telemetry["covenant"]["covenant_metrics"] = ServiceTelemetryData(
|
|
243
|
+
healthy=True,
|
|
244
|
+
uptime_seconds=0.0,
|
|
245
|
+
error_count=0,
|
|
246
|
+
requests_handled=0,
|
|
247
|
+
error_rate=0.0,
|
|
248
|
+
memory_mb=0.0,
|
|
249
|
+
custom_metrics=covenant_metrics_data,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return telemetry
|
|
253
|
+
|
|
254
|
+
def _generate_semantic_service_name(
|
|
255
|
+
self, service_type: str, provider_name: str, provider_metadata: Optional[JSONDict] = None
|
|
256
|
+
) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Generate a semantic name for a dynamic service.
|
|
259
|
+
|
|
260
|
+
For adapters: {service_type}_{adapter_type}_{adapter_id_suffix}
|
|
261
|
+
For LLM providers: {service_type}_{provider_type}_{instance_id}
|
|
262
|
+
For others: {service_type}_{provider_name_cleaned}
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def _extract_adapter_suffix(adapter_id: str, separator: str = "_") -> str:
|
|
266
|
+
"""Extract suffix from adapter ID."""
|
|
267
|
+
if adapter_id and separator in adapter_id:
|
|
268
|
+
return adapter_id.split(separator)[-1][:8]
|
|
269
|
+
return str(id(provider_name))[-6:]
|
|
270
|
+
|
|
271
|
+
def _get_instance_id() -> str:
|
|
272
|
+
"""Get short instance ID."""
|
|
273
|
+
return str(id(provider_name))[-6:]
|
|
274
|
+
|
|
275
|
+
# Dispatch table for known provider patterns
|
|
276
|
+
if "APICommunication" in provider_name:
|
|
277
|
+
adapter_id = get_str(provider_metadata, "adapter_id", "") if provider_metadata else ""
|
|
278
|
+
suffix = _extract_adapter_suffix(adapter_id, "_")
|
|
279
|
+
return f"{service_type}_api_{suffix}"
|
|
280
|
+
|
|
281
|
+
if "CLIAdapter" in provider_name:
|
|
282
|
+
adapter_id = get_str(provider_metadata, "adapter_id", "") if provider_metadata else ""
|
|
283
|
+
suffix = _extract_adapter_suffix(adapter_id, "@") if "@" in adapter_id else _get_instance_id()
|
|
284
|
+
return f"{service_type}_cli_{suffix}"
|
|
285
|
+
|
|
286
|
+
if "DiscordAdapter" in provider_name or "Discord" in provider_name:
|
|
287
|
+
adapter_id = get_str(provider_metadata, "adapter_id", "") if provider_metadata else ""
|
|
288
|
+
suffix = _extract_adapter_suffix(adapter_id, "_")
|
|
289
|
+
return f"{service_type}_discord_{suffix}"
|
|
290
|
+
|
|
291
|
+
# Simple pattern matches (no complex logic)
|
|
292
|
+
simple_patterns = {
|
|
293
|
+
"APITool": "api_tool",
|
|
294
|
+
"APIRuntime": "api_runtime",
|
|
295
|
+
"SecretsToolService": "secrets",
|
|
296
|
+
"MockLLM": "mock",
|
|
297
|
+
"LocalGraphMemory": "local_graph",
|
|
298
|
+
"GraphConfig": "graph",
|
|
299
|
+
"TimeService": "time",
|
|
300
|
+
"WiseAuthority": "wise_authority",
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
for pattern, suffix in simple_patterns.items():
|
|
304
|
+
if pattern in provider_name:
|
|
305
|
+
return f"{service_type}_{suffix}"
|
|
306
|
+
|
|
307
|
+
# LLM providers
|
|
308
|
+
if "OpenAI" in provider_name or "Anthropic" in provider_name:
|
|
309
|
+
provider_type = "openai" if "OpenAI" in provider_name else "anthropic"
|
|
310
|
+
return f"{service_type}_{provider_type}_{_get_instance_id()}"
|
|
311
|
+
|
|
312
|
+
# Default fallback
|
|
313
|
+
provider_cleaned = provider_name.replace("Service", "").replace("Adapter", "")
|
|
314
|
+
return f"{service_type}_{provider_cleaned.lower()}_{_get_instance_id()}"
|
|
315
|
+
|
|
316
|
+
def collect_from_registry_services(self) -> Dict[str, List[Any]]:
|
|
317
|
+
"""
|
|
318
|
+
Collect telemetry from dynamic services registered in ServiceRegistry.
|
|
319
|
+
|
|
320
|
+
Returns dict with 'tasks' and 'info' lists for dynamic services.
|
|
321
|
+
"""
|
|
322
|
+
tasks = []
|
|
323
|
+
service_info = []
|
|
324
|
+
|
|
325
|
+
if not self.service_registry:
|
|
326
|
+
return {"tasks": [], "info": []}
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
# Get all services from registry
|
|
330
|
+
provider_info = self.service_registry.get_provider_info()
|
|
331
|
+
|
|
332
|
+
# Iterate through all service types and providers
|
|
333
|
+
for service_type, providers in provider_info.get("services", {}).items():
|
|
334
|
+
for provider in providers:
|
|
335
|
+
provider_name = provider.get("name", "")
|
|
336
|
+
provider_metadata = provider.get("metadata", {})
|
|
337
|
+
|
|
338
|
+
# Extract the class name without instance ID (e.g., "GraphConfigService_123456" -> "GraphConfigService")
|
|
339
|
+
provider_class_name = provider_name.split("_")[0] if "_" in provider_name else provider_name
|
|
340
|
+
|
|
341
|
+
logger.debug(
|
|
342
|
+
f"[TELEMETRY] Checking registry service: {service_type}.{provider_name} (class: {provider_class_name})"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Skip if this provider is already in CATEGORIES
|
|
346
|
+
# Check both the provider name and simplified versions
|
|
347
|
+
already_collected = False
|
|
348
|
+
|
|
349
|
+
# Map of known core service implementations to their category names
|
|
350
|
+
# Only skip services that are ALREADY collected via CATEGORIES
|
|
351
|
+
# Do NOT skip adapter-provided services like APIToolService, CLIAdapter, etc.
|
|
352
|
+
core_service_mappings = {
|
|
353
|
+
"LocalGraphMemoryService": "memory",
|
|
354
|
+
"GraphConfigService": "config",
|
|
355
|
+
"TimeService": "time",
|
|
356
|
+
"WiseAuthorityService": "wise_authority",
|
|
357
|
+
"ConfigService": "config", # Alternative name
|
|
358
|
+
"MemoryService": "memory", # Alternative name
|
|
359
|
+
"TSDBConsolidationService": "tsdb_consolidation",
|
|
360
|
+
"MockLLMService": "llm", # Mock LLM should be collected through llm, not registry
|
|
361
|
+
"SecretsToolService": "secrets_tool", # Core secrets tool service
|
|
362
|
+
# NOTE: Do NOT add adapter services here! They should be collected dynamically
|
|
363
|
+
# APIToolService, APICommunicationService, CLIAdapter, DiscordWiseAuthority etc
|
|
364
|
+
# are all valid dynamic services that should be collected
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
# Check if this is a core service implementation (use class name without instance ID)
|
|
368
|
+
if provider_class_name in core_service_mappings:
|
|
369
|
+
already_collected = True
|
|
370
|
+
logger.debug(
|
|
371
|
+
f"[TELEMETRY] Skipping core service {provider_name} (class: {provider_class_name}) - already collected as {core_service_mappings[provider_class_name]}"
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
# Also check if provider class name matches any service in CATEGORIES
|
|
375
|
+
for cat_services in self.CATEGORIES.values():
|
|
376
|
+
if provider_class_name.lower() in [s.lower() for s in cat_services]:
|
|
377
|
+
already_collected = True
|
|
378
|
+
logger.debug(f"[TELEMETRY] Skipping {provider_name} - already in CATEGORIES")
|
|
379
|
+
break
|
|
380
|
+
|
|
381
|
+
if not already_collected:
|
|
382
|
+
# Generate semantic name for the service
|
|
383
|
+
semantic_name = self._generate_semantic_service_name(
|
|
384
|
+
service_type, provider_name, provider_metadata
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Create a collection task for this dynamic service
|
|
388
|
+
task = asyncio.create_task(self.collect_from_registry_provider(service_type, provider_name))
|
|
389
|
+
tasks.append(task)
|
|
390
|
+
service_info.append(("registry", semantic_name))
|
|
391
|
+
logger.debug(
|
|
392
|
+
f"[TELEMETRY] Adding registry service: {semantic_name} (was: {service_type}.{provider_name})"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
logger.warning(f"Failed to collect registry services: {e}")
|
|
397
|
+
|
|
398
|
+
return {"tasks": tasks, "info": service_info}
|
|
399
|
+
|
|
400
|
+
async def collect_from_registry_provider(self, service_type: str, provider_name: str) -> ServiceTelemetryData:
|
|
401
|
+
"""
|
|
402
|
+
Collect telemetry from a specific registry provider.
|
|
403
|
+
|
|
404
|
+
Returns ServiceTelemetryData for the provider.
|
|
405
|
+
"""
|
|
406
|
+
try:
|
|
407
|
+
# Get the provider instance from registry using provider_info to match exact instance
|
|
408
|
+
provider_info = self.service_registry.get_provider_info()
|
|
409
|
+
target_provider = None
|
|
410
|
+
|
|
411
|
+
# Find the exact provider by matching the full provider_name (which includes instance ID)
|
|
412
|
+
# This ensures we get the correct instance when multiple instances of the same class exist
|
|
413
|
+
for service_providers in provider_info.get("services", {}).get(service_type, []):
|
|
414
|
+
if service_providers.get("name") == provider_name:
|
|
415
|
+
# Now get the actual provider instance from the services list
|
|
416
|
+
providers = self.service_registry.get_services_by_type(service_type)
|
|
417
|
+
for provider in providers:
|
|
418
|
+
# Match by checking if the provider's id matches the one in provider_name
|
|
419
|
+
provider_full_name = f"{provider.__class__.__name__}_{id(provider)}"
|
|
420
|
+
if provider_full_name == provider_name:
|
|
421
|
+
target_provider = provider
|
|
422
|
+
break
|
|
423
|
+
break
|
|
424
|
+
|
|
425
|
+
if not target_provider:
|
|
426
|
+
logger.debug(f"[TELEMETRY] Provider {provider_name} not found in {service_type}")
|
|
427
|
+
return ServiceTelemetryData(
|
|
428
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Try to get metrics from the provider
|
|
432
|
+
if hasattr(target_provider, "get_metrics"):
|
|
433
|
+
metrics = (
|
|
434
|
+
await target_provider.get_metrics()
|
|
435
|
+
if asyncio.iscoroutinefunction(target_provider.get_metrics)
|
|
436
|
+
else target_provider.get_metrics()
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
if isinstance(metrics, ServiceTelemetryData):
|
|
440
|
+
logger.debug(f"[TELEMETRY] Got metrics from {provider_name}: healthy={metrics.healthy}")
|
|
441
|
+
return metrics
|
|
442
|
+
elif isinstance(metrics, dict):
|
|
443
|
+
# Convert dict to ServiceTelemetryData
|
|
444
|
+
return ServiceTelemetryData(
|
|
445
|
+
healthy=metrics.get("healthy", True),
|
|
446
|
+
uptime_seconds=metrics.get("uptime_seconds", 0.0),
|
|
447
|
+
error_count=metrics.get("error_count", 0),
|
|
448
|
+
requests_handled=metrics.get("requests_handled", 0),
|
|
449
|
+
error_rate=metrics.get("error_rate", 0.0),
|
|
450
|
+
custom_metrics=metrics.get("custom_metrics"),
|
|
451
|
+
last_health_check=metrics.get("last_health_check"),
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# If no get_metrics, check for is_healthy
|
|
455
|
+
if hasattr(target_provider, "is_healthy"):
|
|
456
|
+
is_healthy = (
|
|
457
|
+
await target_provider.is_healthy()
|
|
458
|
+
if asyncio.iscoroutinefunction(target_provider.is_healthy)
|
|
459
|
+
else target_provider.is_healthy()
|
|
460
|
+
)
|
|
461
|
+
# NO FALLBACK DATA - return actual health status with zero metrics
|
|
462
|
+
return ServiceTelemetryData(
|
|
463
|
+
healthy=is_healthy,
|
|
464
|
+
uptime_seconds=0.0, # NO FAKE UPTIME
|
|
465
|
+
error_count=0,
|
|
466
|
+
requests_handled=0,
|
|
467
|
+
error_rate=0.0,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# NO DEFAULTS - if no health check, service is unhealthy
|
|
471
|
+
return ServiceTelemetryData(
|
|
472
|
+
healthy=False, # NO DEFAULT HEALTHY STATUS
|
|
473
|
+
uptime_seconds=0.0, # NO FAKE UPTIME
|
|
474
|
+
error_count=0,
|
|
475
|
+
requests_handled=0,
|
|
476
|
+
error_rate=0.0,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
except Exception as e:
|
|
480
|
+
logger.warning(f"Failed to collect from registry provider {provider_name}: {e}")
|
|
481
|
+
return ServiceTelemetryData(
|
|
482
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
async def collect_service(self, service_name: str) -> ServiceTelemetryData | dict[str, ServiceTelemetryData]:
|
|
486
|
+
"""Collect telemetry from a single service or multiple adapter instances."""
|
|
487
|
+
logger.debug(f"[TELEMETRY] Starting collection for service: {service_name}")
|
|
488
|
+
try:
|
|
489
|
+
# Special handling for buses
|
|
490
|
+
if service_name.endswith("_bus"):
|
|
491
|
+
logger.debug(f"[TELEMETRY] Collecting from bus: {service_name}")
|
|
492
|
+
return await self.collect_from_bus(service_name)
|
|
493
|
+
|
|
494
|
+
# Special handling for adapters - collect from ALL instances
|
|
495
|
+
if service_name in ["api", "discord", "cli"]:
|
|
496
|
+
logger.debug(f"[TELEMETRY] Collecting from adapter: {service_name}")
|
|
497
|
+
return await self.collect_from_adapter_instances(service_name)
|
|
498
|
+
|
|
499
|
+
# Special handling for components
|
|
500
|
+
if service_name in [
|
|
501
|
+
"service_registry",
|
|
502
|
+
"agent_processor",
|
|
503
|
+
]:
|
|
504
|
+
logger.debug(f"[TELEMETRY] Collecting from component: {service_name}")
|
|
505
|
+
return await self.collect_from_component(service_name)
|
|
506
|
+
|
|
507
|
+
# Get service from registry
|
|
508
|
+
service = self._get_service_from_registry(service_name)
|
|
509
|
+
logger.debug(f"[TELEMETRY] Got service {service_name}: {service.__class__.__name__ if service else 'None'}")
|
|
510
|
+
|
|
511
|
+
# Try different collection methods
|
|
512
|
+
metrics = await self._try_collect_metrics(service)
|
|
513
|
+
if metrics is not None:
|
|
514
|
+
logger.debug(
|
|
515
|
+
f"[TELEMETRY] Collected from {service_name}: healthy={metrics.healthy}, uptime={metrics.uptime_seconds}"
|
|
516
|
+
)
|
|
517
|
+
return metrics
|
|
518
|
+
# Return empty telemetry data instead of empty dict
|
|
519
|
+
logger.debug(f"[TELEMETRY] No metrics collected from {service_name}, returning unhealthy")
|
|
520
|
+
return ServiceTelemetryData(
|
|
521
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
522
|
+
) # NO FALLBACKS
|
|
523
|
+
|
|
524
|
+
except Exception as e:
|
|
525
|
+
logger.error(f"Failed to collect from {service_name}: {e}")
|
|
526
|
+
# Return empty telemetry data instead of empty dict
|
|
527
|
+
return ServiceTelemetryData(
|
|
528
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
529
|
+
) # NO FALLBACKS - service failed
|
|
530
|
+
|
|
531
|
+
def _get_service_from_runtime(self, service_name: str) -> Any:
|
|
532
|
+
"""Get service directly from runtime attributes."""
|
|
533
|
+
if not self.runtime:
|
|
534
|
+
logger.debug(f"[TELEMETRY] No runtime available for service {service_name}")
|
|
535
|
+
return None
|
|
536
|
+
|
|
537
|
+
# Map service names to runtime attributes
|
|
538
|
+
runtime_attrs = {
|
|
539
|
+
# Graph services
|
|
540
|
+
"memory": "memory_service",
|
|
541
|
+
"config": "config_service",
|
|
542
|
+
"telemetry": "telemetry_service",
|
|
543
|
+
"audit": "audit_service",
|
|
544
|
+
"incident_management": "incident_management_service",
|
|
545
|
+
"tsdb_consolidation": "tsdb_consolidation_service",
|
|
546
|
+
# Infrastructure services
|
|
547
|
+
"time": "time_service",
|
|
548
|
+
"shutdown": "shutdown_service",
|
|
549
|
+
"initialization": "initialization_service",
|
|
550
|
+
"authentication": "authentication_service",
|
|
551
|
+
"resource_monitor": "resource_monitor",
|
|
552
|
+
"database_maintenance": "maintenance_service",
|
|
553
|
+
"secrets": "secrets_service",
|
|
554
|
+
# Governance services
|
|
555
|
+
"wise_authority": "wa_auth_system",
|
|
556
|
+
"adaptive_filter": "adaptive_filter_service",
|
|
557
|
+
"visibility": "visibility_service",
|
|
558
|
+
"self_observation": "self_observation_service",
|
|
559
|
+
"consent": "consent_service",
|
|
560
|
+
# Runtime services
|
|
561
|
+
"llm": "llm_service",
|
|
562
|
+
"runtime_control": "runtime_control_service",
|
|
563
|
+
"task_scheduler": "task_scheduler",
|
|
564
|
+
# Tool services
|
|
565
|
+
"secrets_tool": "secrets_tool_service",
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
attr_name = runtime_attrs.get(service_name)
|
|
569
|
+
if attr_name:
|
|
570
|
+
service = getattr(self.runtime, attr_name, None)
|
|
571
|
+
if service:
|
|
572
|
+
logger.debug(
|
|
573
|
+
f"Found {service_name} as runtime.{attr_name}: {service.__class__.__name__ if service else 'None'}"
|
|
574
|
+
)
|
|
575
|
+
# Extra debug - check if service has required telemetry methods
|
|
576
|
+
has_get_metrics = hasattr(service, "get_metrics")
|
|
577
|
+
has_collect_metrics = hasattr(service, "_collect_metrics")
|
|
578
|
+
is_started = getattr(service, "_started", False)
|
|
579
|
+
logger.debug(
|
|
580
|
+
f" Service {service_name} telemetry: get_metrics={has_get_metrics}, _collect_metrics={has_collect_metrics}, _started={is_started}"
|
|
581
|
+
)
|
|
582
|
+
return service
|
|
583
|
+
else:
|
|
584
|
+
logger.debug(f"[TELEMETRY] runtime.{attr_name} is None for {service_name}")
|
|
585
|
+
else:
|
|
586
|
+
logger.debug(f"[TELEMETRY] No runtime attr mapping for {service_name}")
|
|
587
|
+
return None
|
|
588
|
+
|
|
589
|
+
def _get_service_from_registry(self, service_name: str) -> Any:
|
|
590
|
+
"""Get service from runtime first, then registry by name."""
|
|
591
|
+
# First try to get service directly from runtime
|
|
592
|
+
if self.runtime:
|
|
593
|
+
runtime_service = self._get_service_from_runtime(service_name)
|
|
594
|
+
if runtime_service:
|
|
595
|
+
logger.debug(
|
|
596
|
+
f"Found {service_name} directly from runtime: {runtime_service.__class__.__name__ if runtime_service else 'None'}"
|
|
597
|
+
)
|
|
598
|
+
return runtime_service
|
|
599
|
+
|
|
600
|
+
# Fall back to registry lookup
|
|
601
|
+
all_services = self.service_registry.get_all_services() if self.service_registry else []
|
|
602
|
+
logger.debug(f"[TELEMETRY] Looking for {service_name} in {len(all_services)} registered services")
|
|
603
|
+
|
|
604
|
+
# Map expected names to actual registered class names
|
|
605
|
+
name_map = {
|
|
606
|
+
# Graph services
|
|
607
|
+
"memory": ["memoryservice", "localgraphmemoryservice"],
|
|
608
|
+
"config": ["configservice", "graphconfigservice"],
|
|
609
|
+
"telemetry": ["telemetryservice", "graphtelemetryservice"],
|
|
610
|
+
"audit": ["auditservice"],
|
|
611
|
+
"incident_management": ["incidentmanagementservice"],
|
|
612
|
+
"tsdb_consolidation": ["tsdbconsolidationservice"],
|
|
613
|
+
# Infrastructure services
|
|
614
|
+
"time": ["timeservice"],
|
|
615
|
+
"shutdown": ["shutdownservice"],
|
|
616
|
+
"initialization": ["initializationservice"],
|
|
617
|
+
"authentication": ["authenticationservice"],
|
|
618
|
+
"resource_monitor": ["resourcemonitorservice"],
|
|
619
|
+
"database_maintenance": ["databasemaintenanceservice"],
|
|
620
|
+
"secrets": ["secretsservice"],
|
|
621
|
+
# Governance services
|
|
622
|
+
"wise_authority": ["wiseauthorityservice"],
|
|
623
|
+
"adaptive_filter": ["adaptivefilterservice"],
|
|
624
|
+
"visibility": ["visibilityservice"],
|
|
625
|
+
"self_observation": ["selfobservationservice"],
|
|
626
|
+
"consent": ["consentservice"],
|
|
627
|
+
# Runtime services
|
|
628
|
+
"llm": ["llmservice", "mockllmservice"],
|
|
629
|
+
"runtime_control": ["runtimecontrolservice", "apiruntimecontrolservice"],
|
|
630
|
+
"task_scheduler": ["taskschedulerservice"],
|
|
631
|
+
# Tool services
|
|
632
|
+
"secrets_tool": ["secretstoolservice"],
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
if service_name not in name_map:
|
|
636
|
+
logger.debug(f"Service {service_name} not in name_map")
|
|
637
|
+
return None
|
|
638
|
+
|
|
639
|
+
# Check each service in registry
|
|
640
|
+
for service in all_services:
|
|
641
|
+
if hasattr(service, "__class__"):
|
|
642
|
+
class_name = service.__class__.__name__.lower()
|
|
643
|
+
# Check if class name matches any expected variant
|
|
644
|
+
for variant in name_map[service_name]:
|
|
645
|
+
if class_name == variant:
|
|
646
|
+
logger.debug(f"Found service {service_name} as {service.__class__.__name__}")
|
|
647
|
+
return service
|
|
648
|
+
|
|
649
|
+
logger.debug(f"Service {service_name} not found in {len(all_services)} services")
|
|
650
|
+
return None
|
|
651
|
+
|
|
652
|
+
def _convert_dict_to_telemetry(self, metrics: JSONDict, service_name: str) -> ServiceTelemetryData:
|
|
653
|
+
"""Convert dict metrics to ServiceTelemetryData with proper uptime detection."""
|
|
654
|
+
# Look for various uptime keys
|
|
655
|
+
uptime = (
|
|
656
|
+
get_float(metrics, "uptime_seconds", 0.0)
|
|
657
|
+
or get_float(metrics, "incident_uptime_seconds", 0.0)
|
|
658
|
+
or get_float(metrics, "tsdb_uptime_seconds", 0.0)
|
|
659
|
+
or get_float(metrics, "auth_uptime_seconds", 0.0)
|
|
660
|
+
or get_float(metrics, "scheduler_uptime_seconds", 0.0)
|
|
661
|
+
or 0.0
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
# If service has uptime > 0, consider it healthy unless explicitly marked unhealthy
|
|
665
|
+
healthy = get_bool(metrics, "healthy", uptime > 0.0)
|
|
666
|
+
|
|
667
|
+
logger.debug(
|
|
668
|
+
f"Converting dict metrics to ServiceTelemetryData for {service_name}: healthy={healthy}, uptime={uptime}"
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
return ServiceTelemetryData(
|
|
672
|
+
healthy=healthy,
|
|
673
|
+
uptime_seconds=uptime,
|
|
674
|
+
error_count=metrics.get("error_count", 0),
|
|
675
|
+
requests_handled=metrics.get("request_count") or metrics.get("requests_handled"),
|
|
676
|
+
error_rate=metrics.get("error_rate", 0.0),
|
|
677
|
+
memory_mb=metrics.get("memory_mb"),
|
|
678
|
+
custom_metrics=metrics, # Pass the whole dict as custom_metrics
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
async def _try_get_metrics_method(self, service: Any) -> Optional[ServiceTelemetryData]:
|
|
682
|
+
"""Try to collect metrics via get_metrics() method."""
|
|
683
|
+
if not hasattr(service, "get_metrics"):
|
|
684
|
+
logger.debug(f"Service {type(service).__name__} does not have get_metrics method")
|
|
685
|
+
return None
|
|
686
|
+
|
|
687
|
+
logger.debug(f"[TELEMETRY] Service {type(service).__name__} has get_metrics method")
|
|
688
|
+
try:
|
|
689
|
+
# Check if get_metrics is async or sync
|
|
690
|
+
if asyncio.iscoroutinefunction(service.get_metrics):
|
|
691
|
+
metrics = await service.get_metrics()
|
|
692
|
+
else:
|
|
693
|
+
metrics = service.get_metrics()
|
|
694
|
+
|
|
695
|
+
logger.debug(f"[TELEMETRY] Got metrics from {type(service).__name__}: {metrics}")
|
|
696
|
+
|
|
697
|
+
if isinstance(metrics, ServiceTelemetryData):
|
|
698
|
+
return metrics
|
|
699
|
+
elif isinstance(metrics, dict):
|
|
700
|
+
return self._convert_dict_to_telemetry(metrics, type(service).__name__)
|
|
701
|
+
|
|
702
|
+
return None
|
|
703
|
+
except Exception as e:
|
|
704
|
+
logger.error(f"Error calling get_metrics on {type(service).__name__}: {e}")
|
|
705
|
+
return None
|
|
706
|
+
|
|
707
|
+
def _try_collect_metrics_method(self, service: Any) -> Optional[ServiceTelemetryData]:
|
|
708
|
+
"""Try to collect metrics via _collect_metrics() method."""
|
|
709
|
+
if not hasattr(service, "_collect_metrics"):
|
|
710
|
+
return None
|
|
711
|
+
|
|
712
|
+
try:
|
|
713
|
+
metrics = service._collect_metrics()
|
|
714
|
+
if isinstance(metrics, ServiceTelemetryData):
|
|
715
|
+
return metrics
|
|
716
|
+
elif isinstance(metrics, dict):
|
|
717
|
+
return ServiceTelemetryData(
|
|
718
|
+
healthy=metrics.get("healthy", False),
|
|
719
|
+
uptime_seconds=metrics.get("uptime_seconds"),
|
|
720
|
+
error_count=metrics.get("error_count"),
|
|
721
|
+
requests_handled=metrics.get("request_count") or metrics.get("requests_handled"),
|
|
722
|
+
error_rate=metrics.get("error_rate"),
|
|
723
|
+
memory_mb=metrics.get("memory_mb"),
|
|
724
|
+
custom_metrics=metrics.get("custom_metrics"),
|
|
725
|
+
)
|
|
726
|
+
except Exception as e:
|
|
727
|
+
logger.error(f"Error calling _collect_metrics on {type(service).__name__}: {e}")
|
|
728
|
+
|
|
729
|
+
return None
|
|
730
|
+
|
|
731
|
+
async def _try_get_status_method(self, service: Any) -> Optional[ServiceTelemetryData]:
|
|
732
|
+
"""Try to collect metrics via get_status() method."""
|
|
733
|
+
if not hasattr(service, "get_status"):
|
|
734
|
+
return None
|
|
735
|
+
|
|
736
|
+
try:
|
|
737
|
+
status = service.get_status()
|
|
738
|
+
if asyncio.iscoroutine(status):
|
|
739
|
+
status = await status
|
|
740
|
+
# status_to_telemetry returns dict, not ServiceTelemetryData
|
|
741
|
+
# Return None as we can't convert properly here
|
|
742
|
+
return None
|
|
743
|
+
except Exception as e:
|
|
744
|
+
logger.error(f"Error calling get_status on {type(service).__name__}: {e}")
|
|
745
|
+
return None
|
|
746
|
+
|
|
747
|
+
async def _try_collect_metrics(self, service: Any) -> Optional[ServiceTelemetryData]:
|
|
748
|
+
"""Try different methods to collect metrics from service."""
|
|
749
|
+
if not service:
|
|
750
|
+
logger.debug("[TELEMETRY] Service is None, cannot collect metrics")
|
|
751
|
+
return None
|
|
752
|
+
|
|
753
|
+
# Try get_metrics first (most common)
|
|
754
|
+
result = await self._try_get_metrics_method(service)
|
|
755
|
+
if result:
|
|
756
|
+
return result
|
|
757
|
+
|
|
758
|
+
# Try _collect_metrics (fallback)
|
|
759
|
+
result = self._try_collect_metrics_method(service)
|
|
760
|
+
if result:
|
|
761
|
+
return result
|
|
762
|
+
|
|
763
|
+
# Try get_status (last resort)
|
|
764
|
+
return await self._try_get_status_method(service)
|
|
765
|
+
|
|
766
|
+
async def collect_from_bus(self, bus_name: str) -> ServiceTelemetryData:
|
|
767
|
+
"""Collect telemetry from a message bus."""
|
|
768
|
+
try:
|
|
769
|
+
# Get the bus from runtime first, then agent/registry
|
|
770
|
+
bus = None
|
|
771
|
+
|
|
772
|
+
# Try runtime.bus_manager first
|
|
773
|
+
if self.runtime:
|
|
774
|
+
bus_manager = getattr(self.runtime, "bus_manager", None)
|
|
775
|
+
if bus_manager:
|
|
776
|
+
# Map bus names to bus_manager attributes
|
|
777
|
+
bus_attr_map = {
|
|
778
|
+
"llm_bus": "llm",
|
|
779
|
+
"memory_bus": "memory",
|
|
780
|
+
"communication_bus": "communication",
|
|
781
|
+
"wise_bus": "wise",
|
|
782
|
+
"tool_bus": "tool",
|
|
783
|
+
"runtime_control_bus": "runtime_control",
|
|
784
|
+
}
|
|
785
|
+
attr_name = bus_attr_map.get(bus_name)
|
|
786
|
+
if attr_name:
|
|
787
|
+
bus = getattr(bus_manager, attr_name, None)
|
|
788
|
+
if bus:
|
|
789
|
+
logger.debug(f"Found {bus_name} from runtime.bus_manager.{attr_name}")
|
|
790
|
+
|
|
791
|
+
# Fall back to registry
|
|
792
|
+
if not bus and hasattr(self.service_registry, "_agent"):
|
|
793
|
+
agent = self.service_registry._agent
|
|
794
|
+
bus = getattr(agent, bus_name, None)
|
|
795
|
+
|
|
796
|
+
if bus:
|
|
797
|
+
# Try get_metrics first (all buses have this)
|
|
798
|
+
if hasattr(bus, "get_metrics"):
|
|
799
|
+
try:
|
|
800
|
+
metrics_result = bus.get_metrics()
|
|
801
|
+
# Convert BusMetrics (Pydantic model) to dict
|
|
802
|
+
# Buses now return typed BusMetrics instead of Dict[str, float]
|
|
803
|
+
if hasattr(metrics_result, "model_dump"):
|
|
804
|
+
metrics = metrics_result.model_dump()
|
|
805
|
+
# Merge additional_metrics into top-level for backward compatibility
|
|
806
|
+
if "additional_metrics" in metrics:
|
|
807
|
+
additional = metrics.pop("additional_metrics")
|
|
808
|
+
metrics.update(additional)
|
|
809
|
+
else:
|
|
810
|
+
# Fallback for any remaining dict returns
|
|
811
|
+
metrics = metrics_result
|
|
812
|
+
|
|
813
|
+
# Buses with providers should report healthy
|
|
814
|
+
is_healthy = True
|
|
815
|
+
if hasattr(bus, "get_providers"):
|
|
816
|
+
providers = bus.get_providers()
|
|
817
|
+
is_healthy = len(providers) > 0
|
|
818
|
+
elif hasattr(bus, "providers"):
|
|
819
|
+
is_healthy = len(bus.providers) > 0
|
|
820
|
+
|
|
821
|
+
# Map bus names to their specific uptime metric names
|
|
822
|
+
uptime_metric_map = {
|
|
823
|
+
"llm_bus": "llm_uptime_seconds",
|
|
824
|
+
"memory_bus": "memory_uptime_seconds",
|
|
825
|
+
"communication_bus": "communication_uptime_seconds",
|
|
826
|
+
"wise_bus": "wise_uptime_seconds",
|
|
827
|
+
"tool_bus": "tool_uptime_seconds",
|
|
828
|
+
"runtime_control_bus": "runtime_control_uptime_seconds",
|
|
829
|
+
}
|
|
830
|
+
uptime_metric = uptime_metric_map.get(bus_name, "uptime_seconds")
|
|
831
|
+
|
|
832
|
+
# Filter custom_metrics to only include valid types (int, float, str) and exclude None
|
|
833
|
+
filtered_metrics = {
|
|
834
|
+
k: v for k, v in metrics.items() if v is not None and isinstance(v, (int, float, str))
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
return ServiceTelemetryData(
|
|
838
|
+
healthy=is_healthy,
|
|
839
|
+
uptime_seconds=metrics.get(uptime_metric, metrics.get("uptime_seconds", 0.0)),
|
|
840
|
+
error_count=metrics.get("error_count", 0) or metrics.get("errors_last_hour", 0),
|
|
841
|
+
requests_handled=metrics.get("request_count")
|
|
842
|
+
or metrics.get("requests_handled", 0)
|
|
843
|
+
or metrics.get("messages_sent", 0),
|
|
844
|
+
error_rate=metrics.get("error_rate", 0.0),
|
|
845
|
+
memory_mb=metrics.get("memory_mb"),
|
|
846
|
+
custom_metrics=filtered_metrics,
|
|
847
|
+
)
|
|
848
|
+
except Exception as e:
|
|
849
|
+
logger.error(f"Error getting metrics from {bus_name}: {e}")
|
|
850
|
+
return self.get_fallback_metrics(bus_name)
|
|
851
|
+
elif hasattr(bus, "collect_telemetry"):
|
|
852
|
+
result = await bus.collect_telemetry()
|
|
853
|
+
# Bus collect_telemetry returns Any, assume it's ServiceTelemetryData
|
|
854
|
+
return result # type: ignore[no-any-return]
|
|
855
|
+
else:
|
|
856
|
+
return self.get_fallback_metrics(bus_name)
|
|
857
|
+
else:
|
|
858
|
+
return self.get_fallback_metrics(bus_name)
|
|
859
|
+
|
|
860
|
+
except Exception as e:
|
|
861
|
+
logger.error(f"Failed to collect from {bus_name}: {e}")
|
|
862
|
+
return self.get_fallback_metrics(bus_name)
|
|
863
|
+
|
|
864
|
+
async def collect_from_component(self, component_name: str) -> ServiceTelemetryData:
|
|
865
|
+
"""Collect telemetry from runtime components."""
|
|
866
|
+
logger.debug(f"[TELEMETRY] Collecting from component: {component_name}")
|
|
867
|
+
try:
|
|
868
|
+
component = None
|
|
869
|
+
|
|
870
|
+
# Map component names to runtime locations
|
|
871
|
+
if self.runtime:
|
|
872
|
+
if component_name == "service_registry":
|
|
873
|
+
component = getattr(self.runtime, "service_registry", None)
|
|
874
|
+
logger.debug(
|
|
875
|
+
f"[TELEMETRY] Got service_registry: {component.__class__.__name__ if component else 'None'}"
|
|
876
|
+
)
|
|
877
|
+
elif component_name == "agent_processor":
|
|
878
|
+
component = getattr(self.runtime, "agent_processor", None)
|
|
879
|
+
logger.debug(
|
|
880
|
+
f"[TELEMETRY] Got agent_processor: {component.__class__.__name__ if component else 'None'}"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
# Try to get metrics from component
|
|
884
|
+
if component:
|
|
885
|
+
logger.debug(f"[TELEMETRY] Trying to collect metrics from {component.__class__.__name__}")
|
|
886
|
+
metrics = await self._try_collect_metrics(component)
|
|
887
|
+
if metrics:
|
|
888
|
+
logger.debug(f"[TELEMETRY] Got metrics from {component_name}: healthy={metrics.healthy}")
|
|
889
|
+
return metrics
|
|
890
|
+
else:
|
|
891
|
+
logger.debug(f"[TELEMETRY] No metrics from {component_name}")
|
|
892
|
+
else:
|
|
893
|
+
logger.debug(f"[TELEMETRY] Component {component_name} not found on runtime")
|
|
894
|
+
|
|
895
|
+
# Return empty telemetry data
|
|
896
|
+
return ServiceTelemetryData(
|
|
897
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
except Exception as e:
|
|
901
|
+
logger.error(f"Failed to collect from component {component_name}: {e}")
|
|
902
|
+
return ServiceTelemetryData(
|
|
903
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
async def _get_control_service(self) -> Any:
|
|
907
|
+
"""Get the runtime control service."""
|
|
908
|
+
if self.runtime and hasattr(self.runtime, "runtime_control_service"):
|
|
909
|
+
return self.runtime.runtime_control_service
|
|
910
|
+
elif self.service_registry:
|
|
911
|
+
from ciris_engine.schemas.runtime.enums import ServiceType
|
|
912
|
+
|
|
913
|
+
return await self.service_registry.get_service(ServiceType.RUNTIME_CONTROL)
|
|
914
|
+
return None
|
|
915
|
+
|
|
916
|
+
def _is_adapter_running(self, adapter_info: Any) -> bool:
|
|
917
|
+
"""Check if an adapter is running."""
|
|
918
|
+
if hasattr(adapter_info, "is_running"):
|
|
919
|
+
return bool(adapter_info.is_running)
|
|
920
|
+
elif hasattr(adapter_info, "status"):
|
|
921
|
+
from ciris_engine.schemas.services.core.runtime import AdapterStatus
|
|
922
|
+
|
|
923
|
+
return adapter_info.status in [AdapterStatus.ACTIVE, AdapterStatus.RUNNING]
|
|
924
|
+
return False
|
|
925
|
+
|
|
926
|
+
def _find_adapter_instance(self, adapter_type: str) -> Any:
|
|
927
|
+
"""Find adapter instance in runtime."""
|
|
928
|
+
if hasattr(self.runtime, "adapters"):
|
|
929
|
+
for adapter in self.runtime.adapters:
|
|
930
|
+
if adapter_type in adapter.__class__.__name__.lower():
|
|
931
|
+
return adapter
|
|
932
|
+
return None
|
|
933
|
+
|
|
934
|
+
async def _get_adapter_metrics(self, adapter_instance: Any) -> Optional[JSONDict]:
|
|
935
|
+
"""Get metrics from adapter instance."""
|
|
936
|
+
if hasattr(adapter_instance, "get_metrics"):
|
|
937
|
+
if asyncio.iscoroutinefunction(adapter_instance.get_metrics):
|
|
938
|
+
result = await adapter_instance.get_metrics()
|
|
939
|
+
return result # type: ignore[no-any-return]
|
|
940
|
+
result = adapter_instance.get_metrics()
|
|
941
|
+
return result # type: ignore[no-any-return]
|
|
942
|
+
return None
|
|
943
|
+
|
|
944
|
+
def _create_telemetry_data(
|
|
945
|
+
self,
|
|
946
|
+
metrics: JSONDict,
|
|
947
|
+
adapter_info: Optional[Any] = None,
|
|
948
|
+
adapter_id: Optional[str] = None,
|
|
949
|
+
healthy: bool = True,
|
|
950
|
+
) -> ServiceTelemetryData:
|
|
951
|
+
"""Create ServiceTelemetryData from metrics."""
|
|
952
|
+
if not metrics:
|
|
953
|
+
return ServiceTelemetryData(
|
|
954
|
+
healthy=False,
|
|
955
|
+
uptime_seconds=0.0,
|
|
956
|
+
error_count=0,
|
|
957
|
+
requests_handled=0,
|
|
958
|
+
error_rate=0.0,
|
|
959
|
+
custom_metrics={"adapter_id": adapter_id} if adapter_id else {},
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
custom_metrics: JSONDict = {"adapter_id": adapter_id} if adapter_id else {}
|
|
963
|
+
if adapter_info:
|
|
964
|
+
adapter_type_value: Any = adapter_info.adapter_type if hasattr(adapter_info, "adapter_type") else None
|
|
965
|
+
if adapter_type_value is not None: # Only add if not None
|
|
966
|
+
custom_metrics["adapter_type"] = adapter_type_value
|
|
967
|
+
if hasattr(adapter_info, "started_at") and adapter_info.started_at:
|
|
968
|
+
custom_metrics["start_time"] = adapter_info.started_at.isoformat()
|
|
969
|
+
|
|
970
|
+
# Update with custom_metrics from metrics, filtering out None values
|
|
971
|
+
raw_custom_metrics = get_dict(metrics, "custom_metrics", {})
|
|
972
|
+
if isinstance(raw_custom_metrics, dict):
|
|
973
|
+
custom_metrics.update(
|
|
974
|
+
{k: v for k, v in raw_custom_metrics.items() if v is not None and isinstance(v, (int, float, str))}
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
# Final filter to ensure all values are valid types (int, float, str) and not None
|
|
978
|
+
filtered_custom_metrics = {
|
|
979
|
+
k: v for k, v in custom_metrics.items() if v is not None and isinstance(v, (int, float, str))
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
return ServiceTelemetryData(
|
|
983
|
+
healthy=healthy,
|
|
984
|
+
uptime_seconds=metrics.get("uptime_seconds", 0.0),
|
|
985
|
+
error_count=metrics.get("error_count", 0),
|
|
986
|
+
requests_handled=metrics.get("request_count") or metrics.get("requests_handled", 0),
|
|
987
|
+
error_rate=metrics.get("error_rate", 0.0),
|
|
988
|
+
memory_mb=metrics.get("memory_mb"),
|
|
989
|
+
custom_metrics=filtered_custom_metrics,
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
def _create_empty_telemetry(self, adapter_id: str, error_msg: Optional[str] = None) -> ServiceTelemetryData:
|
|
993
|
+
"""Create empty telemetry data for failed/unavailable adapter."""
|
|
994
|
+
custom_metrics = {"adapter_id": adapter_id}
|
|
995
|
+
if error_msg:
|
|
996
|
+
custom_metrics["error"] = error_msg
|
|
997
|
+
return ServiceTelemetryData(
|
|
998
|
+
healthy=False,
|
|
999
|
+
uptime_seconds=0.0,
|
|
1000
|
+
error_count=1,
|
|
1001
|
+
requests_handled=0,
|
|
1002
|
+
error_rate=1.0,
|
|
1003
|
+
custom_metrics=custom_metrics,
|
|
1004
|
+
)
|
|
1005
|
+
return ServiceTelemetryData(
|
|
1006
|
+
healthy=False,
|
|
1007
|
+
uptime_seconds=0.0,
|
|
1008
|
+
error_count=0,
|
|
1009
|
+
requests_handled=0,
|
|
1010
|
+
error_rate=0.0,
|
|
1011
|
+
custom_metrics=custom_metrics,
|
|
1012
|
+
)
|
|
1013
|
+
|
|
1014
|
+
def _create_running_telemetry(self, adapter_info: Any) -> ServiceTelemetryData:
|
|
1015
|
+
"""Create telemetry for running adapter without metrics."""
|
|
1016
|
+
uptime = 0.0
|
|
1017
|
+
if hasattr(adapter_info, "started_at") and adapter_info.started_at:
|
|
1018
|
+
uptime = (datetime.now(timezone.utc) - adapter_info.started_at).total_seconds()
|
|
1019
|
+
|
|
1020
|
+
return ServiceTelemetryData(
|
|
1021
|
+
healthy=True,
|
|
1022
|
+
uptime_seconds=uptime,
|
|
1023
|
+
error_count=0,
|
|
1024
|
+
requests_handled=0,
|
|
1025
|
+
error_rate=0.0,
|
|
1026
|
+
custom_metrics={
|
|
1027
|
+
"adapter_id": adapter_info.adapter_id,
|
|
1028
|
+
"adapter_type": adapter_info.adapter_type,
|
|
1029
|
+
},
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
async def _collect_from_adapter_with_metrics(
|
|
1033
|
+
self, adapter_instance: Any, adapter_info: Any, adapter_id: str
|
|
1034
|
+
) -> ServiceTelemetryData:
|
|
1035
|
+
"""Collect metrics from a single adapter instance."""
|
|
1036
|
+
try:
|
|
1037
|
+
metrics = await self._get_adapter_metrics(adapter_instance)
|
|
1038
|
+
if metrics:
|
|
1039
|
+
return self._create_telemetry_data(metrics, adapter_info, adapter_id, healthy=True)
|
|
1040
|
+
else:
|
|
1041
|
+
return self._create_empty_telemetry(adapter_id)
|
|
1042
|
+
except Exception as e:
|
|
1043
|
+
logger.error(f"Error getting metrics from {adapter_id}: {e}")
|
|
1044
|
+
return self._create_empty_telemetry(adapter_id, str(e))
|
|
1045
|
+
|
|
1046
|
+
async def _collect_from_control_service(self, adapter_type: str) -> Optional[Dict[str, ServiceTelemetryData]]:
|
|
1047
|
+
"""Try to collect adapter metrics via control service."""
|
|
1048
|
+
if not self.runtime:
|
|
1049
|
+
return None
|
|
1050
|
+
|
|
1051
|
+
try:
|
|
1052
|
+
control_service = await self._get_control_service()
|
|
1053
|
+
if not control_service or not hasattr(control_service, "list_adapters"):
|
|
1054
|
+
return None
|
|
1055
|
+
|
|
1056
|
+
all_adapters = await control_service.list_adapters()
|
|
1057
|
+
adapter_metrics = {}
|
|
1058
|
+
|
|
1059
|
+
for adapter_info in all_adapters:
|
|
1060
|
+
if adapter_info.adapter_type != adapter_type or not self._is_adapter_running(adapter_info):
|
|
1061
|
+
continue
|
|
1062
|
+
|
|
1063
|
+
adapter_instance = self._find_adapter_instance(adapter_type)
|
|
1064
|
+
if adapter_instance:
|
|
1065
|
+
adapter_metrics[adapter_info.adapter_id] = await self._collect_from_adapter_with_metrics(
|
|
1066
|
+
adapter_instance, adapter_info, adapter_info.adapter_id
|
|
1067
|
+
)
|
|
1068
|
+
else:
|
|
1069
|
+
adapter_metrics[adapter_info.adapter_id] = self._create_running_telemetry(adapter_info)
|
|
1070
|
+
|
|
1071
|
+
return adapter_metrics
|
|
1072
|
+
|
|
1073
|
+
except Exception as e:
|
|
1074
|
+
logger.error(f"Failed to get adapter list from control service: {e}")
|
|
1075
|
+
return None
|
|
1076
|
+
|
|
1077
|
+
async def _collect_from_bootstrap_adapters(self, adapter_type: str) -> Dict[str, ServiceTelemetryData]:
|
|
1078
|
+
"""Fallback: collect from bootstrap adapters directly."""
|
|
1079
|
+
adapter_metrics: Dict[str, ServiceTelemetryData] = {}
|
|
1080
|
+
|
|
1081
|
+
if not self.runtime or not hasattr(self.runtime, "adapters"):
|
|
1082
|
+
return adapter_metrics
|
|
1083
|
+
|
|
1084
|
+
for adapter in self.runtime.adapters:
|
|
1085
|
+
if adapter_type not in adapter.__class__.__name__.lower():
|
|
1086
|
+
continue
|
|
1087
|
+
|
|
1088
|
+
adapter_id = f"{adapter_type}_bootstrap"
|
|
1089
|
+
adapter_metrics[adapter_id] = await self._collect_from_adapter_with_metrics(adapter, None, adapter_id)
|
|
1090
|
+
|
|
1091
|
+
return adapter_metrics
|
|
1092
|
+
|
|
1093
|
+
async def collect_from_adapter_instances(self, adapter_type: str) -> Dict[str, ServiceTelemetryData]:
|
|
1094
|
+
"""
|
|
1095
|
+
Collect telemetry from ALL active adapter instances of a given type.
|
|
1096
|
+
|
|
1097
|
+
Returns a dict mapping adapter_id to telemetry data.
|
|
1098
|
+
Multiple instances of the same adapter type can be running simultaneously.
|
|
1099
|
+
"""
|
|
1100
|
+
# Try control service first
|
|
1101
|
+
adapter_metrics = await self._collect_from_control_service(adapter_type)
|
|
1102
|
+
if adapter_metrics is not None:
|
|
1103
|
+
return adapter_metrics
|
|
1104
|
+
|
|
1105
|
+
# Fallback to bootstrap adapters
|
|
1106
|
+
return await self._collect_from_bootstrap_adapters(adapter_type)
|
|
1107
|
+
|
|
1108
|
+
def get_fallback_metrics(self, _service_name: Optional[str] = None, _healthy: bool = False) -> ServiceTelemetryData:
|
|
1109
|
+
"""NO FALLBACKS. Real metrics or nothing.
|
|
1110
|
+
|
|
1111
|
+
Parameters are accepted for compatibility but ignored - no fake metrics.
|
|
1112
|
+
"""
|
|
1113
|
+
# NO FAKE METRICS. Services must implement get_metrics() or they get nothing.
|
|
1114
|
+
# Return empty telemetry data instead of empty dict
|
|
1115
|
+
return ServiceTelemetryData(
|
|
1116
|
+
healthy=False, uptime_seconds=0.0, error_count=0, requests_handled=0, error_rate=0.0
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
def status_to_telemetry(self, status: Any) -> JSONDict:
|
|
1120
|
+
"""Convert ServiceStatus to telemetry dict."""
|
|
1121
|
+
if hasattr(status, "model_dump"):
|
|
1122
|
+
result = status.model_dump()
|
|
1123
|
+
return result # type: ignore[no-any-return]
|
|
1124
|
+
elif hasattr(status, "__dict__"):
|
|
1125
|
+
result = status.__dict__
|
|
1126
|
+
return result # type: ignore[no-any-return]
|
|
1127
|
+
else:
|
|
1128
|
+
return {"status": str(status)}
|
|
1129
|
+
|
|
1130
|
+
def _process_service_metrics(self, service_data: ServiceTelemetryData) -> Tuple[bool, int, int, float, float]:
|
|
1131
|
+
"""Process metrics for a single service."""
|
|
1132
|
+
is_healthy = service_data.healthy
|
|
1133
|
+
errors = service_data.error_count or 0
|
|
1134
|
+
requests = service_data.requests_handled or 0
|
|
1135
|
+
error_rate = service_data.error_rate or 0.0
|
|
1136
|
+
uptime = service_data.uptime_seconds or 0
|
|
1137
|
+
|
|
1138
|
+
return is_healthy, errors, requests, error_rate, uptime
|
|
1139
|
+
|
|
1140
|
+
def _aggregate_service_metrics(
|
|
1141
|
+
self, telemetry: Dict[str, Dict[str, ServiceTelemetryData]]
|
|
1142
|
+
) -> Tuple[int, int, int, int, float, List[float]]:
|
|
1143
|
+
"""Aggregate metrics from all services."""
|
|
1144
|
+
total_services = 0
|
|
1145
|
+
healthy_services = 0
|
|
1146
|
+
total_errors = 0
|
|
1147
|
+
total_requests = 0
|
|
1148
|
+
min_uptime = float("inf")
|
|
1149
|
+
error_rates = []
|
|
1150
|
+
|
|
1151
|
+
for category_name, category_data in telemetry.items():
|
|
1152
|
+
# Skip covenant category as it contains computed metrics, not service data
|
|
1153
|
+
if category_name == "covenant":
|
|
1154
|
+
continue
|
|
1155
|
+
|
|
1156
|
+
for service_data in category_data.values():
|
|
1157
|
+
total_services += 1
|
|
1158
|
+
is_healthy, errors, requests, error_rate, uptime = self._process_service_metrics(service_data)
|
|
1159
|
+
|
|
1160
|
+
if is_healthy:
|
|
1161
|
+
healthy_services += 1
|
|
1162
|
+
|
|
1163
|
+
total_errors += errors
|
|
1164
|
+
total_requests += requests
|
|
1165
|
+
|
|
1166
|
+
if error_rate > 0:
|
|
1167
|
+
error_rates.append(error_rate)
|
|
1168
|
+
|
|
1169
|
+
if uptime > 0 and uptime < min_uptime:
|
|
1170
|
+
min_uptime = uptime
|
|
1171
|
+
|
|
1172
|
+
return total_services, healthy_services, total_errors, total_requests, min_uptime, error_rates
|
|
1173
|
+
|
|
1174
|
+
def _extract_metric_value(self, metrics_obj: Any, metric_name: str, default: Any = 0) -> Any:
|
|
1175
|
+
"""Extract a metric value from ServiceTelemetryData or dict."""
|
|
1176
|
+
if isinstance(metrics_obj, ServiceTelemetryData):
|
|
1177
|
+
if metrics_obj.custom_metrics:
|
|
1178
|
+
return metrics_obj.custom_metrics.get(metric_name, default)
|
|
1179
|
+
elif isinstance(metrics_obj, dict):
|
|
1180
|
+
return metrics_obj.get(metric_name, default)
|
|
1181
|
+
return default
|
|
1182
|
+
|
|
1183
|
+
def _extract_governance_metrics(
|
|
1184
|
+
self, telemetry: Dict[str, Dict[str, ServiceTelemetryData]], service_name: str, metric_mappings: Dict[str, str]
|
|
1185
|
+
) -> Dict[str, Union[float, int, str]]:
|
|
1186
|
+
"""Extract metrics from a governance service."""
|
|
1187
|
+
results = {}
|
|
1188
|
+
if "governance" in telemetry and service_name in telemetry["governance"]:
|
|
1189
|
+
metrics = telemetry["governance"][service_name]
|
|
1190
|
+
for covenant_key, service_key in metric_mappings.items():
|
|
1191
|
+
results[covenant_key] = self._extract_metric_value(metrics, service_key)
|
|
1192
|
+
return results
|
|
1193
|
+
|
|
1194
|
+
def compute_covenant_metrics(
|
|
1195
|
+
self, telemetry: Dict[str, Dict[str, ServiceTelemetryData]]
|
|
1196
|
+
) -> Dict[str, Union[float, int, str]]:
|
|
1197
|
+
"""
|
|
1198
|
+
Compute covenant/ethics metrics from governance services.
|
|
1199
|
+
|
|
1200
|
+
These metrics track ethical decision-making and covenant compliance.
|
|
1201
|
+
"""
|
|
1202
|
+
covenant_metrics: Dict[str, Union[float, int, str]] = {
|
|
1203
|
+
"wise_authority_deferrals": 0,
|
|
1204
|
+
"filter_matches": 0,
|
|
1205
|
+
"thoughts_processed": 0,
|
|
1206
|
+
"self_observation_insights": 0,
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
try:
|
|
1210
|
+
# Extract metrics from each governance service
|
|
1211
|
+
wa_metrics = self._extract_governance_metrics(
|
|
1212
|
+
telemetry,
|
|
1213
|
+
"wise_authority",
|
|
1214
|
+
{"wise_authority_deferrals": "deferral_count", "thoughts_processed": "guidance_requests"},
|
|
1215
|
+
)
|
|
1216
|
+
covenant_metrics.update(wa_metrics)
|
|
1217
|
+
|
|
1218
|
+
filter_metrics = self._extract_governance_metrics(
|
|
1219
|
+
telemetry, "adaptive_filter", {"filter_matches": "filter_actions"}
|
|
1220
|
+
)
|
|
1221
|
+
covenant_metrics.update(filter_metrics)
|
|
1222
|
+
|
|
1223
|
+
so_metrics = self._extract_governance_metrics(
|
|
1224
|
+
telemetry, "self_observation", {"self_observation_insights": "insights_generated"}
|
|
1225
|
+
)
|
|
1226
|
+
covenant_metrics.update(so_metrics)
|
|
1227
|
+
|
|
1228
|
+
except Exception as e:
|
|
1229
|
+
logger.error(f"Failed to compute covenant metrics: {e}")
|
|
1230
|
+
|
|
1231
|
+
return covenant_metrics
|
|
1232
|
+
|
|
1233
|
+
def calculate_aggregates(
|
|
1234
|
+
self, telemetry: Dict[str, Dict[str, ServiceTelemetryData]]
|
|
1235
|
+
) -> Dict[str, Union[bool, int, float, str]]:
|
|
1236
|
+
"""Calculate system-wide aggregate metrics."""
|
|
1237
|
+
# Get aggregated metrics
|
|
1238
|
+
total_services, healthy_services, total_errors, total_requests, min_uptime, error_rates = (
|
|
1239
|
+
self._aggregate_service_metrics(telemetry)
|
|
1240
|
+
)
|
|
1241
|
+
|
|
1242
|
+
# Calculate overall metrics
|
|
1243
|
+
overall_error_rate = sum(error_rates) / len(error_rates) if error_rates else 0.0
|
|
1244
|
+
|
|
1245
|
+
return {
|
|
1246
|
+
"system_healthy": healthy_services >= (total_services * 0.9),
|
|
1247
|
+
"services_online": healthy_services,
|
|
1248
|
+
"services_total": total_services,
|
|
1249
|
+
"overall_error_rate": round(overall_error_rate, 4),
|
|
1250
|
+
"overall_uptime_seconds": int(min_uptime) if min_uptime != float("inf") else 0,
|
|
1251
|
+
"total_errors": total_errors,
|
|
1252
|
+
"total_requests": total_requests,
|
|
1253
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
|
|
1257
|
+
class GraphTelemetryService(BaseGraphService, TelemetryServiceProtocol, RegistryAwareServiceProtocol):
|
|
1258
|
+
"""
|
|
1259
|
+
Consolidated TelemetryService that stores all metrics as graph memories.
|
|
1260
|
+
|
|
1261
|
+
This service implements the vision where "everything is a memory" by
|
|
1262
|
+
converting telemetry data into TSDBGraphNodes stored in the memory graph.
|
|
1263
|
+
|
|
1264
|
+
Features:
|
|
1265
|
+
- Processes SystemSnapshot data from adapters
|
|
1266
|
+
- Records operational metrics and resource usage
|
|
1267
|
+
- Stores behavioral, social, and identity context
|
|
1268
|
+
- Applies grace-based wisdom to memory consolidation
|
|
1269
|
+
"""
|
|
1270
|
+
|
|
1271
|
+
def __init__(
|
|
1272
|
+
self, memory_bus: Optional[MemoryBus] = None, time_service: Optional[Any] = None # TimeServiceProtocol
|
|
1273
|
+
) -> None:
|
|
1274
|
+
# Initialize BaseGraphService
|
|
1275
|
+
super().__init__(memory_bus=memory_bus, time_service=time_service)
|
|
1276
|
+
|
|
1277
|
+
self._service_registry: Optional[Any] = None
|
|
1278
|
+
self._resource_limits = ResourceLimits(
|
|
1279
|
+
max_memory_mb=4096,
|
|
1280
|
+
max_cpu_percent=80.0,
|
|
1281
|
+
max_disk_gb=100.0,
|
|
1282
|
+
max_api_calls_per_minute=1000,
|
|
1283
|
+
max_concurrent_operations=50,
|
|
1284
|
+
)
|
|
1285
|
+
# Cache for recent metrics (for quick status queries)
|
|
1286
|
+
self._recent_metrics: dict[str, list[MetricDataPoint]] = {}
|
|
1287
|
+
self._max_cached_metrics = 100
|
|
1288
|
+
|
|
1289
|
+
# Cache for telemetry summaries to avoid slamming persistence
|
|
1290
|
+
self._summary_cache: dict[str, tuple[datetime, TelemetrySummary]] = {}
|
|
1291
|
+
self._summary_cache_ttl_seconds = 60 # Cache for 1 minute
|
|
1292
|
+
|
|
1293
|
+
# Memory tracking
|
|
1294
|
+
self._process = psutil.Process() if PSUTIL_AVAILABLE else None
|
|
1295
|
+
|
|
1296
|
+
# Consolidation settings
|
|
1297
|
+
|
|
1298
|
+
# Enterprise telemetry aggregator
|
|
1299
|
+
self._telemetry_aggregator: Optional[TelemetryAggregator] = None
|
|
1300
|
+
self._runtime: Optional[Any] = None # Store runtime reference for aggregator
|
|
1301
|
+
|
|
1302
|
+
def _set_runtime(self, runtime: object) -> None:
|
|
1303
|
+
"""Set the runtime reference for accessing core services directly (internal method)."""
|
|
1304
|
+
logger.debug(f"[TELEMETRY] _set_runtime called, runtime={runtime is not None}")
|
|
1305
|
+
self._runtime = runtime
|
|
1306
|
+
logger.debug(
|
|
1307
|
+
f"[TELEMETRY] Aggregator exists: {self._telemetry_aggregator is not None}, Registry exists: {self._service_registry is not None}"
|
|
1308
|
+
)
|
|
1309
|
+
# Re-create aggregator if it exists to include runtime
|
|
1310
|
+
if self._telemetry_aggregator and self._service_registry:
|
|
1311
|
+
logger.debug("[TELEMETRY] Recreating aggregator with runtime")
|
|
1312
|
+
self._telemetry_aggregator = TelemetryAggregator(
|
|
1313
|
+
service_registry=self._service_registry, time_service=self._time_service, runtime=self._runtime
|
|
1314
|
+
)
|
|
1315
|
+
else:
|
|
1316
|
+
logger.debug("[TELEMETRY] Aggregator will be created later with runtime when first needed")
|
|
1317
|
+
|
|
1318
|
+
async def attach_registry(self, registry: "ServiceRegistryProtocol") -> None:
|
|
1319
|
+
"""
|
|
1320
|
+
Attach service registry for bus and service discovery.
|
|
1321
|
+
|
|
1322
|
+
Implements RegistryAwareServiceProtocol to enable proper initialization
|
|
1323
|
+
of memory bus and time service dependencies.
|
|
1324
|
+
|
|
1325
|
+
Args:
|
|
1326
|
+
registry: Service registry providing access to buses and services
|
|
1327
|
+
"""
|
|
1328
|
+
self._service_registry = registry
|
|
1329
|
+
if not self._memory_bus and registry:
|
|
1330
|
+
# Try to get memory bus from registry
|
|
1331
|
+
try:
|
|
1332
|
+
from ciris_engine.logic.buses import MemoryBus
|
|
1333
|
+
from ciris_engine.logic.registries.base import ServiceRegistry
|
|
1334
|
+
|
|
1335
|
+
if isinstance(registry, ServiceRegistry) and self._time_service is not None:
|
|
1336
|
+
self._memory_bus = MemoryBus(registry, self._time_service)
|
|
1337
|
+
except Exception as e:
|
|
1338
|
+
logger.error(f"Failed to initialize memory bus: {e}")
|
|
1339
|
+
|
|
1340
|
+
# Get time service from registry if not provided
|
|
1341
|
+
if not self._time_service and registry:
|
|
1342
|
+
from ciris_engine.schemas.runtime.enums import ServiceType
|
|
1343
|
+
|
|
1344
|
+
time_services: List[Any] = getattr(registry, "get_services_by_type", lambda x: [])(ServiceType.TIME)
|
|
1345
|
+
if time_services:
|
|
1346
|
+
self._time_service = time_services[0]
|
|
1347
|
+
|
|
1348
|
+
def _now(self) -> datetime:
|
|
1349
|
+
"""Get current time from time service."""
|
|
1350
|
+
if not self._time_service:
|
|
1351
|
+
raise RuntimeError("FATAL: TimeService not available! This is a critical system failure.")
|
|
1352
|
+
if hasattr(self._time_service, "now"):
|
|
1353
|
+
result = self._time_service.now()
|
|
1354
|
+
if isinstance(result, datetime):
|
|
1355
|
+
return result
|
|
1356
|
+
return datetime.now()
|
|
1357
|
+
|
|
1358
|
+
async def record_metric(
|
|
1359
|
+
self,
|
|
1360
|
+
metric_name: str,
|
|
1361
|
+
value: float = 1.0,
|
|
1362
|
+
tags: Optional[Dict[str, str]] = None,
|
|
1363
|
+
handler_name: Optional[str] = None, # Accept extra parameter
|
|
1364
|
+
**kwargs: Any, # Accept telemetry-specific parameters
|
|
1365
|
+
) -> None:
|
|
1366
|
+
"""
|
|
1367
|
+
Record a metric by storing it as a memory in the graph.
|
|
1368
|
+
|
|
1369
|
+
This creates a TSDBGraphNode and stores it via the MemoryService,
|
|
1370
|
+
implementing the unified telemetry flow.
|
|
1371
|
+
"""
|
|
1372
|
+
try:
|
|
1373
|
+
if not self._memory_bus:
|
|
1374
|
+
logger.error("Memory bus not available for telemetry storage")
|
|
1375
|
+
return
|
|
1376
|
+
|
|
1377
|
+
# Add standard telemetry tags
|
|
1378
|
+
metric_tags = tags or {}
|
|
1379
|
+
metric_tags.update(
|
|
1380
|
+
{"source": "telemetry", "metric_type": "operational", "timestamp": self._now().isoformat()}
|
|
1381
|
+
)
|
|
1382
|
+
|
|
1383
|
+
# Add handler_name to tags if provided
|
|
1384
|
+
if handler_name:
|
|
1385
|
+
metric_tags["handler"] = handler_name
|
|
1386
|
+
|
|
1387
|
+
# Store as memory via the bus
|
|
1388
|
+
result = await self._memory_bus.memorize_metric(
|
|
1389
|
+
metric_name=metric_name,
|
|
1390
|
+
value=value,
|
|
1391
|
+
tags=metric_tags,
|
|
1392
|
+
scope="local", # Operational metrics use local scope
|
|
1393
|
+
handler_name="telemetry_service",
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
# Cache for quick access
|
|
1397
|
+
data_point = MetricDataPoint(
|
|
1398
|
+
metric_name=metric_name,
|
|
1399
|
+
value=value,
|
|
1400
|
+
timestamp=self._now(),
|
|
1401
|
+
tags=metric_tags,
|
|
1402
|
+
service_name="telemetry_service",
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
if metric_name not in self._recent_metrics:
|
|
1406
|
+
self._recent_metrics[metric_name] = []
|
|
1407
|
+
|
|
1408
|
+
self._recent_metrics[metric_name].append(data_point)
|
|
1409
|
+
|
|
1410
|
+
# Trim cache
|
|
1411
|
+
if len(self._recent_metrics[metric_name]) > self._max_cached_metrics:
|
|
1412
|
+
self._recent_metrics[metric_name] = self._recent_metrics[metric_name][-self._max_cached_metrics :]
|
|
1413
|
+
|
|
1414
|
+
if result.status != MemoryOpStatus.OK:
|
|
1415
|
+
logger.error(f"Failed to store metric: {result}")
|
|
1416
|
+
|
|
1417
|
+
except Exception as e:
|
|
1418
|
+
logger.error(f"Failed to record metric {metric_name}: {e}")
|
|
1419
|
+
|
|
1420
|
+
async def _record_resource_usage(self, service_name: str, usage: ResourceUsage) -> None:
|
|
1421
|
+
"""
|
|
1422
|
+
Record resource usage as multiple metrics in the graph (internal method).
|
|
1423
|
+
|
|
1424
|
+
Each aspect of resource usage becomes a separate memory node,
|
|
1425
|
+
allowing for fine-grained introspection.
|
|
1426
|
+
"""
|
|
1427
|
+
try:
|
|
1428
|
+
# Record each resource metric separately
|
|
1429
|
+
if usage.tokens_used:
|
|
1430
|
+
await self.record_metric(
|
|
1431
|
+
f"{service_name}.tokens_used",
|
|
1432
|
+
float(usage.tokens_used),
|
|
1433
|
+
{"service": service_name, "resource_type": "tokens"},
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
if usage.tokens_input:
|
|
1437
|
+
await self.record_metric(
|
|
1438
|
+
f"{service_name}.tokens_input",
|
|
1439
|
+
float(usage.tokens_input),
|
|
1440
|
+
{"service": service_name, "resource_type": "tokens", "direction": "input"},
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
if usage.tokens_output:
|
|
1444
|
+
await self.record_metric(
|
|
1445
|
+
f"{service_name}.tokens_output",
|
|
1446
|
+
float(usage.tokens_output),
|
|
1447
|
+
{"service": service_name, "resource_type": "tokens", "direction": "output"},
|
|
1448
|
+
)
|
|
1449
|
+
|
|
1450
|
+
if usage.cost_cents:
|
|
1451
|
+
await self.record_metric(
|
|
1452
|
+
f"{service_name}.cost_cents",
|
|
1453
|
+
usage.cost_cents,
|
|
1454
|
+
{"service": service_name, "resource_type": "cost", "unit": "cents"},
|
|
1455
|
+
)
|
|
1456
|
+
|
|
1457
|
+
if usage.carbon_grams:
|
|
1458
|
+
await self.record_metric(
|
|
1459
|
+
f"{service_name}.carbon_grams",
|
|
1460
|
+
usage.carbon_grams,
|
|
1461
|
+
{"service": service_name, "resource_type": "carbon", "unit": "grams"},
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
if usage.energy_kwh:
|
|
1465
|
+
await self.record_metric(
|
|
1466
|
+
f"{service_name}.energy_kwh",
|
|
1467
|
+
usage.energy_kwh,
|
|
1468
|
+
{"service": service_name, "resource_type": "energy", "unit": "kilowatt_hours"},
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
except Exception as e:
|
|
1472
|
+
logger.error(f"Failed to record resource usage for {service_name}: {e}")
|
|
1473
|
+
|
|
1474
|
+
async def query_metrics(
|
|
1475
|
+
self,
|
|
1476
|
+
metric_name: str,
|
|
1477
|
+
start_time: Optional[datetime] = None,
|
|
1478
|
+
end_time: Optional[datetime] = None,
|
|
1479
|
+
tags: Optional[Dict[str, str]] = None,
|
|
1480
|
+
) -> List[MetricRecord]:
|
|
1481
|
+
"""Query metrics from the graph memory.
|
|
1482
|
+
|
|
1483
|
+
This uses the MemoryService's recall_timeseries capability to
|
|
1484
|
+
retrieve historical metric data.
|
|
1485
|
+
|
|
1486
|
+
Args:
|
|
1487
|
+
metric_name: Name of metric to query
|
|
1488
|
+
start_time: Start of time window (optional)
|
|
1489
|
+
end_time: End of time window (optional)
|
|
1490
|
+
tags: Filter by tags (optional)
|
|
1491
|
+
|
|
1492
|
+
Returns:
|
|
1493
|
+
List of typed MetricRecord objects
|
|
1494
|
+
|
|
1495
|
+
Raises:
|
|
1496
|
+
MemoryBusUnavailableError: If memory bus not available
|
|
1497
|
+
MetricCollectionError: If query fails
|
|
1498
|
+
"""
|
|
1499
|
+
from ciris_engine.logic.services.graph.telemetry_service.exceptions import (
|
|
1500
|
+
MemoryBusUnavailableError,
|
|
1501
|
+
MetricCollectionError,
|
|
1502
|
+
)
|
|
1503
|
+
from ciris_engine.logic.services.graph.telemetry_service.helpers import (
|
|
1504
|
+
calculate_query_time_window,
|
|
1505
|
+
convert_to_metric_record,
|
|
1506
|
+
filter_by_metric_name,
|
|
1507
|
+
filter_by_tags,
|
|
1508
|
+
filter_by_time_range,
|
|
1509
|
+
)
|
|
1510
|
+
from ciris_engine.schemas.services.graph.telemetry import MetricRecord
|
|
1511
|
+
|
|
1512
|
+
if not self._memory_bus:
|
|
1513
|
+
raise MemoryBusUnavailableError("Memory bus not available for metric queries")
|
|
1514
|
+
|
|
1515
|
+
try:
|
|
1516
|
+
# Calculate hours from time range using helper
|
|
1517
|
+
hours = calculate_query_time_window(start_time, end_time, self._now())
|
|
1518
|
+
|
|
1519
|
+
# Recall time series data from memory
|
|
1520
|
+
timeseries_data = await self._memory_bus.recall_timeseries(
|
|
1521
|
+
scope="local", # Operational metrics are in local scope
|
|
1522
|
+
hours=hours,
|
|
1523
|
+
start_time=start_time,
|
|
1524
|
+
end_time=end_time,
|
|
1525
|
+
handler_name="telemetry_service",
|
|
1526
|
+
)
|
|
1527
|
+
|
|
1528
|
+
# Filter and convert to typed MetricRecord objects using helpers
|
|
1529
|
+
results: List[MetricRecord] = []
|
|
1530
|
+
for data in timeseries_data:
|
|
1531
|
+
# Apply filters using helper functions
|
|
1532
|
+
if not filter_by_metric_name(data, metric_name):
|
|
1533
|
+
continue
|
|
1534
|
+
if not filter_by_tags(data, tags):
|
|
1535
|
+
continue
|
|
1536
|
+
if not filter_by_time_range(data, start_time, end_time):
|
|
1537
|
+
continue
|
|
1538
|
+
|
|
1539
|
+
# Convert to MetricRecord using helper
|
|
1540
|
+
record = convert_to_metric_record(data)
|
|
1541
|
+
if record:
|
|
1542
|
+
results.append(record)
|
|
1543
|
+
|
|
1544
|
+
return results
|
|
1545
|
+
|
|
1546
|
+
except (MemoryBusUnavailableError, MetricCollectionError):
|
|
1547
|
+
raise # Re-raise our exceptions
|
|
1548
|
+
except Exception as e:
|
|
1549
|
+
raise MetricCollectionError(f"Failed to query metrics: {e}") from e
|
|
1550
|
+
|
|
1551
|
+
async def get_metric_summary(self, metric_name: str, window_minutes: int = 60) -> Dict[str, float]:
|
|
1552
|
+
"""Get metric summary statistics."""
|
|
1553
|
+
try:
|
|
1554
|
+
# Calculate time window
|
|
1555
|
+
end_time = self._now()
|
|
1556
|
+
start_time = end_time - timedelta(minutes=window_minutes)
|
|
1557
|
+
|
|
1558
|
+
# Query metrics for the window
|
|
1559
|
+
metrics = await self.query_metrics(metric_name=metric_name, start_time=start_time, end_time=end_time)
|
|
1560
|
+
|
|
1561
|
+
if not metrics:
|
|
1562
|
+
return {"count": 0.0, "sum": 0.0, "min": 0.0, "max": 0.0, "avg": 0.0}
|
|
1563
|
+
|
|
1564
|
+
# Calculate summary statistics (MetricRecord objects now)
|
|
1565
|
+
values = [m.value for m in metrics if isinstance(m.value, (int, float))]
|
|
1566
|
+
|
|
1567
|
+
return {
|
|
1568
|
+
"count": float(len(values)),
|
|
1569
|
+
"sum": float(sum(values)),
|
|
1570
|
+
"min": float(min(values)) if values else 0.0,
|
|
1571
|
+
"max": float(max(values)) if values else 0.0,
|
|
1572
|
+
"avg": float(sum(values) / len(values)) if values else 0.0,
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
except Exception as e:
|
|
1576
|
+
logger.error(f"Failed to get metric summary for {metric_name}: {e}")
|
|
1577
|
+
return {"count": 0.0, "sum": 0.0, "min": 0.0, "max": 0.0, "avg": 0.0}
|
|
1578
|
+
|
|
1579
|
+
async def _get_service_status(
|
|
1580
|
+
self, service_name: Optional[str] = None
|
|
1581
|
+
) -> Union[ServiceStatus, Dict[str, ServiceStatus]]:
|
|
1582
|
+
"""
|
|
1583
|
+
Get service status by analyzing recent metrics from the graph (internal method).
|
|
1584
|
+
|
|
1585
|
+
This demonstrates the agent's ability to introspect its own
|
|
1586
|
+
operational state through the unified memory system.
|
|
1587
|
+
"""
|
|
1588
|
+
try:
|
|
1589
|
+
if service_name:
|
|
1590
|
+
# Get status for specific service
|
|
1591
|
+
recent_metrics = self._recent_metrics.get(f"{service_name}.tokens_used", [])
|
|
1592
|
+
last_metric = recent_metrics[-1] if recent_metrics else None
|
|
1593
|
+
|
|
1594
|
+
return ServiceStatus(
|
|
1595
|
+
service_name=service_name,
|
|
1596
|
+
service_type="telemetry",
|
|
1597
|
+
is_healthy=bool(last_metric),
|
|
1598
|
+
uptime_seconds=0.0, # Uptime tracked at service level
|
|
1599
|
+
last_error=None,
|
|
1600
|
+
metrics={"recent_tokens": last_metric.value if last_metric else 0.0},
|
|
1601
|
+
custom_metrics=None,
|
|
1602
|
+
last_health_check=last_metric.timestamp if last_metric else None,
|
|
1603
|
+
)
|
|
1604
|
+
else:
|
|
1605
|
+
# Get status for all services
|
|
1606
|
+
all_status: Dict[str, ServiceStatus] = {}
|
|
1607
|
+
|
|
1608
|
+
# Extract unique service names from cached metrics
|
|
1609
|
+
service_names = set()
|
|
1610
|
+
for metric_name in self._recent_metrics.keys():
|
|
1611
|
+
if "." in metric_name:
|
|
1612
|
+
service_name = metric_name.split(".")[0]
|
|
1613
|
+
service_names.add(service_name)
|
|
1614
|
+
|
|
1615
|
+
for svc_name in service_names:
|
|
1616
|
+
status = await self._get_service_status(svc_name)
|
|
1617
|
+
if isinstance(status, ServiceStatus):
|
|
1618
|
+
all_status[svc_name] = status
|
|
1619
|
+
|
|
1620
|
+
return all_status
|
|
1621
|
+
|
|
1622
|
+
except Exception as e:
|
|
1623
|
+
logger.error(f"Failed to get service status: {e}")
|
|
1624
|
+
if service_name:
|
|
1625
|
+
return ServiceStatus(
|
|
1626
|
+
service_name=service_name,
|
|
1627
|
+
service_type="telemetry",
|
|
1628
|
+
is_healthy=False,
|
|
1629
|
+
uptime_seconds=0.0,
|
|
1630
|
+
last_error=str(e),
|
|
1631
|
+
metrics={},
|
|
1632
|
+
custom_metrics=None,
|
|
1633
|
+
last_health_check=None,
|
|
1634
|
+
)
|
|
1635
|
+
else:
|
|
1636
|
+
# Return empty dict for all services case
|
|
1637
|
+
return {}
|
|
1638
|
+
|
|
1639
|
+
def _get_resource_limits(self) -> ResourceLimits:
|
|
1640
|
+
"""Get resource limits configuration (internal method)."""
|
|
1641
|
+
return self._resource_limits
|
|
1642
|
+
|
|
1643
|
+
async def _process_system_snapshot(
|
|
1644
|
+
self, snapshot: SystemSnapshot, thought_id: str, task_id: Optional[str] = None
|
|
1645
|
+
) -> TelemetrySnapshotResult:
|
|
1646
|
+
"""
|
|
1647
|
+
Process a SystemSnapshot and convert it to graph memories (internal method).
|
|
1648
|
+
|
|
1649
|
+
This is the main entry point for the unified telemetry flow from adapters.
|
|
1650
|
+
"""
|
|
1651
|
+
try:
|
|
1652
|
+
if not self._memory_bus:
|
|
1653
|
+
logger.error("Memory bus not available for telemetry storage")
|
|
1654
|
+
return TelemetrySnapshotResult(
|
|
1655
|
+
memories_created=0,
|
|
1656
|
+
errors=["Memory bus not available"],
|
|
1657
|
+
consolidation_triggered=False,
|
|
1658
|
+
consolidation_result=None,
|
|
1659
|
+
error="Memory bus not available",
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
results = TelemetrySnapshotResult(
|
|
1663
|
+
memories_created=0, errors=[], consolidation_triggered=False, consolidation_result=None, error=None
|
|
1664
|
+
)
|
|
1665
|
+
|
|
1666
|
+
# 1. Store operational metrics from telemetry summary
|
|
1667
|
+
if snapshot.telemetry_summary:
|
|
1668
|
+
# Convert telemetry summary to telemetry data format
|
|
1669
|
+
telemetry_data = TelemetryData(
|
|
1670
|
+
metrics={
|
|
1671
|
+
"messages_processed_24h": snapshot.telemetry_summary.messages_processed_24h,
|
|
1672
|
+
"thoughts_processed_24h": snapshot.telemetry_summary.thoughts_processed_24h,
|
|
1673
|
+
"tasks_completed_24h": snapshot.telemetry_summary.tasks_completed_24h,
|
|
1674
|
+
"errors_24h": snapshot.telemetry_summary.errors_24h,
|
|
1675
|
+
"messages_current_hour": snapshot.telemetry_summary.messages_current_hour,
|
|
1676
|
+
"thoughts_current_hour": snapshot.telemetry_summary.thoughts_current_hour,
|
|
1677
|
+
"errors_current_hour": snapshot.telemetry_summary.errors_current_hour,
|
|
1678
|
+
"tokens_last_hour": snapshot.telemetry_summary.tokens_last_hour,
|
|
1679
|
+
"cost_last_hour_cents": snapshot.telemetry_summary.cost_last_hour_cents,
|
|
1680
|
+
"carbon_last_hour_grams": snapshot.telemetry_summary.carbon_last_hour_grams,
|
|
1681
|
+
"energy_last_hour_kwh": snapshot.telemetry_summary.energy_last_hour_kwh,
|
|
1682
|
+
"error_rate_percent": snapshot.telemetry_summary.error_rate_percent,
|
|
1683
|
+
"avg_thought_depth": snapshot.telemetry_summary.avg_thought_depth,
|
|
1684
|
+
"queue_saturation": snapshot.telemetry_summary.queue_saturation,
|
|
1685
|
+
},
|
|
1686
|
+
events={},
|
|
1687
|
+
# Remove counters field - not in TelemetryData schema
|
|
1688
|
+
)
|
|
1689
|
+
await self._store_telemetry_metrics(telemetry_data, thought_id, task_id)
|
|
1690
|
+
results.memories_created += 1
|
|
1691
|
+
|
|
1692
|
+
# 2. Store resource usage - Note: no current_round_resources in SystemSnapshot
|
|
1693
|
+
# Resource data would come from telemetry_summary if needed
|
|
1694
|
+
|
|
1695
|
+
# 3. Store behavioral data (task/thought summaries)
|
|
1696
|
+
if snapshot.current_task_details:
|
|
1697
|
+
behavioral_data = BehavioralData(
|
|
1698
|
+
data_type="task",
|
|
1699
|
+
content=(
|
|
1700
|
+
snapshot.current_task_details.model_dump()
|
|
1701
|
+
if hasattr(snapshot.current_task_details, "model_dump")
|
|
1702
|
+
else {}
|
|
1703
|
+
),
|
|
1704
|
+
metadata={"thought_id": thought_id},
|
|
1705
|
+
)
|
|
1706
|
+
await self._store_behavioral_data(behavioral_data, "task", thought_id)
|
|
1707
|
+
results.memories_created += 1
|
|
1708
|
+
|
|
1709
|
+
if snapshot.current_thought_summary:
|
|
1710
|
+
behavioral_data = BehavioralData(
|
|
1711
|
+
data_type="thought",
|
|
1712
|
+
content=(
|
|
1713
|
+
snapshot.current_thought_summary.model_dump()
|
|
1714
|
+
if hasattr(snapshot.current_thought_summary, "model_dump")
|
|
1715
|
+
else {}
|
|
1716
|
+
),
|
|
1717
|
+
metadata={"thought_id": thought_id},
|
|
1718
|
+
)
|
|
1719
|
+
await self._store_behavioral_data(behavioral_data, "thought", thought_id)
|
|
1720
|
+
results.memories_created += 1
|
|
1721
|
+
|
|
1722
|
+
# 4. Store social context (user profiles, channel info)
|
|
1723
|
+
if snapshot.user_profiles:
|
|
1724
|
+
await self._store_social_context(snapshot.user_profiles, snapshot.channel_context, thought_id)
|
|
1725
|
+
results.memories_created += 1
|
|
1726
|
+
|
|
1727
|
+
# 5. Store identity context
|
|
1728
|
+
if snapshot.agent_identity or snapshot.identity_purpose:
|
|
1729
|
+
await self._store_identity_context(snapshot, thought_id)
|
|
1730
|
+
results.memories_created += 1
|
|
1731
|
+
|
|
1732
|
+
# Consolidation is now handled by TSDBConsolidationService
|
|
1733
|
+
|
|
1734
|
+
return results
|
|
1735
|
+
|
|
1736
|
+
except Exception as e:
|
|
1737
|
+
logger.error(f"Failed to process system snapshot: {e}")
|
|
1738
|
+
return TelemetrySnapshotResult(
|
|
1739
|
+
memories_created=0,
|
|
1740
|
+
errors=[str(e)],
|
|
1741
|
+
consolidation_triggered=False,
|
|
1742
|
+
consolidation_result=None,
|
|
1743
|
+
error=str(e),
|
|
1744
|
+
)
|
|
1745
|
+
|
|
1746
|
+
async def _store_telemetry_metrics(self, telemetry: TelemetryData, thought_id: str, task_id: Optional[str]) -> None:
|
|
1747
|
+
"""Store telemetry data as operational memories."""
|
|
1748
|
+
# Process metrics
|
|
1749
|
+
for key, value in telemetry.metrics.items():
|
|
1750
|
+
await self.record_metric(
|
|
1751
|
+
f"telemetry.{key}",
|
|
1752
|
+
float(value),
|
|
1753
|
+
{"thought_id": thought_id, "task_id": task_id or "", "memory_type": MemoryType.OPERATIONAL.value},
|
|
1754
|
+
)
|
|
1755
|
+
|
|
1756
|
+
# Process events
|
|
1757
|
+
for event_key, event_value in telemetry.events.items():
|
|
1758
|
+
await self.record_metric(
|
|
1759
|
+
f"telemetry.event.{event_key}",
|
|
1760
|
+
1.0, # Event occurrence
|
|
1761
|
+
{
|
|
1762
|
+
"thought_id": thought_id,
|
|
1763
|
+
"task_id": task_id or "",
|
|
1764
|
+
"memory_type": MemoryType.OPERATIONAL.value,
|
|
1765
|
+
"event_value": str(event_value),
|
|
1766
|
+
},
|
|
1767
|
+
)
|
|
1768
|
+
|
|
1769
|
+
async def _store_resource_usage(self, resources: ResourceData, thought_id: str, task_id: Optional[str]) -> None:
|
|
1770
|
+
"""Store resource usage as operational memories."""
|
|
1771
|
+
if resources.llm:
|
|
1772
|
+
# Extract only the fields that ResourceUsage expects
|
|
1773
|
+
from ciris_engine.schemas.services.graph.telemetry import LLMUsageData
|
|
1774
|
+
|
|
1775
|
+
# Convert dict to LLMUsageData first
|
|
1776
|
+
llm_data = LLMUsageData(
|
|
1777
|
+
tokens_used=(
|
|
1778
|
+
resources.llm.get("tokens_used")
|
|
1779
|
+
if isinstance(resources.llm.get("tokens_used"), (int, float))
|
|
1780
|
+
else None
|
|
1781
|
+
),
|
|
1782
|
+
tokens_input=(
|
|
1783
|
+
resources.llm.get("tokens_input")
|
|
1784
|
+
if isinstance(resources.llm.get("tokens_input"), (int, float))
|
|
1785
|
+
else None
|
|
1786
|
+
),
|
|
1787
|
+
tokens_output=(
|
|
1788
|
+
resources.llm.get("tokens_output")
|
|
1789
|
+
if isinstance(resources.llm.get("tokens_output"), (int, float))
|
|
1790
|
+
else None
|
|
1791
|
+
),
|
|
1792
|
+
cost_cents=(
|
|
1793
|
+
resources.llm.get("cost_cents")
|
|
1794
|
+
if isinstance(resources.llm.get("cost_cents"), (int, float))
|
|
1795
|
+
else None
|
|
1796
|
+
),
|
|
1797
|
+
carbon_grams=(
|
|
1798
|
+
resources.llm.get("carbon_grams")
|
|
1799
|
+
if isinstance(resources.llm.get("carbon_grams"), (int, float))
|
|
1800
|
+
else None
|
|
1801
|
+
),
|
|
1802
|
+
energy_kwh=(
|
|
1803
|
+
resources.llm.get("energy_kwh")
|
|
1804
|
+
if isinstance(resources.llm.get("energy_kwh"), (int, float))
|
|
1805
|
+
else None
|
|
1806
|
+
),
|
|
1807
|
+
model_used=(
|
|
1808
|
+
resources.llm.get("model_used") if isinstance(resources.llm.get("model_used"), str) else None
|
|
1809
|
+
),
|
|
1810
|
+
)
|
|
1811
|
+
|
|
1812
|
+
# Create ResourceUsage directly with proper types
|
|
1813
|
+
usage = ResourceUsage(
|
|
1814
|
+
tokens_used=int(llm_data.tokens_used) if llm_data.tokens_used is not None else 0,
|
|
1815
|
+
tokens_input=int(llm_data.tokens_input) if llm_data.tokens_input is not None else 0,
|
|
1816
|
+
tokens_output=int(llm_data.tokens_output) if llm_data.tokens_output is not None else 0,
|
|
1817
|
+
cost_cents=float(llm_data.cost_cents) if llm_data.cost_cents is not None else 0.0,
|
|
1818
|
+
carbon_grams=float(llm_data.carbon_grams) if llm_data.carbon_grams is not None else 0.0,
|
|
1819
|
+
energy_kwh=float(llm_data.energy_kwh) if llm_data.energy_kwh is not None else 0.0,
|
|
1820
|
+
model_used=llm_data.model_used if llm_data.model_used is not None else None,
|
|
1821
|
+
)
|
|
1822
|
+
await self._record_resource_usage("llm_service", usage)
|
|
1823
|
+
|
|
1824
|
+
async def _store_behavioral_data(self, data: BehavioralData, data_type: str, thought_id: str) -> None:
|
|
1825
|
+
"""Store behavioral data (tasks/thoughts) as memories."""
|
|
1826
|
+
node = GraphNode(
|
|
1827
|
+
id=f"behavioral_{thought_id}_{data_type}",
|
|
1828
|
+
type=NodeType.BEHAVIORAL,
|
|
1829
|
+
scope=GraphScope.LOCAL,
|
|
1830
|
+
updated_by="telemetry_service",
|
|
1831
|
+
updated_at=self._now(),
|
|
1832
|
+
attributes={
|
|
1833
|
+
"data_type": data.data_type,
|
|
1834
|
+
"thought_id": thought_id,
|
|
1835
|
+
"content": data.content,
|
|
1836
|
+
"metadata": data.metadata,
|
|
1837
|
+
"memory_type": MemoryType.BEHAVIORAL.value,
|
|
1838
|
+
"tags": {"thought_id": thought_id, "data_type": data_type},
|
|
1839
|
+
},
|
|
1840
|
+
)
|
|
1841
|
+
|
|
1842
|
+
if self._memory_bus:
|
|
1843
|
+
await self._memory_bus.memorize(node=node, handler_name="telemetry_service", metadata={"behavioral": True})
|
|
1844
|
+
|
|
1845
|
+
async def _store_social_context(
|
|
1846
|
+
self, user_profiles: List[UserProfile], channel_context: Optional[SystemChannelContext], thought_id: str
|
|
1847
|
+
) -> None:
|
|
1848
|
+
"""Store social context as memories."""
|
|
1849
|
+
node = GraphNode(
|
|
1850
|
+
id=f"social_{thought_id}",
|
|
1851
|
+
type=NodeType.SOCIAL,
|
|
1852
|
+
scope=GraphScope.LOCAL,
|
|
1853
|
+
updated_by="telemetry_service",
|
|
1854
|
+
updated_at=self._now(),
|
|
1855
|
+
attributes={
|
|
1856
|
+
"user_profiles": [p.model_dump() for p in user_profiles],
|
|
1857
|
+
"channel_context": channel_context.model_dump() if channel_context else None,
|
|
1858
|
+
"memory_type": MemoryType.SOCIAL.value,
|
|
1859
|
+
"tags": {"thought_id": thought_id, "user_count": str(len(user_profiles))},
|
|
1860
|
+
},
|
|
1861
|
+
)
|
|
1862
|
+
|
|
1863
|
+
if self._memory_bus:
|
|
1864
|
+
await self._memory_bus.memorize(node=node, handler_name="telemetry_service", metadata={"social": True})
|
|
1865
|
+
|
|
1866
|
+
async def _store_identity_context(self, snapshot: SystemSnapshot, thought_id: str) -> None:
|
|
1867
|
+
"""Store identity-related context as memories."""
|
|
1868
|
+
# Extract agent name from identity data if available
|
|
1869
|
+
agent_name = None
|
|
1870
|
+
if snapshot.agent_identity and isinstance(snapshot.agent_identity, dict):
|
|
1871
|
+
agent_name = snapshot.agent_identity.get("name") or snapshot.agent_identity.get("agent_name")
|
|
1872
|
+
|
|
1873
|
+
node = GraphNode(
|
|
1874
|
+
id=f"identity_{thought_id}",
|
|
1875
|
+
type=NodeType.IDENTITY,
|
|
1876
|
+
scope=GraphScope.IDENTITY,
|
|
1877
|
+
updated_by="telemetry_service",
|
|
1878
|
+
updated_at=self._now(),
|
|
1879
|
+
attributes={
|
|
1880
|
+
"agent_name": agent_name,
|
|
1881
|
+
"identity_purpose": snapshot.identity_purpose,
|
|
1882
|
+
"identity_capabilities": snapshot.identity_capabilities,
|
|
1883
|
+
"identity_restrictions": snapshot.identity_restrictions,
|
|
1884
|
+
"memory_type": MemoryType.IDENTITY.value,
|
|
1885
|
+
"tags": {"thought_id": thought_id, "has_purpose": str(bool(snapshot.identity_purpose))},
|
|
1886
|
+
},
|
|
1887
|
+
)
|
|
1888
|
+
|
|
1889
|
+
if self._memory_bus:
|
|
1890
|
+
await self._memory_bus.memorize(node=node, handler_name="telemetry_service", metadata={"identity": True})
|
|
1891
|
+
|
|
1892
|
+
async def start(self) -> None:
|
|
1893
|
+
"""Start the telemetry service."""
|
|
1894
|
+
from datetime import datetime, timezone
|
|
1895
|
+
|
|
1896
|
+
# Don't call super() as BaseService has async start
|
|
1897
|
+
self._started = True
|
|
1898
|
+
self._start_time = datetime.now(timezone.utc)
|
|
1899
|
+
logger.debug("GraphTelemetryService started - routing all metrics through memory graph")
|
|
1900
|
+
|
|
1901
|
+
async def stop(self) -> None:
|
|
1902
|
+
"""Stop the telemetry service."""
|
|
1903
|
+
# Mark as stopped first to prevent new operations
|
|
1904
|
+
self._started = False
|
|
1905
|
+
|
|
1906
|
+
# Try to store a final metric, but don't block shutdown if it fails
|
|
1907
|
+
try:
|
|
1908
|
+
# Use a short timeout to avoid hanging
|
|
1909
|
+
await asyncio.wait_for(
|
|
1910
|
+
self.record_metric(
|
|
1911
|
+
"telemetry_service.shutdown", 1.0, {"event": "service_stop", "timestamp": self._now().isoformat()}
|
|
1912
|
+
),
|
|
1913
|
+
timeout=1.0,
|
|
1914
|
+
)
|
|
1915
|
+
except (asyncio.TimeoutError, Exception) as e:
|
|
1916
|
+
logger.debug(f"Could not record shutdown metric: {e}")
|
|
1917
|
+
|
|
1918
|
+
logger.debug("GraphTelemetryService stopped")
|
|
1919
|
+
|
|
1920
|
+
def _collect_custom_metrics(self) -> Dict[str, float]:
|
|
1921
|
+
"""Collect telemetry-specific metrics."""
|
|
1922
|
+
metrics = super()._collect_custom_metrics()
|
|
1923
|
+
|
|
1924
|
+
# Calculate cache size
|
|
1925
|
+
cache_size_mb = 0.0
|
|
1926
|
+
try:
|
|
1927
|
+
# Estimate size of cached metrics
|
|
1928
|
+
cache_size = sys.getsizeof(self._recent_metrics) + sys.getsizeof(self._summary_cache)
|
|
1929
|
+
cache_size_mb = cache_size / 1024 / 1024
|
|
1930
|
+
except Exception:
|
|
1931
|
+
pass
|
|
1932
|
+
|
|
1933
|
+
# Calculate metrics statistics
|
|
1934
|
+
total_metrics_stored = sum(len(metrics_list) for metrics_list in self._recent_metrics.values())
|
|
1935
|
+
unique_metric_types = len(self._recent_metrics.keys())
|
|
1936
|
+
|
|
1937
|
+
# Get recent metric activity
|
|
1938
|
+
recent_metrics_per_minute = 0.0
|
|
1939
|
+
if self._recent_metrics:
|
|
1940
|
+
# Count metrics from last minute
|
|
1941
|
+
now = self._now()
|
|
1942
|
+
one_minute_ago = now - timedelta(minutes=1)
|
|
1943
|
+
for metric_list in self._recent_metrics.values():
|
|
1944
|
+
for metric in metric_list:
|
|
1945
|
+
if hasattr(metric, "timestamp") and metric.timestamp >= one_minute_ago:
|
|
1946
|
+
recent_metrics_per_minute += 1.0
|
|
1947
|
+
|
|
1948
|
+
# Add telemetry-specific metrics
|
|
1949
|
+
metrics.update(
|
|
1950
|
+
{
|
|
1951
|
+
"total_metrics_cached": float(total_metrics_stored),
|
|
1952
|
+
"unique_metric_types": float(unique_metric_types),
|
|
1953
|
+
"summary_cache_entries": float(len(self._summary_cache)),
|
|
1954
|
+
"metrics_per_minute": recent_metrics_per_minute,
|
|
1955
|
+
"cache_size_mb": cache_size_mb,
|
|
1956
|
+
"max_cached_metrics_per_type": float(self._max_cached_metrics),
|
|
1957
|
+
}
|
|
1958
|
+
)
|
|
1959
|
+
|
|
1960
|
+
return metrics
|
|
1961
|
+
|
|
1962
|
+
async def get_metrics(self) -> Dict[str, float]:
|
|
1963
|
+
"""
|
|
1964
|
+
Get all telemetry service metrics including base, custom, and v1.4.3 specific.
|
|
1965
|
+
"""
|
|
1966
|
+
# Get all base + custom metrics
|
|
1967
|
+
metrics = self._collect_metrics()
|
|
1968
|
+
|
|
1969
|
+
# Calculate telemetry-specific metrics using real service state
|
|
1970
|
+
|
|
1971
|
+
# Total metrics collected from cached metrics
|
|
1972
|
+
total_metrics_collected = sum(len(metrics_list) for metrics_list in self._recent_metrics.values())
|
|
1973
|
+
|
|
1974
|
+
# Number of services monitored (from telemetry aggregator if available)
|
|
1975
|
+
services_monitored = 0
|
|
1976
|
+
if self._telemetry_aggregator:
|
|
1977
|
+
# Count total services across all categories
|
|
1978
|
+
for category, services_list in self._telemetry_aggregator.CATEGORIES.items():
|
|
1979
|
+
services_monitored += len(services_list)
|
|
1980
|
+
else:
|
|
1981
|
+
# Fallback: count unique services from cached metrics
|
|
1982
|
+
unique_services = set()
|
|
1983
|
+
for metric_list in self._recent_metrics.values():
|
|
1984
|
+
for metric in metric_list:
|
|
1985
|
+
if hasattr(metric, "tags") and metric.tags and "service" in metric.tags:
|
|
1986
|
+
unique_services.add(metric.tags["service"])
|
|
1987
|
+
services_monitored = len(unique_services)
|
|
1988
|
+
|
|
1989
|
+
# Cache hits from summary cache
|
|
1990
|
+
cache_hits = len(self._summary_cache)
|
|
1991
|
+
|
|
1992
|
+
# Collection errors from error count
|
|
1993
|
+
collection_errors = metrics.get("error_count", 0.0)
|
|
1994
|
+
|
|
1995
|
+
# Service uptime in seconds
|
|
1996
|
+
uptime_seconds = metrics.get("uptime_seconds", 0.0)
|
|
1997
|
+
|
|
1998
|
+
# Add v1.4.3 specific metrics
|
|
1999
|
+
metrics.update(
|
|
2000
|
+
{
|
|
2001
|
+
"telemetry_metrics_collected": float(total_metrics_collected),
|
|
2002
|
+
"telemetry_services_monitored": float(services_monitored),
|
|
2003
|
+
"telemetry_cache_hits": float(cache_hits),
|
|
2004
|
+
"telemetry_collection_errors": float(collection_errors),
|
|
2005
|
+
"telemetry_uptime_seconds": float(uptime_seconds),
|
|
2006
|
+
}
|
|
2007
|
+
)
|
|
2008
|
+
|
|
2009
|
+
return metrics
|
|
2010
|
+
|
|
2011
|
+
def get_node_type(self) -> str:
|
|
2012
|
+
"""Get the type of nodes this service manages."""
|
|
2013
|
+
return "TELEMETRY"
|
|
2014
|
+
|
|
2015
|
+
async def get_metric_count(self) -> int:
|
|
2016
|
+
"""Get the total count of metrics stored in the system.
|
|
2017
|
+
|
|
2018
|
+
This counts metrics from TSDB_DATA nodes in the graph which stores
|
|
2019
|
+
all telemetry data points.
|
|
2020
|
+
"""
|
|
2021
|
+
try:
|
|
2022
|
+
if not self._memory_bus:
|
|
2023
|
+
logger.debug("Memory bus not available, returning 0 metric count")
|
|
2024
|
+
return 0
|
|
2025
|
+
|
|
2026
|
+
# Query the database directly to count TSDB_DATA nodes
|
|
2027
|
+
from ciris_engine.logic.persistence import get_db_connection
|
|
2028
|
+
|
|
2029
|
+
# Get the memory service to access its db_path
|
|
2030
|
+
memory_service = await self._memory_bus.get_service(handler_name="telemetry_service")
|
|
2031
|
+
if not memory_service:
|
|
2032
|
+
logger.debug("Memory service not available, returning 0 metric count")
|
|
2033
|
+
return 0
|
|
2034
|
+
|
|
2035
|
+
db_path = getattr(memory_service, "db_path", None)
|
|
2036
|
+
with get_db_connection(db_path=db_path) as conn:
|
|
2037
|
+
cursor = conn.cursor()
|
|
2038
|
+
# Count all TSDB_DATA nodes
|
|
2039
|
+
cursor.execute("SELECT COUNT(*) as cnt FROM graph_nodes WHERE node_type = 'tsdb_data'")
|
|
2040
|
+
result = cursor.fetchone()
|
|
2041
|
+
# Handle both dict (PostgreSQL RealDictCursor) and tuple (SQLite Row) formats
|
|
2042
|
+
if result is None:
|
|
2043
|
+
count = 0
|
|
2044
|
+
elif isinstance(result, dict):
|
|
2045
|
+
count = result.get("cnt", 0)
|
|
2046
|
+
else:
|
|
2047
|
+
count = result[0]
|
|
2048
|
+
|
|
2049
|
+
logger.debug(f"Total metric count from graph nodes: {count}")
|
|
2050
|
+
return count
|
|
2051
|
+
|
|
2052
|
+
except Exception as e:
|
|
2053
|
+
logger.error(f"Failed to get metric count: {type(e).__name__}: {e}", exc_info=True)
|
|
2054
|
+
return 0
|
|
2055
|
+
|
|
2056
|
+
async def get_telemetry_summary(self) -> "TelemetrySummary":
|
|
2057
|
+
"""Get aggregated telemetry summary for system snapshot.
|
|
2058
|
+
|
|
2059
|
+
Uses intelligent caching to avoid overloading the persistence layer:
|
|
2060
|
+
- Current task metrics: No cache (always fresh)
|
|
2061
|
+
- Hour metrics: 1 minute cache
|
|
2062
|
+
- Day metrics: 5 minute cache
|
|
2063
|
+
|
|
2064
|
+
Raises:
|
|
2065
|
+
MemoryBusUnavailableError: If memory bus not available
|
|
2066
|
+
MetricCollectionError: If metric collection fails
|
|
2067
|
+
ServiceStartTimeUnavailableError: If start_time not set
|
|
2068
|
+
NoThoughtDataError: If no thought data available (may be acceptable during startup)
|
|
2069
|
+
RuntimeControlBusUnavailableError: If runtime control bus not available
|
|
2070
|
+
QueueStatusUnavailableError: If queue status cannot be retrieved
|
|
2071
|
+
"""
|
|
2072
|
+
from ciris_engine.logic.services.graph.telemetry_service.exceptions import (
|
|
2073
|
+
MemoryBusUnavailableError,
|
|
2074
|
+
NoThoughtDataError,
|
|
2075
|
+
QueueStatusUnavailableError,
|
|
2076
|
+
RuntimeControlBusUnavailableError,
|
|
2077
|
+
)
|
|
2078
|
+
from ciris_engine.logic.services.graph.telemetry_service.helpers import (
|
|
2079
|
+
METRIC_TYPES,
|
|
2080
|
+
build_telemetry_summary,
|
|
2081
|
+
calculate_average_latencies,
|
|
2082
|
+
calculate_error_rate,
|
|
2083
|
+
check_summary_cache,
|
|
2084
|
+
collect_circuit_breaker_state,
|
|
2085
|
+
collect_metric_aggregates,
|
|
2086
|
+
get_average_thought_depth,
|
|
2087
|
+
get_queue_saturation,
|
|
2088
|
+
get_service_uptime,
|
|
2089
|
+
store_summary_cache,
|
|
2090
|
+
)
|
|
2091
|
+
|
|
2092
|
+
now = self._now()
|
|
2093
|
+
|
|
2094
|
+
# Always collect fresh circuit breaker state (not cacheable, changes rapidly)
|
|
2095
|
+
circuit_breaker_state = collect_circuit_breaker_state(self._runtime)
|
|
2096
|
+
if not circuit_breaker_state:
|
|
2097
|
+
circuit_breaker_state = {}
|
|
2098
|
+
|
|
2099
|
+
# Check cache
|
|
2100
|
+
cached: TelemetrySummary | None = check_summary_cache(
|
|
2101
|
+
self._summary_cache, "telemetry_summary", now, self._summary_cache_ttl_seconds
|
|
2102
|
+
)
|
|
2103
|
+
if cached:
|
|
2104
|
+
logger.debug("Returning cached telemetry summary with fresh circuit breaker data")
|
|
2105
|
+
# Update cached summary with fresh circuit breaker state
|
|
2106
|
+
cached.circuit_breaker = circuit_breaker_state
|
|
2107
|
+
return cached
|
|
2108
|
+
|
|
2109
|
+
# Fail fast if memory bus not available
|
|
2110
|
+
if not self._memory_bus:
|
|
2111
|
+
raise MemoryBusUnavailableError("Memory bus not available for telemetry queries")
|
|
2112
|
+
|
|
2113
|
+
# Define time windows
|
|
2114
|
+
window_end = now
|
|
2115
|
+
window_start_24h = now - timedelta(hours=24)
|
|
2116
|
+
window_start_1h = now - timedelta(hours=1)
|
|
2117
|
+
|
|
2118
|
+
# Collect metrics (raises on error, no fallbacks)
|
|
2119
|
+
aggregates = await collect_metric_aggregates(self, METRIC_TYPES, window_start_24h, window_start_1h, window_end)
|
|
2120
|
+
|
|
2121
|
+
# Get external data (may raise exceptions - caller must handle)
|
|
2122
|
+
try:
|
|
2123
|
+
avg_thought_depth = await get_average_thought_depth(self._memory_bus, window_start_24h)
|
|
2124
|
+
except NoThoughtDataError:
|
|
2125
|
+
# Acceptable during startup or low-activity periods
|
|
2126
|
+
logger.info("No thought data available in last 24h - setting to 0.0")
|
|
2127
|
+
avg_thought_depth = 0.0
|
|
2128
|
+
|
|
2129
|
+
try:
|
|
2130
|
+
queue_saturation = await get_queue_saturation(getattr(self, "_runtime_control_bus", None))
|
|
2131
|
+
except (RuntimeControlBusUnavailableError, QueueStatusUnavailableError) as e:
|
|
2132
|
+
# Acceptable if runtime control not available
|
|
2133
|
+
logger.info(f"Queue saturation unavailable: {e} - setting to 0.0")
|
|
2134
|
+
queue_saturation = 0.0
|
|
2135
|
+
|
|
2136
|
+
uptime = get_service_uptime(self._start_time if hasattr(self, "_start_time") else None, now)
|
|
2137
|
+
|
|
2138
|
+
# Calculate derived metrics
|
|
2139
|
+
error_rate = calculate_error_rate(
|
|
2140
|
+
aggregates.errors_24h, aggregates.messages_24h + aggregates.thoughts_24h + aggregates.tasks_24h
|
|
2141
|
+
)
|
|
2142
|
+
service_latency_ms = calculate_average_latencies(aggregates.service_latency)
|
|
2143
|
+
|
|
2144
|
+
# circuit_breaker_state already collected above (before cache check)
|
|
2145
|
+
|
|
2146
|
+
# Build result
|
|
2147
|
+
summary = build_telemetry_summary(
|
|
2148
|
+
window_start_24h,
|
|
2149
|
+
window_end,
|
|
2150
|
+
uptime,
|
|
2151
|
+
aggregates,
|
|
2152
|
+
error_rate,
|
|
2153
|
+
avg_thought_depth,
|
|
2154
|
+
queue_saturation,
|
|
2155
|
+
service_latency_ms,
|
|
2156
|
+
circuit_breaker=circuit_breaker_state,
|
|
2157
|
+
)
|
|
2158
|
+
|
|
2159
|
+
# Cache and return
|
|
2160
|
+
store_summary_cache(self._summary_cache, "telemetry_summary", now, summary)
|
|
2161
|
+
return summary
|
|
2162
|
+
|
|
2163
|
+
async def get_continuity_summary(self) -> Optional[ContinuitySummary]:
|
|
2164
|
+
"""Get continuity awareness summary from startup/shutdown lifecycle events.
|
|
2165
|
+
|
|
2166
|
+
Queries memory service for all startup and shutdown nodes tagged with
|
|
2167
|
+
'continuity_awareness' and builds a complete continuity history.
|
|
2168
|
+
|
|
2169
|
+
Returns:
|
|
2170
|
+
ContinuitySummary with lifecycle metrics, or None if memory service unavailable
|
|
2171
|
+
"""
|
|
2172
|
+
from ciris_engine.logic.services.graph.telemetry_service.helpers import (
|
|
2173
|
+
build_continuity_summary_from_memory,
|
|
2174
|
+
check_summary_cache,
|
|
2175
|
+
store_summary_cache,
|
|
2176
|
+
)
|
|
2177
|
+
|
|
2178
|
+
now = self._now()
|
|
2179
|
+
|
|
2180
|
+
# Check cache
|
|
2181
|
+
cached = check_summary_cache(self._summary_cache, "continuity_summary", now, self._summary_cache_ttl_seconds)
|
|
2182
|
+
if cached:
|
|
2183
|
+
logger.debug("Returning cached continuity summary")
|
|
2184
|
+
return cached # type: ignore[no-any-return]
|
|
2185
|
+
|
|
2186
|
+
# Build from memory nodes
|
|
2187
|
+
continuity = await build_continuity_summary_from_memory(
|
|
2188
|
+
self._memory_bus, self._time_service if hasattr(self, "_time_service") else None, self._start_time
|
|
2189
|
+
)
|
|
2190
|
+
|
|
2191
|
+
# Cache and return
|
|
2192
|
+
if continuity:
|
|
2193
|
+
store_summary_cache(self._summary_cache, "continuity_summary", now, continuity)
|
|
2194
|
+
|
|
2195
|
+
return continuity
|
|
2196
|
+
|
|
2197
|
+
# Required methods for BaseGraphService
|
|
2198
|
+
|
|
2199
|
+
def get_service_type(self) -> ServiceType:
|
|
2200
|
+
"""Get the service type."""
|
|
2201
|
+
return ServiceType.TELEMETRY
|
|
2202
|
+
|
|
2203
|
+
def _init_telemetry_aggregator(self) -> None:
|
|
2204
|
+
"""Initialize telemetry aggregator with debug logging."""
|
|
2205
|
+
if self._service_registry:
|
|
2206
|
+
logger.debug(f"[TELEMETRY] Creating TelemetryAggregator with registry {id(self._service_registry)}")
|
|
2207
|
+
try:
|
|
2208
|
+
all_services = self._service_registry.get_all_services()
|
|
2209
|
+
service_count = len(all_services) if hasattr(all_services, "__len__") else 0
|
|
2210
|
+
logger.debug(f"[TELEMETRY] Registry has {service_count} services")
|
|
2211
|
+
service_names = [s.__class__.__name__ for s in all_services] if all_services else []
|
|
2212
|
+
logger.debug(f"[TELEMETRY] Services in registry: {service_names}")
|
|
2213
|
+
except (TypeError, AttributeError):
|
|
2214
|
+
logger.debug("[TELEMETRY] Registry is mock/test mode")
|
|
2215
|
+
|
|
2216
|
+
logger.debug(f"[TELEMETRY] Runtime available: {self._runtime is not None}")
|
|
2217
|
+
if self._runtime:
|
|
2218
|
+
logger.debug(f"[TELEMETRY] Runtime has bus_manager: {hasattr(self._runtime, 'bus_manager')}")
|
|
2219
|
+
logger.debug(f"[TELEMETRY] Runtime has memory_service: {hasattr(self._runtime, 'memory_service')}")
|
|
2220
|
+
else:
|
|
2221
|
+
logger.debug("[TELEMETRY] ⚠️ Runtime is None when creating aggregator!")
|
|
2222
|
+
|
|
2223
|
+
self._telemetry_aggregator = TelemetryAggregator(
|
|
2224
|
+
service_registry=self._service_registry, time_service=self._time_service, runtime=self._runtime
|
|
2225
|
+
)
|
|
2226
|
+
logger.debug(f"[TELEMETRY] TelemetryAggregator created with runtime={self._runtime is not None}")
|
|
2227
|
+
|
|
2228
|
+
def _check_cache(self, cache_key: str, now: datetime) -> Optional[AggregatedTelemetryResponse]:
|
|
2229
|
+
"""Check cache for valid telemetry data."""
|
|
2230
|
+
if self._telemetry_aggregator and cache_key in self._telemetry_aggregator.cache:
|
|
2231
|
+
cached_time, cached_data = self._telemetry_aggregator.cache[cache_key]
|
|
2232
|
+
if now - cached_time < self._telemetry_aggregator.cache_ttl:
|
|
2233
|
+
# Mark cached response as cache hit
|
|
2234
|
+
if isinstance(cached_data, AggregatedTelemetryResponse):
|
|
2235
|
+
if cached_data.metadata:
|
|
2236
|
+
cached_data.metadata.cache_hit = True
|
|
2237
|
+
return cached_data
|
|
2238
|
+
return cached_data
|
|
2239
|
+
return None
|
|
2240
|
+
|
|
2241
|
+
def _convert_telemetry_to_services(
|
|
2242
|
+
self, telemetry: Dict[str, Dict[str, ServiceTelemetryData]]
|
|
2243
|
+
) -> Dict[str, ServiceTelemetryData]:
|
|
2244
|
+
"""Convert nested telemetry dict to flat service dict."""
|
|
2245
|
+
services_data = {}
|
|
2246
|
+
for category, services in telemetry.items():
|
|
2247
|
+
if isinstance(services, dict):
|
|
2248
|
+
for service_name, service_info in services.items():
|
|
2249
|
+
if isinstance(service_info, ServiceTelemetryData):
|
|
2250
|
+
services_data[service_name] = service_info
|
|
2251
|
+
elif isinstance(service_info, dict):
|
|
2252
|
+
services_data[service_name] = ServiceTelemetryData(
|
|
2253
|
+
healthy=service_info.get("healthy", False),
|
|
2254
|
+
uptime_seconds=service_info.get("uptime_seconds"),
|
|
2255
|
+
error_count=service_info.get("error_count"),
|
|
2256
|
+
requests_handled=service_info.get("request_count"),
|
|
2257
|
+
error_rate=service_info.get("error_rate"),
|
|
2258
|
+
memory_mb=service_info.get("memory_mb"),
|
|
2259
|
+
custom_metrics=service_info.get("custom_metrics"),
|
|
2260
|
+
)
|
|
2261
|
+
return services_data
|
|
2262
|
+
|
|
2263
|
+
async def get_aggregated_telemetry(self) -> AggregatedTelemetryResponse:
|
|
2264
|
+
"""
|
|
2265
|
+
Get aggregated telemetry from all services using parallel collection.
|
|
2266
|
+
|
|
2267
|
+
Returns enterprise telemetry with all service metrics collected in parallel.
|
|
2268
|
+
"""
|
|
2269
|
+
# Initialize aggregator if needed
|
|
2270
|
+
if not self._telemetry_aggregator and self._service_registry:
|
|
2271
|
+
self._init_telemetry_aggregator()
|
|
2272
|
+
|
|
2273
|
+
if not self._telemetry_aggregator:
|
|
2274
|
+
logger.warning("No telemetry aggregator available")
|
|
2275
|
+
return AggregatedTelemetryResponse(
|
|
2276
|
+
system_healthy=False,
|
|
2277
|
+
services_online=0,
|
|
2278
|
+
services_total=0,
|
|
2279
|
+
overall_error_rate=0.0,
|
|
2280
|
+
overall_uptime_seconds=0,
|
|
2281
|
+
total_errors=0,
|
|
2282
|
+
total_requests=0,
|
|
2283
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
2284
|
+
error="Telemetry aggregator not initialized",
|
|
2285
|
+
)
|
|
2286
|
+
|
|
2287
|
+
# Check cache first
|
|
2288
|
+
cache_key = "aggregated_telemetry"
|
|
2289
|
+
now = datetime.now(timezone.utc)
|
|
2290
|
+
|
|
2291
|
+
cached_result = self._check_cache(cache_key, now)
|
|
2292
|
+
if cached_result:
|
|
2293
|
+
# Cache returns AggregatedTelemetryResponse
|
|
2294
|
+
return cached_result
|
|
2295
|
+
|
|
2296
|
+
# Collect from all services in parallel
|
|
2297
|
+
telemetry = await self._telemetry_aggregator.collect_all_parallel()
|
|
2298
|
+
|
|
2299
|
+
# Calculate aggregates
|
|
2300
|
+
aggregates = self._telemetry_aggregator.calculate_aggregates(telemetry)
|
|
2301
|
+
|
|
2302
|
+
# Convert nested telemetry dict to flat service dict
|
|
2303
|
+
services_data = self._convert_telemetry_to_services(telemetry)
|
|
2304
|
+
|
|
2305
|
+
# Combine telemetry and aggregates into typed response
|
|
2306
|
+
result = AggregatedTelemetryResponse(
|
|
2307
|
+
system_healthy=aggregates.get("system_healthy", False),
|
|
2308
|
+
services_online=aggregates.get("services_online", 0),
|
|
2309
|
+
services_total=aggregates.get("services_total", 0),
|
|
2310
|
+
overall_error_rate=aggregates.get("overall_error_rate", 0.0),
|
|
2311
|
+
overall_uptime_seconds=aggregates.get("overall_uptime_seconds", 0),
|
|
2312
|
+
total_errors=aggregates.get("total_errors", 0),
|
|
2313
|
+
total_requests=aggregates.get("total_requests", 0),
|
|
2314
|
+
timestamp=aggregates.get("timestamp", now.isoformat()),
|
|
2315
|
+
services=services_data,
|
|
2316
|
+
metadata=AggregatedTelemetryMetadata(
|
|
2317
|
+
collection_method="parallel", cache_ttl_seconds=30, timestamp=now.isoformat()
|
|
2318
|
+
),
|
|
2319
|
+
)
|
|
2320
|
+
|
|
2321
|
+
# Cache the result
|
|
2322
|
+
self._telemetry_aggregator.cache[cache_key] = (now, result)
|
|
2323
|
+
|
|
2324
|
+
return result
|
|
2325
|
+
|
|
2326
|
+
async def _store_correlation(self, correlation: ServiceCorrelation) -> None:
|
|
2327
|
+
"""
|
|
2328
|
+
Store a service correlation (trace span) in the memory graph.
|
|
2329
|
+
|
|
2330
|
+
Correlations are always linked to tasks/thoughts unless they're edge observations
|
|
2331
|
+
that the adaptive filter chose not to create a task for.
|
|
2332
|
+
"""
|
|
2333
|
+
try:
|
|
2334
|
+
from ciris_engine.schemas.services.graph_core import GraphNode, GraphScope, NodeType
|
|
2335
|
+
|
|
2336
|
+
# Extract task and thought IDs from the correlation's request data if available
|
|
2337
|
+
task_id = None
|
|
2338
|
+
thought_id = None
|
|
2339
|
+
|
|
2340
|
+
if correlation.request_data:
|
|
2341
|
+
# Try to extract from request data
|
|
2342
|
+
if hasattr(correlation.request_data, "task_id"):
|
|
2343
|
+
task_id = correlation.request_data.task_id
|
|
2344
|
+
if hasattr(correlation.request_data, "thought_id"):
|
|
2345
|
+
thought_id = correlation.request_data.thought_id
|
|
2346
|
+
elif isinstance(correlation.request_data, dict):
|
|
2347
|
+
task_id = correlation.request_data.get("task_id")
|
|
2348
|
+
thought_id = correlation.request_data.get("thought_id")
|
|
2349
|
+
|
|
2350
|
+
# Create a graph node for the correlation
|
|
2351
|
+
node_id = f"correlation/{correlation.correlation_id}"
|
|
2352
|
+
|
|
2353
|
+
# Build attributes including task/thought linkage
|
|
2354
|
+
attributes: JSONDict = {
|
|
2355
|
+
"correlation_id": correlation.correlation_id,
|
|
2356
|
+
"correlation_type": (
|
|
2357
|
+
correlation.correlation_type.value
|
|
2358
|
+
if hasattr(correlation.correlation_type, "value")
|
|
2359
|
+
else str(correlation.correlation_type)
|
|
2360
|
+
),
|
|
2361
|
+
"service_type": correlation.service_type,
|
|
2362
|
+
"handler_name": correlation.handler_name,
|
|
2363
|
+
"action_type": correlation.action_type,
|
|
2364
|
+
"status": correlation.status.value if hasattr(correlation.status, "value") else str(correlation.status),
|
|
2365
|
+
"timestamp": correlation.timestamp.isoformat() if correlation.timestamp else self._now().isoformat(),
|
|
2366
|
+
"task_id": task_id, # Link to task if available
|
|
2367
|
+
"thought_id": thought_id, # Link to thought if available
|
|
2368
|
+
}
|
|
2369
|
+
|
|
2370
|
+
# Add trace context if available
|
|
2371
|
+
if correlation.trace_context:
|
|
2372
|
+
trace_ctx = correlation.trace_context
|
|
2373
|
+
attributes.update(
|
|
2374
|
+
{
|
|
2375
|
+
"trace_id": trace_ctx.trace_id if hasattr(trace_ctx, "trace_id") else None,
|
|
2376
|
+
"span_id": trace_ctx.span_id if hasattr(trace_ctx, "span_id") else None,
|
|
2377
|
+
"parent_span_id": trace_ctx.parent_span_id if hasattr(trace_ctx, "parent_span_id") else None,
|
|
2378
|
+
"span_name": trace_ctx.span_name if hasattr(trace_ctx, "span_name") else None,
|
|
2379
|
+
"span_kind": trace_ctx.span_kind if hasattr(trace_ctx, "span_kind") else None,
|
|
2380
|
+
}
|
|
2381
|
+
)
|
|
2382
|
+
|
|
2383
|
+
# Add response data if available
|
|
2384
|
+
if correlation.response_data:
|
|
2385
|
+
resp = correlation.response_data
|
|
2386
|
+
if hasattr(resp, "execution_time_ms"):
|
|
2387
|
+
attributes["execution_time_ms"] = resp.execution_time_ms
|
|
2388
|
+
if hasattr(resp, "success"):
|
|
2389
|
+
attributes["success"] = resp.success
|
|
2390
|
+
if hasattr(resp, "error_message"):
|
|
2391
|
+
attributes["error_message"] = resp.error_message
|
|
2392
|
+
|
|
2393
|
+
# Don't store as graph node - telemetry correlations go in correlations DB
|
|
2394
|
+
# Just keep in recent cache for quick access
|
|
2395
|
+
|
|
2396
|
+
# Keep a recent cache for quick access
|
|
2397
|
+
if not hasattr(self, "_recent_correlations"):
|
|
2398
|
+
self._recent_correlations = []
|
|
2399
|
+
|
|
2400
|
+
self._recent_correlations.append(correlation)
|
|
2401
|
+
# Keep only last 1000 correlations in memory
|
|
2402
|
+
if len(self._recent_correlations) > 1000:
|
|
2403
|
+
self._recent_correlations = self._recent_correlations[-1000:]
|
|
2404
|
+
|
|
2405
|
+
except Exception as e:
|
|
2406
|
+
logger.error(f"Failed to store correlation {correlation.correlation_id}: {e}")
|
|
2407
|
+
# Don't raise - we don't want telemetry failures to break the application
|
|
2408
|
+
|
|
2409
|
+
def _get_actions(self) -> List[str]:
|
|
2410
|
+
"""Get the list of actions this service supports."""
|
|
2411
|
+
return [
|
|
2412
|
+
"record_metric",
|
|
2413
|
+
"query_metrics",
|
|
2414
|
+
"get_metric_summary",
|
|
2415
|
+
"get_metric_count",
|
|
2416
|
+
"get_telemetry_summary",
|
|
2417
|
+
"process_system_snapshot",
|
|
2418
|
+
"get_resource_usage",
|
|
2419
|
+
"get_telemetry_status",
|
|
2420
|
+
]
|
|
2421
|
+
|
|
2422
|
+
def _check_dependencies(self) -> bool:
|
|
2423
|
+
"""Check if all dependencies are satisfied."""
|
|
2424
|
+
# Check parent dependencies (memory bus)
|
|
2425
|
+
if not super()._check_dependencies():
|
|
2426
|
+
return False
|
|
2427
|
+
|
|
2428
|
+
# Telemetry has no additional required dependencies beyond memory bus
|
|
2429
|
+
return True
|