holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,120 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ from pydantic import ValidationError
5
+
6
+ from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
7
+ from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
8
+ from holmes.core.tools import StructuredToolResult, ToolResultStatus
9
+ from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
10
+
11
+
12
+ def _is_redundant_fetch_pod_logs(
13
+ tool_name: str, tool_params: dict, tool_calls: list[dict]
14
+ ) -> bool:
15
+ """
16
+ Tool call is redundant if a previous call without filter returned no results and the current tool call is the same but with a filter
17
+ e.g.
18
+ fetch_pod_logs({"pod_name": "notification-consumer", "namespace": "services"}) => no data
19
+ followed by
20
+ fetch_pod_logs({"pod_name": "notification-consumer", "namespace": "services", "filter": "error"}) => for sure no data either
21
+ """
22
+ if (
23
+ tool_name == POD_LOGGING_TOOL_NAME
24
+ and tool_params.get("filter")
25
+ and _has_previous_unfiltered_pod_logs_call(
26
+ tool_params=tool_params, tool_calls=tool_calls
27
+ )
28
+ ):
29
+ return True
30
+ return False
31
+
32
+
33
+ def _has_previous_unfiltered_pod_logs_call(
34
+ tool_params: dict, tool_calls: list[dict]
35
+ ) -> bool:
36
+ try:
37
+ current_params = FetchPodLogsParams(**tool_params)
38
+ for tool_call in tool_calls:
39
+ result = tool_call.get("result", {})
40
+ if (
41
+ tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
42
+ and result.get("status") == ToolResultStatus.NO_DATA
43
+ and result.get("params")
44
+ ):
45
+ params = FetchPodLogsParams(**result.get("params"))
46
+ if (
47
+ not params.filter
48
+ and current_params.end_time == params.end_time
49
+ and current_params.start_time == params.start_time
50
+ and current_params.pod_name == params.pod_name
51
+ and current_params.namespace == params.namespace
52
+ ):
53
+ return True
54
+
55
+ return False
56
+
57
+ except ValidationError:
58
+ logging.error("fetch_pod_logs params failed validation", exc_info=True)
59
+ return False
60
+
61
+
62
+ def _has_previous_exact_same_tool_call(
63
+ tool_name: str, tool_params: dict, tool_calls: list[dict]
64
+ ) -> bool:
65
+ """Check if a previous tool call with the exact same params was executed this session."""
66
+ for tool_call in tool_calls:
67
+ params = tool_call.get("result", {}).get("params")
68
+ if (
69
+ tool_call.get("tool_name") == tool_name
70
+ and params is not None
71
+ and params == tool_params
72
+ ):
73
+ return True
74
+
75
+ return False
76
+
77
+
78
+ def prevent_overly_repeated_tool_call(
79
+ tool_name: str, tool_params: dict, tool_calls: list[dict]
80
+ ) -> Optional[StructuredToolResult]:
81
+ """Checks if a tool call is redundant"""
82
+
83
+ try:
84
+ if not TOOL_CALL_SAFEGUARDS_ENABLED:
85
+ return None
86
+
87
+ if _has_previous_exact_same_tool_call(
88
+ tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
89
+ ):
90
+ """
91
+ It is only reasonable to prevent identical tool calls if Holmes is read only and does not mutate resources.
92
+ If Holmes mutate resources then this safeguard should be removed or modified. This is because
93
+ there is a risk that one of the tools holmes executed would actually change the answer of a subsequent identical tool call.
94
+ For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
95
+ """
96
+ return StructuredToolResult(
97
+ status=ToolResultStatus.ERROR,
98
+ error=(
99
+ "Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
100
+ "Move on with your investigation to a different tool or change the parameter values."
101
+ ),
102
+ params=tool_params,
103
+ )
104
+
105
+ if _is_redundant_fetch_pod_logs(
106
+ tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
107
+ ):
108
+ return StructuredToolResult(
109
+ status=ToolResultStatus.ERROR,
110
+ error=(
111
+ f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
112
+ "This tool call would also have returned no data.\n"
113
+ "Move on with your investigation to a different tool or extend the time window of your search."
114
+ ),
115
+ params=tool_params,
116
+ )
117
+ except Exception:
118
+ logging.error("Failed to check for overly repeated tool call", exc_info=True)
119
+
120
+ return None
@@ -0,0 +1,540 @@
1
+ import base64
2
+ import binascii
3
+ import json
4
+ import logging
5
+ import os
6
+ import threading
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, List, Optional, Tuple
9
+ from uuid import uuid4
10
+
11
+ import yaml # type: ignore
12
+ from cachetools import TTLCache # type: ignore
13
+ from postgrest._sync.request_builder import SyncQueryRequestBuilder
14
+ from postgrest.exceptions import APIError as PGAPIError
15
+ from postgrest.types import ReturnMethod
16
+ from pydantic import BaseModel
17
+ from supabase import create_client
18
+ from supabase.lib.client_options import ClientOptions
19
+
20
+ from holmes.common.env_vars import (
21
+ ROBUSTA_ACCOUNT_ID,
22
+ ROBUSTA_CONFIG_PATH,
23
+ STORE_API_KEY,
24
+ STORE_EMAIL,
25
+ STORE_PASSWORD,
26
+ STORE_URL,
27
+ )
28
+ from holmes.core.resource_instruction import (
29
+ ResourceInstructionDocument,
30
+ ResourceInstructions,
31
+ )
32
+ from holmes.utils.definitions import RobustaConfig
33
+ from holmes.utils.env import get_env_replacement
34
+ from holmes.utils.global_instructions import Instructions
35
+
36
+ SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
37
+
38
+ ISSUES_TABLE = "Issues"
39
+ EVIDENCE_TABLE = "Evidence"
40
+ RUNBOOKS_TABLE = "HolmesRunbooks"
41
+ SESSION_TOKENS_TABLE = "AuthTokens"
42
+ HOLMES_STATUS_TABLE = "HolmesStatus"
43
+ HOLMES_TOOLSET = "HolmesToolsStatus"
44
+ SCANS_META_TABLE = "ScansMeta"
45
+ SCANS_RESULTS_TABLE = "ScansResults"
46
+
47
+
48
+ class RobustaToken(BaseModel):
49
+ store_url: str
50
+ api_key: str
51
+ account_id: str
52
+ email: str
53
+ password: str
54
+
55
+
56
+ class SupabaseDal:
57
+ def __init__(self, cluster: str):
58
+ self.enabled = self.__init_config()
59
+ self.cluster = cluster
60
+ if not self.enabled:
61
+ logging.info(
62
+ "Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
63
+ )
64
+ return
65
+ logging.info(
66
+ f"Initializing Robusta platform connection for account {self.account_id}"
67
+ )
68
+ options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS)
69
+ self.client = create_client(self.url, self.api_key, options) # type: ignore
70
+ self.user_id = self.sign_in()
71
+ ttl = int(os.environ.get("SAAS_SESSION_TOKEN_TTL_SEC", "82800")) # 23 hours
72
+ self.patch_postgrest_execute()
73
+ self.token_cache = TTLCache(maxsize=1, ttl=ttl)
74
+ self.lock = threading.Lock()
75
+
76
+ def patch_postgrest_execute(self):
77
+ logging.info("Patching postgres execute")
78
+
79
+ # This is somewhat hacky.
80
+ def execute_with_retry(_self):
81
+ try:
82
+ return self._original_execute(_self)
83
+ except PGAPIError as exc:
84
+ message = exc.message or ""
85
+ if exc.code == "PGRST301" or "expired" in message.lower():
86
+ # JWT expired. Sign in again and retry the query
87
+ logging.error(
88
+ "JWT token expired/invalid, signing in to Supabase again"
89
+ )
90
+ self.sign_in()
91
+ # update the session to the new one, after re-sign in
92
+ _self.session = self.client.postgrest.session
93
+ return self._original_execute(_self)
94
+ else:
95
+ raise
96
+
97
+ self._original_execute = SyncQueryRequestBuilder.execute
98
+ SyncQueryRequestBuilder.execute = execute_with_retry
99
+
100
+ @staticmethod
101
+ def __load_robusta_config() -> Optional[RobustaToken]:
102
+ config_file_path = ROBUSTA_CONFIG_PATH
103
+ env_ui_token = os.environ.get("ROBUSTA_UI_TOKEN")
104
+ if env_ui_token:
105
+ # token provided as env var
106
+ try:
107
+ decoded = base64.b64decode(env_ui_token)
108
+ return RobustaToken(**json.loads(decoded))
109
+ except binascii.Error:
110
+ raise Exception(
111
+ "binascii.Error encountered. The Robusta UI token is not a valid base64."
112
+ )
113
+ except json.JSONDecodeError:
114
+ raise Exception(
115
+ "json.JSONDecodeError encountered. The Robusta UI token could not be parsed as JSON after being base64 decoded."
116
+ )
117
+
118
+ if not os.path.exists(config_file_path):
119
+ logging.info(f"No robusta config in {config_file_path}")
120
+ return None
121
+
122
+ logging.info(f"loading config {config_file_path}")
123
+ with open(config_file_path) as file:
124
+ yaml_content = yaml.safe_load(file)
125
+ config = RobustaConfig(**yaml_content)
126
+ for conf in config.sinks_config:
127
+ if "robusta_sink" in conf.keys():
128
+ token = conf["robusta_sink"].get("token")
129
+ if not token:
130
+ raise Exception(
131
+ "No robusta token provided to Holmes.\n"
132
+ "Please set a valid Robusta UI token.\n "
133
+ "See https://docs.robusta.dev/master/configuration/ai-analysis.html#choosing-and-configuring-an-ai-provider for instructions."
134
+ )
135
+ env_replacement_token = get_env_replacement(token)
136
+ if env_replacement_token:
137
+ token = env_replacement_token
138
+
139
+ if "{{" in token:
140
+ raise ValueError(
141
+ "The robusta token configured for Holmes appears to be a templating placeholder (e.g. `{ env.UI_SINK_TOKEN }`).\n "
142
+ "Ensure your Helm chart or environment variables are set correctly.\n "
143
+ "If you store the token in a secret, you must also pass "
144
+ "the environment variable ROBUSTA_UI_TOKEN to Holmes.\n "
145
+ "See https://docs.robusta.dev/master/configuration/ai-analysis.html#configuring-holmesgpt-access-to-saas-data for instructions."
146
+ )
147
+ try:
148
+ decoded = base64.b64decode(token)
149
+ return RobustaToken(**json.loads(decoded))
150
+ except binascii.Error:
151
+ raise Exception(
152
+ "binascii.Error encountered. The robusta token provided to Holmes is not a valid base64."
153
+ )
154
+ except json.JSONDecodeError:
155
+ raise Exception(
156
+ "json.JSONDecodeError encountered. The Robusta token provided to Holmes could not be parsed as JSON after being base64 decoded."
157
+ )
158
+ return None
159
+
160
+ def __init_config(self) -> bool:
161
+ # trying to load the supabase connection parameters from the robusta token, if exists
162
+ # if not, using env variables as fallback
163
+ robusta_token = self.__load_robusta_config()
164
+ if robusta_token:
165
+ self.account_id = robusta_token.account_id
166
+ self.url = robusta_token.store_url
167
+ self.api_key = robusta_token.api_key
168
+ self.email = robusta_token.email
169
+ self.password = robusta_token.password
170
+ else:
171
+ self.account_id = ROBUSTA_ACCOUNT_ID
172
+ self.url = STORE_URL
173
+ self.api_key = STORE_API_KEY
174
+ self.email = STORE_EMAIL
175
+ self.password = STORE_PASSWORD
176
+
177
+ # valid only if all store parameters are provided
178
+ return all([self.account_id, self.url, self.api_key, self.email, self.password])
179
+
180
+ def sign_in(self) -> str:
181
+ logging.info("Supabase DAL login")
182
+ res = self.client.auth.sign_in_with_password(
183
+ {"email": self.email, "password": self.password}
184
+ )
185
+ self.client.auth.set_session(
186
+ res.session.access_token, res.session.refresh_token
187
+ )
188
+ self.client.postgrest.auth(res.session.access_token)
189
+ return res.user.id
190
+
191
+ def get_resource_recommendation(
192
+ self, name: str, namespace: str, kind
193
+ ) -> Optional[List[Dict]]:
194
+ if not self.enabled:
195
+ return []
196
+
197
+ try:
198
+ scans_meta_response = (
199
+ self.client.table(SCANS_META_TABLE)
200
+ .select("*")
201
+ .eq("account_id", self.account_id)
202
+ .eq("cluster_id", self.cluster)
203
+ .eq("latest", True)
204
+ .execute()
205
+ )
206
+ if not len(scans_meta_response.data):
207
+ return None
208
+
209
+ scans_results_response = (
210
+ self.client.table(SCANS_RESULTS_TABLE)
211
+ .select("*")
212
+ .eq("account_id", self.account_id)
213
+ .eq("cluster_id", self.cluster)
214
+ .eq("scan_id", scans_meta_response.data[0]["scan_id"])
215
+ .eq("name", name)
216
+ .eq("namespace", namespace)
217
+ .eq("kind", kind)
218
+ .execute()
219
+ )
220
+ if not len(scans_results_response.data):
221
+ return None
222
+
223
+ return scans_results_response.data
224
+ except Exception:
225
+ logging.exception("Supabase error while retrieving efficiency data")
226
+ return None
227
+
228
+ def get_configuration_changes(
229
+ self, start_datetime: str, end_datetime: str
230
+ ) -> Optional[List[Dict]]:
231
+ if not self.enabled:
232
+ return []
233
+
234
+ try:
235
+ changes_response = (
236
+ self.client.table(ISSUES_TABLE)
237
+ .select("id", "subject_name", "subject_namespace", "description")
238
+ .eq("account_id", self.account_id)
239
+ .eq("cluster", self.cluster)
240
+ .eq("finding_type", "configuration_change")
241
+ .gte("creation_date", start_datetime)
242
+ .lte("creation_date", end_datetime)
243
+ .execute()
244
+ )
245
+ if not len(changes_response.data):
246
+ return None
247
+
248
+ except Exception:
249
+ logging.exception("Supabase error while retrieving change data")
250
+ return None
251
+
252
+ changes_ids = [change["id"] for change in changes_response.data]
253
+ try:
254
+ change_data_response = (
255
+ self.client.table(EVIDENCE_TABLE)
256
+ .select("*")
257
+ .eq("account_id", self.account_id)
258
+ .in_("issue_id", changes_ids)
259
+ .execute()
260
+ )
261
+ if not len(change_data_response.data):
262
+ return None
263
+
264
+ except Exception:
265
+ logging.exception("Supabase error while retrieving change content")
266
+ return None
267
+
268
+ changes_data = []
269
+ change_data_map = {
270
+ change["issue_id"]: change for change in change_data_response.data
271
+ }
272
+
273
+ for change in changes_response.data:
274
+ change_content = change_data_map.get(change["id"])
275
+ if change_content:
276
+ changes_data.append(
277
+ {
278
+ "change": change_content["data"],
279
+ "evidence_id": change_content["id"],
280
+ **change,
281
+ }
282
+ )
283
+
284
+ logging.debug(
285
+ "Change history for %s-%s: %s", start_datetime, end_datetime, changes_data
286
+ )
287
+
288
+ return changes_data
289
+
290
+ def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
291
+ # TODO this could be done in a single atomic SELECT, but there is no
292
+ # foreign key relation between Issues and Evidence.
293
+ if not issue_id:
294
+ return None
295
+ if not self.enabled: # store not initialized
296
+ return None
297
+ issue_data = None
298
+ try:
299
+ issue_response = (
300
+ self.client.table(ISSUES_TABLE)
301
+ .select("*")
302
+ .filter("id", "eq", issue_id)
303
+ .execute()
304
+ )
305
+ if len(issue_response.data):
306
+ issue_data = issue_response.data[0]
307
+
308
+ except Exception: # e.g. invalid id format
309
+ logging.exception("Supabase error while retrieving issue data")
310
+ return None
311
+ if not issue_data:
312
+ return None
313
+ evidence = (
314
+ self.client.table(EVIDENCE_TABLE)
315
+ .select("*")
316
+ .filter("issue_id", "eq", issue_id)
317
+ .execute()
318
+ )
319
+ enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
320
+ data = [
321
+ enrich
322
+ for enrich in evidence.data
323
+ if enrich.get("enrichment_type") not in enrichment_blacklist
324
+ ]
325
+
326
+ issue_data["evidence"] = data
327
+
328
+ # build issue investigation dates
329
+ started_at = issue_data.get("starts_at")
330
+ if started_at:
331
+ dt = datetime.fromisoformat(started_at)
332
+
333
+ # Calculate timestamps
334
+ start_timestamp = dt - timedelta(minutes=10)
335
+ end_timestamp = dt + timedelta(minutes=10)
336
+
337
+ issue_data["start_timestamp"] = start_timestamp.strftime(
338
+ "%Y-%m-%dT%H:%M:%S.%fZ"
339
+ )
340
+ issue_data["end_timestamp"] = end_timestamp.strftime(
341
+ "%Y-%m-%dT%H:%M:%S.%fZ"
342
+ )
343
+ issue_data["start_timestamp_millis"] = int(
344
+ start_timestamp.timestamp() * 1000
345
+ )
346
+ issue_data["end_timestamp_millis"] = int(end_timestamp.timestamp() * 1000)
347
+
348
+ return issue_data
349
+
350
+ def get_resource_instructions(
351
+ self, type: str, name: Optional[str]
352
+ ) -> Optional[ResourceInstructions]:
353
+ if not self.enabled or not name:
354
+ return None
355
+
356
+ res = (
357
+ self.client.table(RUNBOOKS_TABLE)
358
+ .select("runbook")
359
+ .eq("account_id", self.account_id)
360
+ .eq("subject_type", type)
361
+ .eq("subject_name", name)
362
+ .execute()
363
+ )
364
+ if res.data:
365
+ instructions = res.data[0].get("runbook").get("instructions")
366
+ documents_data = res.data[0].get("runbook").get("documents")
367
+ documents = []
368
+
369
+ if documents_data:
370
+ for document_data in documents_data:
371
+ url = document_data.get("url", None)
372
+ if url:
373
+ documents.append(ResourceInstructionDocument(url=url))
374
+ else:
375
+ logging.warning(
376
+ f"Unsupported runbook for subject_type={type} / subject_name={name}: {document_data}"
377
+ )
378
+
379
+ return ResourceInstructions(instructions=instructions, documents=documents)
380
+
381
+ return None
382
+
383
+ def get_global_instructions_for_account(self) -> Optional[Instructions]:
384
+ try:
385
+ res = (
386
+ self.client.table(RUNBOOKS_TABLE)
387
+ .select("runbook")
388
+ .eq("account_id", self.account_id)
389
+ .eq("subject_type", "Account")
390
+ .execute()
391
+ )
392
+
393
+ if res.data:
394
+ instructions = res.data[0].get("runbook").get("instructions")
395
+ return Instructions(instructions=instructions)
396
+ except Exception:
397
+ logging.exception("Failed to fetch global instructions", exc_info=True)
398
+
399
+ return None
400
+
401
+ def create_session_token(self) -> str:
402
+ token = str(uuid4())
403
+ self.client.table(SESSION_TOKENS_TABLE).insert(
404
+ {
405
+ "account_id": self.account_id,
406
+ "user_id": self.user_id,
407
+ "token": token,
408
+ "type": "HOLMES",
409
+ },
410
+ returning=ReturnMethod.minimal, # must use this, because the user cannot read this table
411
+ ).execute()
412
+ return token
413
+
414
+ def get_ai_credentials(self) -> Tuple[str, str]:
415
+ if not self.enabled:
416
+ raise Exception(
417
+ "You're trying to use ROBUSTA_AI, but Cannot get credentials for ROBUSTA_AI. Store not initialized."
418
+ )
419
+
420
+ with self.lock:
421
+ session_token = self.token_cache.get("session_token")
422
+ if not session_token:
423
+ session_token = self.create_session_token()
424
+ self.token_cache["session_token"] = session_token
425
+
426
+ return self.account_id, session_token
427
+
428
+ def get_workload_issues(self, resource: dict, since_hours: float) -> List[str]:
429
+ if not self.enabled or not resource:
430
+ return []
431
+
432
+ cluster = resource.get("cluster")
433
+ if not cluster:
434
+ logging.debug("Missing workload cluster for issues.")
435
+ return []
436
+
437
+ since: str = (datetime.now() - timedelta(hours=since_hours)).isoformat()
438
+
439
+ svc_key = f"{resource.get('namespace', '')}/{resource.get('kind', '')}/{resource.get('name', '')}"
440
+ logging.debug(f"getting issues for workload {svc_key}")
441
+ try:
442
+ res = (
443
+ self.client.table(ISSUES_TABLE)
444
+ .select("id, creation_date, aggregation_key")
445
+ .eq("account_id", self.account_id)
446
+ .eq("cluster", cluster)
447
+ .eq("service_key", svc_key)
448
+ .gte("creation_date", since)
449
+ .order("creation_date")
450
+ .execute()
451
+ )
452
+
453
+ if not res.data:
454
+ return []
455
+
456
+ issue_dict = dict()
457
+ for issue in res.data:
458
+ issue_dict[issue.get("aggregation_key")] = issue.get("id")
459
+
460
+ unique_issues: list[str] = list(issue_dict.values())
461
+
462
+ res = (
463
+ self.client.table(EVIDENCE_TABLE)
464
+ .select("data, enrichment_type")
465
+ .in_("issue_id", unique_issues)
466
+ .execute()
467
+ )
468
+
469
+ enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
470
+ data = [
471
+ evidence.get("data")
472
+ for evidence in res.data
473
+ if evidence.get("enrichment_type") not in enrichment_blacklist
474
+ ]
475
+ return data
476
+
477
+ except Exception:
478
+ logging.exception("failed to fetch workload issues data", exc_info=True)
479
+ return []
480
+
481
+ def upsert_holmes_status(self, holmes_status_data: dict) -> None:
482
+ if not self.enabled:
483
+ logging.info(
484
+ "Robusta store not initialized. Skipping upserting holmes status."
485
+ )
486
+ return
487
+
488
+ updated_at = datetime.now().isoformat()
489
+ try:
490
+ (
491
+ self.client.table(HOLMES_STATUS_TABLE)
492
+ .upsert(
493
+ {
494
+ "account_id": self.account_id,
495
+ "updated_at": updated_at,
496
+ **holmes_status_data,
497
+ },
498
+ on_conflict="account_id, cluster_id",
499
+ )
500
+ .execute()
501
+ )
502
+ except Exception as error:
503
+ logging.error(
504
+ f"Error happened during upserting holmes status: {error}", exc_info=True
505
+ )
506
+
507
+ return None
508
+
509
+ def sync_toolsets(self, toolsets: list[dict], cluster_name: str) -> None:
510
+ if not toolsets:
511
+ logging.warning("No toolsets were provided for synchronization.")
512
+ return
513
+
514
+ if not self.enabled:
515
+ logging.info(
516
+ "Robusta store not initialized. Skipping sync holmes toolsets."
517
+ )
518
+ return
519
+
520
+ provided_toolset_names = [toolset["toolset_name"] for toolset in toolsets]
521
+
522
+ try:
523
+ self.client.table(HOLMES_TOOLSET).upsert(
524
+ toolsets, on_conflict="account_id, cluster_id, toolset_name"
525
+ ).execute()
526
+
527
+ logging.info("Toolsets upserted successfully.")
528
+
529
+ self.client.table(HOLMES_TOOLSET).delete().eq(
530
+ "account_id", self.account_id
531
+ ).eq("cluster_id", cluster_name).not_.in_(
532
+ "toolset_name", provided_toolset_names
533
+ ).execute()
534
+
535
+ logging.info("Toolsets synchronized successfully.")
536
+
537
+ except Exception as e:
538
+ logging.exception(
539
+ f"An error occurred during toolset synchronization: {e}", exc_info=True
540
+ )