holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import ValidationError
|
|
5
|
+
|
|
6
|
+
from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
|
|
7
|
+
from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
|
|
8
|
+
from holmes.core.tools import StructuredToolResult, ToolResultStatus
|
|
9
|
+
from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _is_redundant_fetch_pod_logs(
|
|
13
|
+
tool_name: str, tool_params: dict, tool_calls: list[dict]
|
|
14
|
+
) -> bool:
|
|
15
|
+
"""
|
|
16
|
+
Tool call is redundant if a previous call without filter returned no results and the current tool call is the same but with a filter
|
|
17
|
+
e.g.
|
|
18
|
+
fetch_pod_logs({"pod_name": "notification-consumer", "namespace": "services"}) => no data
|
|
19
|
+
followed by
|
|
20
|
+
fetch_pod_logs({"pod_name": "notification-consumer", "namespace": "services", "filter": "error"}) => for sure no data either
|
|
21
|
+
"""
|
|
22
|
+
if (
|
|
23
|
+
tool_name == POD_LOGGING_TOOL_NAME
|
|
24
|
+
and tool_params.get("filter")
|
|
25
|
+
and _has_previous_unfiltered_pod_logs_call(
|
|
26
|
+
tool_params=tool_params, tool_calls=tool_calls
|
|
27
|
+
)
|
|
28
|
+
):
|
|
29
|
+
return True
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _has_previous_unfiltered_pod_logs_call(
|
|
34
|
+
tool_params: dict, tool_calls: list[dict]
|
|
35
|
+
) -> bool:
|
|
36
|
+
try:
|
|
37
|
+
current_params = FetchPodLogsParams(**tool_params)
|
|
38
|
+
for tool_call in tool_calls:
|
|
39
|
+
result = tool_call.get("result", {})
|
|
40
|
+
if (
|
|
41
|
+
tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
|
|
42
|
+
and result.get("status") == ToolResultStatus.NO_DATA
|
|
43
|
+
and result.get("params")
|
|
44
|
+
):
|
|
45
|
+
params = FetchPodLogsParams(**result.get("params"))
|
|
46
|
+
if (
|
|
47
|
+
not params.filter
|
|
48
|
+
and current_params.end_time == params.end_time
|
|
49
|
+
and current_params.start_time == params.start_time
|
|
50
|
+
and current_params.pod_name == params.pod_name
|
|
51
|
+
and current_params.namespace == params.namespace
|
|
52
|
+
):
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
except ValidationError:
|
|
58
|
+
logging.error("fetch_pod_logs params failed validation", exc_info=True)
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _has_previous_exact_same_tool_call(
|
|
63
|
+
tool_name: str, tool_params: dict, tool_calls: list[dict]
|
|
64
|
+
) -> bool:
|
|
65
|
+
"""Check if a previous tool call with the exact same params was executed this session."""
|
|
66
|
+
for tool_call in tool_calls:
|
|
67
|
+
params = tool_call.get("result", {}).get("params")
|
|
68
|
+
if (
|
|
69
|
+
tool_call.get("tool_name") == tool_name
|
|
70
|
+
and params is not None
|
|
71
|
+
and params == tool_params
|
|
72
|
+
):
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def prevent_overly_repeated_tool_call(
|
|
79
|
+
tool_name: str, tool_params: dict, tool_calls: list[dict]
|
|
80
|
+
) -> Optional[StructuredToolResult]:
|
|
81
|
+
"""Checks if a tool call is redundant"""
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
if not TOOL_CALL_SAFEGUARDS_ENABLED:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
if _has_previous_exact_same_tool_call(
|
|
88
|
+
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
It is only reasonable to prevent identical tool calls if Holmes is read only and does not mutate resources.
|
|
92
|
+
If Holmes mutate resources then this safeguard should be removed or modified. This is because
|
|
93
|
+
there is a risk that one of the tools holmes executed would actually change the answer of a subsequent identical tool call.
|
|
94
|
+
For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
|
|
95
|
+
"""
|
|
96
|
+
return StructuredToolResult(
|
|
97
|
+
status=ToolResultStatus.ERROR,
|
|
98
|
+
error=(
|
|
99
|
+
"Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
|
|
100
|
+
"Move on with your investigation to a different tool or change the parameter values."
|
|
101
|
+
),
|
|
102
|
+
params=tool_params,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if _is_redundant_fetch_pod_logs(
|
|
106
|
+
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
|
|
107
|
+
):
|
|
108
|
+
return StructuredToolResult(
|
|
109
|
+
status=ToolResultStatus.ERROR,
|
|
110
|
+
error=(
|
|
111
|
+
f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
|
|
112
|
+
"This tool call would also have returned no data.\n"
|
|
113
|
+
"Move on with your investigation to a different tool or extend the time window of your search."
|
|
114
|
+
),
|
|
115
|
+
params=tool_params,
|
|
116
|
+
)
|
|
117
|
+
except Exception:
|
|
118
|
+
logging.error("Failed to check for overly repeated tool call", exc_info=True)
|
|
119
|
+
|
|
120
|
+
return None
|
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import binascii
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import threading
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
import yaml # type: ignore
|
|
12
|
+
from cachetools import TTLCache # type: ignore
|
|
13
|
+
from postgrest._sync.request_builder import SyncQueryRequestBuilder
|
|
14
|
+
from postgrest.exceptions import APIError as PGAPIError
|
|
15
|
+
from postgrest.types import ReturnMethod
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
from supabase import create_client
|
|
18
|
+
from supabase.lib.client_options import ClientOptions
|
|
19
|
+
|
|
20
|
+
from holmes.common.env_vars import (
|
|
21
|
+
ROBUSTA_ACCOUNT_ID,
|
|
22
|
+
ROBUSTA_CONFIG_PATH,
|
|
23
|
+
STORE_API_KEY,
|
|
24
|
+
STORE_EMAIL,
|
|
25
|
+
STORE_PASSWORD,
|
|
26
|
+
STORE_URL,
|
|
27
|
+
)
|
|
28
|
+
from holmes.core.resource_instruction import (
|
|
29
|
+
ResourceInstructionDocument,
|
|
30
|
+
ResourceInstructions,
|
|
31
|
+
)
|
|
32
|
+
from holmes.utils.definitions import RobustaConfig
|
|
33
|
+
from holmes.utils.env import get_env_replacement
|
|
34
|
+
from holmes.utils.global_instructions import Instructions
|
|
35
|
+
|
|
36
|
+
SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
|
|
37
|
+
|
|
38
|
+
ISSUES_TABLE = "Issues"
|
|
39
|
+
EVIDENCE_TABLE = "Evidence"
|
|
40
|
+
RUNBOOKS_TABLE = "HolmesRunbooks"
|
|
41
|
+
SESSION_TOKENS_TABLE = "AuthTokens"
|
|
42
|
+
HOLMES_STATUS_TABLE = "HolmesStatus"
|
|
43
|
+
HOLMES_TOOLSET = "HolmesToolsStatus"
|
|
44
|
+
SCANS_META_TABLE = "ScansMeta"
|
|
45
|
+
SCANS_RESULTS_TABLE = "ScansResults"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class RobustaToken(BaseModel):
|
|
49
|
+
store_url: str
|
|
50
|
+
api_key: str
|
|
51
|
+
account_id: str
|
|
52
|
+
email: str
|
|
53
|
+
password: str
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SupabaseDal:
|
|
57
|
+
def __init__(self, cluster: str):
|
|
58
|
+
self.enabled = self.__init_config()
|
|
59
|
+
self.cluster = cluster
|
|
60
|
+
if not self.enabled:
|
|
61
|
+
logging.info(
|
|
62
|
+
"Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
|
|
63
|
+
)
|
|
64
|
+
return
|
|
65
|
+
logging.info(
|
|
66
|
+
f"Initializing Robusta platform connection for account {self.account_id}"
|
|
67
|
+
)
|
|
68
|
+
options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS)
|
|
69
|
+
self.client = create_client(self.url, self.api_key, options) # type: ignore
|
|
70
|
+
self.user_id = self.sign_in()
|
|
71
|
+
ttl = int(os.environ.get("SAAS_SESSION_TOKEN_TTL_SEC", "82800")) # 23 hours
|
|
72
|
+
self.patch_postgrest_execute()
|
|
73
|
+
self.token_cache = TTLCache(maxsize=1, ttl=ttl)
|
|
74
|
+
self.lock = threading.Lock()
|
|
75
|
+
|
|
76
|
+
def patch_postgrest_execute(self):
|
|
77
|
+
logging.info("Patching postgres execute")
|
|
78
|
+
|
|
79
|
+
# This is somewhat hacky.
|
|
80
|
+
def execute_with_retry(_self):
|
|
81
|
+
try:
|
|
82
|
+
return self._original_execute(_self)
|
|
83
|
+
except PGAPIError as exc:
|
|
84
|
+
message = exc.message or ""
|
|
85
|
+
if exc.code == "PGRST301" or "expired" in message.lower():
|
|
86
|
+
# JWT expired. Sign in again and retry the query
|
|
87
|
+
logging.error(
|
|
88
|
+
"JWT token expired/invalid, signing in to Supabase again"
|
|
89
|
+
)
|
|
90
|
+
self.sign_in()
|
|
91
|
+
# update the session to the new one, after re-sign in
|
|
92
|
+
_self.session = self.client.postgrest.session
|
|
93
|
+
return self._original_execute(_self)
|
|
94
|
+
else:
|
|
95
|
+
raise
|
|
96
|
+
|
|
97
|
+
self._original_execute = SyncQueryRequestBuilder.execute
|
|
98
|
+
SyncQueryRequestBuilder.execute = execute_with_retry
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def __load_robusta_config() -> Optional[RobustaToken]:
|
|
102
|
+
config_file_path = ROBUSTA_CONFIG_PATH
|
|
103
|
+
env_ui_token = os.environ.get("ROBUSTA_UI_TOKEN")
|
|
104
|
+
if env_ui_token:
|
|
105
|
+
# token provided as env var
|
|
106
|
+
try:
|
|
107
|
+
decoded = base64.b64decode(env_ui_token)
|
|
108
|
+
return RobustaToken(**json.loads(decoded))
|
|
109
|
+
except binascii.Error:
|
|
110
|
+
raise Exception(
|
|
111
|
+
"binascii.Error encountered. The Robusta UI token is not a valid base64."
|
|
112
|
+
)
|
|
113
|
+
except json.JSONDecodeError:
|
|
114
|
+
raise Exception(
|
|
115
|
+
"json.JSONDecodeError encountered. The Robusta UI token could not be parsed as JSON after being base64 decoded."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if not os.path.exists(config_file_path):
|
|
119
|
+
logging.info(f"No robusta config in {config_file_path}")
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
logging.info(f"loading config {config_file_path}")
|
|
123
|
+
with open(config_file_path) as file:
|
|
124
|
+
yaml_content = yaml.safe_load(file)
|
|
125
|
+
config = RobustaConfig(**yaml_content)
|
|
126
|
+
for conf in config.sinks_config:
|
|
127
|
+
if "robusta_sink" in conf.keys():
|
|
128
|
+
token = conf["robusta_sink"].get("token")
|
|
129
|
+
if not token:
|
|
130
|
+
raise Exception(
|
|
131
|
+
"No robusta token provided to Holmes.\n"
|
|
132
|
+
"Please set a valid Robusta UI token.\n "
|
|
133
|
+
"See https://docs.robusta.dev/master/configuration/ai-analysis.html#choosing-and-configuring-an-ai-provider for instructions."
|
|
134
|
+
)
|
|
135
|
+
env_replacement_token = get_env_replacement(token)
|
|
136
|
+
if env_replacement_token:
|
|
137
|
+
token = env_replacement_token
|
|
138
|
+
|
|
139
|
+
if "{{" in token:
|
|
140
|
+
raise ValueError(
|
|
141
|
+
"The robusta token configured for Holmes appears to be a templating placeholder (e.g. `{ env.UI_SINK_TOKEN }`).\n "
|
|
142
|
+
"Ensure your Helm chart or environment variables are set correctly.\n "
|
|
143
|
+
"If you store the token in a secret, you must also pass "
|
|
144
|
+
"the environment variable ROBUSTA_UI_TOKEN to Holmes.\n "
|
|
145
|
+
"See https://docs.robusta.dev/master/configuration/ai-analysis.html#configuring-holmesgpt-access-to-saas-data for instructions."
|
|
146
|
+
)
|
|
147
|
+
try:
|
|
148
|
+
decoded = base64.b64decode(token)
|
|
149
|
+
return RobustaToken(**json.loads(decoded))
|
|
150
|
+
except binascii.Error:
|
|
151
|
+
raise Exception(
|
|
152
|
+
"binascii.Error encountered. The robusta token provided to Holmes is not a valid base64."
|
|
153
|
+
)
|
|
154
|
+
except json.JSONDecodeError:
|
|
155
|
+
raise Exception(
|
|
156
|
+
"json.JSONDecodeError encountered. The Robusta token provided to Holmes could not be parsed as JSON after being base64 decoded."
|
|
157
|
+
)
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
def __init_config(self) -> bool:
|
|
161
|
+
# trying to load the supabase connection parameters from the robusta token, if exists
|
|
162
|
+
# if not, using env variables as fallback
|
|
163
|
+
robusta_token = self.__load_robusta_config()
|
|
164
|
+
if robusta_token:
|
|
165
|
+
self.account_id = robusta_token.account_id
|
|
166
|
+
self.url = robusta_token.store_url
|
|
167
|
+
self.api_key = robusta_token.api_key
|
|
168
|
+
self.email = robusta_token.email
|
|
169
|
+
self.password = robusta_token.password
|
|
170
|
+
else:
|
|
171
|
+
self.account_id = ROBUSTA_ACCOUNT_ID
|
|
172
|
+
self.url = STORE_URL
|
|
173
|
+
self.api_key = STORE_API_KEY
|
|
174
|
+
self.email = STORE_EMAIL
|
|
175
|
+
self.password = STORE_PASSWORD
|
|
176
|
+
|
|
177
|
+
# valid only if all store parameters are provided
|
|
178
|
+
return all([self.account_id, self.url, self.api_key, self.email, self.password])
|
|
179
|
+
|
|
180
|
+
def sign_in(self) -> str:
|
|
181
|
+
logging.info("Supabase DAL login")
|
|
182
|
+
res = self.client.auth.sign_in_with_password(
|
|
183
|
+
{"email": self.email, "password": self.password}
|
|
184
|
+
)
|
|
185
|
+
self.client.auth.set_session(
|
|
186
|
+
res.session.access_token, res.session.refresh_token
|
|
187
|
+
)
|
|
188
|
+
self.client.postgrest.auth(res.session.access_token)
|
|
189
|
+
return res.user.id
|
|
190
|
+
|
|
191
|
+
def get_resource_recommendation(
|
|
192
|
+
self, name: str, namespace: str, kind
|
|
193
|
+
) -> Optional[List[Dict]]:
|
|
194
|
+
if not self.enabled:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
scans_meta_response = (
|
|
199
|
+
self.client.table(SCANS_META_TABLE)
|
|
200
|
+
.select("*")
|
|
201
|
+
.eq("account_id", self.account_id)
|
|
202
|
+
.eq("cluster_id", self.cluster)
|
|
203
|
+
.eq("latest", True)
|
|
204
|
+
.execute()
|
|
205
|
+
)
|
|
206
|
+
if not len(scans_meta_response.data):
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
scans_results_response = (
|
|
210
|
+
self.client.table(SCANS_RESULTS_TABLE)
|
|
211
|
+
.select("*")
|
|
212
|
+
.eq("account_id", self.account_id)
|
|
213
|
+
.eq("cluster_id", self.cluster)
|
|
214
|
+
.eq("scan_id", scans_meta_response.data[0]["scan_id"])
|
|
215
|
+
.eq("name", name)
|
|
216
|
+
.eq("namespace", namespace)
|
|
217
|
+
.eq("kind", kind)
|
|
218
|
+
.execute()
|
|
219
|
+
)
|
|
220
|
+
if not len(scans_results_response.data):
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
return scans_results_response.data
|
|
224
|
+
except Exception:
|
|
225
|
+
logging.exception("Supabase error while retrieving efficiency data")
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def get_configuration_changes(
|
|
229
|
+
self, start_datetime: str, end_datetime: str
|
|
230
|
+
) -> Optional[List[Dict]]:
|
|
231
|
+
if not self.enabled:
|
|
232
|
+
return []
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
changes_response = (
|
|
236
|
+
self.client.table(ISSUES_TABLE)
|
|
237
|
+
.select("id", "subject_name", "subject_namespace", "description")
|
|
238
|
+
.eq("account_id", self.account_id)
|
|
239
|
+
.eq("cluster", self.cluster)
|
|
240
|
+
.eq("finding_type", "configuration_change")
|
|
241
|
+
.gte("creation_date", start_datetime)
|
|
242
|
+
.lte("creation_date", end_datetime)
|
|
243
|
+
.execute()
|
|
244
|
+
)
|
|
245
|
+
if not len(changes_response.data):
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
except Exception:
|
|
249
|
+
logging.exception("Supabase error while retrieving change data")
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
changes_ids = [change["id"] for change in changes_response.data]
|
|
253
|
+
try:
|
|
254
|
+
change_data_response = (
|
|
255
|
+
self.client.table(EVIDENCE_TABLE)
|
|
256
|
+
.select("*")
|
|
257
|
+
.eq("account_id", self.account_id)
|
|
258
|
+
.in_("issue_id", changes_ids)
|
|
259
|
+
.execute()
|
|
260
|
+
)
|
|
261
|
+
if not len(change_data_response.data):
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
except Exception:
|
|
265
|
+
logging.exception("Supabase error while retrieving change content")
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
changes_data = []
|
|
269
|
+
change_data_map = {
|
|
270
|
+
change["issue_id"]: change for change in change_data_response.data
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
for change in changes_response.data:
|
|
274
|
+
change_content = change_data_map.get(change["id"])
|
|
275
|
+
if change_content:
|
|
276
|
+
changes_data.append(
|
|
277
|
+
{
|
|
278
|
+
"change": change_content["data"],
|
|
279
|
+
"evidence_id": change_content["id"],
|
|
280
|
+
**change,
|
|
281
|
+
}
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
logging.debug(
|
|
285
|
+
"Change history for %s-%s: %s", start_datetime, end_datetime, changes_data
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return changes_data
|
|
289
|
+
|
|
290
|
+
def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
|
|
291
|
+
# TODO this could be done in a single atomic SELECT, but there is no
|
|
292
|
+
# foreign key relation between Issues and Evidence.
|
|
293
|
+
if not issue_id:
|
|
294
|
+
return None
|
|
295
|
+
if not self.enabled: # store not initialized
|
|
296
|
+
return None
|
|
297
|
+
issue_data = None
|
|
298
|
+
try:
|
|
299
|
+
issue_response = (
|
|
300
|
+
self.client.table(ISSUES_TABLE)
|
|
301
|
+
.select("*")
|
|
302
|
+
.filter("id", "eq", issue_id)
|
|
303
|
+
.execute()
|
|
304
|
+
)
|
|
305
|
+
if len(issue_response.data):
|
|
306
|
+
issue_data = issue_response.data[0]
|
|
307
|
+
|
|
308
|
+
except Exception: # e.g. invalid id format
|
|
309
|
+
logging.exception("Supabase error while retrieving issue data")
|
|
310
|
+
return None
|
|
311
|
+
if not issue_data:
|
|
312
|
+
return None
|
|
313
|
+
evidence = (
|
|
314
|
+
self.client.table(EVIDENCE_TABLE)
|
|
315
|
+
.select("*")
|
|
316
|
+
.filter("issue_id", "eq", issue_id)
|
|
317
|
+
.execute()
|
|
318
|
+
)
|
|
319
|
+
enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
|
|
320
|
+
data = [
|
|
321
|
+
enrich
|
|
322
|
+
for enrich in evidence.data
|
|
323
|
+
if enrich.get("enrichment_type") not in enrichment_blacklist
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
issue_data["evidence"] = data
|
|
327
|
+
|
|
328
|
+
# build issue investigation dates
|
|
329
|
+
started_at = issue_data.get("starts_at")
|
|
330
|
+
if started_at:
|
|
331
|
+
dt = datetime.fromisoformat(started_at)
|
|
332
|
+
|
|
333
|
+
# Calculate timestamps
|
|
334
|
+
start_timestamp = dt - timedelta(minutes=10)
|
|
335
|
+
end_timestamp = dt + timedelta(minutes=10)
|
|
336
|
+
|
|
337
|
+
issue_data["start_timestamp"] = start_timestamp.strftime(
|
|
338
|
+
"%Y-%m-%dT%H:%M:%S.%fZ"
|
|
339
|
+
)
|
|
340
|
+
issue_data["end_timestamp"] = end_timestamp.strftime(
|
|
341
|
+
"%Y-%m-%dT%H:%M:%S.%fZ"
|
|
342
|
+
)
|
|
343
|
+
issue_data["start_timestamp_millis"] = int(
|
|
344
|
+
start_timestamp.timestamp() * 1000
|
|
345
|
+
)
|
|
346
|
+
issue_data["end_timestamp_millis"] = int(end_timestamp.timestamp() * 1000)
|
|
347
|
+
|
|
348
|
+
return issue_data
|
|
349
|
+
|
|
350
|
+
def get_resource_instructions(
|
|
351
|
+
self, type: str, name: Optional[str]
|
|
352
|
+
) -> Optional[ResourceInstructions]:
|
|
353
|
+
if not self.enabled or not name:
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
res = (
|
|
357
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
358
|
+
.select("runbook")
|
|
359
|
+
.eq("account_id", self.account_id)
|
|
360
|
+
.eq("subject_type", type)
|
|
361
|
+
.eq("subject_name", name)
|
|
362
|
+
.execute()
|
|
363
|
+
)
|
|
364
|
+
if res.data:
|
|
365
|
+
instructions = res.data[0].get("runbook").get("instructions")
|
|
366
|
+
documents_data = res.data[0].get("runbook").get("documents")
|
|
367
|
+
documents = []
|
|
368
|
+
|
|
369
|
+
if documents_data:
|
|
370
|
+
for document_data in documents_data:
|
|
371
|
+
url = document_data.get("url", None)
|
|
372
|
+
if url:
|
|
373
|
+
documents.append(ResourceInstructionDocument(url=url))
|
|
374
|
+
else:
|
|
375
|
+
logging.warning(
|
|
376
|
+
f"Unsupported runbook for subject_type={type} / subject_name={name}: {document_data}"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
return ResourceInstructions(instructions=instructions, documents=documents)
|
|
380
|
+
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
def get_global_instructions_for_account(self) -> Optional[Instructions]:
|
|
384
|
+
try:
|
|
385
|
+
res = (
|
|
386
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
387
|
+
.select("runbook")
|
|
388
|
+
.eq("account_id", self.account_id)
|
|
389
|
+
.eq("subject_type", "Account")
|
|
390
|
+
.execute()
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if res.data:
|
|
394
|
+
instructions = res.data[0].get("runbook").get("instructions")
|
|
395
|
+
return Instructions(instructions=instructions)
|
|
396
|
+
except Exception:
|
|
397
|
+
logging.exception("Failed to fetch global instructions", exc_info=True)
|
|
398
|
+
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
def create_session_token(self) -> str:
|
|
402
|
+
token = str(uuid4())
|
|
403
|
+
self.client.table(SESSION_TOKENS_TABLE).insert(
|
|
404
|
+
{
|
|
405
|
+
"account_id": self.account_id,
|
|
406
|
+
"user_id": self.user_id,
|
|
407
|
+
"token": token,
|
|
408
|
+
"type": "HOLMES",
|
|
409
|
+
},
|
|
410
|
+
returning=ReturnMethod.minimal, # must use this, because the user cannot read this table
|
|
411
|
+
).execute()
|
|
412
|
+
return token
|
|
413
|
+
|
|
414
|
+
def get_ai_credentials(self) -> Tuple[str, str]:
|
|
415
|
+
if not self.enabled:
|
|
416
|
+
raise Exception(
|
|
417
|
+
"You're trying to use ROBUSTA_AI, but Cannot get credentials for ROBUSTA_AI. Store not initialized."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
with self.lock:
|
|
421
|
+
session_token = self.token_cache.get("session_token")
|
|
422
|
+
if not session_token:
|
|
423
|
+
session_token = self.create_session_token()
|
|
424
|
+
self.token_cache["session_token"] = session_token
|
|
425
|
+
|
|
426
|
+
return self.account_id, session_token
|
|
427
|
+
|
|
428
|
+
def get_workload_issues(self, resource: dict, since_hours: float) -> List[str]:
|
|
429
|
+
if not self.enabled or not resource:
|
|
430
|
+
return []
|
|
431
|
+
|
|
432
|
+
cluster = resource.get("cluster")
|
|
433
|
+
if not cluster:
|
|
434
|
+
logging.debug("Missing workload cluster for issues.")
|
|
435
|
+
return []
|
|
436
|
+
|
|
437
|
+
since: str = (datetime.now() - timedelta(hours=since_hours)).isoformat()
|
|
438
|
+
|
|
439
|
+
svc_key = f"{resource.get('namespace', '')}/{resource.get('kind', '')}/{resource.get('name', '')}"
|
|
440
|
+
logging.debug(f"getting issues for workload {svc_key}")
|
|
441
|
+
try:
|
|
442
|
+
res = (
|
|
443
|
+
self.client.table(ISSUES_TABLE)
|
|
444
|
+
.select("id, creation_date, aggregation_key")
|
|
445
|
+
.eq("account_id", self.account_id)
|
|
446
|
+
.eq("cluster", cluster)
|
|
447
|
+
.eq("service_key", svc_key)
|
|
448
|
+
.gte("creation_date", since)
|
|
449
|
+
.order("creation_date")
|
|
450
|
+
.execute()
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if not res.data:
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
issue_dict = dict()
|
|
457
|
+
for issue in res.data:
|
|
458
|
+
issue_dict[issue.get("aggregation_key")] = issue.get("id")
|
|
459
|
+
|
|
460
|
+
unique_issues: list[str] = list(issue_dict.values())
|
|
461
|
+
|
|
462
|
+
res = (
|
|
463
|
+
self.client.table(EVIDENCE_TABLE)
|
|
464
|
+
.select("data, enrichment_type")
|
|
465
|
+
.in_("issue_id", unique_issues)
|
|
466
|
+
.execute()
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
|
|
470
|
+
data = [
|
|
471
|
+
evidence.get("data")
|
|
472
|
+
for evidence in res.data
|
|
473
|
+
if evidence.get("enrichment_type") not in enrichment_blacklist
|
|
474
|
+
]
|
|
475
|
+
return data
|
|
476
|
+
|
|
477
|
+
except Exception:
|
|
478
|
+
logging.exception("failed to fetch workload issues data", exc_info=True)
|
|
479
|
+
return []
|
|
480
|
+
|
|
481
|
+
def upsert_holmes_status(self, holmes_status_data: dict) -> None:
|
|
482
|
+
if not self.enabled:
|
|
483
|
+
logging.info(
|
|
484
|
+
"Robusta store not initialized. Skipping upserting holmes status."
|
|
485
|
+
)
|
|
486
|
+
return
|
|
487
|
+
|
|
488
|
+
updated_at = datetime.now().isoformat()
|
|
489
|
+
try:
|
|
490
|
+
(
|
|
491
|
+
self.client.table(HOLMES_STATUS_TABLE)
|
|
492
|
+
.upsert(
|
|
493
|
+
{
|
|
494
|
+
"account_id": self.account_id,
|
|
495
|
+
"updated_at": updated_at,
|
|
496
|
+
**holmes_status_data,
|
|
497
|
+
},
|
|
498
|
+
on_conflict="account_id, cluster_id",
|
|
499
|
+
)
|
|
500
|
+
.execute()
|
|
501
|
+
)
|
|
502
|
+
except Exception as error:
|
|
503
|
+
logging.error(
|
|
504
|
+
f"Error happened during upserting holmes status: {error}", exc_info=True
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
def sync_toolsets(self, toolsets: list[dict], cluster_name: str) -> None:
|
|
510
|
+
if not toolsets:
|
|
511
|
+
logging.warning("No toolsets were provided for synchronization.")
|
|
512
|
+
return
|
|
513
|
+
|
|
514
|
+
if not self.enabled:
|
|
515
|
+
logging.info(
|
|
516
|
+
"Robusta store not initialized. Skipping sync holmes toolsets."
|
|
517
|
+
)
|
|
518
|
+
return
|
|
519
|
+
|
|
520
|
+
provided_toolset_names = [toolset["toolset_name"] for toolset in toolsets]
|
|
521
|
+
|
|
522
|
+
try:
|
|
523
|
+
self.client.table(HOLMES_TOOLSET).upsert(
|
|
524
|
+
toolsets, on_conflict="account_id, cluster_id, toolset_name"
|
|
525
|
+
).execute()
|
|
526
|
+
|
|
527
|
+
logging.info("Toolsets upserted successfully.")
|
|
528
|
+
|
|
529
|
+
self.client.table(HOLMES_TOOLSET).delete().eq(
|
|
530
|
+
"account_id", self.account_id
|
|
531
|
+
).eq("cluster_id", cluster_name).not_.in_(
|
|
532
|
+
"toolset_name", provided_toolset_names
|
|
533
|
+
).execute()
|
|
534
|
+
|
|
535
|
+
logging.info("Toolsets synchronized successfully.")
|
|
536
|
+
|
|
537
|
+
except Exception as e:
|
|
538
|
+
logging.exception(
|
|
539
|
+
f"An error occurred during toolset synchronization: {e}", exc_info=True
|
|
540
|
+
)
|