holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/core/supabase_dal.py
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import binascii
|
|
3
|
+
import gzip
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import threading
|
|
7
8
|
from datetime import datetime, timedelta
|
|
9
|
+
from enum import Enum
|
|
8
10
|
from typing import Dict, List, Optional, Tuple
|
|
9
11
|
from uuid import uuid4
|
|
10
|
-
import gzip
|
|
11
12
|
|
|
13
|
+
import sentry_sdk
|
|
12
14
|
import yaml # type: ignore
|
|
13
15
|
from cachetools import TTLCache # type: ignore
|
|
16
|
+
from postgrest._sync import request_builder as supabase_request_builder
|
|
14
17
|
from postgrest._sync.request_builder import SyncQueryRequestBuilder
|
|
18
|
+
from postgrest.base_request_builder import QueryArgs
|
|
15
19
|
from postgrest.exceptions import APIError as PGAPIError
|
|
16
20
|
from postgrest.types import ReturnMethod
|
|
17
21
|
from pydantic import BaseModel
|
|
@@ -30,13 +34,19 @@ from holmes.core.resource_instruction import (
|
|
|
30
34
|
ResourceInstructionDocument,
|
|
31
35
|
ResourceInstructions,
|
|
32
36
|
)
|
|
37
|
+
from holmes.core.truncation.dal_truncation_utils import (
|
|
38
|
+
truncate_evidences_entities_if_necessary,
|
|
39
|
+
)
|
|
40
|
+
from holmes.plugins.runbooks import RobustaRunbookInstruction
|
|
33
41
|
from holmes.utils.definitions import RobustaConfig
|
|
34
42
|
from holmes.utils.env import get_env_replacement
|
|
35
43
|
from holmes.utils.global_instructions import Instructions
|
|
44
|
+
from holmes.utils.krr_utils import calculate_krr_savings
|
|
36
45
|
|
|
37
46
|
SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
|
|
38
47
|
|
|
39
48
|
ISSUES_TABLE = "Issues"
|
|
49
|
+
GROUPED_ISSUES_TABLE = "GroupedIssues"
|
|
40
50
|
EVIDENCE_TABLE = "Evidence"
|
|
41
51
|
RUNBOOKS_TABLE = "HolmesRunbooks"
|
|
42
52
|
SESSION_TOKENS_TABLE = "AuthTokens"
|
|
@@ -45,6 +55,31 @@ HOLMES_TOOLSET = "HolmesToolsStatus"
|
|
|
45
55
|
SCANS_META_TABLE = "ScansMeta"
|
|
46
56
|
SCANS_RESULTS_TABLE = "ScansResults"
|
|
47
57
|
|
|
58
|
+
ENRICHMENT_BLACKLIST = ["text_file", "graph", "ai_analysis", "holmes"]
|
|
59
|
+
ENRICHMENT_BLACKLIST_SET = set(ENRICHMENT_BLACKLIST)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
logging.info("Patching supabase_request_builder.pre_select")
|
|
63
|
+
original_pre_select = supabase_request_builder.pre_select
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def pre_select_patched(*args, **kwargs):
|
|
67
|
+
query_args: QueryArgs = original_pre_select(*args, **kwargs)
|
|
68
|
+
if not query_args.json:
|
|
69
|
+
query_args = QueryArgs(
|
|
70
|
+
query_args.method, query_args.params, query_args.headers, None
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return query_args
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
supabase_request_builder.pre_select = pre_select_patched
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class FindingType(str, Enum):
|
|
80
|
+
ISSUE = "issue"
|
|
81
|
+
CONFIGURATION_CHANGE = "configuration_change"
|
|
82
|
+
|
|
48
83
|
|
|
49
84
|
class RobustaToken(BaseModel):
|
|
50
85
|
store_url: str
|
|
@@ -54,12 +89,23 @@ class RobustaToken(BaseModel):
|
|
|
54
89
|
password: str
|
|
55
90
|
|
|
56
91
|
|
|
92
|
+
class SupabaseDnsException(Exception):
|
|
93
|
+
def __init__(self, error: Exception, url: str):
|
|
94
|
+
message = (
|
|
95
|
+
f"\n{error.__class__.__name__}: {error}\n"
|
|
96
|
+
f"Error connecting to <{url}>\n"
|
|
97
|
+
"This is often due to DNS issues or firewall policies - to troubleshoot run in your cluster:\n"
|
|
98
|
+
f"curl -I {url}\n"
|
|
99
|
+
)
|
|
100
|
+
super().__init__(message)
|
|
101
|
+
|
|
102
|
+
|
|
57
103
|
class SupabaseDal:
|
|
58
104
|
def __init__(self, cluster: str):
|
|
59
105
|
self.enabled = self.__init_config()
|
|
60
106
|
self.cluster = cluster
|
|
61
107
|
if not self.enabled:
|
|
62
|
-
logging.
|
|
108
|
+
logging.debug(
|
|
63
109
|
"Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
|
|
64
110
|
)
|
|
65
111
|
return
|
|
@@ -67,6 +113,7 @@ class SupabaseDal:
|
|
|
67
113
|
f"Initializing Robusta platform connection for account {self.account_id}"
|
|
68
114
|
)
|
|
69
115
|
options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS)
|
|
116
|
+
sentry_sdk.set_tag("db_url", self.url)
|
|
70
117
|
self.client = create_client(self.url, self.api_key, options) # type: ignore
|
|
71
118
|
self.user_id = self.sign_in()
|
|
72
119
|
ttl = int(os.environ.get("SAAS_SESSION_TOKEN_TTL_SEC", "82800")) # 23 hours
|
|
@@ -117,7 +164,7 @@ class SupabaseDal:
|
|
|
117
164
|
)
|
|
118
165
|
|
|
119
166
|
if not os.path.exists(config_file_path):
|
|
120
|
-
logging.
|
|
167
|
+
logging.debug(f"No robusta config in {config_file_path}")
|
|
121
168
|
return None
|
|
122
169
|
|
|
123
170
|
logging.info(f"loading config {config_file_path}")
|
|
@@ -179,118 +226,183 @@ class SupabaseDal:
|
|
|
179
226
|
return all([self.account_id, self.url, self.api_key, self.email, self.password])
|
|
180
227
|
|
|
181
228
|
def sign_in(self) -> str:
|
|
182
|
-
logging.info("Supabase
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
229
|
+
logging.info("Supabase dal login")
|
|
230
|
+
try:
|
|
231
|
+
res = self.client.auth.sign_in_with_password(
|
|
232
|
+
{"email": self.email, "password": self.password}
|
|
233
|
+
)
|
|
234
|
+
if not res.session:
|
|
235
|
+
raise ValueError("Authentication failed: no session returned")
|
|
236
|
+
if not res.user:
|
|
237
|
+
raise ValueError("Authentication failed: no user returned")
|
|
238
|
+
self.client.auth.set_session(
|
|
239
|
+
res.session.access_token, res.session.refresh_token
|
|
240
|
+
)
|
|
241
|
+
self.client.postgrest.auth(res.session.access_token)
|
|
242
|
+
return res.user.id
|
|
243
|
+
except Exception as e:
|
|
244
|
+
error_msg = str(e).lower()
|
|
245
|
+
if any(
|
|
246
|
+
dns_indicator in error_msg
|
|
247
|
+
for dns_indicator in [
|
|
248
|
+
"temporary failure in name resolution",
|
|
249
|
+
"name resolution",
|
|
250
|
+
"dns",
|
|
251
|
+
"name or service not known",
|
|
252
|
+
"nodename nor servname provided",
|
|
253
|
+
]
|
|
254
|
+
):
|
|
255
|
+
raise SupabaseDnsException(e, self.url) from e
|
|
256
|
+
raise
|
|
195
257
|
|
|
196
258
|
def get_resource_recommendation(
|
|
197
|
-
self,
|
|
259
|
+
self,
|
|
260
|
+
limit: int = 10,
|
|
261
|
+
sort_by: str = "cpu_total",
|
|
262
|
+
namespace: Optional[str] = None,
|
|
263
|
+
name_pattern: Optional[str] = None,
|
|
264
|
+
kind: Optional[str] = None,
|
|
265
|
+
container: Optional[str] = None,
|
|
198
266
|
) -> Optional[List[Dict]]:
|
|
267
|
+
"""
|
|
268
|
+
Fetch top N resource recommendations with optional filters and sorting.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
limit: Maximum number of recommendations to return (default: 10)
|
|
272
|
+
sort_by: Field to sort by potential savings. Options:
|
|
273
|
+
- "cpu_total": Total CPU savings (requests + limits)
|
|
274
|
+
- "memory_total": Total memory savings (requests + limits)
|
|
275
|
+
- "cpu_requests": CPU requests savings
|
|
276
|
+
- "memory_requests": Memory requests savings
|
|
277
|
+
- "cpu_limits": CPU limits savings
|
|
278
|
+
- "memory_limits": Memory limits savings
|
|
279
|
+
- "priority": Use the priority field from the scan
|
|
280
|
+
namespace: Filter by Kubernetes namespace (exact match)
|
|
281
|
+
name_pattern: Filter by workload name (supports SQL LIKE pattern, e.g., '%app%')
|
|
282
|
+
kind: Filter by Kubernetes resource kind (e.g., Deployment, StatefulSet, DaemonSet, Job)
|
|
283
|
+
container: Filter by container name (exact match)
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
List of recommendations sorted by the specified metric
|
|
287
|
+
"""
|
|
199
288
|
if not self.enabled:
|
|
200
289
|
return []
|
|
201
290
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
291
|
+
scans_meta_response = (
|
|
292
|
+
self.client.table(SCANS_META_TABLE)
|
|
293
|
+
.select("*")
|
|
294
|
+
.eq("account_id", self.account_id)
|
|
295
|
+
.eq("cluster_id", self.cluster)
|
|
296
|
+
.eq("latest", True)
|
|
297
|
+
.execute()
|
|
298
|
+
)
|
|
299
|
+
if not len(scans_meta_response.data):
|
|
300
|
+
logging.warning("No scan metadata found for latest krr scan")
|
|
301
|
+
return None
|
|
213
302
|
|
|
214
|
-
|
|
215
|
-
self.client.table(SCANS_RESULTS_TABLE)
|
|
216
|
-
.select("*")
|
|
217
|
-
.eq("account_id", self.account_id)
|
|
218
|
-
.eq("cluster_id", self.cluster)
|
|
219
|
-
.eq("scan_id", scans_meta_response.data[0]["scan_id"])
|
|
220
|
-
.eq("name", name)
|
|
221
|
-
.eq("namespace", namespace)
|
|
222
|
-
.eq("kind", kind)
|
|
223
|
-
.execute()
|
|
224
|
-
)
|
|
225
|
-
if not len(scans_results_response.data):
|
|
226
|
-
return None
|
|
303
|
+
scan_id = scans_meta_response.data[0]["scan_id"]
|
|
227
304
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
305
|
+
query = (
|
|
306
|
+
self.client.table(SCANS_RESULTS_TABLE)
|
|
307
|
+
.select("*")
|
|
308
|
+
.eq("account_id", self.account_id)
|
|
309
|
+
.eq("cluster_id", self.cluster)
|
|
310
|
+
.eq("scan_id", scan_id)
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if namespace:
|
|
314
|
+
query = query.eq("namespace", namespace)
|
|
315
|
+
if name_pattern:
|
|
316
|
+
query = query.like("name", name_pattern)
|
|
317
|
+
if kind:
|
|
318
|
+
query = query.eq("kind", kind)
|
|
319
|
+
if container:
|
|
320
|
+
query = query.eq("container", container)
|
|
321
|
+
|
|
322
|
+
# For priority sorting, we can use the database's order
|
|
323
|
+
if sort_by == "priority":
|
|
324
|
+
query = query.order("priority", desc=True).limit(limit)
|
|
325
|
+
|
|
326
|
+
scans_results_response = query.execute()
|
|
327
|
+
|
|
328
|
+
if not len(scans_results_response.data):
|
|
231
329
|
return None
|
|
232
330
|
|
|
233
|
-
|
|
234
|
-
|
|
331
|
+
results = scans_results_response.data
|
|
332
|
+
|
|
333
|
+
if len(results) <= 1:
|
|
334
|
+
return results
|
|
335
|
+
|
|
336
|
+
# If sorting by priority, we already ordered and limited in the query
|
|
337
|
+
if sort_by == "priority":
|
|
338
|
+
return results
|
|
339
|
+
|
|
340
|
+
# Sort by calculated savings (descending)
|
|
341
|
+
results_with_savings = [
|
|
342
|
+
(result, calculate_krr_savings(result, sort_by)) for result in results
|
|
343
|
+
]
|
|
344
|
+
results_with_savings.sort(key=lambda x: x[1], reverse=True)
|
|
345
|
+
|
|
346
|
+
return [result for result, _ in results_with_savings[:limit]]
|
|
347
|
+
|
|
348
|
+
def get_issues_metadata(
|
|
349
|
+
self,
|
|
350
|
+
start_datetime: str,
|
|
351
|
+
end_datetime: str,
|
|
352
|
+
limit: int = 100,
|
|
353
|
+
workload: Optional[str] = None,
|
|
354
|
+
ns: Optional[str] = None,
|
|
355
|
+
cluster: Optional[str] = None,
|
|
356
|
+
finding_type: FindingType = FindingType.CONFIGURATION_CHANGE,
|
|
235
357
|
) -> Optional[List[Dict]]:
|
|
236
358
|
if not self.enabled:
|
|
237
359
|
return []
|
|
238
|
-
|
|
360
|
+
if not cluster:
|
|
361
|
+
cluster = self.cluster
|
|
239
362
|
try:
|
|
240
|
-
|
|
363
|
+
query = (
|
|
241
364
|
self.client.table(ISSUES_TABLE)
|
|
242
|
-
.select(
|
|
365
|
+
.select(
|
|
366
|
+
"id",
|
|
367
|
+
"title",
|
|
368
|
+
"subject_name",
|
|
369
|
+
"subject_namespace",
|
|
370
|
+
"subject_type",
|
|
371
|
+
"description",
|
|
372
|
+
"starts_at",
|
|
373
|
+
"ends_at",
|
|
374
|
+
)
|
|
243
375
|
.eq("account_id", self.account_id)
|
|
244
|
-
.eq("cluster",
|
|
245
|
-
.eq("finding_type", "configuration_change")
|
|
376
|
+
.eq("cluster", cluster)
|
|
246
377
|
.gte("creation_date", start_datetime)
|
|
247
378
|
.lte("creation_date", end_datetime)
|
|
248
|
-
.
|
|
379
|
+
.limit(limit)
|
|
249
380
|
)
|
|
250
|
-
if not len(changes_response.data):
|
|
251
|
-
return None
|
|
252
381
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
382
|
+
query = query.eq("finding_type", finding_type.value)
|
|
383
|
+
if workload:
|
|
384
|
+
query.eq("subject_name", workload)
|
|
385
|
+
if ns:
|
|
386
|
+
query.eq("subject_namespace", ns)
|
|
256
387
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
change_data_response = (
|
|
260
|
-
self.client.table(EVIDENCE_TABLE)
|
|
261
|
-
.select("*")
|
|
262
|
-
.eq("account_id", self.account_id)
|
|
263
|
-
.in_("issue_id", changes_ids)
|
|
264
|
-
.execute()
|
|
265
|
-
)
|
|
266
|
-
if not len(change_data_response.data):
|
|
388
|
+
res = query.execute()
|
|
389
|
+
if not res.data:
|
|
267
390
|
return None
|
|
268
391
|
|
|
269
392
|
except Exception:
|
|
270
|
-
logging.exception("Supabase error while retrieving change
|
|
393
|
+
logging.exception("Supabase error while retrieving change data")
|
|
271
394
|
return None
|
|
272
395
|
|
|
273
|
-
changes_data = []
|
|
274
|
-
change_data_map = {
|
|
275
|
-
change["issue_id"]: change for change in change_data_response.data
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
for change in changes_response.data:
|
|
279
|
-
change_content = change_data_map.get(change["id"])
|
|
280
|
-
if change_content:
|
|
281
|
-
changes_data.append(
|
|
282
|
-
{
|
|
283
|
-
"change": change_content["data"],
|
|
284
|
-
"evidence_id": change_content["id"],
|
|
285
|
-
**change,
|
|
286
|
-
}
|
|
287
|
-
)
|
|
288
|
-
|
|
289
396
|
logging.debug(
|
|
290
|
-
"Change history for %s-%s: %s",
|
|
397
|
+
"Change history metadata for %s-%s workload %s in ns %s: %s",
|
|
398
|
+
start_datetime,
|
|
399
|
+
end_datetime,
|
|
400
|
+
workload,
|
|
401
|
+
ns,
|
|
402
|
+
res.data,
|
|
291
403
|
)
|
|
292
404
|
|
|
293
|
-
return
|
|
405
|
+
return res.data
|
|
294
406
|
|
|
295
407
|
def unzip_evidence_file(self, data):
|
|
296
408
|
try:
|
|
@@ -322,22 +434,30 @@ class SupabaseDal:
|
|
|
322
434
|
return data
|
|
323
435
|
|
|
324
436
|
def extract_relevant_issues(self, evidence):
|
|
325
|
-
enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
|
|
326
437
|
data = [
|
|
327
438
|
enrich
|
|
328
439
|
for enrich in evidence.data
|
|
329
|
-
if enrich.get("enrichment_type") not in
|
|
440
|
+
if enrich.get("enrichment_type") not in ENRICHMENT_BLACKLIST_SET
|
|
330
441
|
]
|
|
331
442
|
|
|
332
443
|
unzipped_files = [
|
|
333
444
|
self.unzip_evidence_file(enrich)
|
|
334
445
|
for enrich in evidence.data
|
|
335
446
|
if enrich.get("enrichment_type") == "text_file"
|
|
447
|
+
or enrich.get("enrichment_type") == "alert_raw_data"
|
|
336
448
|
]
|
|
337
449
|
|
|
338
450
|
data.extend(unzipped_files)
|
|
339
451
|
return data
|
|
340
452
|
|
|
453
|
+
def get_issue_from_db(self, issue_id: str, table: str) -> Optional[Dict]:
|
|
454
|
+
issue_response = (
|
|
455
|
+
self.client.table(table).select("*").filter("id", "eq", issue_id).execute()
|
|
456
|
+
)
|
|
457
|
+
if len(issue_response.data):
|
|
458
|
+
return issue_response.data[0]
|
|
459
|
+
return None
|
|
460
|
+
|
|
341
461
|
def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
|
|
342
462
|
# TODO this could be done in a single atomic SELECT, but there is no
|
|
343
463
|
# foreign key relation between Issues and Evidence.
|
|
@@ -347,14 +467,11 @@ class SupabaseDal:
|
|
|
347
467
|
return None
|
|
348
468
|
issue_data = None
|
|
349
469
|
try:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
.
|
|
353
|
-
|
|
354
|
-
.
|
|
355
|
-
)
|
|
356
|
-
if len(issue_response.data):
|
|
357
|
-
issue_data = issue_response.data[0]
|
|
470
|
+
issue_data = self.get_issue_from_db(issue_id, ISSUES_TABLE)
|
|
471
|
+
if issue_data and issue_data["source"] == "prometheus":
|
|
472
|
+
logging.debug("Getting alert %s from GroupedIssuesTable", issue_id)
|
|
473
|
+
# This issue will have the complete alert duration information
|
|
474
|
+
issue_data = self.get_issue_from_db(issue_id, GROUPED_ISSUES_TABLE)
|
|
358
475
|
|
|
359
476
|
except Exception: # e.g. invalid id format
|
|
360
477
|
logging.exception("Supabase error while retrieving issue data")
|
|
@@ -364,12 +481,14 @@ class SupabaseDal:
|
|
|
364
481
|
evidence = (
|
|
365
482
|
self.client.table(EVIDENCE_TABLE)
|
|
366
483
|
.select("*")
|
|
367
|
-
.
|
|
484
|
+
.eq("issue_id", issue_id)
|
|
485
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
368
486
|
.execute()
|
|
369
487
|
)
|
|
370
|
-
|
|
488
|
+
relevant_evidence = self.extract_relevant_issues(evidence)
|
|
489
|
+
truncate_evidences_entities_if_necessary(relevant_evidence)
|
|
371
490
|
|
|
372
|
-
issue_data["evidence"] =
|
|
491
|
+
issue_data["evidence"] = relevant_evidence
|
|
373
492
|
|
|
374
493
|
# build issue investigation dates
|
|
375
494
|
started_at = issue_data.get("starts_at")
|
|
@@ -393,6 +512,79 @@ class SupabaseDal:
|
|
|
393
512
|
|
|
394
513
|
return issue_data
|
|
395
514
|
|
|
515
|
+
def get_runbook_catalog(self) -> Optional[List[RobustaRunbookInstruction]]:
|
|
516
|
+
if not self.enabled:
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
res = (
|
|
521
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
522
|
+
.select("*")
|
|
523
|
+
.eq("account_id", self.account_id)
|
|
524
|
+
.eq("subject_type", "RunbookCatalog")
|
|
525
|
+
.execute()
|
|
526
|
+
)
|
|
527
|
+
if not res.data:
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
instructions = []
|
|
531
|
+
for row in res.data:
|
|
532
|
+
id = row.get("runbook_id")
|
|
533
|
+
symptom = row.get("symptoms")
|
|
534
|
+
title = row.get("subject_name")
|
|
535
|
+
if not symptom:
|
|
536
|
+
logging.warning("Skipping runbook with empty symptom: %s", id)
|
|
537
|
+
continue
|
|
538
|
+
instructions.append(
|
|
539
|
+
RobustaRunbookInstruction(id=id, symptom=symptom, title=title)
|
|
540
|
+
)
|
|
541
|
+
return instructions
|
|
542
|
+
except Exception:
|
|
543
|
+
logging.exception("Failed to fetch RunbookCatalog", exc_info=True)
|
|
544
|
+
return None
|
|
545
|
+
|
|
546
|
+
def get_runbook_content(
|
|
547
|
+
self, runbook_id: str
|
|
548
|
+
) -> Optional[RobustaRunbookInstruction]:
|
|
549
|
+
if not self.enabled:
|
|
550
|
+
return None
|
|
551
|
+
|
|
552
|
+
res = (
|
|
553
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
554
|
+
.select("*")
|
|
555
|
+
.eq("account_id", self.account_id)
|
|
556
|
+
.eq("subject_type", "RunbookCatalog")
|
|
557
|
+
.eq("runbook_id", runbook_id)
|
|
558
|
+
.execute()
|
|
559
|
+
)
|
|
560
|
+
if not res.data or len(res.data) != 1:
|
|
561
|
+
return None
|
|
562
|
+
|
|
563
|
+
row = res.data[0]
|
|
564
|
+
id = row.get("runbook_id")
|
|
565
|
+
symptom = row.get("symptoms")
|
|
566
|
+
title = row.get("subject_name")
|
|
567
|
+
raw_instruction = row.get("runbook").get("instructions")
|
|
568
|
+
# TODO: remove in the future when we migrate the table data
|
|
569
|
+
if isinstance(raw_instruction, list) and len(raw_instruction) == 1:
|
|
570
|
+
instruction = raw_instruction[0]
|
|
571
|
+
elif isinstance(raw_instruction, list) and len(raw_instruction) > 1:
|
|
572
|
+
# not currently used, but will be used in the future
|
|
573
|
+
instruction = "\n - ".join(raw_instruction)
|
|
574
|
+
elif isinstance(raw_instruction, str):
|
|
575
|
+
# not supported by the current UI, but will be supported in the future
|
|
576
|
+
instruction = raw_instruction
|
|
577
|
+
else:
|
|
578
|
+
# in case the format is unexpected, convert to string
|
|
579
|
+
logging.error(
|
|
580
|
+
f"Unexpected runbook instruction format for runbook_id={runbook_id}: {raw_instruction}"
|
|
581
|
+
)
|
|
582
|
+
instruction = str(raw_instruction)
|
|
583
|
+
|
|
584
|
+
return RobustaRunbookInstruction(
|
|
585
|
+
id=id, symptom=symptom, instruction=instruction, title=title
|
|
586
|
+
)
|
|
587
|
+
|
|
396
588
|
def get_resource_instructions(
|
|
397
589
|
self, type: str, name: Optional[str]
|
|
398
590
|
) -> Optional[ResourceInstructions]:
|
|
@@ -512,10 +704,13 @@ class SupabaseDal:
|
|
|
512
704
|
self.client.table(EVIDENCE_TABLE)
|
|
513
705
|
.select("data, enrichment_type")
|
|
514
706
|
.in_("issue_id", unique_issues)
|
|
707
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
515
708
|
.execute()
|
|
516
709
|
)
|
|
517
710
|
|
|
518
|
-
|
|
711
|
+
relevant_issues = self.extract_relevant_issues(res)
|
|
712
|
+
truncate_evidences_entities_if_necessary(relevant_issues)
|
|
713
|
+
return relevant_issues
|
|
519
714
|
|
|
520
715
|
except Exception:
|
|
521
716
|
logging.exception("failed to fetch workload issues data", exc_info=True)
|