holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
holmes/core/supabase_dal.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import binascii
|
|
3
|
+
import gzip
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import threading
|
|
7
8
|
from datetime import datetime, timedelta
|
|
9
|
+
from enum import Enum
|
|
8
10
|
from typing import Dict, List, Optional, Tuple
|
|
9
11
|
from uuid import uuid4
|
|
10
|
-
import gzip
|
|
11
12
|
|
|
13
|
+
from postgrest.base_request_builder import QueryArgs
|
|
12
14
|
import yaml # type: ignore
|
|
13
15
|
from cachetools import TTLCache # type: ignore
|
|
14
16
|
from postgrest._sync.request_builder import SyncQueryRequestBuilder
|
|
@@ -30,13 +32,20 @@ from holmes.core.resource_instruction import (
|
|
|
30
32
|
ResourceInstructionDocument,
|
|
31
33
|
ResourceInstructions,
|
|
32
34
|
)
|
|
35
|
+
from holmes.core.truncation.dal_truncation_utils import (
|
|
36
|
+
truncate_evidences_entities_if_necessary,
|
|
37
|
+
)
|
|
38
|
+
from holmes.plugins.runbooks import RobustaRunbookInstruction
|
|
33
39
|
from holmes.utils.definitions import RobustaConfig
|
|
34
40
|
from holmes.utils.env import get_env_replacement
|
|
35
41
|
from holmes.utils.global_instructions import Instructions
|
|
42
|
+
from holmes.utils.krr_utils import calculate_krr_savings
|
|
43
|
+
from postgrest._sync import request_builder as supabase_request_builder
|
|
36
44
|
|
|
37
45
|
SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
|
|
38
46
|
|
|
39
47
|
ISSUES_TABLE = "Issues"
|
|
48
|
+
GROUPED_ISSUES_TABLE = "GroupedIssues"
|
|
40
49
|
EVIDENCE_TABLE = "Evidence"
|
|
41
50
|
RUNBOOKS_TABLE = "HolmesRunbooks"
|
|
42
51
|
SESSION_TOKENS_TABLE = "AuthTokens"
|
|
@@ -45,6 +54,31 @@ HOLMES_TOOLSET = "HolmesToolsStatus"
|
|
|
45
54
|
SCANS_META_TABLE = "ScansMeta"
|
|
46
55
|
SCANS_RESULTS_TABLE = "ScansResults"
|
|
47
56
|
|
|
57
|
+
ENRICHMENT_BLACKLIST = ["text_file", "graph", "ai_analysis", "holmes"]
|
|
58
|
+
ENRICHMENT_BLACKLIST_SET = set(ENRICHMENT_BLACKLIST)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
logging.info("Patching supabase_request_builder.pre_select")
|
|
62
|
+
original_pre_select = supabase_request_builder.pre_select
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def pre_select_patched(*args, **kwargs):
|
|
66
|
+
query_args: QueryArgs = original_pre_select(*args, **kwargs)
|
|
67
|
+
if not query_args.json:
|
|
68
|
+
query_args = QueryArgs(
|
|
69
|
+
query_args.method, query_args.params, query_args.headers, None
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return query_args
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
supabase_request_builder.pre_select = pre_select_patched
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class FindingType(str, Enum):
|
|
79
|
+
ISSUE = "issue"
|
|
80
|
+
CONFIGURATION_CHANGE = "configuration_change"
|
|
81
|
+
|
|
48
82
|
|
|
49
83
|
class RobustaToken(BaseModel):
|
|
50
84
|
store_url: str
|
|
@@ -54,12 +88,23 @@ class RobustaToken(BaseModel):
|
|
|
54
88
|
password: str
|
|
55
89
|
|
|
56
90
|
|
|
91
|
+
class SupabaseDnsException(Exception):
|
|
92
|
+
def __init__(self, error: Exception, url: str):
|
|
93
|
+
message = (
|
|
94
|
+
f"\n{error.__class__.__name__}: {error}\n"
|
|
95
|
+
f"Error connecting to <{url}>\n"
|
|
96
|
+
"This is often due to DNS issues or firewall policies - to troubleshoot run in your cluster:\n"
|
|
97
|
+
f"curl -I {url}\n"
|
|
98
|
+
)
|
|
99
|
+
super().__init__(message)
|
|
100
|
+
|
|
101
|
+
|
|
57
102
|
class SupabaseDal:
|
|
58
103
|
def __init__(self, cluster: str):
|
|
59
104
|
self.enabled = self.__init_config()
|
|
60
105
|
self.cluster = cluster
|
|
61
106
|
if not self.enabled:
|
|
62
|
-
logging.
|
|
107
|
+
logging.debug(
|
|
63
108
|
"Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
|
|
64
109
|
)
|
|
65
110
|
return
|
|
@@ -117,7 +162,7 @@ class SupabaseDal:
|
|
|
117
162
|
)
|
|
118
163
|
|
|
119
164
|
if not os.path.exists(config_file_path):
|
|
120
|
-
logging.
|
|
165
|
+
logging.debug(f"No robusta config in {config_file_path}")
|
|
121
166
|
return None
|
|
122
167
|
|
|
123
168
|
logging.info(f"loading config {config_file_path}")
|
|
@@ -179,118 +224,183 @@ class SupabaseDal:
|
|
|
179
224
|
return all([self.account_id, self.url, self.api_key, self.email, self.password])
|
|
180
225
|
|
|
181
226
|
def sign_in(self) -> str:
|
|
182
|
-
logging.info("Supabase
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
227
|
+
logging.info("Supabase dal login")
|
|
228
|
+
try:
|
|
229
|
+
res = self.client.auth.sign_in_with_password(
|
|
230
|
+
{"email": self.email, "password": self.password}
|
|
231
|
+
)
|
|
232
|
+
if not res.session:
|
|
233
|
+
raise ValueError("Authentication failed: no session returned")
|
|
234
|
+
if not res.user:
|
|
235
|
+
raise ValueError("Authentication failed: no user returned")
|
|
236
|
+
self.client.auth.set_session(
|
|
237
|
+
res.session.access_token, res.session.refresh_token
|
|
238
|
+
)
|
|
239
|
+
self.client.postgrest.auth(res.session.access_token)
|
|
240
|
+
return res.user.id
|
|
241
|
+
except Exception as e:
|
|
242
|
+
error_msg = str(e).lower()
|
|
243
|
+
if any(
|
|
244
|
+
dns_indicator in error_msg
|
|
245
|
+
for dns_indicator in [
|
|
246
|
+
"temporary failure in name resolution",
|
|
247
|
+
"name resolution",
|
|
248
|
+
"dns",
|
|
249
|
+
"name or service not known",
|
|
250
|
+
"nodename nor servname provided",
|
|
251
|
+
]
|
|
252
|
+
):
|
|
253
|
+
raise SupabaseDnsException(e, self.url) from e
|
|
254
|
+
raise
|
|
195
255
|
|
|
196
256
|
def get_resource_recommendation(
|
|
197
|
-
self,
|
|
257
|
+
self,
|
|
258
|
+
limit: int = 10,
|
|
259
|
+
sort_by: str = "cpu_total",
|
|
260
|
+
namespace: Optional[str] = None,
|
|
261
|
+
name_pattern: Optional[str] = None,
|
|
262
|
+
kind: Optional[str] = None,
|
|
263
|
+
container: Optional[str] = None,
|
|
198
264
|
) -> Optional[List[Dict]]:
|
|
265
|
+
"""
|
|
266
|
+
Fetch top N resource recommendations with optional filters and sorting.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
limit: Maximum number of recommendations to return (default: 10)
|
|
270
|
+
sort_by: Field to sort by potential savings. Options:
|
|
271
|
+
- "cpu_total": Total CPU savings (requests + limits)
|
|
272
|
+
- "memory_total": Total memory savings (requests + limits)
|
|
273
|
+
- "cpu_requests": CPU requests savings
|
|
274
|
+
- "memory_requests": Memory requests savings
|
|
275
|
+
- "cpu_limits": CPU limits savings
|
|
276
|
+
- "memory_limits": Memory limits savings
|
|
277
|
+
- "priority": Use the priority field from the scan
|
|
278
|
+
namespace: Filter by Kubernetes namespace (exact match)
|
|
279
|
+
name_pattern: Filter by workload name (supports SQL LIKE pattern, e.g., '%app%')
|
|
280
|
+
kind: Filter by Kubernetes resource kind (e.g., Deployment, StatefulSet, DaemonSet, Job)
|
|
281
|
+
container: Filter by container name (exact match)
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of recommendations sorted by the specified metric
|
|
285
|
+
"""
|
|
199
286
|
if not self.enabled:
|
|
200
287
|
return []
|
|
201
288
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
289
|
+
scans_meta_response = (
|
|
290
|
+
self.client.table(SCANS_META_TABLE)
|
|
291
|
+
.select("*")
|
|
292
|
+
.eq("account_id", self.account_id)
|
|
293
|
+
.eq("cluster_id", self.cluster)
|
|
294
|
+
.eq("latest", True)
|
|
295
|
+
.execute()
|
|
296
|
+
)
|
|
297
|
+
if not len(scans_meta_response.data):
|
|
298
|
+
logging.warning("No scan metadata found for latest krr scan")
|
|
299
|
+
return None
|
|
213
300
|
|
|
214
|
-
|
|
215
|
-
self.client.table(SCANS_RESULTS_TABLE)
|
|
216
|
-
.select("*")
|
|
217
|
-
.eq("account_id", self.account_id)
|
|
218
|
-
.eq("cluster_id", self.cluster)
|
|
219
|
-
.eq("scan_id", scans_meta_response.data[0]["scan_id"])
|
|
220
|
-
.eq("name", name)
|
|
221
|
-
.eq("namespace", namespace)
|
|
222
|
-
.eq("kind", kind)
|
|
223
|
-
.execute()
|
|
224
|
-
)
|
|
225
|
-
if not len(scans_results_response.data):
|
|
226
|
-
return None
|
|
301
|
+
scan_id = scans_meta_response.data[0]["scan_id"]
|
|
227
302
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
303
|
+
query = (
|
|
304
|
+
self.client.table(SCANS_RESULTS_TABLE)
|
|
305
|
+
.select("*")
|
|
306
|
+
.eq("account_id", self.account_id)
|
|
307
|
+
.eq("cluster_id", self.cluster)
|
|
308
|
+
.eq("scan_id", scan_id)
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
if namespace:
|
|
312
|
+
query = query.eq("namespace", namespace)
|
|
313
|
+
if name_pattern:
|
|
314
|
+
query = query.like("name", name_pattern)
|
|
315
|
+
if kind:
|
|
316
|
+
query = query.eq("kind", kind)
|
|
317
|
+
if container:
|
|
318
|
+
query = query.eq("container", container)
|
|
319
|
+
|
|
320
|
+
# For priority sorting, we can use the database's order
|
|
321
|
+
if sort_by == "priority":
|
|
322
|
+
query = query.order("priority", desc=True).limit(limit)
|
|
323
|
+
|
|
324
|
+
scans_results_response = query.execute()
|
|
325
|
+
|
|
326
|
+
if not len(scans_results_response.data):
|
|
231
327
|
return None
|
|
232
328
|
|
|
233
|
-
|
|
234
|
-
|
|
329
|
+
results = scans_results_response.data
|
|
330
|
+
|
|
331
|
+
if len(results) <= 1:
|
|
332
|
+
return results
|
|
333
|
+
|
|
334
|
+
# If sorting by priority, we already ordered and limited in the query
|
|
335
|
+
if sort_by == "priority":
|
|
336
|
+
return results
|
|
337
|
+
|
|
338
|
+
# Sort by calculated savings (descending)
|
|
339
|
+
results_with_savings = [
|
|
340
|
+
(result, calculate_krr_savings(result, sort_by)) for result in results
|
|
341
|
+
]
|
|
342
|
+
results_with_savings.sort(key=lambda x: x[1], reverse=True)
|
|
343
|
+
|
|
344
|
+
return [result for result, _ in results_with_savings[:limit]]
|
|
345
|
+
|
|
346
|
+
def get_issues_metadata(
|
|
347
|
+
self,
|
|
348
|
+
start_datetime: str,
|
|
349
|
+
end_datetime: str,
|
|
350
|
+
limit: int = 100,
|
|
351
|
+
workload: Optional[str] = None,
|
|
352
|
+
ns: Optional[str] = None,
|
|
353
|
+
cluster: Optional[str] = None,
|
|
354
|
+
finding_type: FindingType = FindingType.CONFIGURATION_CHANGE,
|
|
235
355
|
) -> Optional[List[Dict]]:
|
|
236
356
|
if not self.enabled:
|
|
237
357
|
return []
|
|
238
|
-
|
|
358
|
+
if not cluster:
|
|
359
|
+
cluster = self.cluster
|
|
239
360
|
try:
|
|
240
|
-
|
|
361
|
+
query = (
|
|
241
362
|
self.client.table(ISSUES_TABLE)
|
|
242
|
-
.select(
|
|
363
|
+
.select(
|
|
364
|
+
"id",
|
|
365
|
+
"title",
|
|
366
|
+
"subject_name",
|
|
367
|
+
"subject_namespace",
|
|
368
|
+
"subject_type",
|
|
369
|
+
"description",
|
|
370
|
+
"starts_at",
|
|
371
|
+
"ends_at",
|
|
372
|
+
)
|
|
243
373
|
.eq("account_id", self.account_id)
|
|
244
|
-
.eq("cluster",
|
|
245
|
-
.eq("finding_type", "configuration_change")
|
|
374
|
+
.eq("cluster", cluster)
|
|
246
375
|
.gte("creation_date", start_datetime)
|
|
247
376
|
.lte("creation_date", end_datetime)
|
|
248
|
-
.
|
|
377
|
+
.limit(limit)
|
|
249
378
|
)
|
|
250
|
-
if not len(changes_response.data):
|
|
251
|
-
return None
|
|
252
379
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
380
|
+
query = query.eq("finding_type", finding_type.value)
|
|
381
|
+
if workload:
|
|
382
|
+
query.eq("subject_name", workload)
|
|
383
|
+
if ns:
|
|
384
|
+
query.eq("subject_namespace", ns)
|
|
256
385
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
change_data_response = (
|
|
260
|
-
self.client.table(EVIDENCE_TABLE)
|
|
261
|
-
.select("*")
|
|
262
|
-
.eq("account_id", self.account_id)
|
|
263
|
-
.in_("issue_id", changes_ids)
|
|
264
|
-
.execute()
|
|
265
|
-
)
|
|
266
|
-
if not len(change_data_response.data):
|
|
386
|
+
res = query.execute()
|
|
387
|
+
if not res.data:
|
|
267
388
|
return None
|
|
268
389
|
|
|
269
390
|
except Exception:
|
|
270
|
-
logging.exception("Supabase error while retrieving change
|
|
391
|
+
logging.exception("Supabase error while retrieving change data")
|
|
271
392
|
return None
|
|
272
393
|
|
|
273
|
-
changes_data = []
|
|
274
|
-
change_data_map = {
|
|
275
|
-
change["issue_id"]: change for change in change_data_response.data
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
for change in changes_response.data:
|
|
279
|
-
change_content = change_data_map.get(change["id"])
|
|
280
|
-
if change_content:
|
|
281
|
-
changes_data.append(
|
|
282
|
-
{
|
|
283
|
-
"change": change_content["data"],
|
|
284
|
-
"evidence_id": change_content["id"],
|
|
285
|
-
**change,
|
|
286
|
-
}
|
|
287
|
-
)
|
|
288
|
-
|
|
289
394
|
logging.debug(
|
|
290
|
-
"Change history for %s-%s: %s",
|
|
395
|
+
"Change history metadata for %s-%s workload %s in ns %s: %s",
|
|
396
|
+
start_datetime,
|
|
397
|
+
end_datetime,
|
|
398
|
+
workload,
|
|
399
|
+
ns,
|
|
400
|
+
res.data,
|
|
291
401
|
)
|
|
292
402
|
|
|
293
|
-
return
|
|
403
|
+
return res.data
|
|
294
404
|
|
|
295
405
|
def unzip_evidence_file(self, data):
|
|
296
406
|
try:
|
|
@@ -322,22 +432,30 @@ class SupabaseDal:
|
|
|
322
432
|
return data
|
|
323
433
|
|
|
324
434
|
def extract_relevant_issues(self, evidence):
|
|
325
|
-
enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
|
|
326
435
|
data = [
|
|
327
436
|
enrich
|
|
328
437
|
for enrich in evidence.data
|
|
329
|
-
if enrich.get("enrichment_type") not in
|
|
438
|
+
if enrich.get("enrichment_type") not in ENRICHMENT_BLACKLIST_SET
|
|
330
439
|
]
|
|
331
440
|
|
|
332
441
|
unzipped_files = [
|
|
333
442
|
self.unzip_evidence_file(enrich)
|
|
334
443
|
for enrich in evidence.data
|
|
335
444
|
if enrich.get("enrichment_type") == "text_file"
|
|
445
|
+
or enrich.get("enrichment_type") == "alert_raw_data"
|
|
336
446
|
]
|
|
337
447
|
|
|
338
448
|
data.extend(unzipped_files)
|
|
339
449
|
return data
|
|
340
450
|
|
|
451
|
+
def get_issue_from_db(self, issue_id: str, table: str) -> Optional[Dict]:
|
|
452
|
+
issue_response = (
|
|
453
|
+
self.client.table(table).select("*").filter("id", "eq", issue_id).execute()
|
|
454
|
+
)
|
|
455
|
+
if len(issue_response.data):
|
|
456
|
+
return issue_response.data[0]
|
|
457
|
+
return None
|
|
458
|
+
|
|
341
459
|
def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
|
|
342
460
|
# TODO this could be done in a single atomic SELECT, but there is no
|
|
343
461
|
# foreign key relation between Issues and Evidence.
|
|
@@ -347,14 +465,11 @@ class SupabaseDal:
|
|
|
347
465
|
return None
|
|
348
466
|
issue_data = None
|
|
349
467
|
try:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
.
|
|
353
|
-
|
|
354
|
-
.
|
|
355
|
-
)
|
|
356
|
-
if len(issue_response.data):
|
|
357
|
-
issue_data = issue_response.data[0]
|
|
468
|
+
issue_data = self.get_issue_from_db(issue_id, ISSUES_TABLE)
|
|
469
|
+
if issue_data and issue_data["source"] == "prometheus":
|
|
470
|
+
logging.debug("Getting alert %s from GroupedIssuesTable", issue_id)
|
|
471
|
+
# This issue will have the complete alert duration information
|
|
472
|
+
issue_data = self.get_issue_from_db(issue_id, GROUPED_ISSUES_TABLE)
|
|
358
473
|
|
|
359
474
|
except Exception: # e.g. invalid id format
|
|
360
475
|
logging.exception("Supabase error while retrieving issue data")
|
|
@@ -364,12 +479,14 @@ class SupabaseDal:
|
|
|
364
479
|
evidence = (
|
|
365
480
|
self.client.table(EVIDENCE_TABLE)
|
|
366
481
|
.select("*")
|
|
367
|
-
.
|
|
482
|
+
.eq("issue_id", issue_id)
|
|
483
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
368
484
|
.execute()
|
|
369
485
|
)
|
|
370
|
-
|
|
486
|
+
relevant_evidence = self.extract_relevant_issues(evidence)
|
|
487
|
+
truncate_evidences_entities_if_necessary(relevant_evidence)
|
|
371
488
|
|
|
372
|
-
issue_data["evidence"] =
|
|
489
|
+
issue_data["evidence"] = relevant_evidence
|
|
373
490
|
|
|
374
491
|
# build issue investigation dates
|
|
375
492
|
started_at = issue_data.get("starts_at")
|
|
@@ -393,6 +510,79 @@ class SupabaseDal:
|
|
|
393
510
|
|
|
394
511
|
return issue_data
|
|
395
512
|
|
|
513
|
+
def get_runbook_catalog(self) -> Optional[List[RobustaRunbookInstruction]]:
|
|
514
|
+
if not self.enabled:
|
|
515
|
+
return None
|
|
516
|
+
|
|
517
|
+
try:
|
|
518
|
+
res = (
|
|
519
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
520
|
+
.select("*")
|
|
521
|
+
.eq("account_id", self.account_id)
|
|
522
|
+
.eq("subject_type", "RunbookCatalog")
|
|
523
|
+
.execute()
|
|
524
|
+
)
|
|
525
|
+
if not res.data:
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
instructions = []
|
|
529
|
+
for row in res.data:
|
|
530
|
+
id = row.get("runbook_id")
|
|
531
|
+
symptom = row.get("symptoms")
|
|
532
|
+
title = row.get("subject_name")
|
|
533
|
+
if not symptom:
|
|
534
|
+
logging.warning("Skipping runbook with empty symptom: %s", id)
|
|
535
|
+
continue
|
|
536
|
+
instructions.append(
|
|
537
|
+
RobustaRunbookInstruction(id=id, symptom=symptom, title=title)
|
|
538
|
+
)
|
|
539
|
+
return instructions
|
|
540
|
+
except Exception:
|
|
541
|
+
logging.exception("Failed to fetch RunbookCatalog", exc_info=True)
|
|
542
|
+
return None
|
|
543
|
+
|
|
544
|
+
def get_runbook_content(
|
|
545
|
+
self, runbook_id: str
|
|
546
|
+
) -> Optional[RobustaRunbookInstruction]:
|
|
547
|
+
if not self.enabled:
|
|
548
|
+
return None
|
|
549
|
+
|
|
550
|
+
res = (
|
|
551
|
+
self.client.table(RUNBOOKS_TABLE)
|
|
552
|
+
.select("*")
|
|
553
|
+
.eq("account_id", self.account_id)
|
|
554
|
+
.eq("subject_type", "RunbookCatalog")
|
|
555
|
+
.eq("runbook_id", runbook_id)
|
|
556
|
+
.execute()
|
|
557
|
+
)
|
|
558
|
+
if not res.data or len(res.data) != 1:
|
|
559
|
+
return None
|
|
560
|
+
|
|
561
|
+
row = res.data[0]
|
|
562
|
+
id = row.get("runbook_id")
|
|
563
|
+
symptom = row.get("symptoms")
|
|
564
|
+
title = row.get("subject_name")
|
|
565
|
+
raw_instruction = row.get("runbook").get("instructions")
|
|
566
|
+
# TODO: remove in the future when we migrate the table data
|
|
567
|
+
if isinstance(raw_instruction, list) and len(raw_instruction) == 1:
|
|
568
|
+
instruction = raw_instruction[0]
|
|
569
|
+
elif isinstance(raw_instruction, list) and len(raw_instruction) > 1:
|
|
570
|
+
# not currently used, but will be used in the future
|
|
571
|
+
instruction = "\n - ".join(raw_instruction)
|
|
572
|
+
elif isinstance(raw_instruction, str):
|
|
573
|
+
# not supported by the current UI, but will be supported in the future
|
|
574
|
+
instruction = raw_instruction
|
|
575
|
+
else:
|
|
576
|
+
# in case the format is unexpected, convert to string
|
|
577
|
+
logging.error(
|
|
578
|
+
f"Unexpected runbook instruction format for runbook_id={runbook_id}: {raw_instruction}"
|
|
579
|
+
)
|
|
580
|
+
instruction = str(raw_instruction)
|
|
581
|
+
|
|
582
|
+
return RobustaRunbookInstruction(
|
|
583
|
+
id=id, symptom=symptom, instruction=instruction, title=title
|
|
584
|
+
)
|
|
585
|
+
|
|
396
586
|
def get_resource_instructions(
|
|
397
587
|
self, type: str, name: Optional[str]
|
|
398
588
|
) -> Optional[ResourceInstructions]:
|
|
@@ -512,10 +702,13 @@ class SupabaseDal:
|
|
|
512
702
|
self.client.table(EVIDENCE_TABLE)
|
|
513
703
|
.select("data, enrichment_type")
|
|
514
704
|
.in_("issue_id", unique_issues)
|
|
705
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
515
706
|
.execute()
|
|
516
707
|
)
|
|
517
708
|
|
|
518
|
-
|
|
709
|
+
relevant_issues = self.extract_relevant_issues(res)
|
|
710
|
+
truncate_evidences_entities_if_necessary(relevant_issues)
|
|
711
|
+
return relevant_issues
|
|
519
712
|
|
|
520
713
|
except Exception:
|
|
521
714
|
logging.exception("failed to fetch workload issues data", exc_info=True)
|