holmesgpt 0.12.3__py3-none-any.whl → 0.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/config.py +75 -33
- holmes/core/config.py +5 -0
- holmes/core/conversations.py +17 -2
- holmes/core/investigation.py +1 -0
- holmes/core/llm.py +1 -2
- holmes/core/prompt.py +29 -4
- holmes/core/supabase_dal.py +49 -13
- holmes/core/tool_calling_llm.py +26 -1
- holmes/core/tools.py +2 -1
- holmes/core/tools_utils/tool_executor.py +1 -0
- holmes/core/toolset_manager.py +10 -3
- holmes/core/tracing.py +77 -10
- holmes/interactive.py +110 -20
- holmes/main.py +13 -18
- holmes/plugins/destinations/slack/plugin.py +19 -9
- holmes/plugins/prompts/_fetch_logs.jinja2 +11 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +6 -37
- holmes/plugins/prompts/_permission_errors.jinja2 +6 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -5
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +22 -14
- holmes/plugins/prompts/generic_ask.jinja2 +6 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +1 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +1 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +1 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
- holmes/plugins/runbooks/__init__.py +20 -4
- holmes/plugins/toolsets/__init__.py +7 -9
- holmes/plugins/toolsets/aks-node-health.yaml +0 -8
- holmes/plugins/toolsets/argocd.yaml +4 -1
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +1 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +2 -0
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +54 -4
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +150 -6
- holmes/plugins/toolsets/kubernetes.yaml +6 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +2 -6
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +2 -2
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +65 -6
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/slab.yaml +1 -1
- holmes/utils/colors.py +7 -0
- holmes/utils/console/consts.py +5 -0
- holmes/utils/console/result.py +2 -1
- holmes/utils/keygen_utils.py +6 -0
- holmes/version.py +2 -2
- holmesgpt-0.12.4.dist-info/METADATA +258 -0
- {holmesgpt-0.12.3.dist-info → holmesgpt-0.12.4.dist-info}/RECORD +51 -47
- holmesgpt-0.12.3.dist-info/METADATA +0 -400
- {holmesgpt-0.12.3.dist-info → holmesgpt-0.12.4.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.12.3.dist-info → holmesgpt-0.12.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.12.3.dist-info → holmesgpt-0.12.4.dist-info}/entry_points.txt +0 -0
|
@@ -27,6 +27,9 @@ from holmes.plugins.toolsets.utils import (
|
|
|
27
27
|
process_timestamps_to_int,
|
|
28
28
|
standard_start_datetime_tool_param_description,
|
|
29
29
|
)
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
|
|
32
|
+
from holmes.utils.keygen_utils import generate_random_key
|
|
30
33
|
|
|
31
34
|
DEFAULT_TIME_SPAN_SECONDS = 3600
|
|
32
35
|
|
|
@@ -60,7 +63,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
60
63
|
required=False,
|
|
61
64
|
),
|
|
62
65
|
"tag_filter": ToolParameter(
|
|
63
|
-
description="Filter metrics by tags in the format tag:value",
|
|
66
|
+
description="Filter metrics by tags in the format tag:value. pod tag is pod_name. namespace tag is kube_namespace.",
|
|
64
67
|
type="string",
|
|
65
68
|
required=False,
|
|
66
69
|
),
|
|
@@ -188,6 +191,16 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
188
191
|
type="string",
|
|
189
192
|
required=False,
|
|
190
193
|
),
|
|
194
|
+
"description": ToolParameter(
|
|
195
|
+
description="Describes the query",
|
|
196
|
+
type="string",
|
|
197
|
+
required=True,
|
|
198
|
+
),
|
|
199
|
+
"output_type": ToolParameter(
|
|
200
|
+
description="Specifies how to interpret the Datadog result. Use 'Plain' for raw values, 'Bytes' to format byte values, 'Percentage' to scale 0–1 values into 0–100%, or 'CPUUsage' to convert values to cores (e.g., 500 becomes 500m, 2000 becomes 2).",
|
|
201
|
+
type="string",
|
|
202
|
+
required=False,
|
|
203
|
+
),
|
|
191
204
|
},
|
|
192
205
|
toolset=toolset,
|
|
193
206
|
)
|
|
@@ -230,6 +243,8 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
230
243
|
)
|
|
231
244
|
|
|
232
245
|
series = data.get("series", [])
|
|
246
|
+
description = params.get("description", "")
|
|
247
|
+
output_type = params.get("output_type", "Plain")
|
|
233
248
|
|
|
234
249
|
if not series:
|
|
235
250
|
return StructuredToolResult(
|
|
@@ -238,17 +253,58 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
238
253
|
params=params,
|
|
239
254
|
)
|
|
240
255
|
|
|
256
|
+
# Transform Datadog series data to match Prometheus format
|
|
257
|
+
prometheus_result = []
|
|
258
|
+
for serie in series:
|
|
259
|
+
# Extract metric info from Datadog series
|
|
260
|
+
metric_info = {}
|
|
261
|
+
if "metric" in serie:
|
|
262
|
+
metric_info["__name__"] = serie["metric"]
|
|
263
|
+
|
|
264
|
+
# Add other fields from scope/tag_set if available
|
|
265
|
+
if "scope" in serie and serie["scope"]:
|
|
266
|
+
# Parse scope like "pod_name:robusta-runner-78599b764d-f847h" into labels
|
|
267
|
+
scope_parts = serie["scope"].split(",")
|
|
268
|
+
for part in scope_parts:
|
|
269
|
+
if ":" in part:
|
|
270
|
+
key, value = part.split(":", 1)
|
|
271
|
+
metric_info[key.strip()] = value.strip()
|
|
272
|
+
|
|
273
|
+
# Transform pointlist to values format (timestamp, value as strings)
|
|
274
|
+
values = []
|
|
275
|
+
if "pointlist" in serie:
|
|
276
|
+
for point in serie["pointlist"]:
|
|
277
|
+
if len(point) >= 2:
|
|
278
|
+
# Convert timestamp from milliseconds to seconds, format as string
|
|
279
|
+
timestamp = int(point[0] / 1000)
|
|
280
|
+
value = str(point[1])
|
|
281
|
+
values.append([timestamp, value])
|
|
282
|
+
|
|
283
|
+
prometheus_result.append({"metric": metric_info, "values": values})
|
|
284
|
+
|
|
285
|
+
# Convert timestamps to RFC3339 format for start/end
|
|
286
|
+
start_rfc = datetime.fromtimestamp(from_time).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
287
|
+
end_rfc = datetime.fromtimestamp(to_time).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
288
|
+
|
|
289
|
+
# Create response matching Prometheus format
|
|
241
290
|
response_data = {
|
|
242
291
|
"status": "success",
|
|
292
|
+
"error_message": None,
|
|
293
|
+
"random_key": generate_random_key(),
|
|
294
|
+
"tool_name": self.name,
|
|
295
|
+
"description": description,
|
|
243
296
|
"query": query,
|
|
244
|
-
"
|
|
245
|
-
"
|
|
246
|
-
"
|
|
297
|
+
"start": start_rfc,
|
|
298
|
+
"end": end_rfc,
|
|
299
|
+
"step": 60, # Default step, Datadog doesn't provide this directly
|
|
300
|
+
"output_type": output_type,
|
|
301
|
+
"data": {"resultType": "matrix", "result": prometheus_result},
|
|
247
302
|
}
|
|
248
303
|
|
|
304
|
+
data_str = json.dumps(response_data, indent=2)
|
|
249
305
|
return StructuredToolResult(
|
|
250
306
|
status=ToolResultStatus.SUCCESS,
|
|
251
|
-
data=
|
|
307
|
+
data=data_str,
|
|
252
308
|
params=params,
|
|
253
309
|
)
|
|
254
310
|
|
|
@@ -287,7 +343,8 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
287
343
|
|
|
288
344
|
def get_parameterized_one_liner(self, params) -> str:
|
|
289
345
|
query = params.get("query", "<no query>")
|
|
290
|
-
|
|
346
|
+
description = params.get("description", "")
|
|
347
|
+
return f"Query Datadog metrics: query='{query}', description='{description}'"
|
|
291
348
|
|
|
292
349
|
|
|
293
350
|
class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
@@ -404,6 +461,92 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
404
461
|
return "Get Datadog metric metadata"
|
|
405
462
|
|
|
406
463
|
|
|
464
|
+
class ListMetricTags(BaseDatadogMetricsTool):
|
|
465
|
+
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
466
|
+
super().__init__(
|
|
467
|
+
name="list_datadog_metric_tags",
|
|
468
|
+
description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
|
|
469
|
+
parameters={
|
|
470
|
+
"metric_name": ToolParameter(
|
|
471
|
+
description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
|
|
472
|
+
type="string",
|
|
473
|
+
required=True,
|
|
474
|
+
),
|
|
475
|
+
},
|
|
476
|
+
toolset=toolset,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
480
|
+
if not self.toolset.dd_config:
|
|
481
|
+
return StructuredToolResult(
|
|
482
|
+
status=ToolResultStatus.ERROR,
|
|
483
|
+
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
484
|
+
params=params,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
url = None
|
|
488
|
+
query_params = None
|
|
489
|
+
|
|
490
|
+
try:
|
|
491
|
+
metric_name = get_param_or_raise(params, "metric_name")
|
|
492
|
+
|
|
493
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/metrics/{metric_name}/active-configurations"
|
|
494
|
+
headers = get_headers(self.toolset.dd_config)
|
|
495
|
+
|
|
496
|
+
data = execute_datadog_http_request(
|
|
497
|
+
url=url,
|
|
498
|
+
headers=headers,
|
|
499
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
500
|
+
method="GET",
|
|
501
|
+
payload_or_params={},
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
return StructuredToolResult(
|
|
505
|
+
status=ToolResultStatus.SUCCESS,
|
|
506
|
+
data=data,
|
|
507
|
+
params=params,
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
except DataDogRequestError as e:
|
|
511
|
+
logging.exception(e, exc_info=True)
|
|
512
|
+
|
|
513
|
+
if e.status_code == 404:
|
|
514
|
+
error_msg = f"Metric '{params.get('metric_name', 'unknown')}' not found. Please check the metric name."
|
|
515
|
+
elif e.status_code == 429:
|
|
516
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
517
|
+
elif e.status_code == 403:
|
|
518
|
+
error_msg = (
|
|
519
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'metrics_read' "
|
|
520
|
+
f"permissions. Error: {str(e)}"
|
|
521
|
+
)
|
|
522
|
+
else:
|
|
523
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
524
|
+
|
|
525
|
+
return StructuredToolResult(
|
|
526
|
+
status=ToolResultStatus.ERROR,
|
|
527
|
+
error=error_msg,
|
|
528
|
+
params=params,
|
|
529
|
+
invocation=json.dumps({"url": url, "params": query_params})
|
|
530
|
+
if url and query_params
|
|
531
|
+
else None,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
except Exception as e:
|
|
535
|
+
logging.exception(
|
|
536
|
+
f"Failed to query Datadog metric tags for params: {params}",
|
|
537
|
+
exc_info=True,
|
|
538
|
+
)
|
|
539
|
+
return StructuredToolResult(
|
|
540
|
+
status=ToolResultStatus.ERROR,
|
|
541
|
+
error=f"Exception while querying Datadog: {str(e)}",
|
|
542
|
+
params=params,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
546
|
+
metric_name = params.get("metric_name", "<no metric>")
|
|
547
|
+
return f"List available tags for Datadog metric: {metric_name}"
|
|
548
|
+
|
|
549
|
+
|
|
407
550
|
class DatadogMetricsToolset(Toolset):
|
|
408
551
|
dd_config: Optional[DatadogMetricsConfig] = None
|
|
409
552
|
|
|
@@ -418,6 +561,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
418
561
|
ListActiveMetrics(toolset=self),
|
|
419
562
|
QueryMetrics(toolset=self),
|
|
420
563
|
QueryMetricsMetadata(toolset=self),
|
|
564
|
+
ListMetricTags(toolset=self),
|
|
421
565
|
],
|
|
422
566
|
experimental=True,
|
|
423
567
|
tags=[ToolsetTag.CORE],
|
|
@@ -199,8 +199,14 @@ toolsets:
|
|
|
199
199
|
description: "Provides real-time metrics for pods and nodes"
|
|
200
200
|
docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#live-metrics"
|
|
201
201
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
202
|
+
llm_instructions: |
|
|
203
|
+
The kubectl_top_pods or kubectl_top_nodes do not return time series data or metrics that can be used for graphs
|
|
204
|
+
Do NOT use kubectl_top_pods or kubectl_top_nodes for graph generation - it only shows current snapshot data
|
|
205
|
+
kubectl_top_pods or kubectl_top_nodes are for current status checks, not historical graphs
|
|
202
206
|
tags:
|
|
203
207
|
- core
|
|
208
|
+
prerequisites:
|
|
209
|
+
- command: "kubectl top nodes"
|
|
204
210
|
tools:
|
|
205
211
|
- name: "kubectl_top_pods"
|
|
206
212
|
description: "Retrieves real-time CPU and memory usage for each pod in the cluster."
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
import random
|
|
5
4
|
import re
|
|
6
|
-
import string
|
|
7
5
|
import time
|
|
8
6
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
9
7
|
from urllib.parse import urljoin
|
|
@@ -31,6 +29,7 @@ from holmes.plugins.toolsets.utils import (
|
|
|
31
29
|
from holmes.utils.cache import TTLCache
|
|
32
30
|
from holmes.common.env_vars import IS_OPENSHIFT
|
|
33
31
|
from holmes.common.openshift import load_openshift_token
|
|
32
|
+
from holmes.utils.keygen_utils import generate_random_key
|
|
34
33
|
|
|
35
34
|
PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
|
|
36
35
|
DEFAULT_TIME_SPAN_SECONDS = 3600
|
|
@@ -79,10 +78,6 @@ class BasePrometheusTool(Tool):
|
|
|
79
78
|
toolset: "PrometheusToolset"
|
|
80
79
|
|
|
81
80
|
|
|
82
|
-
def generate_random_key():
|
|
83
|
-
return "".join(random.choices(string.ascii_letters + string.digits, k=4))
|
|
84
|
-
|
|
85
|
-
|
|
86
81
|
def filter_metrics_by_type(metrics: Dict, expected_type: str):
|
|
87
82
|
return {
|
|
88
83
|
metric_name: metric_data
|
|
@@ -687,6 +682,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
687
682
|
if self.toolset.config.tool_calls_return_data:
|
|
688
683
|
response_data["data"] = data.get("data")
|
|
689
684
|
data_str = json.dumps(response_data, indent=2)
|
|
685
|
+
|
|
690
686
|
return StructuredToolResult(
|
|
691
687
|
status=ToolResultStatus.SUCCESS,
|
|
692
688
|
data=data_str,
|
|
@@ -25,11 +25,11 @@
|
|
|
25
25
|
* Embed at most 2 graphs
|
|
26
26
|
* When embedding multiple graphs, always add line spacing between them
|
|
27
27
|
For example:
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "random_key": "lBaA"}>>
|
|
30
30
|
|
|
31
31
|
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "random_key": "IKtq"}>>
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
{%- if config and config.additional_labels and config.additional_labels.keys()|list|length > 0 %}
|
|
34
34
|
* ALWAYS add the following additional labels to ALL PromQL queries:
|
|
35
35
|
{%- for key, value in config.additional_labels.items() %}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
|
|
2
|
+
import textwrap
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
3
4
|
|
|
4
5
|
from holmes.core.tools import (
|
|
5
6
|
StructuredToolResult,
|
|
@@ -9,7 +10,8 @@ from holmes.core.tools import (
|
|
|
9
10
|
Toolset,
|
|
10
11
|
ToolsetTag,
|
|
11
12
|
)
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
from holmes.plugins.runbooks import get_runbook_by_path, DEFAULT_RUNBOOK_SEARCH_PATH
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
# TODO(mainred): currently we support fetch runbooks hosted internally, in the future we may want to support fetching
|
|
@@ -33,15 +35,66 @@ class RunbookFetcher(Tool):
|
|
|
33
35
|
)
|
|
34
36
|
|
|
35
37
|
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
36
|
-
|
|
38
|
+
link: str = params["link"]
|
|
39
|
+
|
|
40
|
+
search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
|
|
41
|
+
if self.toolset.config and "additional_search_paths" in self.toolset.config:
|
|
42
|
+
search_paths.extend(self.toolset.config["additional_search_paths"])
|
|
37
43
|
|
|
38
|
-
runbook_path = get_runbook_by_path(
|
|
44
|
+
runbook_path = get_runbook_by_path(link, search_paths)
|
|
45
|
+
|
|
46
|
+
if runbook_path is None:
|
|
47
|
+
err_msg = (
|
|
48
|
+
f"Runbook '{link}' not found in any of the search paths: {search_paths}"
|
|
49
|
+
)
|
|
50
|
+
logging.error(err_msg)
|
|
51
|
+
return StructuredToolResult(
|
|
52
|
+
status=ToolResultStatus.ERROR,
|
|
53
|
+
error=err_msg,
|
|
54
|
+
params=params,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Read and return the runbook content
|
|
39
58
|
try:
|
|
40
59
|
with open(runbook_path, "r") as file:
|
|
41
60
|
content = file.read()
|
|
61
|
+
wrapped_content = textwrap.dedent(f"""\
|
|
62
|
+
<runbook>
|
|
63
|
+
{textwrap.indent(content, " " * 20)}
|
|
64
|
+
</runbook>
|
|
65
|
+
Note: the above are DIRECTIONS not ACTUAL RESULTS. You now need to follow the steps outlined in the runbook yourself USING TOOLS.
|
|
66
|
+
Anything that looks like an actual result in the above <runbook> is just an EXAMPLE.
|
|
67
|
+
Now follow those steps and report back what you find.
|
|
68
|
+
You must follow them by CALLING TOOLS YOURSELF.
|
|
69
|
+
If you are missing tools, follow your general instructions on how to enable them as present in your system prompt.
|
|
70
|
+
|
|
71
|
+
Assuming the above runbook is relevant, you MUST start your response (after calling tools to investigate) with:
|
|
72
|
+
"I found a runbook named [runbook name/description] and used it to troubleshoot:"
|
|
73
|
+
|
|
74
|
+
Then list each step with ✅ for completed steps and ❌ for steps you couldn't complete.
|
|
75
|
+
|
|
76
|
+
<example>
|
|
77
|
+
I found a runbook named **Troubleshooting Erlang Issues** and used it to troubleshoot:
|
|
78
|
+
|
|
79
|
+
1. ✅ *Check BEAM VM memory usage* - 87% allocated (3.2GB used of 4GB limit)
|
|
80
|
+
2. ✅ *Review GC logs* - 15 full GC cycles in last 30 minutes, avg pause time 2.3s
|
|
81
|
+
3. ✅ *Verify Erlang application logs* - `** exception error: out of memory in process <0.139.0> called by gen_server:handle_msg/6`
|
|
82
|
+
4. ❌ *Could not analyze process mailbox sizes* - Observer tool not enabled in container. Enable remote shell or observer_cli for process introspection.
|
|
83
|
+
5. ✅ *Check pod memory limits* - container limit 4Gi, requests 2Gi
|
|
84
|
+
6. ✅ *Verify BEAM startup arguments* - `+S 4:4 +P 1048576`, no memory instrumentation flags enabled
|
|
85
|
+
7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/datadog/
|
|
86
|
+
8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/prometheus/
|
|
87
|
+
9. ✅ *Examine recent deployments* - app version 2.1.3 deployed 4 hours ago, coincides with memory spike
|
|
88
|
+
10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/) or use a [remote MCP server](https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/)
|
|
89
|
+
|
|
90
|
+
**Root cause:** Memory leak in `gen_server` logic introduced in v2.1.3. BEAM VM hitting memory limit, causing out-of-memory crashes.
|
|
91
|
+
|
|
92
|
+
**Fix:** Roll back to v2.1.2 or increase memory limit to 6GB as a temporary workaround.
|
|
93
|
+
</example>
|
|
94
|
+
""")
|
|
42
95
|
return StructuredToolResult(
|
|
43
96
|
status=ToolResultStatus.SUCCESS,
|
|
44
|
-
data=
|
|
97
|
+
data=wrapped_content,
|
|
45
98
|
params=params,
|
|
46
99
|
)
|
|
47
100
|
except Exception as e:
|
|
@@ -59,7 +112,12 @@ class RunbookFetcher(Tool):
|
|
|
59
112
|
|
|
60
113
|
|
|
61
114
|
class RunbookToolset(Toolset):
|
|
62
|
-
def __init__(self):
|
|
115
|
+
def __init__(self, additional_search_paths: Optional[List[str]] = None):
|
|
116
|
+
# Store additional search paths in config
|
|
117
|
+
config = {}
|
|
118
|
+
if additional_search_paths:
|
|
119
|
+
config["additional_search_paths"] = additional_search_paths
|
|
120
|
+
|
|
63
121
|
super().__init__(
|
|
64
122
|
name="runbook",
|
|
65
123
|
description="Fetch runbooks",
|
|
@@ -72,6 +130,7 @@ class RunbookToolset(Toolset):
|
|
|
72
130
|
ToolsetTag.CORE,
|
|
73
131
|
],
|
|
74
132
|
is_default=True,
|
|
133
|
+
config=config,
|
|
75
134
|
)
|
|
76
135
|
|
|
77
136
|
def get_example_config(self) -> Dict[str, Any]:
|
|
@@ -30,7 +30,7 @@ def find_service_url(label_selector):
|
|
|
30
30
|
)
|
|
31
31
|
if not svc_list.items:
|
|
32
32
|
return None
|
|
33
|
-
svc: V1Service = svc_list.items[0]
|
|
33
|
+
svc: V1Service = svc_list.items[0] # type: ignore
|
|
34
34
|
name = svc.metadata.name
|
|
35
35
|
namespace = svc.metadata.namespace
|
|
36
36
|
port = svc.spec.ports[0].port
|
|
@@ -11,7 +11,7 @@ toolsets:
|
|
|
11
11
|
- SLAB_API_KEY
|
|
12
12
|
tools:
|
|
13
13
|
- name: "fetch_slab_document"
|
|
14
|
-
description: "Fetch a document from slab.
|
|
14
|
+
description: "Fetch a document from slab."
|
|
15
15
|
command: |
|
|
16
16
|
curl -X POST \
|
|
17
17
|
-H "Authorization: ${SLAB_API_KEY}" \
|
holmes/utils/colors.py
ADDED
holmes/utils/console/consts.py
CHANGED
|
@@ -1,2 +1,7 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
# Common help texts
|
|
2
4
|
system_prompt_help = "Advanced. System prompt for LLM. Values starting with builtin:// are loaded from holmes/plugins/prompts, values starting with file:// are loaded from the given path, other values are interpreted as a prompt string"
|
|
5
|
+
|
|
6
|
+
# Agent name used in welcome banner, logging, and prompts
|
|
7
|
+
agent_name: str = os.environ.get("AGENT_NAME", "HolmesGPT")
|
holmes/utils/console/result.py
CHANGED
|
@@ -6,6 +6,7 @@ from holmes.config import Config
|
|
|
6
6
|
from holmes.core.tool_calling_llm import LLMResult
|
|
7
7
|
from holmes.plugins.destinations import DestinationType
|
|
8
8
|
from holmes.plugins.interfaces import Issue
|
|
9
|
+
from holmes.utils.colors import AI_COLOR
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def handle_result(
|
|
@@ -27,7 +28,7 @@ def handle_result(
|
|
|
27
28
|
markup=False,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
|
-
console.print("[bold
|
|
31
|
+
console.print(f"[bold {AI_COLOR}]AI:[/bold {AI_COLOR}]", end=" ")
|
|
31
32
|
console.print(Markdown(result.result)) # type: ignore
|
|
32
33
|
if add_separator:
|
|
33
34
|
console.print(Rule())
|
holmes/version.py
CHANGED
|
@@ -90,7 +90,7 @@ def get_version() -> str:
|
|
|
90
90
|
)
|
|
91
91
|
dirty = "-dirty" if status else ""
|
|
92
92
|
|
|
93
|
-
return f"{tag}-{branch}{dirty}"
|
|
93
|
+
return f"dev-{tag}-{branch}{dirty}"
|
|
94
94
|
|
|
95
95
|
except Exception:
|
|
96
96
|
pass
|
|
@@ -149,7 +149,7 @@ def check_version() -> VersionCheckResult:
|
|
|
149
149
|
|
|
150
150
|
update_message = None
|
|
151
151
|
if not is_latest:
|
|
152
|
-
update_message = f"Update available:
|
|
152
|
+
update_message = f"Update available: {holmes_info.latest_version} (current: {current_version})"
|
|
153
153
|
|
|
154
154
|
return VersionCheckResult(
|
|
155
155
|
is_latest=is_latest,
|