holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +4 -3
- holmes/common/env_vars.py +18 -2
- holmes/common/openshift.py +1 -1
- holmes/config.py +11 -6
- holmes/core/conversations.py +30 -13
- holmes/core/investigation.py +21 -25
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +50 -31
- holmes/core/models.py +19 -17
- holmes/core/openai_formatting.py +1 -1
- holmes/core/prompt.py +47 -2
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +4 -2
- holmes/core/supabase_dal.py +4 -2
- holmes/core/tool_calling_llm.py +102 -141
- holmes/core/tools.py +19 -28
- holmes/core/tools_utils/token_counting.py +9 -2
- holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
- holmes/core/tools_utils/tool_executor.py +0 -18
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +37 -2
- holmes/core/tracing.py +13 -2
- holmes/core/transformers/__init__.py +1 -1
- holmes/core/transformers/base.py +1 -0
- holmes/core/transformers/llm_summarize.py +3 -2
- holmes/core/transformers/registry.py +2 -1
- holmes/core/transformers/transformer.py +1 -0
- holmes/core/truncation/compaction.py +37 -2
- holmes/core/truncation/input_context_window_limiter.py +3 -2
- holmes/interactive.py +52 -8
- holmes/main.py +17 -37
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
- holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask.jinja2 +0 -2
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
- holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
- holmes/plugins/runbooks/__init__.py +32 -3
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +30 -26
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
- holmes/plugins/toolsets/bash/common/bash.py +19 -9
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/git.py +7 -8
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +2 -30
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
- holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
- holmes/plugins/toolsets/internet/internet.py +10 -10
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +12 -7
- holmes/plugins/toolsets/kubernetes.yaml +260 -30
- holmes/plugins/toolsets/kubernetes_logs.py +3 -3
- holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
- holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
- holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
- holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
- holmes/plugins/toolsets/robusta/robusta.py +5 -5
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
- holmes/plugins/toolsets/utils.py +1 -1
- holmes/utils/config_utils.py +1 -1
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +10 -26
- holmes/utils/holmes_status.py +4 -3
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +23 -0
- holmes/utils/stream.py +12 -5
- holmes/utils/tags.py +4 -3
- holmes/version.py +3 -1
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
- holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
- holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
- /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
|
|
2
2
|
# Prometheus/PromQL queries
|
|
3
3
|
|
|
4
|
+
{%- if config and config.prometheus_url and "coralogix" in config.prometheus_url %}
|
|
5
|
+
You are using Coralogix Prometheus.
|
|
6
|
+
* Metrics/labels may differ; discover names with `get_metric_names` first.
|
|
7
|
+
* For high-cardinality, wrap with `topk(5, <query>)`.
|
|
8
|
+
* Always include explicit time ranges for range queries.
|
|
9
|
+
* Example: `container_cpu_utilization{namespace="test-173"}` can fail if the label is named differently (e.g., `k8s_namespace_name`) or the metric is named differently. Do not assume names—only use labels you have seen returned or been told exist.
|
|
10
|
+
{%- endif %}
|
|
11
|
+
|
|
4
12
|
## Efficient Metric Discovery (when needed)
|
|
5
13
|
* When you need to discover metrics, use `get_metric_names` with filters - it's the fastest method
|
|
6
14
|
* Combine multiple patterns with regex OR (|) to reduce API calls:
|
|
@@ -26,7 +34,7 @@
|
|
|
26
34
|
* Use prometheus to execute promql queries with the tools `execute_prometheus_instant_query` and `execute_prometheus_range_query`
|
|
27
35
|
* To create queries, use 'start_timestamp' and 'end_timestamp' as graphs start and end times
|
|
28
36
|
* ALWAYS embed the execution results into your answer
|
|
29
|
-
* You only need to embed the partial result in your response. Include the "tool_name" and "
|
|
37
|
+
* You only need to embed the partial result in your response. Include the "tool_name" and "tool_call_id". For example: << {"type": "promql", "tool_name": "execute_prometheus_range_query", "tool_call_id": "92jf2hf"} >>
|
|
30
38
|
* Use these tools to generate charts that users can see. Here are standard metrics but you can use different ones:
|
|
31
39
|
** For memory consumption: `container_memory_working_set_bytes`
|
|
32
40
|
** For CPU usage: `container_cpu_usage_seconds_total`
|
|
@@ -67,9 +75,9 @@
|
|
|
67
75
|
* When embedding multiple graphs, always add line spacing between them
|
|
68
76
|
For example:
|
|
69
77
|
|
|
70
|
-
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "
|
|
78
|
+
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "tool_call_id": "lBaA"}>>
|
|
71
79
|
|
|
72
|
-
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "
|
|
80
|
+
<<{"type": "promql", "tool_name": "execute_prometheus_range_query", "tool_call_id": "IKtq"}>>
|
|
73
81
|
|
|
74
82
|
{%- if config and config.additional_labels and config.additional_labels.keys()|list|length > 0 %}
|
|
75
83
|
* ALWAYS add the following additional labels to ALL PromQL queries:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
1
|
import logging
|
|
2
|
+
from enum import Enum
|
|
3
3
|
from typing import Any, Dict, List, Optional, Set
|
|
4
4
|
from urllib.parse import urljoin, urlparse
|
|
5
5
|
|
|
6
6
|
import backoff
|
|
7
|
-
from pydantic import BaseModel
|
|
8
7
|
import requests # type: ignore
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
9
9
|
from requests.auth import HTTPBasicAuth # type: ignore
|
|
10
10
|
|
|
11
11
|
# --- Enums and Pydantic Models (Mostly Unchanged) ---
|
|
@@ -17,12 +17,31 @@ class ClusterConnectionStatus(str, Enum):
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class RabbitMQClusterConfig(BaseModel):
|
|
20
|
+
model_config = ConfigDict(extra="allow")
|
|
21
|
+
|
|
20
22
|
id: str = "rabbitmq" # must be unique
|
|
21
23
|
management_url: str # e.g., http://rabbitmq-service:15672
|
|
22
24
|
username: Optional[str] = None
|
|
23
25
|
password: Optional[str] = None
|
|
24
26
|
request_timeout_seconds: int = 30
|
|
25
|
-
|
|
27
|
+
verify_ssl: bool = True
|
|
28
|
+
|
|
29
|
+
@model_validator(mode="after")
|
|
30
|
+
def handle_deprecated_fields(self):
|
|
31
|
+
extra = self.model_extra or {}
|
|
32
|
+
deprecated = []
|
|
33
|
+
|
|
34
|
+
# Map old name to new name
|
|
35
|
+
if "verify_certs" in extra:
|
|
36
|
+
self.verify_ssl = extra["verify_certs"]
|
|
37
|
+
deprecated.append("verify_certs -> verify_ssl")
|
|
38
|
+
|
|
39
|
+
if deprecated:
|
|
40
|
+
logging.warning(
|
|
41
|
+
f"RabbitMQ config uses deprecated field names: {', '.join(deprecated)}. "
|
|
42
|
+
"Please update your configuration."
|
|
43
|
+
)
|
|
44
|
+
return self
|
|
26
45
|
|
|
27
46
|
# For internal use
|
|
28
47
|
connection_status: Optional[ClusterConnectionStatus] = None
|
|
@@ -111,7 +130,7 @@ def make_request(
|
|
|
111
130
|
params=params,
|
|
112
131
|
json=data,
|
|
113
132
|
timeout=config.request_timeout_seconds,
|
|
114
|
-
verify=config.
|
|
133
|
+
verify=config.verify_ssl,
|
|
115
134
|
)
|
|
116
135
|
response.raise_for_status()
|
|
117
136
|
return response.json()
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import logging
|
|
2
|
+
import os
|
|
3
3
|
from typing import Any, List, Optional, Tuple
|
|
4
|
+
from urllib.parse import urljoin
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel
|
|
7
|
+
from requests import RequestException # type: ignore
|
|
8
|
+
|
|
6
9
|
from holmes.core.tools import (
|
|
7
10
|
CallablePrerequisite,
|
|
8
11
|
StructuredToolResult,
|
|
12
|
+
StructuredToolResultStatus,
|
|
9
13
|
Tool,
|
|
10
14
|
ToolInvokeContext,
|
|
11
15
|
ToolParameter,
|
|
12
|
-
StructuredToolResultStatus,
|
|
13
16
|
Toolset,
|
|
14
17
|
ToolsetTag,
|
|
15
18
|
)
|
|
16
|
-
from requests import RequestException # type: ignore
|
|
17
|
-
from urllib.parse import urljoin
|
|
18
|
-
|
|
19
19
|
from holmes.plugins.toolsets.rabbitmq.api import (
|
|
20
20
|
ClusterConnectionStatus,
|
|
21
21
|
RabbitMQClusterConfig,
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
1
|
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
|
-
from typing import Optional, Dict, Any, List
|
|
6
5
|
from holmes.common.env_vars import load_bool
|
|
7
|
-
from holmes.core.supabase_dal import
|
|
6
|
+
from holmes.core.supabase_dal import FindingType, SupabaseDal
|
|
8
7
|
from holmes.core.tools import (
|
|
9
8
|
StaticPrerequisite,
|
|
9
|
+
StructuredToolResult,
|
|
10
|
+
StructuredToolResultStatus,
|
|
10
11
|
Tool,
|
|
11
12
|
ToolInvokeContext,
|
|
12
13
|
ToolParameter,
|
|
13
14
|
Toolset,
|
|
14
15
|
ToolsetTag,
|
|
15
16
|
)
|
|
16
|
-
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
17
17
|
|
|
18
18
|
PULL_EXTERNAL_FINDINGS = load_bool("PULL_EXTERNAL_FINDINGS", False)
|
|
19
19
|
|
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import textwrap
|
|
4
|
-
from
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
|
6
|
+
|
|
5
7
|
from holmes.core.supabase_dal import SupabaseDal
|
|
6
8
|
from holmes.core.tools import (
|
|
7
9
|
StructuredToolResult,
|
|
10
|
+
StructuredToolResultStatus,
|
|
8
11
|
Tool,
|
|
9
12
|
ToolInvokeContext,
|
|
10
13
|
ToolParameter,
|
|
11
|
-
StructuredToolResultStatus,
|
|
12
14
|
Toolset,
|
|
13
15
|
ToolsetTag,
|
|
14
16
|
)
|
|
15
|
-
|
|
16
17
|
from holmes.plugins.runbooks import (
|
|
18
|
+
DEFAULT_RUNBOOK_SEARCH_PATH,
|
|
17
19
|
get_runbook_by_path,
|
|
18
20
|
load_runbook_catalog,
|
|
19
|
-
DEFAULT_RUNBOOK_SEARCH_PATH,
|
|
20
21
|
)
|
|
21
22
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
22
23
|
|
|
@@ -32,8 +33,11 @@ class RunbookFetcher(Tool):
|
|
|
32
33
|
toolset: "RunbookToolset",
|
|
33
34
|
additional_search_paths: Optional[List[str]] = None,
|
|
34
35
|
dal: Optional[SupabaseDal] = None,
|
|
36
|
+
custom_catalog_paths: Optional[List[Union[str, Path]]] = None,
|
|
35
37
|
):
|
|
36
|
-
catalog = load_runbook_catalog(
|
|
38
|
+
catalog = load_runbook_catalog(
|
|
39
|
+
dal=dal, custom_catalog_paths=custom_catalog_paths
|
|
40
|
+
)
|
|
37
41
|
available_runbooks = []
|
|
38
42
|
if catalog:
|
|
39
43
|
available_runbooks = catalog.list_available_runbooks()
|
|
@@ -232,12 +236,26 @@ class RunbookToolset(Toolset):
|
|
|
232
236
|
if additional_search_paths:
|
|
233
237
|
config["additional_search_paths"] = additional_search_paths
|
|
234
238
|
|
|
239
|
+
# Compute custom catalog paths from additional search paths
|
|
240
|
+
custom_catalog_paths = None
|
|
241
|
+
if additional_search_paths:
|
|
242
|
+
custom_catalog_paths = [
|
|
243
|
+
os.path.join(search_path, "catalog.json")
|
|
244
|
+
for search_path in additional_search_paths
|
|
245
|
+
if os.path.isfile(os.path.join(search_path, "catalog.json"))
|
|
246
|
+
]
|
|
247
|
+
|
|
235
248
|
super().__init__(
|
|
236
249
|
name="runbook",
|
|
237
250
|
description="Fetch runbooks",
|
|
238
251
|
icon_url="https://platform.robusta.dev/demos/runbook.svg",
|
|
239
252
|
tools=[
|
|
240
|
-
RunbookFetcher(
|
|
253
|
+
RunbookFetcher(
|
|
254
|
+
self,
|
|
255
|
+
additional_search_paths,
|
|
256
|
+
dal,
|
|
257
|
+
cast(Optional[List[Union[str, Path]]], custom_catalog_paths),
|
|
258
|
+
),
|
|
241
259
|
],
|
|
242
260
|
docs_url="https://holmesgpt.dev/data-sources/",
|
|
243
261
|
tags=[
|
|
@@ -245,6 +263,7 @@ class RunbookToolset(Toolset):
|
|
|
245
263
|
],
|
|
246
264
|
is_default=True,
|
|
247
265
|
config=config,
|
|
266
|
+
enabled=True,
|
|
248
267
|
)
|
|
249
268
|
|
|
250
269
|
def get_example_config(self) -> Dict[str, Any]:
|
holmes/plugins/toolsets/utils.py
CHANGED
holmes/utils/config_utils.py
CHANGED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import socket
|
|
3
|
+
|
|
4
|
+
from holmes.common.env_vars import KEEPALIVE_CNT, KEEPALIVE_IDLE, KEEPALIVE_INTVL
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def patch_socket_create_connection(
|
|
8
|
+
idle: int = KEEPALIVE_IDLE,
|
|
9
|
+
intvl: int = KEEPALIVE_INTVL,
|
|
10
|
+
cnt: int = KEEPALIVE_CNT,
|
|
11
|
+
) -> None:
|
|
12
|
+
orig = socket.create_connection
|
|
13
|
+
|
|
14
|
+
def new_create_connection(address, timeout=None, source_address=None, **kwargs):
|
|
15
|
+
logging.debug(
|
|
16
|
+
f"Creating patched connection to {address} with timeout {timeout} and source address {source_address}"
|
|
17
|
+
)
|
|
18
|
+
s = orig(address, timeout=timeout, source_address=source_address, **kwargs)
|
|
19
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
|
|
20
|
+
|
|
21
|
+
# Linux-only tuning (these attrs won't exist on macOS/Windows)
|
|
22
|
+
if hasattr(socket, "TCP_KEEPIDLE"):
|
|
23
|
+
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, int(idle))
|
|
24
|
+
if hasattr(socket, "TCP_KEEPINTVL"):
|
|
25
|
+
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, int(intvl))
|
|
26
|
+
if hasattr(socket, "TCP_KEEPCNT"):
|
|
27
|
+
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, int(cnt))
|
|
28
|
+
return s
|
|
29
|
+
|
|
30
|
+
logging.info("Patching socket.create_connection to force keepalive")
|
|
31
|
+
socket.create_connection = new_create_connection
|
holmes/utils/console/result.py
CHANGED
|
@@ -17,6 +17,7 @@ def handle_result(
|
|
|
17
17
|
issue: Issue,
|
|
18
18
|
show_tool_output: bool,
|
|
19
19
|
add_separator: bool,
|
|
20
|
+
log_costs: bool = False,
|
|
20
21
|
):
|
|
21
22
|
if destination == DestinationType.CLI:
|
|
22
23
|
if show_tool_output and result.tool_calls:
|
|
@@ -30,6 +31,15 @@ def handle_result(
|
|
|
30
31
|
|
|
31
32
|
console.print(f"[bold {AI_COLOR}]AI:[/bold {AI_COLOR}]", end=" ")
|
|
32
33
|
console.print(Markdown(result.result)) # type: ignore
|
|
34
|
+
|
|
35
|
+
if log_costs and result.total_cost > 0:
|
|
36
|
+
console.print(
|
|
37
|
+
f"\n[bold yellow]💰 Total Cost:[/bold yellow] ${result.total_cost:.6f}"
|
|
38
|
+
)
|
|
39
|
+
console.print(
|
|
40
|
+
f"[dim]Tokens: {result.prompt_tokens:,} prompt + {result.completion_tokens:,} completion = {result.total_tokens:,} total[/dim]"
|
|
41
|
+
)
|
|
42
|
+
|
|
33
43
|
if add_separator:
|
|
34
44
|
console.print(Rule())
|
|
35
45
|
|
holmes/utils/file_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import TYPE_CHECKING, Dict, List, Optional
|
|
2
|
+
|
|
2
3
|
from pydantic import BaseModel
|
|
3
|
-
|
|
4
|
+
|
|
4
5
|
from holmes.plugins.runbooks import RunbookCatalog
|
|
5
6
|
|
|
6
7
|
if TYPE_CHECKING:
|
|
@@ -34,24 +35,14 @@ def _format_resource_instructions(
|
|
|
34
35
|
return items
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def
|
|
38
|
-
user_prompt: str,
|
|
38
|
+
def generate_runbooks_args(
|
|
39
39
|
runbook_catalog: Optional[RunbookCatalog],
|
|
40
40
|
global_instructions: Optional[Instructions] = None,
|
|
41
41
|
issue_instructions: Optional[List[str]] = None,
|
|
42
42
|
resource_instructions: Optional["ResourceInstructions"] = None, # type: ignore
|
|
43
|
-
) -> str:
|
|
44
|
-
if (
|
|
45
|
-
not runbook_catalog
|
|
46
|
-
and not issue_instructions
|
|
47
|
-
and not resource_instructions
|
|
48
|
-
and not global_instructions
|
|
49
|
-
):
|
|
50
|
-
return user_prompt
|
|
51
|
-
|
|
43
|
+
) -> Dict[str, str]:
|
|
52
44
|
catalog_str = runbook_catalog.to_prompt_string() if runbook_catalog else ""
|
|
53
45
|
|
|
54
|
-
# Combine and format all instructions
|
|
55
46
|
combined_instructions = []
|
|
56
47
|
if issue_instructions:
|
|
57
48
|
combined_instructions.extend(issue_instructions)
|
|
@@ -71,15 +62,8 @@ def add_runbooks_to_user_prompt(
|
|
|
71
62
|
else ""
|
|
72
63
|
)
|
|
73
64
|
|
|
74
|
-
|
|
75
|
-
"
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
"global_instructions": global_block,
|
|
80
|
-
},
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
if user_prompt and not user_prompt.endswith("\n"):
|
|
84
|
-
user_prompt += "\n"
|
|
85
|
-
return f"{user_prompt}\n{rendered}"
|
|
65
|
+
return {
|
|
66
|
+
"runbook_catalog": catalog_str,
|
|
67
|
+
"custom_instructions": issue_block,
|
|
68
|
+
"global_instructions": global_block,
|
|
69
|
+
}
|
holmes/utils/holmes_status.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from holmes.core.supabase_dal import SupabaseDal
|
|
3
|
-
from holmes.config import Config
|
|
4
|
-
from holmes import get_version # type: ignore
|
|
5
2
|
import logging
|
|
6
3
|
|
|
4
|
+
from holmes import get_version # type: ignore
|
|
5
|
+
from holmes.config import Config
|
|
6
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
def update_holmes_status_in_db(dal: SupabaseDal, config: Config):
|
|
9
10
|
logging.info("Updating status of holmes")
|
holmes/utils/log.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Logging utilities for Holmes."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class EndpointFilter(logging.Filter):
|
|
8
|
+
"""Filter out log records for specific endpoint paths."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, path: str, *args: Any, **kwargs: Any):
|
|
11
|
+
super().__init__(*args, **kwargs)
|
|
12
|
+
self._path = path
|
|
13
|
+
|
|
14
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
15
|
+
return record.getMessage().find(self._path) == -1
|
holmes/utils/markdown_utils.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# based on https://github.com/kostyachum/python-markdown-plain-text/blob/main/markdown_plain_text/extention.py
|
|
2
2
|
# MIT licensed
|
|
3
|
-
from
|
|
3
|
+
from xml.etree.ElementTree import Comment, ElementTree, ProcessingInstruction
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from xml.etree.ElementTree import Comment, ElementTree
|
|
5
|
+
from markdown import Extension, Markdown # type: ignore
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def _serialize_plain_text(write, elem):
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory limit utilities for tool subprocess execution.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from holmes.common.env_vars import TOOL_MEMORY_LIMIT_MB
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_ulimit_prefix() -> str:
|
|
13
|
+
"""
|
|
14
|
+
Get the ulimit command prefix for memory protection.
|
|
15
|
+
|
|
16
|
+
Returns a shell command prefix that sets virtual memory limit.
|
|
17
|
+
The '|| true' ensures we continue even if ulimit is not supported.
|
|
18
|
+
"""
|
|
19
|
+
memory_limit_kb = TOOL_MEMORY_LIMIT_MB * 1024
|
|
20
|
+
return f"ulimit -v {memory_limit_kb} || true; "
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def check_oom_and_append_hint(output: str, return_code: int) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Check if a command was OOM killed and append a helpful hint.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
output: The command output
|
|
29
|
+
return_code: The command's return code
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Output with OOM hint appended if OOM was detected
|
|
33
|
+
"""
|
|
34
|
+
# Common OOM indicators:
|
|
35
|
+
# - Return code 137 (128 + 9 = SIGKILL, commonly OOM)
|
|
36
|
+
# - Return code -9 (SIGKILL on some systems)
|
|
37
|
+
# - "Killed" in output (Linux OOM killer message)
|
|
38
|
+
# - "MemoryError" (Python)
|
|
39
|
+
# - "Cannot allocate memory" (various tools)
|
|
40
|
+
is_oom = (
|
|
41
|
+
return_code in (137, -9)
|
|
42
|
+
or "Killed" in output
|
|
43
|
+
or "MemoryError" in output
|
|
44
|
+
or "Cannot allocate memory" in output
|
|
45
|
+
or "bad_alloc" in output
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if is_oom:
|
|
49
|
+
hint = (
|
|
50
|
+
f"\n\n[OOM] Command was killed due to memory limits (current limit: {TOOL_MEMORY_LIMIT_MB} MB). "
|
|
51
|
+
f"Try querying the data differently to reduce memory usage - add filters to narrow the results, "
|
|
52
|
+
f"use smaller time ranges, or try alternative tools that may be more memory-efficient. "
|
|
53
|
+
f"If you cannot succeed with a modified query, you may recommend the user increase the limit "
|
|
54
|
+
f"by setting the TOOL_MEMORY_LIMIT_MB environment variable (Tool memory limit, MB)."
|
|
55
|
+
)
|
|
56
|
+
return output + hint
|
|
57
|
+
|
|
58
|
+
return output
|
holmes/utils/sentry_helper.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
import sentry_sdk
|
|
4
|
+
|
|
2
5
|
from holmes.core.models import ToolCallResult, TruncationMetadata
|
|
3
6
|
|
|
4
7
|
|
|
@@ -39,3 +42,23 @@ def capture_structured_output_incorrect_tool_call():
|
|
|
39
42
|
"Structured output incorrect tool call",
|
|
40
43
|
level="warning",
|
|
41
44
|
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def capture_sections_none(content: Optional[str]):
|
|
48
|
+
# Limit display length to avoid sending huge payloads to Sentry
|
|
49
|
+
_MAX_DISPLAY_LENGTH = 1500
|
|
50
|
+
display_content = ""
|
|
51
|
+
if content:
|
|
52
|
+
if len(content) > _MAX_DISPLAY_LENGTH * 2:
|
|
53
|
+
# Show first and last portions of content
|
|
54
|
+
display_content = f"{content[:_MAX_DISPLAY_LENGTH]}...\n\n...{content[-_MAX_DISPLAY_LENGTH:]}"
|
|
55
|
+
else:
|
|
56
|
+
display_content = content
|
|
57
|
+
|
|
58
|
+
with sentry_sdk.push_scope() as scope:
|
|
59
|
+
scope.set_extra("content", display_content)
|
|
60
|
+
scope.set_extra("content_length", len(content) if content else 0)
|
|
61
|
+
sentry_sdk.capture_message(
|
|
62
|
+
"Holmes answer couldn't be parsed into sections",
|
|
63
|
+
level="warning",
|
|
64
|
+
)
|
holmes/utils/stream.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
from enum import Enum
|
|
3
|
-
from typing import Generator, Optional, List, Union
|
|
4
|
-
import litellm
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
from holmes.core.investigation_structured_output import process_response_into_sections
|
|
7
4
|
from functools import partial
|
|
8
|
-
import
|
|
5
|
+
from typing import Generator, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
import litellm
|
|
9
8
|
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
|
10
9
|
from litellm.types.utils import ModelResponse, TextCompletionResponse
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from holmes.core.investigation_structured_output import process_response_into_sections
|
|
12
13
|
from holmes.core.llm import TokenCountMetadata, get_llm_usage
|
|
14
|
+
from holmes.utils import sentry_helper
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class StreamEvents(str, Enum):
|
|
@@ -63,6 +65,11 @@ def stream_investigate_formatter(
|
|
|
63
65
|
message.data.get("content")
|
|
64
66
|
)
|
|
65
67
|
|
|
68
|
+
if sections is None:
|
|
69
|
+
sentry_helper.capture_sections_none(
|
|
70
|
+
content=message.data.get("content"),
|
|
71
|
+
)
|
|
72
|
+
|
|
66
73
|
yield create_sse_message(
|
|
67
74
|
StreamEvents.ANSWER_END.value,
|
|
68
75
|
{
|
holmes/utils/tags.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
|
-
from typing import Optional
|
|
3
|
-
from typing_extensions import Dict, List
|
|
4
3
|
import re
|
|
5
|
-
import json
|
|
6
4
|
from copy import deepcopy
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from typing_extensions import Dict, List
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def stringify_tag(tag: Dict[str, str]) -> Optional[str]:
|
holmes/version.py
CHANGED
|
@@ -8,10 +8,12 @@ import os
|
|
|
8
8
|
import subprocess
|
|
9
9
|
import sys
|
|
10
10
|
import threading
|
|
11
|
-
from typing import Optional, NamedTuple
|
|
12
11
|
from functools import cache
|
|
12
|
+
from typing import NamedTuple, Optional
|
|
13
|
+
|
|
13
14
|
import requests # type: ignore
|
|
14
15
|
from pydantic import BaseModel, ConfigDict
|
|
16
|
+
|
|
15
17
|
from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
|
|
16
18
|
|
|
17
19
|
# For relative imports to work in Python 3.6 - see https://stackoverflow.com/a/49375740
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: holmesgpt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.18.4
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Natan Yellin
|
|
6
6
|
Author-email: natan@robusta.dev
|
|
@@ -28,6 +28,7 @@ Requires-Dist: google-cloud-aiplatform (>=1.38)
|
|
|
28
28
|
Requires-Dist: httpx[socks] (<0.28)
|
|
29
29
|
Requires-Dist: humanize (>=4.9.0,<5.0.0)
|
|
30
30
|
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
|
31
|
+
Requires-Dist: jq (>=1.10.0,<2.0.0)
|
|
31
32
|
Requires-Dist: kubernetes (>=32.0.1,<33.0.0)
|
|
32
33
|
Requires-Dist: litellm (==1.77.1)
|
|
33
34
|
Requires-Dist: markdown (>=3.6,<4.0)
|
|
@@ -58,6 +59,9 @@ Description-Content-Type: text/markdown
|
|
|
58
59
|
|
|
59
60
|
HolmesGPT is an AI agent for investigating problems in your cloud, finding the root cause, and suggesting remediations. It has dozens of built-in integrations for cloud providers, observability tools, and on-call systems.
|
|
60
61
|
|
|
62
|
+
[](https://www.bestpractices.dev/projects/11586)
|
|
63
|
+
[](https://scorecard.dev/viewer/?uri=github.com/HolmesGPT/holmesgpt)
|
|
64
|
+
|
|
61
65
|
>🎉 **HolmesGPT is now a CNCF Sandbox Project!**
|
|
62
66
|
HolmesGPT was originally created by [Robusta.Dev](https://home.robusta.dev/) and is a CNCF sandbox project.
|
|
63
67
|
|
|
@@ -70,7 +74,7 @@ Find more about HolmesGPT's maintainers and adopters [here](./ADOPTERS.md).
|
|
|
70
74
|
<a href="#installation"><strong>Installation</strong></a> |
|
|
71
75
|
<a href="#supported-llm-providers"><strong>LLM Providers</strong></a> |
|
|
72
76
|
<a href="https://www.youtube.com/watch?v=TfQfx65LsDQ"><strong>YouTube Demo</strong></a> |
|
|
73
|
-
<a href="https://deepwiki.com/
|
|
77
|
+
<a href="https://deepwiki.com/HolmesGPT/holmesgpt"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
|
|
74
78
|
</p>
|
|
75
79
|
</div>
|
|
76
80
|
|
|
@@ -248,27 +252,25 @@ Because HolmesGPT relies on LLMs, it relies on [a suite of pytest based evaluati
|
|
|
248
252
|
|
|
249
253
|
|
|
250
254
|
## License
|
|
251
|
-
Distributed under the Apache 2.0 License. See [LICENSE](https://github.com/
|
|
255
|
+
Distributed under the Apache 2.0 License. See [LICENSE](https://github.com/HolmesGPT/holmesgpt/blob/master/LICENSE) for more information.
|
|
252
256
|
<!-- Change License -->
|
|
253
257
|
|
|
254
258
|
## Community
|
|
255
259
|
|
|
256
260
|
Join our community to discuss the HolmesGPT roadmap and share feedback:
|
|
257
261
|
|
|
258
|
-
|
|
259
|
-
- **Topics:** Roadmap discussion, community feedback, and Q&A
|
|
260
|
-
- **Resources:** [📝 Meeting Notes](https://docs.google.com/document/d/1sIHCcTivyzrF5XNvos7ZT_UcxEOqgwfawsTbb9wMJe4/edit?tab=t.0) | [📋 Community Page](https://holmesgpt.dev/community/)
|
|
262
|
+
- [Community Meetups](https://docs.google.com/document/d/1q3L2iUd8tNu-NmZ6QIVOJcCLHrile9CC5QguOGTn_tg/edit?tab=t.0#heading=h.ihdnrt5bstrv)
|
|
261
263
|
|
|
262
264
|
## Support
|
|
263
265
|
|
|
264
|
-
If you have any questions, feel free to message us on [
|
|
266
|
+
If you have any questions, feel free to message us on [HolmesGPT Slack Channel](https://cloud-native.slack.com/archives/C0A1SPQM5PZ)
|
|
265
267
|
|
|
266
268
|
## How to Contribute
|
|
267
269
|
|
|
268
270
|
Please read our [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines and instructions.
|
|
269
271
|
|
|
270
|
-
For help, contact us on [Slack](https://
|
|
272
|
+
For help, contact us on [Slack](https://cloud-native.slack.com/archives/C0A1SPQM5PZ) or ask [DeepWiki AI](https://deepwiki.com/HolmesGPT/holmesgpt) your questions.
|
|
271
273
|
|
|
272
|
-
Please make sure to follow the CNCF code of conduct - [details here](https://github.com/
|
|
273
|
-
[](https://deepwiki.com/
|
|
274
|
+
Please make sure to follow the CNCF code of conduct - [details here](https://github.com/HolmesGPT/holmesgpt/blob/master/CODE_OF_CONDUCT.md).
|
|
275
|
+
[](https://deepwiki.com/HolmesGPT/holmesgpt)
|
|
274
276
|
|