holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
import requests # type: ignore
|
|
5
|
+
from requests.auth import HTTPBasicAuth # type: ignore
|
|
6
|
+
|
|
7
|
+
from holmes.core.issue import Issue
|
|
8
|
+
from holmes.core.tool_calling_llm import LLMResult
|
|
9
|
+
from holmes.plugins.interfaces import SourcePlugin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JiraSource(SourcePlugin):
|
|
13
|
+
def __init__(self, url: str, username: str, api_key: str, jql_query: str):
|
|
14
|
+
self.url = url
|
|
15
|
+
self.username = username
|
|
16
|
+
self.api_key = api_key
|
|
17
|
+
self.jql_query = jql_query
|
|
18
|
+
|
|
19
|
+
def fetch_issues(self) -> List[Issue]:
|
|
20
|
+
logging.info(f"Fetching issues from {self.url} with JQL='{self.jql_query}'")
|
|
21
|
+
try:
|
|
22
|
+
response = requests.get(
|
|
23
|
+
f"{self.url}/rest/api/2/search",
|
|
24
|
+
params={"jql": self.jql_query},
|
|
25
|
+
auth=HTTPBasicAuth(self.username, self.api_key),
|
|
26
|
+
headers={"Accept": "application/json"},
|
|
27
|
+
)
|
|
28
|
+
if response.status_code != 200:
|
|
29
|
+
raise Exception(
|
|
30
|
+
f"Failed to get issues: {response.status_code} {response.text}"
|
|
31
|
+
)
|
|
32
|
+
logging.info(f"Got {response}")
|
|
33
|
+
response.raise_for_status()
|
|
34
|
+
data = response.json()
|
|
35
|
+
return [self.convert_to_issue(issue) for issue in data.get("issues", [])]
|
|
36
|
+
except requests.RequestException as e:
|
|
37
|
+
raise ConnectionError("Failed to fetch data from Jira.") from e
|
|
38
|
+
|
|
39
|
+
def convert_to_issue(self, jira_issue, description: Optional[str] = None):
|
|
40
|
+
description = self.extract_description(jira_issue)
|
|
41
|
+
return Issue(
|
|
42
|
+
id=jira_issue["id"],
|
|
43
|
+
name=jira_issue["fields"]["summary"],
|
|
44
|
+
source_type="jira",
|
|
45
|
+
source_instance_id=self.url,
|
|
46
|
+
url=f"{self.url}/browse/{jira_issue['key']}",
|
|
47
|
+
description=description,
|
|
48
|
+
raw=jira_issue,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# status=jira_issue["fields"]["status"]["name"],
|
|
52
|
+
|
|
53
|
+
def extract_description(self, jira_issue) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Extracts and formats the issue description.
|
|
56
|
+
"""
|
|
57
|
+
description_blocks = (
|
|
58
|
+
jira_issue.get("fields", {}).get("description", {}).get("content", [])
|
|
59
|
+
)
|
|
60
|
+
description_text = []
|
|
61
|
+
|
|
62
|
+
for block in description_blocks:
|
|
63
|
+
if block["type"] == "paragraph":
|
|
64
|
+
text = " ".join(
|
|
65
|
+
[c["text"] for c in block.get("content", []) if "text" in c]
|
|
66
|
+
)
|
|
67
|
+
description_text.append(text)
|
|
68
|
+
elif block["type"] == "orderedList":
|
|
69
|
+
for idx, item in enumerate(block["content"], start=1):
|
|
70
|
+
text = " ".join(
|
|
71
|
+
[
|
|
72
|
+
c["text"]
|
|
73
|
+
for c in item["content"][0].get("content", [])
|
|
74
|
+
if "text" in c
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
description_text.append(f"{idx}. {text}")
|
|
78
|
+
|
|
79
|
+
return (
|
|
80
|
+
"\n".join(description_text)
|
|
81
|
+
if description_text
|
|
82
|
+
else "No description available."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
|
|
86
|
+
# TODO: upload files and show tool usage
|
|
87
|
+
comment_url = f"{self.url}/rest/api/2/issue/{issue_id}/comment"
|
|
88
|
+
comment_data = {
|
|
89
|
+
"body": f"Automatic AI Investigation by Robusta:\n\n{result_data.result}\n"
|
|
90
|
+
}
|
|
91
|
+
response = requests.post(
|
|
92
|
+
comment_url,
|
|
93
|
+
json=comment_data,
|
|
94
|
+
auth=HTTPBasicAuth(self.username, self.api_key),
|
|
95
|
+
headers={"Accept": "application/json"},
|
|
96
|
+
)
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
data = response.json()
|
|
99
|
+
logging.debug(f"Comment added to issue {issue_id}: {data}")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class JiraServiceManagementSource(JiraSource):
|
|
103
|
+
def __init__(self, url: str, username: str, api_key: str, jql_query: str):
|
|
104
|
+
super().__init__(url, username, api_key, jql_query)
|
|
105
|
+
|
|
106
|
+
def fetch_issue(self, id: str) -> Issue:
|
|
107
|
+
"""
|
|
108
|
+
Might also be the same in jira, needs additional testing
|
|
109
|
+
"""
|
|
110
|
+
logging.info(f"Fetching Jira Service Management issue {id} from {self.url}")
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
response = requests.get(
|
|
114
|
+
f"{self.url}/rest/api/3/issue/{id}",
|
|
115
|
+
auth=HTTPBasicAuth(self.username, self.api_key),
|
|
116
|
+
headers={"Accept": "application/json"},
|
|
117
|
+
)
|
|
118
|
+
response.raise_for_status()
|
|
119
|
+
jsm_issue = response.json()
|
|
120
|
+
description = self.extract_description(jsm_issue)
|
|
121
|
+
return self.convert_to_issue(jsm_issue, description)
|
|
122
|
+
except requests.RequestException as e:
|
|
123
|
+
raise ConnectionError(f"Failed to fetch JSM ticket {id}") from e
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import markdown # type: ignore
|
|
5
|
+
import requests # type: ignore
|
|
6
|
+
|
|
7
|
+
from holmes.core.issue import Issue
|
|
8
|
+
from holmes.core.tool_calling_llm import LLMResult
|
|
9
|
+
from holmes.plugins.interfaces import SourcePlugin
|
|
10
|
+
|
|
11
|
+
OPSGENIE_TEAM_INTEGRATION_KEY_HELP = "OpsGenie Team Integration key for writing back results. (NOT a normal API Key.) Get it from Teams > YourTeamName > Integrations > Add Integration > API Key. Don't forget to turn on the integration!"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpsGenieSource(SourcePlugin):
|
|
15
|
+
def __init__(
|
|
16
|
+
self, api_key: str, query: str, team_integration_key: Optional[str] = None
|
|
17
|
+
):
|
|
18
|
+
self.api_key = api_key
|
|
19
|
+
self.query = query
|
|
20
|
+
self.team_integration_key = team_integration_key
|
|
21
|
+
|
|
22
|
+
def fetch_issues(self) -> List[Issue]:
|
|
23
|
+
logging.info(f"Fetching alerts from OpsGenie with query: {self.query}")
|
|
24
|
+
try:
|
|
25
|
+
data = []
|
|
26
|
+
url = "https://api.opsgenie.com/v2/alerts"
|
|
27
|
+
headers = {
|
|
28
|
+
"Authorization": f"GenieKey {self.api_key}",
|
|
29
|
+
"Content-Type": "application/json",
|
|
30
|
+
}
|
|
31
|
+
params: Dict[str, Union[int, str]] = {"query": self.query, "limit": 100}
|
|
32
|
+
while url:
|
|
33
|
+
# TODO: also fetch notes and description
|
|
34
|
+
response = requests.get(url, headers=headers, params=params)
|
|
35
|
+
logging.debug(f"Got {response.json()}")
|
|
36
|
+
if response.status_code != 200:
|
|
37
|
+
raise Exception(
|
|
38
|
+
f"Failed to get alerts: {response.status_code} {response.text}"
|
|
39
|
+
)
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
data.extend(response.json().get("data", []))
|
|
42
|
+
next_url = response.json().get("paging", {}).get("next", None)
|
|
43
|
+
url = next_url if next_url else None # type: ignore
|
|
44
|
+
return [self.convert_to_issue(alert) for alert in data]
|
|
45
|
+
except requests.RequestException as e:
|
|
46
|
+
raise ConnectionError("Failed to fetch data from OpsGenie.") from e
|
|
47
|
+
|
|
48
|
+
def convert_to_issue(self, opsgenie_alert):
|
|
49
|
+
return Issue(
|
|
50
|
+
id=str(opsgenie_alert["id"]),
|
|
51
|
+
name=opsgenie_alert["message"],
|
|
52
|
+
source_type="opsgenie",
|
|
53
|
+
source_instance_id="opsgenie",
|
|
54
|
+
url=opsgenie_alert["tinyId"],
|
|
55
|
+
raw=opsgenie_alert,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
|
|
59
|
+
if self.team_integration_key is None:
|
|
60
|
+
raise Exception(
|
|
61
|
+
f"Please set '--opsgenie-team-integration-key' to write back results. This is an {OPSGENIE_TEAM_INTEGRATION_KEY_HELP}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# TODO: update description to make this more visible (right now we add a comment)
|
|
65
|
+
html_output = markdown.markdown(result_data.result)
|
|
66
|
+
logging.debug(f"HTML output: {html_output}")
|
|
67
|
+
|
|
68
|
+
url = f"https://api.opsgenie.com/v2/alerts/{issue_id}/notes?identifierType=id"
|
|
69
|
+
headers = {
|
|
70
|
+
"Authorization": f"GenieKey {self.team_integration_key}",
|
|
71
|
+
"Content-Type": "application/json",
|
|
72
|
+
}
|
|
73
|
+
response = requests.post(
|
|
74
|
+
url=url,
|
|
75
|
+
json={"note": f"Automatic AI Investigation by Robusta:\n\n{html_output}\n"},
|
|
76
|
+
headers=headers,
|
|
77
|
+
)
|
|
78
|
+
logging.debug(f"Response: {response.json()}")
|
|
79
|
+
response.raise_for_status()
|
|
80
|
+
|
|
81
|
+
# We get back a response like: {'result': 'Request will be processed', 'took': 0.006, 'requestId': '<request_id>'}
|
|
82
|
+
# Now we need to lookup the request to see if it succeeded
|
|
83
|
+
request_id = response.json().get("requestId", None)
|
|
84
|
+
url = f"https://api.opsgenie.com/v2/alerts/requests/{request_id}"
|
|
85
|
+
response = requests.get(url=url, headers=headers)
|
|
86
|
+
|
|
87
|
+
logging.debug(f"Response: {response.json()}")
|
|
88
|
+
response.raise_for_status()
|
|
89
|
+
json_response = response.json()
|
|
90
|
+
if not json_response["data"]["success"]:
|
|
91
|
+
raise Exception(
|
|
92
|
+
f"Failed to write back result to OpsGenie: {json_response['data']['status']}"
|
|
93
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
import requests # type: ignore
|
|
5
|
+
|
|
6
|
+
from holmes.core.issue import Issue
|
|
7
|
+
from holmes.core.tool_calling_llm import LLMResult
|
|
8
|
+
from holmes.plugins.interfaces import SourcePlugin
|
|
9
|
+
from holmes.utils.markdown_utils import markdown_to_plain_text
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PagerDutySource(SourcePlugin):
|
|
13
|
+
def __init__(
|
|
14
|
+
self, api_key: str, user_email: str, incident_key: Optional[str] = None
|
|
15
|
+
):
|
|
16
|
+
self.api_url = (
|
|
17
|
+
"https://api.pagerduty.com" # currently hard-coded, can expose it if useful
|
|
18
|
+
)
|
|
19
|
+
self.api_key = api_key
|
|
20
|
+
self.user_email = user_email
|
|
21
|
+
self.incident_key = incident_key
|
|
22
|
+
|
|
23
|
+
def fetch_issues(self) -> List[Issue]:
|
|
24
|
+
logging.info(f"Fetching issues from {self.api_url}")
|
|
25
|
+
try:
|
|
26
|
+
headers = {
|
|
27
|
+
"Authorization": f"Token token={self.api_key}",
|
|
28
|
+
"Accept": "application/vnd.pagerduty+json;version=2",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# excludes resolved
|
|
32
|
+
query_params = "?statuses[]=triggered&statuses[]=acknowledged"
|
|
33
|
+
|
|
34
|
+
if self.incident_key:
|
|
35
|
+
query_params = f"{query_params}&incident_key={self.incident_key}"
|
|
36
|
+
|
|
37
|
+
response = requests.get(
|
|
38
|
+
f"{self.api_url}/incidents{query_params}", headers=headers
|
|
39
|
+
)
|
|
40
|
+
if response.status_code != 200:
|
|
41
|
+
print(f"Got response: {response}")
|
|
42
|
+
raise Exception(
|
|
43
|
+
f"Failed to get issues: {response.status_code} {response.text}"
|
|
44
|
+
)
|
|
45
|
+
logging.debug(f"Got response: {response}")
|
|
46
|
+
response.raise_for_status()
|
|
47
|
+
data = response.json()
|
|
48
|
+
return [self.convert_to_issue(issue) for issue in data.get("incidents", [])]
|
|
49
|
+
except requests.RequestException as e:
|
|
50
|
+
raise ConnectionError("Failed to fetch data from PagerDuty.") from e
|
|
51
|
+
|
|
52
|
+
def fetch_issue(self, id: str) -> Optional[Issue]: # type: ignore
|
|
53
|
+
"""
|
|
54
|
+
Fetch a single issue from PagerDuty using the incident ID and convert it to an Issue object.
|
|
55
|
+
|
|
56
|
+
:param incident_id: The ID of the incident to fetch.
|
|
57
|
+
:return: An Issue object if found, otherwise None.
|
|
58
|
+
"""
|
|
59
|
+
logging.info(f"Fetching issue {id} from {self.api_url}")
|
|
60
|
+
|
|
61
|
+
headers = {
|
|
62
|
+
"Authorization": f"Token token={self.api_key}",
|
|
63
|
+
"Accept": "application/vnd.pagerduty+json;version=2",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
response = requests.get(f"{self.api_url}/incidents/{id}", headers=headers)
|
|
68
|
+
|
|
69
|
+
if response.status_code == 404:
|
|
70
|
+
logging.warning(f"Incident {id} not found.")
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
if response.status_code != 200:
|
|
74
|
+
logging.error(
|
|
75
|
+
f"Failed to get issue: {response.status_code} {response.text}"
|
|
76
|
+
)
|
|
77
|
+
raise Exception(
|
|
78
|
+
f"Failed to get issue: {response.status_code} {response.text}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
logging.debug(f"Got response: {response.json()}")
|
|
82
|
+
incident_data = response.json().get("incident")
|
|
83
|
+
|
|
84
|
+
if incident_data:
|
|
85
|
+
return self.convert_to_issue(incident_data)
|
|
86
|
+
else:
|
|
87
|
+
logging.warning(f"No incident data found for {id}.")
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
except requests.RequestException as e:
|
|
91
|
+
logging.error(f"Connection error while fetching issue {id}: {e}")
|
|
92
|
+
raise ConnectionError("Failed to fetch data from PagerDuty.") from e
|
|
93
|
+
|
|
94
|
+
def convert_to_issue(self, source_issue):
|
|
95
|
+
return Issue(
|
|
96
|
+
id=source_issue["id"],
|
|
97
|
+
name=source_issue["summary"],
|
|
98
|
+
source_type="pagerduty",
|
|
99
|
+
source_instance_id=self.api_url,
|
|
100
|
+
url=f"{source_issue['html_url']}",
|
|
101
|
+
raw=source_issue,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
|
|
105
|
+
logging.info(f"Writing back result to issue {issue_id}")
|
|
106
|
+
if not self.user_email:
|
|
107
|
+
raise Exception(
|
|
108
|
+
"When using --update mode, --pagerduty-user-email must be provided"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
url = f"{self.api_url}/incidents/{issue_id}/notes"
|
|
113
|
+
headers = {
|
|
114
|
+
"Authorization": f"Token token={self.api_key}",
|
|
115
|
+
"Content-Type": "application/json",
|
|
116
|
+
"From": self.user_email,
|
|
117
|
+
}
|
|
118
|
+
comment = markdown_to_plain_text(result_data.result)
|
|
119
|
+
comment_data = {
|
|
120
|
+
"note": {
|
|
121
|
+
"content": f"Automatic AI Investigation by HolmesGPT:\n\n{comment}"
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
response = requests.post(url, json=comment_data, headers=headers)
|
|
125
|
+
response.raise_for_status()
|
|
126
|
+
data = response.json()
|
|
127
|
+
logging.debug(f"Comment added to issue {issue_id}: {data}")
|
|
128
|
+
except requests.RequestException as e:
|
|
129
|
+
if e.response is not None:
|
|
130
|
+
logging.error(
|
|
131
|
+
f"Failed to write back result to PagerDuty: {e}; {e.response.text}"
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
logging.error(f"Failed to write back result to PagerDuty: {e}")
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Run with:
|
|
139
|
+
# poetry run python3 -m holmes.plugins.sources.pagerduty <api-key> <user-email>
|
|
140
|
+
if __name__ == "__main__":
|
|
141
|
+
import sys
|
|
142
|
+
|
|
143
|
+
pd_source = PagerDutySource(api_key=sys.argv[1], user_email=sys.argv[2])
|
|
144
|
+
issues = pd_source.fetch_issues()
|
|
145
|
+
for issue in issues:
|
|
146
|
+
pd_source.write_back_result(issue.id, LLMResult(result="This is a test"))
|
|
147
|
+
print(issue)
|
|
File without changes
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import html
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
|
+
from urllib.parse import parse_qs, unquote, urlparse
|
|
5
|
+
from pydantic import BaseModel, computed_field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# these models are used by AlertManager's push API (when alertmanager pushes alerts to us by webhook)
|
|
9
|
+
# this is the standard format we use internally
|
|
10
|
+
class PrometheusAlert(BaseModel):
|
|
11
|
+
status: str
|
|
12
|
+
labels: Dict[str, str]
|
|
13
|
+
annotations: Dict[str, str]
|
|
14
|
+
startsAt: datetime
|
|
15
|
+
endsAt: datetime
|
|
16
|
+
generatorURL: Optional[str] = None
|
|
17
|
+
fingerprint: str
|
|
18
|
+
|
|
19
|
+
@computed_field # type: ignore
|
|
20
|
+
@property
|
|
21
|
+
def unique_id(self) -> str:
|
|
22
|
+
return f"{self.name}-{self.fingerprint}-{self.startsAt}"
|
|
23
|
+
|
|
24
|
+
@computed_field # type: ignore
|
|
25
|
+
@property
|
|
26
|
+
def duration(self) -> Union[timedelta, str]:
|
|
27
|
+
if self.endsAt.year == 1:
|
|
28
|
+
return "Ongoing"
|
|
29
|
+
else:
|
|
30
|
+
duration = self.endsAt - self.startsAt
|
|
31
|
+
return duration
|
|
32
|
+
|
|
33
|
+
@computed_field # type: ignore
|
|
34
|
+
@property
|
|
35
|
+
def name(self) -> str:
|
|
36
|
+
return self.labels["alertname"]
|
|
37
|
+
|
|
38
|
+
@computed_field # type: ignore
|
|
39
|
+
@property
|
|
40
|
+
def definition(self) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Returns the promql definition of this alert
|
|
43
|
+
"""
|
|
44
|
+
url = self.generatorURL
|
|
45
|
+
if not url:
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
# decode HTML entities to convert + like representations to characters
|
|
49
|
+
url = html.unescape(url)
|
|
50
|
+
parsed_url = urlparse(url)
|
|
51
|
+
query_params = parse_qs(parsed_url.query)
|
|
52
|
+
|
|
53
|
+
q_expr = query_params.get("g0.expr", [])
|
|
54
|
+
if len(q_expr) < 1 or not q_expr[0]:
|
|
55
|
+
return ""
|
|
56
|
+
|
|
57
|
+
return unquote(q_expr[0])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class PrometheusAlertGroup(BaseModel):
|
|
61
|
+
receiver: str
|
|
62
|
+
status: str
|
|
63
|
+
alerts: List[PrometheusAlert]
|
|
64
|
+
groupLabels: Dict[str, str]
|
|
65
|
+
commonLabels: Dict[str, str]
|
|
66
|
+
commonAnnotations: Dict[str, str]
|
|
67
|
+
externalURL: str
|
|
68
|
+
version: str
|
|
69
|
+
groupKey: str
|
|
70
|
+
truncatedAlerts: int
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# these models are used by AlertManager's pull API (when pulling alerts from alertmanager via API)
|
|
74
|
+
class PrometheusReceiver(BaseModel):
|
|
75
|
+
name: str
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PrometheusAlertStatus(BaseModel):
|
|
79
|
+
state: str
|
|
80
|
+
silencedBy: List[str]
|
|
81
|
+
inhibitedBy: List[str]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PrometheusGettableAlert(BaseModel):
|
|
85
|
+
labels: Dict[str, str]
|
|
86
|
+
generatorURL: Optional[str] = ""
|
|
87
|
+
annotations: Dict[str, str]
|
|
88
|
+
receivers: List[PrometheusReceiver]
|
|
89
|
+
fingerprint: str
|
|
90
|
+
startsAt: datetime
|
|
91
|
+
updatedAt: datetime
|
|
92
|
+
endsAt: datetime
|
|
93
|
+
status: PrometheusAlertStatus
|
|
94
|
+
|
|
95
|
+
def to_regular_prometheus_alert(self) -> PrometheusAlert:
|
|
96
|
+
return PrometheusAlert(
|
|
97
|
+
status=self.status.state,
|
|
98
|
+
labels=self.labels,
|
|
99
|
+
annotations=self.annotations,
|
|
100
|
+
startsAt=self.startsAt,
|
|
101
|
+
endsAt=self.endsAt,
|
|
102
|
+
generatorURL=self.generatorURL,
|
|
103
|
+
fingerprint=self.fingerprint,
|
|
104
|
+
)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional, Pattern
|
|
6
|
+
|
|
7
|
+
import humanize
|
|
8
|
+
import requests # type: ignore
|
|
9
|
+
import rich
|
|
10
|
+
import rich.segment
|
|
11
|
+
from pydantic import parse_obj_as
|
|
12
|
+
from pydantic.json import pydantic_encoder
|
|
13
|
+
from requests.auth import HTTPBasicAuth # type: ignore
|
|
14
|
+
|
|
15
|
+
from holmes.core.issue import Issue
|
|
16
|
+
from holmes.plugins.interfaces import SourcePlugin
|
|
17
|
+
from holmes.plugins.utils import dict_to_markdown
|
|
18
|
+
|
|
19
|
+
from .models import PrometheusAlert, PrometheusGettableAlert
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AlertManagerSource(SourcePlugin):
|
|
23
|
+
"""
|
|
24
|
+
Issue IDs are of the format {alert_name}-{alert_fingerprint}-{starts_at} which is both unique and allows
|
|
25
|
+
quickly identifying the alertname and using it to filter on issue_id
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
url: str,
|
|
31
|
+
username: Optional[str] = None,
|
|
32
|
+
password: Optional[str] = None,
|
|
33
|
+
alertname_filter: Optional[Pattern] = None,
|
|
34
|
+
label_filter: Optional[str] = None,
|
|
35
|
+
filepath: Optional[Path] = None,
|
|
36
|
+
):
|
|
37
|
+
super().__init__()
|
|
38
|
+
self.url = url
|
|
39
|
+
self.username = username
|
|
40
|
+
self.password = password
|
|
41
|
+
self.alertname_filter = alertname_filter
|
|
42
|
+
self.label_filter = label_filter
|
|
43
|
+
self.filepath = filepath
|
|
44
|
+
|
|
45
|
+
if self.url is None and self.filepath is None:
|
|
46
|
+
# we don't mention --alertmanager-file to avoid confusing users - most users wont care about it
|
|
47
|
+
raise ValueError("--alertmanager-url must be specified")
|
|
48
|
+
if self.url is not None and self.filepath is not None:
|
|
49
|
+
logging.warning(
|
|
50
|
+
"Ignoring --alertmanager-url because --alertmanager-file is specified"
|
|
51
|
+
)
|
|
52
|
+
if self.label_filter and self.filepath is not None:
|
|
53
|
+
logging.warning(
|
|
54
|
+
"Ignoring --label-filter because --alertmanager-file is specified"
|
|
55
|
+
)
|
|
56
|
+
if self.url and not (
|
|
57
|
+
self.url.startswith("http://") or self.url.startswith("https://")
|
|
58
|
+
):
|
|
59
|
+
raise ValueError("--alertmanager-url must start with http:// or https://")
|
|
60
|
+
|
|
61
|
+
def __fetch_issues_from_api(self) -> List[PrometheusAlert]:
|
|
62
|
+
fetch_alerts_url = f"{self.url}/api/v2/alerts"
|
|
63
|
+
params = {
|
|
64
|
+
"active": "true",
|
|
65
|
+
"silenced": "false",
|
|
66
|
+
"inhibited": "false",
|
|
67
|
+
}
|
|
68
|
+
if self.label_filter:
|
|
69
|
+
params["filter"] = self.label_filter
|
|
70
|
+
logging.info(f"Filtering alerts by {self.label_filter}")
|
|
71
|
+
|
|
72
|
+
if self.username is not None or self.password is not None:
|
|
73
|
+
auth = HTTPBasicAuth(self.username, self.password) # type: ignore
|
|
74
|
+
else:
|
|
75
|
+
auth = None
|
|
76
|
+
|
|
77
|
+
logging.info(f"Loading alerts from url {fetch_alerts_url}")
|
|
78
|
+
response = requests.get(fetch_alerts_url, params=params, auth=auth)
|
|
79
|
+
if response.status_code != 200:
|
|
80
|
+
raise Exception(
|
|
81
|
+
f"Failed to get live alerts: {response.status_code} {response.text}"
|
|
82
|
+
)
|
|
83
|
+
data = response.json()
|
|
84
|
+
return [
|
|
85
|
+
a.to_regular_prometheus_alert()
|
|
86
|
+
for a in parse_obj_as(List[PrometheusGettableAlert], data)
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
def __fetch_issues_from_file(self) -> List[PrometheusAlert]:
|
|
90
|
+
logging.info(f"Loading alerts from file {self.filepath}")
|
|
91
|
+
with open(self.filepath, "r") as f: # type: ignore
|
|
92
|
+
data = json.load(f)
|
|
93
|
+
return parse_obj_as(List[PrometheusAlert], data)
|
|
94
|
+
|
|
95
|
+
def fetch_issues(self) -> List[Issue]:
|
|
96
|
+
if self.filepath is not None:
|
|
97
|
+
alerts = self.__fetch_issues_from_file()
|
|
98
|
+
else:
|
|
99
|
+
alerts = self.__fetch_issues_from_api()
|
|
100
|
+
|
|
101
|
+
if self.alertname_filter is not None:
|
|
102
|
+
alertname_filter = re.compile(self.alertname_filter)
|
|
103
|
+
alerts = [a for a in alerts if alertname_filter.match(a.unique_id)]
|
|
104
|
+
|
|
105
|
+
return [
|
|
106
|
+
Issue(
|
|
107
|
+
id=alert.unique_id,
|
|
108
|
+
name=alert.name,
|
|
109
|
+
source_type="prometheus",
|
|
110
|
+
source_instance_id=self.filepath if self.filepath else self.url, # type: ignore
|
|
111
|
+
url=alert.generatorURL,
|
|
112
|
+
presentation_key_metadata=f"*Severity*: {alert.labels['severity']}\n*Start Time*: {alert.startsAt.strftime('%Y-%m-%d %H:%M:%S UTC')}\n*Duration*: {humanize.naturaldelta(alert.duration)}", # type: ignore
|
|
113
|
+
presentation_all_metadata=self.__format_issue_metadata(alert),
|
|
114
|
+
raw=alert.model_dump(),
|
|
115
|
+
)
|
|
116
|
+
for alert in alerts
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
def dump_raw_alerts_to_file(self, path: Path) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Useful for generating test data
|
|
122
|
+
"""
|
|
123
|
+
alerts = self.__fetch_issues_from_api()
|
|
124
|
+
with open(path, "w") as f:
|
|
125
|
+
f.write(json.dumps(alerts, default=pydantic_encoder, indent=2))
|
|
126
|
+
|
|
127
|
+
def output_curl_commands(self, console: rich.console.Console) -> None:
|
|
128
|
+
"""
|
|
129
|
+
Outputs curl commands to send each alert to Alertmanager via the API.
|
|
130
|
+
"""
|
|
131
|
+
alerts = self.__fetch_issues_from_api()
|
|
132
|
+
for alert in alerts:
|
|
133
|
+
alert_json = json.dumps(
|
|
134
|
+
[alert.model_dump()], default=pydantic_encoder
|
|
135
|
+
) # Wrap in a list
|
|
136
|
+
curl_command = (
|
|
137
|
+
f"curl -X POST -H 'Content-Type: application/json' "
|
|
138
|
+
f"-d '{alert_json}' {self.url}/api/v2/alerts"
|
|
139
|
+
)
|
|
140
|
+
console.print(f"[green]{alert.name} alert[/green]")
|
|
141
|
+
console.print(f"[yellow]{curl_command}[/yellow]", soft_wrap=True)
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def __format_issue_metadata(alert: PrometheusAlert) -> Optional[str]:
|
|
145
|
+
if not alert.labels and not alert.annotations:
|
|
146
|
+
return None
|
|
147
|
+
text = ""
|
|
148
|
+
if alert.labels:
|
|
149
|
+
text += "*Labels:*\n"
|
|
150
|
+
text += dict_to_markdown(alert.labels)
|
|
151
|
+
if alert.annotations:
|
|
152
|
+
text += "*Annotations:*\n"
|
|
153
|
+
text += dict_to_markdown(alert.annotations)
|
|
154
|
+
return text
|