holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,123 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ import requests # type: ignore
5
+ from requests.auth import HTTPBasicAuth # type: ignore
6
+
7
+ from holmes.core.issue import Issue
8
+ from holmes.core.tool_calling_llm import LLMResult
9
+ from holmes.plugins.interfaces import SourcePlugin
10
+
11
+
12
+ class JiraSource(SourcePlugin):
13
+ def __init__(self, url: str, username: str, api_key: str, jql_query: str):
14
+ self.url = url
15
+ self.username = username
16
+ self.api_key = api_key
17
+ self.jql_query = jql_query
18
+
19
+ def fetch_issues(self) -> List[Issue]:
20
+ logging.info(f"Fetching issues from {self.url} with JQL='{self.jql_query}'")
21
+ try:
22
+ response = requests.get(
23
+ f"{self.url}/rest/api/2/search",
24
+ params={"jql": self.jql_query},
25
+ auth=HTTPBasicAuth(self.username, self.api_key),
26
+ headers={"Accept": "application/json"},
27
+ )
28
+ if response.status_code != 200:
29
+ raise Exception(
30
+ f"Failed to get issues: {response.status_code} {response.text}"
31
+ )
32
+ logging.info(f"Got {response}")
33
+ response.raise_for_status()
34
+ data = response.json()
35
+ return [self.convert_to_issue(issue) for issue in data.get("issues", [])]
36
+ except requests.RequestException as e:
37
+ raise ConnectionError("Failed to fetch data from Jira.") from e
38
+
39
+ def convert_to_issue(self, jira_issue, description: Optional[str] = None):
40
+ description = self.extract_description(jira_issue)
41
+ return Issue(
42
+ id=jira_issue["id"],
43
+ name=jira_issue["fields"]["summary"],
44
+ source_type="jira",
45
+ source_instance_id=self.url,
46
+ url=f"{self.url}/browse/{jira_issue['key']}",
47
+ description=description,
48
+ raw=jira_issue,
49
+ )
50
+
51
+ # status=jira_issue["fields"]["status"]["name"],
52
+
53
+ def extract_description(self, jira_issue) -> str:
54
+ """
55
+ Extracts and formats the issue description.
56
+ """
57
+ description_blocks = (
58
+ jira_issue.get("fields", {}).get("description", {}).get("content", [])
59
+ )
60
+ description_text = []
61
+
62
+ for block in description_blocks:
63
+ if block["type"] == "paragraph":
64
+ text = " ".join(
65
+ [c["text"] for c in block.get("content", []) if "text" in c]
66
+ )
67
+ description_text.append(text)
68
+ elif block["type"] == "orderedList":
69
+ for idx, item in enumerate(block["content"], start=1):
70
+ text = " ".join(
71
+ [
72
+ c["text"]
73
+ for c in item["content"][0].get("content", [])
74
+ if "text" in c
75
+ ]
76
+ )
77
+ description_text.append(f"{idx}. {text}")
78
+
79
+ return (
80
+ "\n".join(description_text)
81
+ if description_text
82
+ else "No description available."
83
+ )
84
+
85
+ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
86
+ # TODO: upload files and show tool usage
87
+ comment_url = f"{self.url}/rest/api/2/issue/{issue_id}/comment"
88
+ comment_data = {
89
+ "body": f"Automatic AI Investigation by Robusta:\n\n{result_data.result}\n"
90
+ }
91
+ response = requests.post(
92
+ comment_url,
93
+ json=comment_data,
94
+ auth=HTTPBasicAuth(self.username, self.api_key),
95
+ headers={"Accept": "application/json"},
96
+ )
97
+ response.raise_for_status()
98
+ data = response.json()
99
+ logging.debug(f"Comment added to issue {issue_id}: {data}")
100
+
101
+
102
+ class JiraServiceManagementSource(JiraSource):
103
+ def __init__(self, url: str, username: str, api_key: str, jql_query: str):
104
+ super().__init__(url, username, api_key, jql_query)
105
+
106
+ def fetch_issue(self, id: str) -> Issue:
107
+ """
108
+ Might also be the same in jira, needs additional testing
109
+ """
110
+ logging.info(f"Fetching Jira Service Management issue {id} from {self.url}")
111
+
112
+ try:
113
+ response = requests.get(
114
+ f"{self.url}/rest/api/3/issue/{id}",
115
+ auth=HTTPBasicAuth(self.username, self.api_key),
116
+ headers={"Accept": "application/json"},
117
+ )
118
+ response.raise_for_status()
119
+ jsm_issue = response.json()
120
+ description = self.extract_description(jsm_issue)
121
+ return self.convert_to_issue(jsm_issue, description)
122
+ except requests.RequestException as e:
123
+ raise ConnectionError(f"Failed to fetch JSM ticket {id}") from e
@@ -0,0 +1,93 @@
1
+ import logging
2
+ from typing import Dict, List, Optional, Union
3
+
4
+ import markdown # type: ignore
5
+ import requests # type: ignore
6
+
7
+ from holmes.core.issue import Issue
8
+ from holmes.core.tool_calling_llm import LLMResult
9
+ from holmes.plugins.interfaces import SourcePlugin
10
+
11
+ OPSGENIE_TEAM_INTEGRATION_KEY_HELP = "OpsGenie Team Integration key for writing back results. (NOT a normal API Key.) Get it from Teams > YourTeamName > Integrations > Add Integration > API Key. Don't forget to turn on the integration!"
12
+
13
+
14
+ class OpsGenieSource(SourcePlugin):
15
+ def __init__(
16
+ self, api_key: str, query: str, team_integration_key: Optional[str] = None
17
+ ):
18
+ self.api_key = api_key
19
+ self.query = query
20
+ self.team_integration_key = team_integration_key
21
+
22
+ def fetch_issues(self) -> List[Issue]:
23
+ logging.info(f"Fetching alerts from OpsGenie with query: {self.query}")
24
+ try:
25
+ data = []
26
+ url = "https://api.opsgenie.com/v2/alerts"
27
+ headers = {
28
+ "Authorization": f"GenieKey {self.api_key}",
29
+ "Content-Type": "application/json",
30
+ }
31
+ params: Dict[str, Union[int, str]] = {"query": self.query, "limit": 100}
32
+ while url:
33
+ # TODO: also fetch notes and description
34
+ response = requests.get(url, headers=headers, params=params)
35
+ logging.debug(f"Got {response.json()}")
36
+ if response.status_code != 200:
37
+ raise Exception(
38
+ f"Failed to get alerts: {response.status_code} {response.text}"
39
+ )
40
+ response.raise_for_status()
41
+ data.extend(response.json().get("data", []))
42
+ next_url = response.json().get("paging", {}).get("next", None)
43
+ url = next_url if next_url else None # type: ignore
44
+ return [self.convert_to_issue(alert) for alert in data]
45
+ except requests.RequestException as e:
46
+ raise ConnectionError("Failed to fetch data from OpsGenie.") from e
47
+
48
+ def convert_to_issue(self, opsgenie_alert):
49
+ return Issue(
50
+ id=str(opsgenie_alert["id"]),
51
+ name=opsgenie_alert["message"],
52
+ source_type="opsgenie",
53
+ source_instance_id="opsgenie",
54
+ url=opsgenie_alert["tinyId"],
55
+ raw=opsgenie_alert,
56
+ )
57
+
58
+ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
59
+ if self.team_integration_key is None:
60
+ raise Exception(
61
+ f"Please set '--opsgenie-team-integration-key' to write back results. This is an {OPSGENIE_TEAM_INTEGRATION_KEY_HELP}"
62
+ )
63
+
64
+ # TODO: update description to make this more visible (right now we add a comment)
65
+ html_output = markdown.markdown(result_data.result)
66
+ logging.debug(f"HTML output: {html_output}")
67
+
68
+ url = f"https://api.opsgenie.com/v2/alerts/{issue_id}/notes?identifierType=id"
69
+ headers = {
70
+ "Authorization": f"GenieKey {self.team_integration_key}",
71
+ "Content-Type": "application/json",
72
+ }
73
+ response = requests.post(
74
+ url=url,
75
+ json={"note": f"Automatic AI Investigation by Robusta:\n\n{html_output}\n"},
76
+ headers=headers,
77
+ )
78
+ logging.debug(f"Response: {response.json()}")
79
+ response.raise_for_status()
80
+
81
+ # We get back a response like: {'result': 'Request will be processed', 'took': 0.006, 'requestId': '<request_id>'}
82
+ # Now we need to lookup the request to see if it succeeded
83
+ request_id = response.json().get("requestId", None)
84
+ url = f"https://api.opsgenie.com/v2/alerts/requests/{request_id}"
85
+ response = requests.get(url=url, headers=headers)
86
+
87
+ logging.debug(f"Response: {response.json()}")
88
+ response.raise_for_status()
89
+ json_response = response.json()
90
+ if not json_response["data"]["success"]:
91
+ raise Exception(
92
+ f"Failed to write back result to OpsGenie: {json_response['data']['status']}"
93
+ )
@@ -0,0 +1,147 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ import requests # type: ignore
5
+
6
+ from holmes.core.issue import Issue
7
+ from holmes.core.tool_calling_llm import LLMResult
8
+ from holmes.plugins.interfaces import SourcePlugin
9
+ from holmes.utils.markdown_utils import markdown_to_plain_text
10
+
11
+
12
+ class PagerDutySource(SourcePlugin):
13
+ def __init__(
14
+ self, api_key: str, user_email: str, incident_key: Optional[str] = None
15
+ ):
16
+ self.api_url = (
17
+ "https://api.pagerduty.com" # currently hard-coded, can expose it if useful
18
+ )
19
+ self.api_key = api_key
20
+ self.user_email = user_email
21
+ self.incident_key = incident_key
22
+
23
+ def fetch_issues(self) -> List[Issue]:
24
+ logging.info(f"Fetching issues from {self.api_url}")
25
+ try:
26
+ headers = {
27
+ "Authorization": f"Token token={self.api_key}",
28
+ "Accept": "application/vnd.pagerduty+json;version=2",
29
+ }
30
+
31
+ # excludes resolved
32
+ query_params = "?statuses[]=triggered&statuses[]=acknowledged"
33
+
34
+ if self.incident_key:
35
+ query_params = f"{query_params}&incident_key={self.incident_key}"
36
+
37
+ response = requests.get(
38
+ f"{self.api_url}/incidents{query_params}", headers=headers
39
+ )
40
+ if response.status_code != 200:
41
+ print(f"Got response: {response}")
42
+ raise Exception(
43
+ f"Failed to get issues: {response.status_code} {response.text}"
44
+ )
45
+ logging.debug(f"Got response: {response}")
46
+ response.raise_for_status()
47
+ data = response.json()
48
+ return [self.convert_to_issue(issue) for issue in data.get("incidents", [])]
49
+ except requests.RequestException as e:
50
+ raise ConnectionError("Failed to fetch data from PagerDuty.") from e
51
+
52
+ def fetch_issue(self, id: str) -> Optional[Issue]: # type: ignore
53
+ """
54
+ Fetch a single issue from PagerDuty using the incident ID and convert it to an Issue object.
55
+
56
+ :param incident_id: The ID of the incident to fetch.
57
+ :return: An Issue object if found, otherwise None.
58
+ """
59
+ logging.info(f"Fetching issue {id} from {self.api_url}")
60
+
61
+ headers = {
62
+ "Authorization": f"Token token={self.api_key}",
63
+ "Accept": "application/vnd.pagerduty+json;version=2",
64
+ }
65
+
66
+ try:
67
+ response = requests.get(f"{self.api_url}/incidents/{id}", headers=headers)
68
+
69
+ if response.status_code == 404:
70
+ logging.warning(f"Incident {id} not found.")
71
+ return None
72
+
73
+ if response.status_code != 200:
74
+ logging.error(
75
+ f"Failed to get issue: {response.status_code} {response.text}"
76
+ )
77
+ raise Exception(
78
+ f"Failed to get issue: {response.status_code} {response.text}"
79
+ )
80
+
81
+ logging.debug(f"Got response: {response.json()}")
82
+ incident_data = response.json().get("incident")
83
+
84
+ if incident_data:
85
+ return self.convert_to_issue(incident_data)
86
+ else:
87
+ logging.warning(f"No incident data found for {id}.")
88
+ return None
89
+
90
+ except requests.RequestException as e:
91
+ logging.error(f"Connection error while fetching issue {id}: {e}")
92
+ raise ConnectionError("Failed to fetch data from PagerDuty.") from e
93
+
94
+ def convert_to_issue(self, source_issue):
95
+ return Issue(
96
+ id=source_issue["id"],
97
+ name=source_issue["summary"],
98
+ source_type="pagerduty",
99
+ source_instance_id=self.api_url,
100
+ url=f"{source_issue['html_url']}",
101
+ raw=source_issue,
102
+ )
103
+
104
+ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None:
105
+ logging.info(f"Writing back result to issue {issue_id}")
106
+ if not self.user_email:
107
+ raise Exception(
108
+ "When using --update mode, --pagerduty-user-email must be provided"
109
+ )
110
+
111
+ try:
112
+ url = f"{self.api_url}/incidents/{issue_id}/notes"
113
+ headers = {
114
+ "Authorization": f"Token token={self.api_key}",
115
+ "Content-Type": "application/json",
116
+ "From": self.user_email,
117
+ }
118
+ comment = markdown_to_plain_text(result_data.result)
119
+ comment_data = {
120
+ "note": {
121
+ "content": f"Automatic AI Investigation by HolmesGPT:\n\n{comment}"
122
+ }
123
+ }
124
+ response = requests.post(url, json=comment_data, headers=headers)
125
+ response.raise_for_status()
126
+ data = response.json()
127
+ logging.debug(f"Comment added to issue {issue_id}: {data}")
128
+ except requests.RequestException as e:
129
+ if e.response is not None:
130
+ logging.error(
131
+ f"Failed to write back result to PagerDuty: {e}; {e.response.text}"
132
+ )
133
+ else:
134
+ logging.error(f"Failed to write back result to PagerDuty: {e}")
135
+ raise
136
+
137
+
138
+ # Run with:
139
+ # poetry run python3 -m holmes.plugins.sources.pagerduty <api-key> <user-email>
140
+ if __name__ == "__main__":
141
+ import sys
142
+
143
+ pd_source = PagerDutySource(api_key=sys.argv[1], user_email=sys.argv[2])
144
+ issues = pd_source.fetch_issues()
145
+ for issue in issues:
146
+ pd_source.write_back_result(issue.id, LLMResult(result="This is a test"))
147
+ print(issue)
File without changes
@@ -0,0 +1,104 @@
1
+ import html
2
+ from datetime import datetime, timedelta
3
+ from typing import Dict, List, Optional, Union
4
+ from urllib.parse import parse_qs, unquote, urlparse
5
+ from pydantic import BaseModel, computed_field
6
+
7
+
8
+ # these models are used by AlertManager's push API (when alertmanager pushes alerts to us by webhook)
9
+ # this is the standard format we use internally
10
+ class PrometheusAlert(BaseModel):
11
+ status: str
12
+ labels: Dict[str, str]
13
+ annotations: Dict[str, str]
14
+ startsAt: datetime
15
+ endsAt: datetime
16
+ generatorURL: Optional[str] = None
17
+ fingerprint: str
18
+
19
+ @computed_field # type: ignore
20
+ @property
21
+ def unique_id(self) -> str:
22
+ return f"{self.name}-{self.fingerprint}-{self.startsAt}"
23
+
24
+ @computed_field # type: ignore
25
+ @property
26
+ def duration(self) -> Union[timedelta, str]:
27
+ if self.endsAt.year == 1:
28
+ return "Ongoing"
29
+ else:
30
+ duration = self.endsAt - self.startsAt
31
+ return duration
32
+
33
+ @computed_field # type: ignore
34
+ @property
35
+ def name(self) -> str:
36
+ return self.labels["alertname"]
37
+
38
+ @computed_field # type: ignore
39
+ @property
40
+ def definition(self) -> str:
41
+ """
42
+ Returns the promql definition of this alert
43
+ """
44
+ url = self.generatorURL
45
+ if not url:
46
+ return ""
47
+
48
+ # decode HTML entities to convert &#43; like representations to characters
49
+ url = html.unescape(url)
50
+ parsed_url = urlparse(url)
51
+ query_params = parse_qs(parsed_url.query)
52
+
53
+ q_expr = query_params.get("g0.expr", [])
54
+ if len(q_expr) < 1 or not q_expr[0]:
55
+ return ""
56
+
57
+ return unquote(q_expr[0])
58
+
59
+
60
+ class PrometheusAlertGroup(BaseModel):
61
+ receiver: str
62
+ status: str
63
+ alerts: List[PrometheusAlert]
64
+ groupLabels: Dict[str, str]
65
+ commonLabels: Dict[str, str]
66
+ commonAnnotations: Dict[str, str]
67
+ externalURL: str
68
+ version: str
69
+ groupKey: str
70
+ truncatedAlerts: int
71
+
72
+
73
+ # these models are used by AlertManager's pull API (when pulling alerts from alertmanager via API)
74
+ class PrometheusReceiver(BaseModel):
75
+ name: str
76
+
77
+
78
+ class PrometheusAlertStatus(BaseModel):
79
+ state: str
80
+ silencedBy: List[str]
81
+ inhibitedBy: List[str]
82
+
83
+
84
+ class PrometheusGettableAlert(BaseModel):
85
+ labels: Dict[str, str]
86
+ generatorURL: Optional[str] = ""
87
+ annotations: Dict[str, str]
88
+ receivers: List[PrometheusReceiver]
89
+ fingerprint: str
90
+ startsAt: datetime
91
+ updatedAt: datetime
92
+ endsAt: datetime
93
+ status: PrometheusAlertStatus
94
+
95
+ def to_regular_prometheus_alert(self) -> PrometheusAlert:
96
+ return PrometheusAlert(
97
+ status=self.status.state,
98
+ labels=self.labels,
99
+ annotations=self.annotations,
100
+ startsAt=self.startsAt,
101
+ endsAt=self.endsAt,
102
+ generatorURL=self.generatorURL,
103
+ fingerprint=self.fingerprint,
104
+ )
@@ -0,0 +1,154 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from pathlib import Path
5
+ from typing import List, Optional, Pattern
6
+
7
+ import humanize
8
+ import requests # type: ignore
9
+ import rich
10
+ import rich.segment
11
+ from pydantic import parse_obj_as
12
+ from pydantic.json import pydantic_encoder
13
+ from requests.auth import HTTPBasicAuth # type: ignore
14
+
15
+ from holmes.core.issue import Issue
16
+ from holmes.plugins.interfaces import SourcePlugin
17
+ from holmes.plugins.utils import dict_to_markdown
18
+
19
+ from .models import PrometheusAlert, PrometheusGettableAlert
20
+
21
+
22
+ class AlertManagerSource(SourcePlugin):
23
+ """
24
+ Issue IDs are of the format {alert_name}-{alert_fingerprint}-{starts_at} which is both unique and allows
25
+ quickly identifying the alertname and using it to filter on issue_id
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ url: str,
31
+ username: Optional[str] = None,
32
+ password: Optional[str] = None,
33
+ alertname_filter: Optional[Pattern] = None,
34
+ label_filter: Optional[str] = None,
35
+ filepath: Optional[Path] = None,
36
+ ):
37
+ super().__init__()
38
+ self.url = url
39
+ self.username = username
40
+ self.password = password
41
+ self.alertname_filter = alertname_filter
42
+ self.label_filter = label_filter
43
+ self.filepath = filepath
44
+
45
+ if self.url is None and self.filepath is None:
46
+ # we don't mention --alertmanager-file to avoid confusing users - most users wont care about it
47
+ raise ValueError("--alertmanager-url must be specified")
48
+ if self.url is not None and self.filepath is not None:
49
+ logging.warning(
50
+ "Ignoring --alertmanager-url because --alertmanager-file is specified"
51
+ )
52
+ if self.label_filter and self.filepath is not None:
53
+ logging.warning(
54
+ "Ignoring --label-filter because --alertmanager-file is specified"
55
+ )
56
+ if self.url and not (
57
+ self.url.startswith("http://") or self.url.startswith("https://")
58
+ ):
59
+ raise ValueError("--alertmanager-url must start with http:// or https://")
60
+
61
+ def __fetch_issues_from_api(self) -> List[PrometheusAlert]:
62
+ fetch_alerts_url = f"{self.url}/api/v2/alerts"
63
+ params = {
64
+ "active": "true",
65
+ "silenced": "false",
66
+ "inhibited": "false",
67
+ }
68
+ if self.label_filter:
69
+ params["filter"] = self.label_filter
70
+ logging.info(f"Filtering alerts by {self.label_filter}")
71
+
72
+ if self.username is not None or self.password is not None:
73
+ auth = HTTPBasicAuth(self.username, self.password) # type: ignore
74
+ else:
75
+ auth = None
76
+
77
+ logging.info(f"Loading alerts from url {fetch_alerts_url}")
78
+ response = requests.get(fetch_alerts_url, params=params, auth=auth)
79
+ if response.status_code != 200:
80
+ raise Exception(
81
+ f"Failed to get live alerts: {response.status_code} {response.text}"
82
+ )
83
+ data = response.json()
84
+ return [
85
+ a.to_regular_prometheus_alert()
86
+ for a in parse_obj_as(List[PrometheusGettableAlert], data)
87
+ ]
88
+
89
+ def __fetch_issues_from_file(self) -> List[PrometheusAlert]:
90
+ logging.info(f"Loading alerts from file {self.filepath}")
91
+ with open(self.filepath, "r") as f: # type: ignore
92
+ data = json.load(f)
93
+ return parse_obj_as(List[PrometheusAlert], data)
94
+
95
+ def fetch_issues(self) -> List[Issue]:
96
+ if self.filepath is not None:
97
+ alerts = self.__fetch_issues_from_file()
98
+ else:
99
+ alerts = self.__fetch_issues_from_api()
100
+
101
+ if self.alertname_filter is not None:
102
+ alertname_filter = re.compile(self.alertname_filter)
103
+ alerts = [a for a in alerts if alertname_filter.match(a.unique_id)]
104
+
105
+ return [
106
+ Issue(
107
+ id=alert.unique_id,
108
+ name=alert.name,
109
+ source_type="prometheus",
110
+ source_instance_id=self.filepath if self.filepath else self.url, # type: ignore
111
+ url=alert.generatorURL,
112
+ presentation_key_metadata=f"*Severity*: {alert.labels['severity']}\n*Start Time*: {alert.startsAt.strftime('%Y-%m-%d %H:%M:%S UTC')}\n*Duration*: {humanize.naturaldelta(alert.duration)}", # type: ignore
113
+ presentation_all_metadata=self.__format_issue_metadata(alert),
114
+ raw=alert.model_dump(),
115
+ )
116
+ for alert in alerts
117
+ ]
118
+
119
+ def dump_raw_alerts_to_file(self, path: Path) -> None:
120
+ """
121
+ Useful for generating test data
122
+ """
123
+ alerts = self.__fetch_issues_from_api()
124
+ with open(path, "w") as f:
125
+ f.write(json.dumps(alerts, default=pydantic_encoder, indent=2))
126
+
127
+ def output_curl_commands(self, console: rich.console.Console) -> None:
128
+ """
129
+ Outputs curl commands to send each alert to Alertmanager via the API.
130
+ """
131
+ alerts = self.__fetch_issues_from_api()
132
+ for alert in alerts:
133
+ alert_json = json.dumps(
134
+ [alert.model_dump()], default=pydantic_encoder
135
+ ) # Wrap in a list
136
+ curl_command = (
137
+ f"curl -X POST -H 'Content-Type: application/json' "
138
+ f"-d '{alert_json}' {self.url}/api/v2/alerts"
139
+ )
140
+ console.print(f"[green]{alert.name} alert[/green]")
141
+ console.print(f"[yellow]{curl_command}[/yellow]", soft_wrap=True)
142
+
143
+ @staticmethod
144
+ def __format_issue_metadata(alert: PrometheusAlert) -> Optional[str]:
145
+ if not alert.labels and not alert.annotations:
146
+ return None
147
+ text = ""
148
+ if alert.labels:
149
+ text += "*Labels:*\n"
150
+ text += dict_to_markdown(alert.labels)
151
+ if alert.annotations:
152
+ text += "*Annotations:*\n"
153
+ text += dict_to_markdown(alert.annotations)
154
+ return text