holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,638 @@
1
+ import logging
2
+ from typing import Any, Dict, List, Optional, Tuple, Union
3
+
4
+ import yaml # type: ignore
5
+ from confluent_kafka.admin import (
6
+ AdminClient,
7
+ BrokerMetadata,
8
+ ClusterMetadata,
9
+ ConfigResource,
10
+ ConsumerGroupDescription,
11
+ GroupMember,
12
+ GroupMetadata,
13
+ KafkaError,
14
+ ListConsumerGroupsResult,
15
+ MemberAssignment,
16
+ MemberDescription,
17
+ PartitionMetadata,
18
+ TopicMetadata,
19
+ )
20
+ from confluent_kafka import Consumer
21
+ from confluent_kafka._model import Node
22
+ from enum import Enum
23
+ from confluent_kafka.admin import _TopicPartition as TopicPartition
24
+ from pydantic import BaseModel, ConfigDict
25
+
26
+ from holmes.core.tools import (
27
+ CallablePrerequisite,
28
+ StructuredToolResult,
29
+ Tool,
30
+ ToolParameter,
31
+ ToolResultStatus,
32
+ Toolset,
33
+ ToolsetTag,
34
+ )
35
+ from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
36
+ from holmes.plugins.toolsets.utils import get_param_or_raise
37
+
38
+
39
+ class KafkaClusterConfig(BaseModel):
40
+ name: str
41
+ kafka_broker: str
42
+ kafka_security_protocol: Optional[str] = None
43
+ kafka_sasl_mechanism: Optional[str] = None
44
+ kafka_username: Optional[str] = None
45
+ kafka_password: Optional[str] = None
46
+ kafka_client_id: Optional[str] = "holmes-kafka-client"
47
+
48
+
49
+ class KafkaConfig(BaseModel):
50
+ kafka_clusters: List[KafkaClusterConfig]
51
+
52
+
53
+ def convert_to_dict(obj: Any) -> Union[str, Dict]:
54
+ if isinstance(
55
+ obj,
56
+ (
57
+ ClusterMetadata,
58
+ BrokerMetadata,
59
+ TopicMetadata,
60
+ PartitionMetadata,
61
+ GroupMember,
62
+ GroupMetadata,
63
+ ConsumerGroupDescription,
64
+ MemberDescription,
65
+ MemberAssignment,
66
+ ),
67
+ ):
68
+ result = {}
69
+ for key, value in vars(obj).items():
70
+ if value is not None and value != -1 and value != []:
71
+ if isinstance(value, dict):
72
+ result[key] = {k: convert_to_dict(v) for k, v in value.items()}
73
+ elif isinstance(value, list):
74
+ result[key] = [convert_to_dict(item) for item in value] # type: ignore
75
+ else:
76
+ result[key] = convert_to_dict(value) # type: ignore
77
+ return result
78
+ if isinstance(obj, TopicPartition):
79
+ return str(obj)
80
+ if isinstance(obj, KafkaError):
81
+ return str(obj)
82
+ if isinstance(obj, Node):
83
+ # Convert Node to a simple dict
84
+ return {"host": obj.host, "id": obj.id, "port": obj.port}
85
+ if isinstance(obj, Enum):
86
+ # Convert enum to its string representation
87
+ return str(obj).split(".")[-1] # Get just the enum value name
88
+ return obj
89
+
90
+
91
+ def format_list_consumer_group_errors(errors: Optional[List]) -> str:
92
+ errors_text = ""
93
+ if errors:
94
+ if len(errors) > 1:
95
+ errors_text = "# Some errors happened while listing consumer groups:\n\n"
96
+ errors_text = errors_text + "\n\n".join(
97
+ [f"## Error:\n{str(error)}" for error in errors]
98
+ )
99
+
100
+ return errors_text
101
+
102
+
103
+ class BaseKafkaTool(Tool):
104
+ toolset: "KafkaToolset"
105
+
106
+ def get_kafka_client(self, cluster_name: Optional[str]) -> AdminClient:
107
+ """
108
+ Retrieves the correct Kafka AdminClient based on the cluster name.
109
+ """
110
+ if len(self.toolset.clients) == 1:
111
+ return next(
112
+ iter(self.toolset.clients.values())
113
+ ) # Return the only available client
114
+
115
+ if not cluster_name:
116
+ raise Exception("Missing cluster name to resolve Kafka client")
117
+
118
+ if cluster_name in self.toolset.clients:
119
+ return self.toolset.clients[cluster_name]
120
+
121
+ raise Exception(
122
+ f"Failed to resolve Kafka client. No matching cluster: {cluster_name}"
123
+ )
124
+
125
+ def get_bootstrap_servers(self, cluster_name: str) -> str:
126
+ """
127
+ Retrieves the bootstrap servers for a given cluster.
128
+ """
129
+ if not self.toolset.kafka_config:
130
+ raise Exception("Kafka configuration not available")
131
+
132
+ for cluster in self.toolset.kafka_config.kafka_clusters:
133
+ if cluster.name == cluster_name:
134
+ return cluster.kafka_broker
135
+
136
+ raise Exception(
137
+ f"Failed to resolve bootstrap servers. No matching cluster: {cluster_name}"
138
+ )
139
+
140
+
141
+ class ListKafkaConsumers(BaseKafkaTool):
142
+ def __init__(self, toolset: "KafkaToolset"):
143
+ super().__init__(
144
+ name="list_kafka_consumers",
145
+ description="Lists all Kafka consumer groups in the cluster",
146
+ parameters={
147
+ "kafka_cluster_name": ToolParameter(
148
+ description="The name of the kafka cluster to investigate",
149
+ type="string",
150
+ required=True,
151
+ ),
152
+ },
153
+ toolset=toolset,
154
+ )
155
+
156
+ def _invoke(self, params: Dict) -> StructuredToolResult:
157
+ try:
158
+ kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
159
+ client = self.get_kafka_client(kafka_cluster_name)
160
+ if client is None:
161
+ return StructuredToolResult(
162
+ status=ToolResultStatus.ERROR,
163
+ error="No admin_client on toolset. This toolset is misconfigured.",
164
+ params=params,
165
+ )
166
+
167
+ futures = client.list_consumer_groups()
168
+ list_groups_result: ListConsumerGroupsResult = futures.result()
169
+ groups_text = ""
170
+ if list_groups_result.valid and len(list_groups_result.valid) > 0:
171
+ groups = []
172
+ for group in list_groups_result.valid:
173
+ groups.append(
174
+ {
175
+ "group_id": group.group_id,
176
+ "is_simple_consumer_group": group.is_simple_consumer_group,
177
+ "state": str(group.state),
178
+ "type": str(group.type),
179
+ }
180
+ )
181
+ groups_text = yaml.dump({"consumer_groups": groups})
182
+ else:
183
+ groups_text = "No consumer group was found"
184
+
185
+ errors_text = format_list_consumer_group_errors(list_groups_result.errors)
186
+
187
+ result_text = groups_text
188
+ if errors_text:
189
+ result_text = result_text + "\n\n" + errors_text
190
+ return StructuredToolResult(
191
+ status=ToolResultStatus.SUCCESS,
192
+ data=result_text,
193
+ params=params,
194
+ )
195
+ except Exception as e:
196
+ error_msg = f"Failed to list consumer groups: {str(e)}"
197
+ logging.error(error_msg)
198
+ return StructuredToolResult(
199
+ status=ToolResultStatus.ERROR,
200
+ error=error_msg,
201
+ params=params,
202
+ )
203
+
204
+ def get_parameterized_one_liner(self, params: Dict) -> str:
205
+ return f"Listed all Kafka consumer groups in the cluster \"{params.get('kafka_cluster_name')}\""
206
+
207
+
208
+ class DescribeConsumerGroup(BaseKafkaTool):
209
+ def __init__(self, toolset: "KafkaToolset"):
210
+ super().__init__(
211
+ name="describe_consumer_group",
212
+ description="Describes a specific Kafka consumer group",
213
+ parameters={
214
+ "kafka_cluster_name": ToolParameter(
215
+ description="The name of the kafka cluster to investigate",
216
+ type="string",
217
+ required=True,
218
+ ),
219
+ "group_id": ToolParameter(
220
+ description="The ID of the consumer group to describe",
221
+ type="string",
222
+ required=True,
223
+ ),
224
+ },
225
+ toolset=toolset,
226
+ )
227
+
228
+ def _invoke(self, params: Dict) -> StructuredToolResult:
229
+ group_id = params["group_id"]
230
+ try:
231
+ kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
232
+ client = self.get_kafka_client(kafka_cluster_name)
233
+ if client is None:
234
+ return StructuredToolResult(
235
+ status=ToolResultStatus.ERROR,
236
+ error="No admin_client on toolset. This toolset is misconfigured.",
237
+ params=params,
238
+ )
239
+
240
+ futures = client.describe_consumer_groups([group_id])
241
+
242
+ if futures.get(group_id):
243
+ group_metadata = futures.get(group_id).result()
244
+ return StructuredToolResult(
245
+ status=ToolResultStatus.SUCCESS,
246
+ data=yaml.dump(convert_to_dict(group_metadata)),
247
+ params=params,
248
+ )
249
+ else:
250
+ return StructuredToolResult(
251
+ status=ToolResultStatus.ERROR,
252
+ error="Group not found",
253
+ params=params,
254
+ )
255
+ except Exception as e:
256
+ error_msg = f"Failed to describe consumer group {group_id}: {str(e)}"
257
+ logging.error(error_msg)
258
+ return StructuredToolResult(
259
+ status=ToolResultStatus.ERROR,
260
+ error=error_msg,
261
+ params=params,
262
+ )
263
+
264
+ def get_parameterized_one_liner(self, params: Dict) -> str:
265
+ return f"Described consumer group: {params['group_id']} in cluster \"{params.get('kafka_cluster_name')}\""
266
+
267
+
268
+ class ListTopics(BaseKafkaTool):
269
+ def __init__(self, toolset: "KafkaToolset"):
270
+ super().__init__(
271
+ name="list_topics",
272
+ description="Lists all Kafka topics in the cluster",
273
+ parameters={
274
+ "kafka_cluster_name": ToolParameter(
275
+ description="The name of the kafka cluster to investigate",
276
+ type="string",
277
+ required=True,
278
+ ),
279
+ },
280
+ toolset=toolset,
281
+ )
282
+
283
+ def _invoke(self, params: Dict) -> StructuredToolResult:
284
+ try:
285
+ kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
286
+ client = self.get_kafka_client(kafka_cluster_name)
287
+ if client is None:
288
+ return StructuredToolResult(
289
+ status=ToolResultStatus.ERROR,
290
+ error="No admin_client on toolset. This toolset is misconfigured.",
291
+ params=params,
292
+ )
293
+
294
+ topics = client.list_topics()
295
+ return StructuredToolResult(
296
+ status=ToolResultStatus.SUCCESS,
297
+ data=yaml.dump(convert_to_dict(topics)),
298
+ params=params,
299
+ )
300
+ except Exception as e:
301
+ error_msg = f"Failed to list topics: {str(e)}"
302
+ logging.error(error_msg)
303
+ return StructuredToolResult(
304
+ status=ToolResultStatus.ERROR,
305
+ error=error_msg,
306
+ params=params,
307
+ )
308
+
309
+ def get_parameterized_one_liner(self, params: Dict) -> str:
310
+ return f"Listed all Kafka topics in the cluster \"{params.get('kafka_cluster_name')}\""
311
+
312
+
313
+ class DescribeTopic(BaseKafkaTool):
314
+ def __init__(self, toolset: "KafkaToolset"):
315
+ super().__init__(
316
+ name="describe_topic",
317
+ description="Describes details of a specific Kafka topic",
318
+ parameters={
319
+ "kafka_cluster_name": ToolParameter(
320
+ description="The name of the kafka cluster to investigate",
321
+ type="string",
322
+ required=True,
323
+ ),
324
+ "topic_name": ToolParameter(
325
+ description="The name of the topic to describe",
326
+ type="string",
327
+ required=True,
328
+ ),
329
+ "fetch_configuration": ToolParameter(
330
+ description="If true, also fetches the topic configuration. defaults to false",
331
+ type="boolean",
332
+ required=False,
333
+ ),
334
+ },
335
+ toolset=toolset,
336
+ )
337
+
338
+ def _invoke(self, params: Dict) -> StructuredToolResult:
339
+ topic_name = params["topic_name"]
340
+ try:
341
+ kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
342
+ client = self.get_kafka_client(kafka_cluster_name)
343
+ if client is None:
344
+ return StructuredToolResult(
345
+ status=ToolResultStatus.ERROR,
346
+ error="No admin_client on toolset. This toolset is misconfigured.",
347
+ params=params,
348
+ )
349
+ config_future = None
350
+ if str(params.get("fetch_configuration", False)).lower() == "true":
351
+ resource = ConfigResource("topic", topic_name)
352
+ configs = client.describe_configs([resource])
353
+ config_future = next(iter(configs.values()))
354
+
355
+ metadata = client.list_topics(topic_name).topics[topic_name]
356
+
357
+ metadata = convert_to_dict(metadata)
358
+ result: dict = {"metadata": metadata}
359
+
360
+ if config_future:
361
+ config = config_future.result()
362
+ result["configuration"] = convert_to_dict(config)
363
+
364
+ return StructuredToolResult(
365
+ status=ToolResultStatus.SUCCESS,
366
+ data=yaml.dump(result),
367
+ params=params,
368
+ )
369
+ except Exception as e:
370
+ error_msg = f"Failed to describe topic {topic_name}: {str(e)}"
371
+ logging.error(error_msg, exc_info=True)
372
+ return StructuredToolResult(
373
+ status=ToolResultStatus.ERROR,
374
+ error=error_msg,
375
+ params=params,
376
+ )
377
+
378
+ def get_parameterized_one_liner(self, params: Dict) -> str:
379
+ return f"Described topic: {params['topic_name']} in cluster \"{params.get('kafka_cluster_name')}\""
380
+
381
+
382
+ def group_has_topic(
383
+ client: AdminClient,
384
+ consumer_group_description: ConsumerGroupDescription,
385
+ topic_name: str,
386
+ bootstrap_servers: str,
387
+ topic_metadata: Any,
388
+ ):
389
+ # Check active member assignments
390
+ for member in consumer_group_description.members:
391
+ for topic_partition in member.assignment.topic_partitions:
392
+ if topic_partition.topic == topic_name:
393
+ return True
394
+
395
+ # Check committed offsets for the topic (handles inactive/empty consumer groups)
396
+ try:
397
+ # Try using the Consumer class to check committed offsets for the specific group
398
+
399
+ # Create a consumer with the same group.id as the one we're checking
400
+ # This allows us to check its committed offsets
401
+ consumer_config = {
402
+ "bootstrap.servers": bootstrap_servers,
403
+ "group.id": consumer_group_description.group_id,
404
+ "auto.offset.reset": "earliest",
405
+ "enable.auto.commit": False, # Don't auto-commit to avoid side effects
406
+ }
407
+ consumer = Consumer(consumer_config)
408
+
409
+ # Check topic metadata to know which partitions exist
410
+ if topic_name not in topic_metadata.topics:
411
+ consumer.close()
412
+ return False
413
+
414
+ # Create TopicPartition objects for all partitions of the topic
415
+ topic_partitions = []
416
+ for partition_id in topic_metadata.topics[topic_name].partitions:
417
+ topic_partitions.append(TopicPartition(topic_name, partition_id))
418
+
419
+ # Check committed offsets for this consumer group on these topic partitions
420
+
421
+ committed_offsets = consumer.committed(topic_partitions, timeout=10.0)
422
+ consumer.close()
423
+
424
+ # Check if any partition has a valid committed offset
425
+ for tp in committed_offsets:
426
+ if tp.offset != -1001: # -1001 means no committed offset
427
+ return True
428
+
429
+ return False
430
+
431
+ except Exception:
432
+ # If we can't check offsets, fall back to just the active assignment check
433
+ pass
434
+
435
+ return False
436
+
437
+
438
+ class FindConsumerGroupsByTopic(BaseKafkaTool):
439
+ def __init__(self, toolset: "KafkaToolset"):
440
+ super().__init__(
441
+ name="find_consumer_groups_by_topic",
442
+ description="Finds all consumer groups consuming from a specific topic",
443
+ parameters={
444
+ "kafka_cluster_name": ToolParameter(
445
+ description="The name of the kafka cluster to investigate",
446
+ type="string",
447
+ required=True,
448
+ ),
449
+ "topic_name": ToolParameter(
450
+ description="The name of the topic to find consumers for",
451
+ type="string",
452
+ required=True,
453
+ ),
454
+ },
455
+ toolset=toolset,
456
+ )
457
+
458
+ def _invoke(self, params: Dict) -> StructuredToolResult:
459
+ topic_name = params["topic_name"]
460
+ try:
461
+ kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
462
+ client = self.get_kafka_client(kafka_cluster_name)
463
+ if client is None:
464
+ return StructuredToolResult(
465
+ status=ToolResultStatus.ERROR,
466
+ error="No admin_client on toolset. This toolset is misconfigured.",
467
+ params=params,
468
+ )
469
+
470
+ groups_future = client.list_consumer_groups()
471
+ groups: ListConsumerGroupsResult = groups_future.result()
472
+
473
+ consumer_groups = []
474
+ group_ids_to_evaluate: list[str] = []
475
+ if groups.valid:
476
+ group_ids_to_evaluate = group_ids_to_evaluate + [
477
+ group.group_id for group in groups.valid
478
+ ]
479
+
480
+ if len(group_ids_to_evaluate) > 0:
481
+ consumer_groups_futures = client.describe_consumer_groups(
482
+ group_ids_to_evaluate
483
+ )
484
+
485
+ for (
486
+ group_id,
487
+ consumer_group_description_future,
488
+ ) in consumer_groups_futures.items():
489
+ consumer_group_description = (
490
+ consumer_group_description_future.result()
491
+ )
492
+ bootstrap_servers = self.get_bootstrap_servers(kafka_cluster_name)
493
+ topic_metadata = client.list_topics(topic_name, timeout=10)
494
+ if group_has_topic(
495
+ client=client,
496
+ consumer_group_description=consumer_group_description,
497
+ topic_name=topic_name,
498
+ bootstrap_servers=bootstrap_servers,
499
+ topic_metadata=topic_metadata,
500
+ ):
501
+ consumer_groups.append(
502
+ convert_to_dict(consumer_group_description)
503
+ )
504
+
505
+ errors_text = format_list_consumer_group_errors(groups.errors)
506
+
507
+ result_text = None
508
+ if len(consumer_groups) > 0:
509
+ result_text = yaml.dump(consumer_groups)
510
+ else:
511
+ result_text = f"No consumer group were found for topic {topic_name}"
512
+
513
+ if errors_text:
514
+ result_text = result_text + "\n\n" + errors_text
515
+
516
+ return StructuredToolResult(
517
+ status=ToolResultStatus.SUCCESS,
518
+ data=result_text,
519
+ params=params,
520
+ )
521
+ except Exception as e:
522
+ error_msg = (
523
+ f"Failed to find consumer groups for topic {topic_name}: {str(e)}"
524
+ )
525
+ logging.error(error_msg)
526
+ return StructuredToolResult(
527
+ status=ToolResultStatus.ERROR,
528
+ error=error_msg,
529
+ params=params,
530
+ )
531
+
532
+ def get_parameterized_one_liner(self, params: Dict) -> str:
533
+ return f"Found consumer groups for topic: {params.get('topic_name')} in cluster \"{params.get('kafka_cluster_name')}\""
534
+
535
+
536
+ class ListKafkaClusters(BaseKafkaTool):
537
+ def __init__(self, toolset: "KafkaToolset"):
538
+ super().__init__(
539
+ name="list_kafka_clusters",
540
+ description="Lists all available Kafka clusters configured in HolmesGPT",
541
+ parameters={},
542
+ toolset=toolset,
543
+ )
544
+
545
+ def _invoke(self, params: Dict) -> StructuredToolResult:
546
+ cluster_names = list(self.toolset.clients.keys())
547
+ return StructuredToolResult(
548
+ status=ToolResultStatus.SUCCESS,
549
+ data="Available Kafka Clusters:\n" + "\n".join(cluster_names),
550
+ params=params,
551
+ )
552
+
553
+ def get_parameterized_one_liner(self, params: Dict) -> str:
554
+ return "Listed all available Kafka clusters"
555
+
556
+
557
+ class KafkaToolset(Toolset):
558
+ model_config = ConfigDict(arbitrary_types_allowed=True)
559
+ clients: Dict[str, AdminClient] = {}
560
+ kafka_config: Optional[KafkaConfig] = None
561
+
562
+ def __init__(self):
563
+ super().__init__(
564
+ name="kafka/admin",
565
+ description="Fetches metadata from multiple Kafka clusters",
566
+ prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
567
+ docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kafka.html",
568
+ icon_url="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT-cR1JrBgJxB_SPVKUIRwtiHnR8qBvLeHXjQ&s",
569
+ tags=[ToolsetTag.CORE],
570
+ tools=[
571
+ ListKafkaClusters(self),
572
+ ListKafkaConsumers(self),
573
+ DescribeConsumerGroup(self),
574
+ ListTopics(self),
575
+ DescribeTopic(self),
576
+ FindConsumerGroupsByTopic(self),
577
+ ],
578
+ )
579
+
580
+ def prerequisites_callable(self, config: Dict[str, Any]) -> Tuple[bool, str]:
581
+ if not config:
582
+ return False, TOOLSET_CONFIG_MISSING_ERROR
583
+ errors = []
584
+ try:
585
+ kafka_config = KafkaConfig(**config)
586
+ self.kafka_config = kafka_config
587
+
588
+ for cluster in kafka_config.kafka_clusters:
589
+ try:
590
+ logging.info(f"Setting up Kafka client for cluster: {cluster.name}")
591
+ admin_config = {
592
+ "bootstrap.servers": cluster.kafka_broker,
593
+ "client.id": cluster.kafka_client_id,
594
+ }
595
+
596
+ if cluster.kafka_security_protocol:
597
+ admin_config["security.protocol"] = (
598
+ cluster.kafka_security_protocol
599
+ )
600
+ if cluster.kafka_sasl_mechanism:
601
+ admin_config["sasl.mechanisms"] = cluster.kafka_sasl_mechanism
602
+ if cluster.kafka_username and cluster.kafka_password:
603
+ admin_config["sasl.username"] = cluster.kafka_username
604
+ admin_config["sasl.password"] = cluster.kafka_password
605
+
606
+ client = AdminClient(admin_config)
607
+ self.clients[cluster.name] = client # Store in dictionary
608
+ except Exception as e:
609
+ message = (
610
+ f"Failed to set up Kafka client for {cluster.name}: {str(e)}"
611
+ )
612
+ logging.error(message)
613
+ errors.append(message)
614
+
615
+ return len(self.clients) > 0, "\n".join(errors)
616
+ except Exception as e:
617
+ logging.exception("Failed to set up Kafka toolset")
618
+ return False, str(e)
619
+
620
+ def get_example_config(self) -> Dict[str, Any]:
621
+ example_config = KafkaConfig(
622
+ kafka_clusters=[
623
+ KafkaClusterConfig(
624
+ name="us-west-kafka",
625
+ kafka_broker="broker1.example.com:9092,broker2.example.com:9092",
626
+ kafka_security_protocol="SASL_SSL",
627
+ kafka_sasl_mechanism="PLAIN",
628
+ kafka_username="{{ env.KAFKA_USERNAME }}",
629
+ kafka_password="{{ env.KAFKA_PASSWORD }}",
630
+ ),
631
+ KafkaClusterConfig(
632
+ name="eu-central-kafka",
633
+ kafka_broker="broker3.example.com:9092",
634
+ kafka_security_protocol="SSL",
635
+ ),
636
+ ]
637
+ )
638
+ return example_config.model_dump()