holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,255 @@
1
+ toolsets:
2
+ kubernetes/core:
3
+ description: "Read access to cluster resources (excluding secrets and other sensitive data)"
4
+ docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#core"
5
+ icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
6
+ tags:
7
+ - core
8
+ prerequisites:
9
+ - command: "kubectl version --client"
10
+
11
+ tools:
12
+ - name: "kubectl_describe"
13
+ description: >
14
+ Run kubectl describe <kind> <name> -n <namespace>,
15
+ call this when users ask for description,
16
+ for example when a user asks
17
+ - 'describe pod xyz-123'
18
+ - 'show service xyz-123 in namespace my-ns'
19
+ command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
20
+
21
+ - name: "kubectl_get_by_name"
22
+ description: "Run `kubectl get <kind> <name> --show-labels`"
23
+ command: "kubectl get --show-labels -o wide {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
24
+
25
+ - name: "kubectl_get_by_kind_in_namespace"
26
+ description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
27
+ command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
28
+
29
+ - name: "kubectl_get_by_kind_in_cluster"
30
+ description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
31
+ command: "kubectl get -A --show-labels -o wide {{ kind }}"
32
+
33
+ - name: "kubectl_find_resource"
34
+ description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
35
+ command: "kubectl get -A --show-labels -o wide {{ kind }} | grep {{ keyword }}"
36
+
37
+ - name: "kubectl_get_yaml"
38
+ description: "Run `kubectl get -o yaml` on a single Kubernetes resource"
39
+ command: "kubectl get -o yaml {{ kind }} {{ name}}{% if namespace %} -n {{ namespace }}{% endif %}"
40
+
41
+ - name: "kubectl_events"
42
+ description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment, 'job'', 'node', etc."
43
+ command: "kubectl events --for {{resource_type}}/{{ pod_name }} -n {{ namespace }}"
44
+
45
+ - name: "kubectl_memory_requests_all_namespaces"
46
+ description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
47
+ command: |
48
+ kubectl get pods --all-namespaces -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
49
+ awk '
50
+ function convert_to_mib(value) {
51
+ if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
52
+ if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
53
+ if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
54
+ if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
55
+ if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
56
+ if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
57
+ if (value ~ /Mi$/) return (value + 0);
58
+ if (value ~ /Ki$/) return (value + 0) / 1024;
59
+ if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
60
+ if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
61
+ if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
62
+ if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
63
+ if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
64
+ if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
65
+ return (value + 0) / (1024 * 1024); # Default: bytes
66
+ }
67
+ function sum_memory(requests) {
68
+ gsub(/^[ \t]+|[ \t]+$/, "", requests);
69
+ if (requests == "" || requests == "<none>") return 0;
70
+ split(requests, arr, ",");
71
+ total = 0;
72
+ for (i in arr) {
73
+ if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
74
+ }
75
+ return total;
76
+ }
77
+ {
78
+ namespace = $1;
79
+ name = $2;
80
+ requests = $3;
81
+ for (i=4; i<=NF; i++) {
82
+ requests = requests " " $i;
83
+ }
84
+ print namespace, name, sum_memory(requests) " Mi";
85
+ }' | sort -k3 -nr
86
+
87
+ - name: "kubectl_memory_requests_namespace"
88
+ description: "Fetch and display memory requests for all pods in a specified namespace in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
89
+ command: |
90
+ kubectl get pods -n {{ namespace }} -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
91
+ awk '
92
+ function convert_to_mib(value) {
93
+ if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
94
+ if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
95
+ if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
96
+ if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
97
+ if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
98
+ if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
99
+ if (value ~ /Mi$/) return (value + 0);
100
+ if (value ~ /Ki$/) return (value + 0) / 1024;
101
+ if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
102
+ if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
103
+ if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
104
+ if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
105
+ if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
106
+ if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
107
+ return (value + 0) / (1024 * 1024); # Default: bytes
108
+ }
109
+ function sum_memory(requests) {
110
+ gsub(/^[ \t]+|[ \t]+$/, "", requests);
111
+ if (requests == "" || requests == "<none>") return 0;
112
+ split(requests, arr, ",");
113
+ total = 0;
114
+ for (i in arr) {
115
+ if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
116
+ }
117
+ return total;
118
+ }
119
+ {
120
+ namespace = $1;
121
+ name = $2;
122
+ requests = $3;
123
+ for (i=4; i<=NF; i++) {
124
+ requests = requests " " $i;
125
+ }
126
+ print namespace, name, sum_memory(requests) " Mi";
127
+ }' | sort -k3 -nr
128
+
129
+ - name: "kubernetes_jq_query"
130
+ user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
131
+ description: >
132
+ Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
133
+ command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
134
+
135
+ - name: "kubernetes_count"
136
+ user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
137
+ description: >
138
+ Use kubectl to get apply a jq filter and then count the results.
139
+ Use this whenever asked to count kubernetes resources.
140
+ Use select() to filter objects before extracting properties, e.g. .items[] | select(.metadata.namespace == "test-1") | .metadata.name
141
+ Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give.
142
+ e.g. give an expression like .items[] | select(.spec.containers[].image | test("^gcr.io/") | not) | .metadata.name
143
+ script: |
144
+ echo "Command executed: kubectl get {{ kind }} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
145
+ echo "---"
146
+
147
+ # Execute the command and capture both stdout and stderr separately
148
+ temp_error=$(mktemp)
149
+ matches=$(kubectl get {{ kind }} --all-namespaces -o json 2>"$temp_error" | jq -c -r {{ jq_expr }} 2>>"$temp_error")
150
+ exit_code=$?
151
+ error_output=$(cat "$temp_error")
152
+ rm -f "$temp_error"
153
+
154
+ if [ $exit_code -ne 0 ]; then
155
+ echo "Error executing command (exit code: $exit_code):"
156
+ echo "$error_output"
157
+ exit $exit_code
158
+ else
159
+ # Show any stderr warnings even if command succeeded
160
+ if [ -n "$error_output" ]; then
161
+ echo "Warnings/stderr output:"
162
+ echo "$error_output"
163
+ echo "---"
164
+ fi
165
+
166
+ # Filter out empty lines for accurate count
167
+ filtered_matches=$(echo "$matches" | grep -v '^$' | grep -v '^null$')
168
+ if [ -z "$filtered_matches" ]; then
169
+ count=0
170
+ else
171
+ count=$(echo "$filtered_matches" | wc -l)
172
+ fi
173
+ preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
174
+
175
+ echo "$count results"
176
+ echo "---"
177
+ echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
178
+ echo "$preview"
179
+ fi
180
+
181
+ # NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic
182
+ # http GET capabilities which are more powerful than we want to expose
183
+ #- name: "check_liveness_probe"
184
+ # description: "Run an http Kubernetes liveness probe for a given pod and return the results. Can be used to troubleshoot previous failures of the same probe assuming they fail now in the same manner."
185
+ # command: "kubectl get --raw '/api/v1/namespaces/{{pod_namespace}}/pods/{{pod_name}}:{{liveness_probe_port}}/healthz'"
186
+
187
+ #- name: "kubectl_debug_node"
188
+ # description: "Run a command on a Kubernetes node"
189
+ # command: "kubectl debug node/mynode --image=ubuntu"
190
+
191
+ #- name: "healthcheck_plugin"
192
+ # description: "Check why a kubernetes health probe is failing. First call get_healthcheck_details"
193
+ # command: "kubectl exec -n {{namespace}} {{ pod_name }} -- wget {{ url }}:{{port}}"
194
+
195
+ # try adding your own tools here!
196
+ # e.g. to query company-specific data or run your own commands
197
+
198
+ kubernetes/live-metrics:
199
+ description: "Provides real-time metrics for pods and nodes"
200
+ docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#live-metrics"
201
+ icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
202
+ tags:
203
+ - core
204
+ tools:
205
+ - name: "kubectl_top_pods"
206
+ description: "Retrieves real-time CPU and memory usage for each pod in the cluster."
207
+ command: >
208
+ kubectl top pods -A
209
+ - name: "kubectl_top_nodes"
210
+ description: "Retrieves real-time CPU and memory usage for each node in the cluster."
211
+ command: >
212
+ kubectl top nodes
213
+
214
+ kubernetes/kube-prometheus-stack:
215
+ description: "Fetches prometheus definition"
216
+ docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#prometheus-stack"
217
+ icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
218
+ tags:
219
+ - core
220
+ tools:
221
+ - name: "get_prometheus_target"
222
+ description: "Fetch the definition of a Prometheus target"
223
+ command: 'kubectl get --raw ''/api/v1/namespaces/{{prometheus_namespace}}/services/{{prometheus_service_name}}:9090/proxy/api/v1/targets'' | jq ''.data.activeTargets[] | select(.labels.job == "{{ target_name }}")'''
224
+
225
+ kubernetes/krew-extras: # To make this work, install kube-lineage with krew
226
+ description: "Fetches children/dependents and parents/dependencies resources using kube-lineage installed via `kubectl krew`"
227
+ docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#resource-lineage-extras-with-krew"
228
+ icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
229
+ tags:
230
+ - cli
231
+ prerequisites:
232
+ - command: "kubectl version --client && kubectl lineage --version"
233
+ tools:
234
+ - name: "kubectl_lineage_children"
235
+ description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
236
+ command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }}"
237
+ - name: "kubectl_lineage_parents"
238
+ description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
239
+ command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }} -D"
240
+
241
+ kubernetes/kube-lineage-extras: # To make this work, build kube-lineage from source
242
+ description: "Fetches children/dependents and parents/dependencies resources using kube-lineage"
243
+ docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#resource-lineage-extras"
244
+ icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
245
+ tags:
246
+ - cluster
247
+ prerequisites:
248
+ - command: "kubectl version --client && kube-lineage --version"
249
+ tools:
250
+ - name: "kubectl_lineage_children"
251
+ description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
252
+ command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }}"
253
+ - name: "kubectl_lineage_parents"
254
+ description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
255
+ command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }} -D"
@@ -0,0 +1,426 @@
1
+ import logging
2
+ import re
3
+ import subprocess
4
+ from typing import Optional, List, Tuple
5
+ from pydantic import BaseModel
6
+
7
+ from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
8
+ from holmes.core.tools import (
9
+ StaticPrerequisite,
10
+ StructuredToolResult,
11
+ ToolResultStatus,
12
+ ToolsetTag,
13
+ )
14
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
15
+ BasePodLoggingToolset,
16
+ FetchPodLogsParams,
17
+ LoggingConfig,
18
+ PodLoggingTool,
19
+ )
20
+ from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
21
+
22
+
23
+ # match ISO 8601 format (YYYY-MM-DDTHH:MM:SS[.fffffffff]Z) or (YYYY-MM-DDTHH:MM:SS[.fffffffff]+/-XX:XX)
24
+ timestamp_pattern = re.compile(
25
+ r"^(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"
26
+ )
27
+
28
+
29
+ class Pod(BaseModel):
30
+ containers: list[str]
31
+
32
+
33
+ class StructuredLog(BaseModel):
34
+ timestamp_ms: Optional[int]
35
+ container: Optional[str]
36
+ content: str
37
+
38
+
39
+ class LogResult(BaseModel):
40
+ error: Optional[str]
41
+ return_code: Optional[int]
42
+ has_multiple_containers: bool
43
+ logs: list[StructuredLog]
44
+
45
+
46
+ class KubernetesLogsToolset(BasePodLoggingToolset):
47
+ """Implementation of the unified logging API for Kubernetes logs using kubectl commands"""
48
+
49
+ def __init__(self):
50
+ prerequisite = StaticPrerequisite(enabled=False, disabled_reason="Initializing")
51
+ super().__init__(
52
+ name="kubernetes/logs",
53
+ description="Read Kubernetes pod logs using a unified API",
54
+ docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#logs",
55
+ icon_url="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s",
56
+ prerequisites=[prerequisite],
57
+ is_default=True,
58
+ tools=[
59
+ PodLoggingTool(self),
60
+ ],
61
+ tags=[ToolsetTag.CORE],
62
+ )
63
+ enabled, disabled_reason = self.health_check()
64
+ prerequisite.enabled = enabled
65
+ prerequisite.disabled_reason = disabled_reason
66
+
67
+ def health_check(self) -> Tuple[bool, str]:
68
+ try:
69
+ # Check if kubectl is available
70
+ result = subprocess.run(
71
+ ["kubectl", "version", "--client"],
72
+ capture_output=True,
73
+ text=True,
74
+ timeout=10,
75
+ )
76
+ if result.returncode == 0:
77
+ return True, ""
78
+ else:
79
+ return False, f"kubectl command failed: {result.stderr}"
80
+ except subprocess.TimeoutExpired:
81
+ return False, "kubectl command timed out"
82
+ except FileNotFoundError:
83
+ return False, "kubectl command not found"
84
+ except Exception as e:
85
+ return False, f"kubectl health check error: {str(e)}"
86
+
87
+ def get_example_config(self):
88
+ return LoggingConfig().model_dump()
89
+
90
+ def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
91
+ try:
92
+ all_logs: list[StructuredLog] = []
93
+
94
+ # Fetch previous logs
95
+ previous_logs_result = self._fetch_kubectl_logs(
96
+ params=params,
97
+ previous=True,
98
+ )
99
+
100
+ # Fetch current logs
101
+ current_logs_result = self._fetch_kubectl_logs(
102
+ params=params,
103
+ previous=False,
104
+ )
105
+
106
+ return_code: Optional[int] = current_logs_result.return_code
107
+
108
+ if previous_logs_result.logs:
109
+ all_logs.extend(previous_logs_result.logs)
110
+ return_code = previous_logs_result.return_code
111
+
112
+ if current_logs_result.logs:
113
+ all_logs.extend(current_logs_result.logs)
114
+ return_code = current_logs_result.return_code
115
+
116
+ if (
117
+ not all_logs
118
+ and previous_logs_result.error
119
+ and current_logs_result.error
120
+ ):
121
+ # Both commands failed - return error from current logs
122
+ return StructuredToolResult(
123
+ status=ToolResultStatus.ERROR,
124
+ error=current_logs_result.error,
125
+ params=params.model_dump(),
126
+ return_code=return_code,
127
+ )
128
+
129
+ all_logs = filter_logs(all_logs, params)
130
+
131
+ if not all_logs:
132
+ return StructuredToolResult(
133
+ status=ToolResultStatus.NO_DATA,
134
+ params=params.model_dump(),
135
+ return_code=return_code,
136
+ )
137
+
138
+ formatted_logs = format_logs(
139
+ logs=all_logs,
140
+ display_container_name=previous_logs_result.has_multiple_containers
141
+ or current_logs_result.has_multiple_containers,
142
+ )
143
+
144
+ return StructuredToolResult(
145
+ status=ToolResultStatus.SUCCESS,
146
+ data=formatted_logs,
147
+ params=params.model_dump(),
148
+ return_code=return_code,
149
+ )
150
+ except Exception as e:
151
+ logging.exception(f"Error fetching logs for pod {params.pod_name}")
152
+ return StructuredToolResult(
153
+ status=ToolResultStatus.ERROR,
154
+ error=f"Error fetching logs: {str(e)}",
155
+ params=params.model_dump(),
156
+ )
157
+
158
+ def _fetch_kubectl_logs(
159
+ self,
160
+ params: FetchPodLogsParams,
161
+ previous: bool = False,
162
+ ) -> LogResult:
163
+ """Fetch logs using kubectl command"""
164
+ cmd = [
165
+ "kubectl",
166
+ "logs",
167
+ params.pod_name,
168
+ "-n",
169
+ params.namespace,
170
+ "--all-containers=true",
171
+ "--timestamps=true",
172
+ "--prefix=true",
173
+ ]
174
+
175
+ if previous:
176
+ cmd.append("--previous")
177
+
178
+ try:
179
+ result = subprocess.run(
180
+ cmd,
181
+ text=True,
182
+ timeout=KUBERNETES_LOGS_TIMEOUT_SECONDS,
183
+ check=False, # do not throw error, we just return the error code
184
+ stdin=subprocess.DEVNULL,
185
+ stdout=subprocess.PIPE,
186
+ stderr=subprocess.STDOUT,
187
+ )
188
+
189
+ if result.returncode == 0:
190
+ # Parse the logs - kubectl with --all-containers prefixes lines with container name
191
+ log_result = self._parse_kubectl_logs(logs=result.stdout)
192
+ log_result.return_code = result.returncode
193
+ return log_result
194
+ else:
195
+ error_msg = (
196
+ result.stdout.strip()
197
+ or f"kubectl logs command failed with return code {result.returncode}"
198
+ )
199
+ logging.debug(
200
+ f"kubectl logs command failed for pod {params.pod_name} "
201
+ f"(previous={previous}): {error_msg}"
202
+ )
203
+ return LogResult(
204
+ logs=[],
205
+ error=error_msg,
206
+ return_code=result.returncode,
207
+ has_multiple_containers=False,
208
+ )
209
+
210
+ except subprocess.TimeoutExpired:
211
+ error_msg = f"kubectl logs command timed out after {KUBERNETES_LOGS_TIMEOUT_SECONDS} seconds"
212
+ logging.warning(
213
+ f"kubectl logs command timed out for pod {params.pod_name} "
214
+ f"(previous={previous})"
215
+ )
216
+ return LogResult(
217
+ logs=[],
218
+ error=error_msg,
219
+ return_code=None,
220
+ has_multiple_containers=False,
221
+ )
222
+ except Exception as e:
223
+ error_msg = f"Error executing kubectl: {str(e)}"
224
+ logging.error(
225
+ f"Error executing kubectl logs for pod {params.pod_name} "
226
+ f"(previous={previous}): {str(e)}"
227
+ )
228
+ return LogResult(
229
+ logs=[],
230
+ error=error_msg,
231
+ return_code=None,
232
+ has_multiple_containers=False,
233
+ )
234
+
235
+ def _parse_kubectl_logs(self, logs: str) -> LogResult:
236
+ """Parse kubectl logs output with container prefixes"""
237
+ structured_logs: List[StructuredLog] = []
238
+
239
+ if not logs:
240
+ return LogResult(
241
+ logs=structured_logs,
242
+ error=None,
243
+ return_code=None,
244
+ has_multiple_containers=False,
245
+ )
246
+
247
+ has_multiple_containers = False
248
+
249
+ previous_container: Optional[str] = None
250
+
251
+ for line in logs.strip().split("\n"):
252
+ if not line:
253
+ continue
254
+
255
+ # kubectl with --all-containers prefixes lines with [pod/container]
256
+ # Format: [pod/container] timestamp content
257
+ container_match = re.match(r"^\[([^/]+)/([^\]]+)\] (.*)$", line)
258
+
259
+ if container_match:
260
+ pod_name, container_name, rest_of_line = container_match.groups()
261
+
262
+ if not has_multiple_containers and not previous_container:
263
+ previous_container = container_name
264
+ elif (
265
+ not has_multiple_containers and previous_container != container_name
266
+ ):
267
+ has_multiple_containers = True
268
+
269
+ # Now extract timestamp from rest_of_line
270
+ timestamp_match = timestamp_pattern.match(rest_of_line)
271
+
272
+ if timestamp_match:
273
+ timestamp_str = timestamp_match.group(0)
274
+ try:
275
+ log_unix_ts = to_unix_ms(timestamp_str)
276
+ prefix_length = len(timestamp_str)
277
+ content = rest_of_line[prefix_length:]
278
+ # Remove only the single space after timestamp, preserve other whitespaces to
279
+ # keep the indentations of the original logs
280
+ if content.startswith(" "):
281
+ content = content[1:]
282
+
283
+ structured_logs.append(
284
+ StructuredLog(
285
+ timestamp_ms=log_unix_ts,
286
+ content=content,
287
+ container=container_name,
288
+ )
289
+ )
290
+ except ValueError:
291
+ # Keep the line with container info but no timestamp
292
+ structured_logs.append(
293
+ StructuredLog(
294
+ timestamp_ms=None,
295
+ content=rest_of_line,
296
+ container=container_name,
297
+ )
298
+ )
299
+ else:
300
+ # No timestamp but has container info
301
+ structured_logs.append(
302
+ StructuredLog(
303
+ timestamp_ms=None,
304
+ content=rest_of_line,
305
+ container=container_name,
306
+ )
307
+ )
308
+ else:
309
+ # No container prefix - parse as regular log line
310
+ parsed = parse_logs(line, None)
311
+ structured_logs.extend(parsed)
312
+
313
+ return LogResult(
314
+ logs=structured_logs,
315
+ error=None,
316
+ return_code=None,
317
+ has_multiple_containers=has_multiple_containers,
318
+ )
319
+
320
+
321
+ def format_logs(logs: List[StructuredLog], display_container_name: bool) -> str:
322
+ if display_container_name:
323
+ return "\n".join([f"{log.container or 'N/A'}: {log.content}" for log in logs])
324
+ else:
325
+ return "\n".join([log.content for log in logs])
326
+
327
+
328
+ class TimeFilter(BaseModel):
329
+ start_ms: int
330
+ end_ms: int
331
+
332
+
333
+ def filter_logs(
334
+ logs: List[StructuredLog], params: FetchPodLogsParams
335
+ ) -> List[StructuredLog]:
336
+ time_filter: Optional[TimeFilter] = None
337
+ if params.start_time or params.end_time:
338
+ start, end = process_timestamps_to_int(
339
+ start=params.start_time,
340
+ end=params.end_time,
341
+ default_time_span_seconds=3600,
342
+ )
343
+ time_filter = TimeFilter(start_ms=start * 1000, end_ms=end * 1000)
344
+
345
+ filtered_logs = []
346
+ logs.sort(key=lambda x: x.timestamp_ms or 0)
347
+
348
+ for log in logs:
349
+ if params.filter and params.filter.lower() not in log.content.lower():
350
+ # exclude this log
351
+ continue
352
+
353
+ if (
354
+ time_filter
355
+ and log.timestamp_ms
356
+ and (
357
+ log.timestamp_ms
358
+ < time_filter.start_ms # log is before expected time range
359
+ or time_filter.end_ms
360
+ < log.timestamp_ms # log is after expected time range
361
+ )
362
+ ):
363
+ # exclude this log
364
+ continue
365
+ else:
366
+ filtered_logs.append(log)
367
+
368
+ if params.limit and params.limit < len(filtered_logs):
369
+ filtered_logs = filtered_logs[-params.limit :]
370
+ return filtered_logs
371
+
372
+
373
+ def parse_logs(
374
+ logs: Optional[str], container_name: Optional[str]
375
+ ) -> list[StructuredLog]:
376
+ structured_logs = []
377
+ if logs:
378
+ for log_line in logs.strip().split("\n"):
379
+ if not isinstance(log_line, str):
380
+ # defensive code given logs are from an external API
381
+ structured_logs.append(
382
+ StructuredLog(
383
+ timestamp_ms=None,
384
+ content=str(log_line),
385
+ container=container_name,
386
+ )
387
+ )
388
+ continue
389
+ match = timestamp_pattern.match(log_line)
390
+ if match:
391
+ timestamp_str = match.group(0)
392
+ try:
393
+ log_unix_ts = to_unix_ms(timestamp_str)
394
+ prefix_length = len(timestamp_str)
395
+ # Remove only the single space after timestamp, preserve other whitespace
396
+ line_content = log_line[prefix_length:]
397
+ if line_content.startswith(" "):
398
+ line_content = line_content[1:]
399
+ structured_logs.append(
400
+ StructuredLog(
401
+ timestamp_ms=log_unix_ts,
402
+ content=line_content,
403
+ container=container_name,
404
+ )
405
+ )
406
+
407
+ except ValueError:
408
+ # For invalid timestamp formats (when regex matches but date parsing fails)
409
+ # keep the original line - this is important for testing and consistency
410
+ structured_logs.append(
411
+ StructuredLog(
412
+ timestamp_ms=None,
413
+ content=log_line,
414
+ container=container_name,
415
+ )
416
+ )
417
+ elif len(structured_logs) > 0:
418
+ # if a line has no timestamp, assume it is part of a previous line
419
+ structured_logs[-1].content += "\n" + log_line
420
+ else:
421
+ structured_logs.append(
422
+ StructuredLog(
423
+ timestamp_ms=None, content=log_line, container=container_name
424
+ )
425
+ )
426
+ return structured_logs