holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +4 -3
  3. holmes/common/env_vars.py +18 -2
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +11 -6
  6. holmes/core/conversations.py +30 -13
  7. holmes/core/investigation.py +21 -25
  8. holmes/core/investigation_structured_output.py +3 -3
  9. holmes/core/issue.py +1 -1
  10. holmes/core/llm.py +50 -31
  11. holmes/core/models.py +19 -17
  12. holmes/core/openai_formatting.py +1 -1
  13. holmes/core/prompt.py +47 -2
  14. holmes/core/runbooks.py +1 -0
  15. holmes/core/safeguards.py +4 -2
  16. holmes/core/supabase_dal.py +4 -2
  17. holmes/core/tool_calling_llm.py +102 -141
  18. holmes/core/tools.py +19 -28
  19. holmes/core/tools_utils/token_counting.py +9 -2
  20. holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
  21. holmes/core/tools_utils/tool_executor.py +0 -18
  22. holmes/core/tools_utils/toolset_utils.py +1 -0
  23. holmes/core/toolset_manager.py +37 -2
  24. holmes/core/tracing.py +13 -2
  25. holmes/core/transformers/__init__.py +1 -1
  26. holmes/core/transformers/base.py +1 -0
  27. holmes/core/transformers/llm_summarize.py +3 -2
  28. holmes/core/transformers/registry.py +2 -1
  29. holmes/core/transformers/transformer.py +1 -0
  30. holmes/core/truncation/compaction.py +37 -2
  31. holmes/core/truncation/input_context_window_limiter.py +3 -2
  32. holmes/interactive.py +52 -8
  33. holmes/main.py +17 -37
  34. holmes/plugins/interfaces.py +2 -1
  35. holmes/plugins/prompts/__init__.py +2 -1
  36. holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
  37. holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
  38. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  39. holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
  40. holmes/plugins/prompts/generic_ask.jinja2 +0 -2
  41. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
  42. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
  43. holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
  44. holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
  45. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
  46. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
  47. holmes/plugins/runbooks/__init__.py +32 -3
  48. holmes/plugins/sources/github/__init__.py +4 -2
  49. holmes/plugins/sources/prometheus/models.py +1 -0
  50. holmes/plugins/toolsets/__init__.py +30 -26
  51. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
  52. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  53. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  54. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  55. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  56. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  57. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
  58. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
  59. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
  60. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
  61. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  62. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
  63. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
  64. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
  65. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
  66. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
  67. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
  68. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  69. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  70. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  71. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  72. holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
  73. holmes/plugins/toolsets/bash/common/bash.py +19 -9
  74. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  75. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  76. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  77. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  78. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  79. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  80. holmes/plugins/toolsets/connectivity_check.py +124 -0
  81. holmes/plugins/toolsets/coralogix/api.py +132 -119
  82. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  84. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  85. holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
  86. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
  87. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  88. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  89. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  90. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  91. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
  92. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
  93. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
  94. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
  95. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  96. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  97. holmes/plugins/toolsets/git.py +7 -8
  98. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  99. holmes/plugins/toolsets/grafana/common.py +2 -30
  100. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
  101. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
  102. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
  103. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  104. holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
  105. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
  106. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
  107. holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
  108. holmes/plugins/toolsets/internet/internet.py +10 -10
  109. holmes/plugins/toolsets/internet/notion.py +5 -6
  110. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  111. holmes/plugins/toolsets/investigator/model.py +3 -1
  112. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  113. holmes/plugins/toolsets/kafka.py +12 -7
  114. holmes/plugins/toolsets/kubernetes.yaml +260 -30
  115. holmes/plugins/toolsets/kubernetes_logs.py +3 -3
  116. holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
  117. holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
  118. holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
  119. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
  120. holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
  121. holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
  122. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
  123. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  124. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
  125. holmes/plugins/toolsets/robusta/robusta.py +5 -5
  126. holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
  127. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
  128. holmes/plugins/toolsets/utils.py +1 -1
  129. holmes/utils/config_utils.py +1 -1
  130. holmes/utils/connection_utils.py +31 -0
  131. holmes/utils/console/result.py +10 -0
  132. holmes/utils/file_utils.py +2 -1
  133. holmes/utils/global_instructions.py +10 -26
  134. holmes/utils/holmes_status.py +4 -3
  135. holmes/utils/log.py +15 -0
  136. holmes/utils/markdown_utils.py +2 -3
  137. holmes/utils/memory_limit.py +58 -0
  138. holmes/utils/sentry_helper.py +23 -0
  139. holmes/utils/stream.py +12 -5
  140. holmes/utils/tags.py +4 -3
  141. holmes/version.py +3 -1
  142. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
  143. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  144. holmes/plugins/toolsets/aws.yaml +0 -80
  145. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
  146. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  147. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
  148. holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
  149. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  150. holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
  151. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  152. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
  153. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  154. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  155. holmes/utils/keygen_utils.py +0 -6
  156. holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
  157. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
  158. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
  159. /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
  160. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
  161. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  162. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -89,12 +89,124 @@ toolsets:
89
89
  - name: "kubernetes_jq_query"
90
90
  user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
91
91
  description: >
92
- Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
93
- command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
92
+ Use kubectl to get json for all resources of a specific kind and filter with jq.
93
+ IMPORTANT: The 'kind' parameter must be the plural form of the resource type
94
+ (e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
95
+ Do not worry about escaping the jq_expr - it will be done by the system.
96
+ Example: .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
97
+ script: |
98
+ #!/bin/bash
99
+
100
+ echo "Executing paginated query for {{ kind }} resources..."
101
+ echo "Expression: {{ jq_expr }}"
102
+ echo "---"
103
+
104
+ # Get the API path for the resource kind using kubectl
105
+ API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
106
+
107
+ if [ -z "$API_INFO" ]; then
108
+ echo "Error: Unable to find resource kind '{{ kind }}'" >&2
109
+ exit 1
110
+ fi
111
+
112
+ # Extract NAMESPACED value
113
+ if [[ "$API_INFO" == *" true "* ]]; then
114
+ NAMESPACED="true"
115
+ PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
116
+ elif [[ "$API_INFO" == *" false "* ]]; then
117
+ NAMESPACED="false"
118
+ PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
119
+ else
120
+ echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
121
+ exit 1
122
+ fi
123
+
124
+ # Trim trailing spaces from prefix and collapse internal spaces
125
+ PREFIX=$(echo "$PREFIX" | sed 's/ *$//' | sed 's/ */ /g')
126
+
127
+ IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
128
+ FIELD_COUNT=0
129
+ for field in "${PREFIX_FIELDS[@]}"; do
130
+ ((FIELD_COUNT++))
131
+ done
132
+
133
+ RESOURCE_NAME="${PREFIX_FIELDS[0]}"
134
+ if [ $FIELD_COUNT -ge 2 ]; then
135
+ API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
136
+ else
137
+ API_VERSION=""
138
+ fi
139
+
140
+ if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
141
+ echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
142
+ exit 1
143
+ fi
144
+
145
+ # Build API path
146
+ if [[ "$API_VERSION" == "v1" ]]; then
147
+ API_PATH="/api/v1/${RESOURCE_NAME}"
148
+ else
149
+ API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
150
+ fi
151
+
152
+ # Process resources in chunks using API pagination
153
+ LIMIT=500 # Process 500 items at a time
154
+ CONTINUE=""
155
+ PROCESSED=0
156
+ TOTAL_MATCHES=0
157
+
158
+ while true; do
159
+ # Build API query with limit and continue token
160
+ if [ -z "$CONTINUE" ]; then
161
+ # First request - get from all namespaces
162
+ QUERY="${API_PATH}?limit=${LIMIT}"
163
+ else
164
+ # Subsequent requests with continue token
165
+ QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
166
+ fi
167
+
168
+ OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
169
+ exit_code=$?
170
+
171
+ if [ $exit_code -ne 0 ]; then
172
+ echo "Error: $OUTPUT" >&2
173
+ exit $exit_code
174
+ fi
175
+
176
+ ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
177
+
178
+ MATCHES=$(echo "$OUTPUT" | jq -r {{ jq_expr }} 2>&1)
179
+ jq_exit=$?
180
+ if [ $jq_exit -ne 0 ]; then
181
+ echo "Error: jq expression failed: $MATCHES" >&2
182
+ exit $jq_exit
183
+ fi
184
+
185
+ if [ "$ITEMS_COUNT" -gt 0 ]; then
186
+ if [ -n "$MATCHES" ]; then
187
+ echo "$MATCHES"
188
+ MATCH_COUNT=$(echo "$MATCHES" | grep -c . || true)
189
+ TOTAL_MATCHES=$((TOTAL_MATCHES + MATCH_COUNT))
190
+ fi
191
+
192
+ PROCESSED=$((PROCESSED + ITEMS_COUNT))
193
+
194
+ echo "Processed $PROCESSED items, found $TOTAL_MATCHES matches so far..." >&2
195
+ fi
196
+
197
+ CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
198
+
199
+ if [ -z "$CONTINUE" ]; then
200
+ break
201
+ fi
202
+ done
203
+
204
+ echo "---" >&2
205
+ echo "Total items processed: $PROCESSED, matches found: $TOTAL_MATCHES" >&2
94
206
  transformers:
95
207
  - name: llm_summarize
96
208
  config:
97
- input_threshold: 1000
209
+ input_threshold: 10000
98
210
  prompt: |
99
211
  Summarize this jq query output focusing on:
100
212
  - Key patterns and commonalities in the data
@@ -106,52 +218,170 @@ toolsets:
106
218
  - Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
107
219
  - Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
108
220
 
221
+ - name: "kubernetes_tabular_query"
222
+ user_description: "Tabular output of specific fields: kubectl get {{kind}} --all-namespaces -o custom-columns={{columns}}"
223
+ description: >
224
+ Extract specific fields from Kubernetes resources in tabular format with optional filtering.
225
+ Memory-efficient way to query large clusters - only requested fields are transmitted.
226
+ Column specification format: HEADER:FIELD_PATH,HEADER2:FIELD_PATH2,...
227
+
228
+ Optional filtering parameter:
229
+ - filter_pattern: Pattern to match in any column (supports grep regex)
230
+
231
+ Examples:
232
+ - Basic fields: NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
233
+ - Filter by status: filter_pattern="Running"
234
+ - Filter out lines with <none>: filter_pattern="-v '<none>'"
235
+ - Nested fields: CREATED:.metadata.creationTimestamp,IMAGE:.spec.containers[0].image
236
+ - Array fields: LABELS:.metadata.labels,PORTS:.spec.ports[*].port
237
+
238
+ Note: Output is tabular text with column headers. Filtering works on the entire line.
239
+ Note: not allowed characters are: ' / ; and newline
240
+ command: kubectl get {{ kind }} --all-namespaces -o custom-columns='{{ columns }}'{% if filter_pattern %} | (head -n 1; tail -n +2 | grep {{ filter_pattern }}){% endif %}
241
+ transformers:
242
+ - name: llm_summarize
243
+ config:
244
+ input_threshold: 10000
245
+ prompt: |
246
+ Summarize this tabular output focusing on:
247
+ - Key patterns and trends in the data
248
+ - Resources that need attention (errors, pending, failures)
249
+ - Group similar items into aggregate descriptions
250
+ - Highlight outliers or unusual values
251
+ - Mention specific resource names only for problematic items
252
+ - Provide counts and distributions where relevant
253
+ - Be concise: aim for ≤ 50% of the original size
254
+ - Keep output actionable and focused on anomalies
255
+
109
256
  - name: "kubernetes_count"
110
257
  user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
111
258
  description: >
112
259
  Use kubectl to get apply a jq filter and then count the results.
113
260
  Use this whenever asked to count kubernetes resources.
261
+ IMPORTANT: The 'kind' parameter must be the plural form of the resource type
262
+ (e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
114
263
  Use select() to filter objects before extracting properties, e.g. .items[] | select(.metadata.namespace == "test-1") | .metadata.name
115
264
  Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give.
116
265
  e.g. give an expression like .items[] | select(.spec.containers[].image | test("^gcr.io/") | not) | .metadata.name
117
266
  script: |
267
+ #!/bin/bash
268
+
118
269
  echo "Command executed: kubectl get {{ kind }} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
119
270
  echo "---"
120
271
 
121
- # Execute the command and capture both stdout and stderr separately
122
- temp_error=$(mktemp)
123
- matches=$(kubectl get {{ kind }} --all-namespaces -o json 2>"$temp_error" | jq -c -r {{ jq_expr }} 2>>"$temp_error")
124
- exit_code=$?
125
- error_output=$(cat "$temp_error")
126
- rm -f "$temp_error"
127
-
128
- if [ $exit_code -ne 0 ]; then
129
- echo "Error executing command (exit code: $exit_code):"
130
- echo "$error_output"
131
- exit $exit_code
272
+ # Get the API path for the resource kind
273
+ API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
274
+
275
+ if [ -z "$API_INFO" ]; then
276
+ echo "Error: Unable to find resource kind '{{ kind }}'" >&2
277
+ exit 1
278
+ fi
279
+
280
+ if [[ "$API_INFO" == *" true "* ]]; then
281
+ NAMESPACED="true"
282
+ PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
283
+ elif [[ "$API_INFO" == *" false "* ]]; then
284
+ NAMESPACED="false"
285
+ PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
132
286
  else
133
- # Show any stderr warnings even if command succeeded
134
- if [ -n "$error_output" ]; then
135
- echo "Warnings/stderr output:"
136
- echo "$error_output"
137
- echo "---"
138
- fi
287
+ echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
288
+ exit 1
289
+ fi
290
+
291
+ PREFIX=$(echo "$PREFIX" | sed 's/ *$//' | sed 's/ */ /g')
292
+
293
+ IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
294
+ FIELD_COUNT=0
295
+ for field in "${PREFIX_FIELDS[@]}"; do
296
+ ((FIELD_COUNT++))
297
+ done
298
+ RESOURCE_NAME="${PREFIX_FIELDS[0]}"
299
+
300
+ if [ $FIELD_COUNT -ge 2 ]; then
301
+ API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
302
+ else
303
+ API_VERSION=""
304
+ fi
305
+
306
+ if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
307
+ echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
308
+ exit 1
309
+ fi
139
310
 
140
- # Filter out empty lines for accurate count
141
- filtered_matches=$(echo "$matches" | grep -v '^$' | grep -v '^null$')
142
- if [ -z "$filtered_matches" ]; then
143
- count=0
311
+ # Build API path
312
+ if [[ "$API_VERSION" == "v1" ]]; then
313
+ API_PATH="/api/v1/${RESOURCE_NAME}"
314
+ else
315
+ API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
316
+ fi
317
+
318
+ # Process resources in chunks using API pagination
319
+ LIMIT=500
320
+ CONTINUE=""
321
+ ALL_MATCHES=""
322
+ BATCH_NUM=0
323
+ TOTAL_PROCESSED=0
324
+
325
+ while true; do
326
+ BATCH_NUM=$((BATCH_NUM + 1))
327
+
328
+ if [ -z "$CONTINUE" ]; then
329
+ QUERY="${API_PATH}?limit=${LIMIT}"
144
330
  else
145
- count=$(echo "$filtered_matches" | wc -l)
331
+ QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
332
+ fi
333
+
334
+ OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
335
+ exit_code=$?
336
+
337
+ if [ $exit_code -ne 0 ]; then
338
+ echo "Error for query $QUERY: $OUTPUT" >&2
339
+ exit $exit_code
340
+ fi
341
+
342
+ ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
343
+ TOTAL_PROCESSED=$((TOTAL_PROCESSED + ITEMS_COUNT))
344
+
345
+ BATCH_MATCHES=$(echo "$OUTPUT" | jq -c -r {{ jq_expr }} 2>&1)
346
+ jq_exit=$?
347
+ if [ $jq_exit -ne 0 ]; then
348
+ echo "Error: jq expression failed: $BATCH_MATCHES" >&2
349
+ exit $jq_exit
146
350
  fi
147
- preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
148
351
 
149
- echo "$count results"
150
- echo "---"
151
- echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
152
- echo "$preview"
352
+ if [ -n "$BATCH_MATCHES" ]; then
353
+ if [ -z "$ALL_MATCHES" ]; then
354
+ ALL_MATCHES="$BATCH_MATCHES"
355
+ else
356
+ ALL_MATCHES="$ALL_MATCHES"$'\n'"$BATCH_MATCHES"
357
+ fi
358
+ fi
359
+
360
+ CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
361
+ if [ -z "$CONTINUE" ]; then
362
+ break
363
+ fi
364
+
365
+ echo "Processed batch $BATCH_NUM ($TOTAL_PROCESSED items so far)..." >&2
366
+ done
367
+
368
+ # Now process the collected matches
369
+ filtered_matches=$(echo "$ALL_MATCHES" | grep -v '^$' | grep -v '^null$')
370
+ if [ -z "$filtered_matches" ]; then
371
+ count=0
372
+ preview=""
373
+ else
374
+ count=$(echo "$filtered_matches" | wc -l)
375
+ preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
153
376
  fi
154
377
 
378
+ echo "$count results"
379
+ echo "---"
380
+ echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
381
+ echo "$preview"
382
+ echo "---"
383
+ echo "Total items processed: $TOTAL_PROCESSED" >&2
384
+
155
385
  # NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic
156
386
  # http GET capabilities which are more powerful than we want to expose
157
387
  #- name: "check_liveness_probe"
@@ -3,7 +3,8 @@ import re
3
3
  import subprocess
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
5
  from datetime import datetime, timezone
6
- from typing import Optional, List, Tuple, Set
6
+ from typing import List, Optional, Set, Tuple
7
+
7
8
  from pydantic import BaseModel
8
9
 
9
10
  from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
@@ -14,16 +15,15 @@ from holmes.core.tools import (
14
15
  ToolsetTag,
15
16
  )
16
17
  from holmes.plugins.toolsets.logging_utils.logging_api import (
18
+ DEFAULT_TIME_SPAN_SECONDS,
17
19
  BasePodLoggingToolset,
18
20
  FetchPodLogsParams,
19
21
  LoggingCapability,
20
22
  LoggingConfig,
21
23
  PodLoggingTool,
22
- DEFAULT_TIME_SPAN_SECONDS,
23
24
  )
24
25
  from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
25
26
 
26
-
27
27
  # match ISO 8601 format (YYYY-MM-DDTHH:MM:SS[.fffffffff]Z) or (YYYY-MM-DDTHH:MM:SS[.fffffffff]+/-XX:XX)
28
28
  timestamp_pattern = re.compile(
29
29
  r"^(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"
@@ -1,12 +1,12 @@
1
- from abc import ABC, abstractmethod
2
- from datetime import datetime, timedelta
3
1
  import logging
2
+ from abc import ABC, abstractmethod
3
+ from datetime import datetime, timedelta, timezone
4
+ from enum import Enum
4
5
  from math import ceil
5
6
  from typing import Optional, Set
6
- from enum import Enum
7
7
 
8
8
  from pydantic import BaseModel, field_validator
9
- from datetime import timezone
9
+
10
10
  from holmes.core.llm import LLM
11
11
  from holmes.core.tools import (
12
12
  StructuredToolResult,
@@ -88,9 +88,14 @@ def truncate_logs(
88
88
  llm: LLM,
89
89
  token_limit: int,
90
90
  structured_params: FetchPodLogsParams,
91
+ tool_call_id: str,
92
+ tool_name: str,
91
93
  ):
92
94
  original_token_count = count_tool_response_tokens(
93
- llm=llm, structured_tool_result=logging_structured_tool_result
95
+ llm=llm,
96
+ structured_tool_result=logging_structured_tool_result,
97
+ tool_call_id=tool_call_id,
98
+ tool_name=tool_name,
94
99
  )
95
100
  token_count = original_token_count
96
101
  text = None
@@ -137,7 +142,10 @@ def truncate_logs(
137
142
  )
138
143
  logging_structured_tool_result.data = text
139
144
  token_count = count_tool_response_tokens(
140
- llm=llm, structured_tool_result=logging_structured_tool_result
145
+ llm=llm,
146
+ structured_tool_result=logging_structured_tool_result,
147
+ tool_call_id=tool_call_id,
148
+ tool_name=tool_name,
141
149
  )
142
150
  if token_count < original_token_count:
143
151
  logging.info(
@@ -266,6 +274,8 @@ If you hit the log limit and see lots of repetitive INFO logs, use exclude_filte
266
274
  llm=context.llm,
267
275
  token_limit=context.max_token_count,
268
276
  structured_params=structured_params,
277
+ tool_call_id=context.tool_call_id,
278
+ tool_name=context.tool_name,
269
279
  )
270
280
 
271
281
  return result
@@ -1,30 +1,28 @@
1
+ import asyncio
1
2
  import json
3
+ import logging
4
+ import threading
5
+ from contextlib import asynccontextmanager
6
+ from enum import Enum
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
2
8
 
3
- from holmes.common.env_vars import SSE_READ_TIMEOUT
4
- from holmes.core.tools import (
5
- ToolInvokeContext,
6
- Toolset,
7
- Tool,
8
- ToolParameter,
9
- StructuredToolResult,
10
- StructuredToolResultStatus,
11
- CallablePrerequisite,
12
- )
13
-
14
- from typing import Dict, Any, List, Optional
15
9
  from mcp.client.session import ClientSession
16
10
  from mcp.client.sse import sse_client
11
+ from mcp.client.stdio import StdioServerParameters, stdio_client
17
12
  from mcp.client.streamable_http import streamablehttp_client
18
-
19
13
  from mcp.types import Tool as MCP_Tool
14
+ from pydantic import AnyUrl, BaseModel, Field, model_validator
20
15
 
21
- import asyncio
22
- from contextlib import asynccontextmanager
23
- from pydantic import BaseModel, Field, AnyUrl, model_validator
24
- from typing import Tuple
25
- import logging
26
- from enum import Enum
27
- import threading
16
+ from holmes.common.env_vars import SSE_READ_TIMEOUT
17
+ from holmes.core.tools import (
18
+ CallablePrerequisite,
19
+ StructuredToolResult,
20
+ StructuredToolResultStatus,
21
+ Tool,
22
+ ToolInvokeContext,
23
+ ToolParameter,
24
+ Toolset,
25
+ )
28
26
 
29
27
  # Lock per MCP server URL to serialize calls to the same server
30
28
  _server_locks: Dict[str, threading.Lock] = {}
@@ -42,6 +40,7 @@ def get_server_lock(url: str) -> threading.Lock:
42
40
  class MCPMode(str, Enum):
43
41
  SSE = "sse"
44
42
  STREAMABLE_HTTP = "streamable-http"
43
+ STDIO = "stdio"
45
44
 
46
45
 
47
46
  class MCPConfig(BaseModel):
@@ -49,14 +48,42 @@ class MCPConfig(BaseModel):
49
48
  mode: MCPMode = MCPMode.SSE
50
49
  headers: Optional[Dict[str, str]] = None
51
50
 
51
+ def get_lock_string(self) -> str:
52
+ return str(self.url)
53
+
54
+
55
+ class StdioMCPConfig(BaseModel):
56
+ mode: MCPMode = MCPMode.STDIO
57
+ command: str
58
+ args: Optional[List[str]] = None
59
+ env: Optional[Dict[str, str]] = None
60
+
61
+ def get_lock_string(self) -> str:
62
+ return str(self.command)
63
+
52
64
 
53
65
  @asynccontextmanager
54
- async def get_initialized_mcp_session(
55
- url: str, headers: Optional[Dict[str, str]], mode: MCPMode
56
- ):
57
- if mode == MCPMode.SSE:
66
+ async def get_initialized_mcp_session(toolset: "RemoteMCPToolset"):
67
+ if toolset._mcp_config is None:
68
+ raise ValueError("MCP config is not initialized")
69
+
70
+ if isinstance(toolset._mcp_config, StdioMCPConfig):
71
+ server_params = StdioServerParameters(
72
+ command=toolset._mcp_config.command,
73
+ args=toolset._mcp_config.args or [],
74
+ env=toolset._mcp_config.env,
75
+ )
76
+ async with stdio_client(server_params) as (
77
+ read_stream,
78
+ write_stream,
79
+ ):
80
+ async with ClientSession(read_stream, write_stream) as session:
81
+ _ = await session.initialize()
82
+ yield session
83
+ elif toolset._mcp_config.mode == MCPMode.SSE:
84
+ url = str(toolset._mcp_config.url)
58
85
  async with sse_client(
59
- url, headers=headers, sse_read_timeout=SSE_READ_TIMEOUT
86
+ url, toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
60
87
  ) as (
61
88
  read_stream,
62
89
  write_stream,
@@ -65,8 +92,9 @@ async def get_initialized_mcp_session(
65
92
  _ = await session.initialize()
66
93
  yield session
67
94
  else:
95
+ url = str(toolset._mcp_config.url)
68
96
  async with streamablehttp_client(
69
- url, headers=headers, sse_read_timeout=SSE_READ_TIMEOUT
97
+ url, headers=toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
70
98
  ) as (
71
99
  read_stream,
72
100
  write_stream,
@@ -86,7 +114,8 @@ class RemoteMCPTool(Tool):
86
114
  # Different servers can still run in parallel
87
115
  if not self.toolset._mcp_config:
88
116
  raise ValueError("MCP config not initialized")
89
- lock = get_server_lock(str(self.toolset._mcp_config.url))
117
+
118
+ lock = get_server_lock(str(self.toolset._mcp_config.get_lock_string()))
90
119
  with lock:
91
120
  return asyncio.run(self._invoke_async(params))
92
121
  except Exception as e:
@@ -107,7 +136,7 @@ class RemoteMCPTool(Tool):
107
136
  return False
108
137
 
109
138
  async def _invoke_async(self, params: Dict) -> StructuredToolResult:
110
- async with self.toolset.get_initialized_session() as session:
139
+ async with get_initialized_mcp_session(self.toolset) as session:
111
140
  tool_result = await session.call_tool(self.name, params)
112
141
 
113
142
  merged_text = " ".join(c.text for c in tool_result.content if c.type == "text")
@@ -153,20 +182,23 @@ class RemoteMCPTool(Tool):
153
182
  return parameters
154
183
 
155
184
  def get_parameterized_one_liner(self, params: Dict) -> str:
156
- if params:
157
- if params.get("cli_command"): # Return AWS MCP cli command, if available
158
- return f"{params.get('cli_command')}"
185
+ # AWS MCP cli_command
186
+ if params and params.get("cli_command"):
187
+ return f"{params.get('cli_command')}"
159
188
 
160
- url = (
161
- str(self.toolset._mcp_config.url) if self.toolset._mcp_config else "unknown"
162
- )
163
- return f"Call MCP Server ({url} - {self.name})"
189
+ # gcloud MCP run_gcloud_command
190
+ if self.name == "run_gcloud_command" and params and "args" in params:
191
+ args = params.get("args", [])
192
+ if isinstance(args, list):
193
+ return f"gcloud {' '.join(str(arg) for arg in args)}"
194
+
195
+ return f"{self.toolset.name}: {self.name} {params}"
164
196
 
165
197
 
166
198
  class RemoteMCPToolset(Toolset):
167
199
  tools: List[RemoteMCPTool] = Field(default_factory=list) # type: ignore
168
200
  icon_url: str = "https://registry.npmmirror.com/@lobehub/icons-static-png/1.46.0/files/light/mcp.png"
169
- _mcp_config: Optional[MCPConfig] = None
201
+ _mcp_config: Optional[Union[MCPConfig, StdioMCPConfig]] = None
170
202
 
171
203
  def model_post_init(self, __context: Any) -> None:
172
204
  self.prerequisites = [
@@ -211,23 +243,24 @@ class RemoteMCPToolset(Toolset):
211
243
  if not config:
212
244
  return (False, f"Config is required for {self.name}")
213
245
 
214
- if "mode" in config:
215
- mode_value = config.get("mode")
216
- allowed_modes = [e.value for e in MCPMode]
217
- if mode_value not in allowed_modes:
218
- return (
219
- False,
220
- f'Invalid mode "{mode_value}", allowed modes are {", ".join(allowed_modes)}',
221
- )
222
-
223
- self._mcp_config = MCPConfig(**config)
224
-
225
- clean_url_str = str(self._mcp_config.url).rstrip("/")
226
-
227
- if self._mcp_config.mode == MCPMode.SSE and not clean_url_str.endswith(
228
- "/sse"
229
- ):
230
- self._mcp_config.url = AnyUrl(clean_url_str + "/sse")
246
+ mode_value = config.get("mode", MCPMode.SSE.value)
247
+ allowed_modes = [e.value for e in MCPMode]
248
+ if mode_value not in allowed_modes:
249
+ return (
250
+ False,
251
+ f'Invalid mode "{mode_value}", allowed modes are {", ".join(allowed_modes)}',
252
+ )
253
+
254
+ if mode_value == MCPMode.STDIO.value:
255
+ self._mcp_config = StdioMCPConfig(**config)
256
+ else:
257
+ self._mcp_config = MCPConfig(**config)
258
+ clean_url_str = str(self._mcp_config.url).rstrip("/")
259
+
260
+ if self._mcp_config.mode == MCPMode.SSE and not clean_url_str.endswith(
261
+ "/sse"
262
+ ):
263
+ self._mcp_config.url = AnyUrl(clean_url_str + "/sse")
231
264
 
232
265
  tools_result = asyncio.run(self._get_server_tools())
233
266
 
@@ -242,18 +275,13 @@ class RemoteMCPToolset(Toolset):
242
275
  except Exception as e:
243
276
  return (
244
277
  False,
245
- f"Failed to load mcp server {self.name} {self._mcp_config.url if self._mcp_config else 'unknown'}: {str(e)}",
278
+ f"Failed to load mcp server {self.name}: {str(e)}",
246
279
  )
247
280
 
248
281
  async def _get_server_tools(self):
249
- async with self.get_initialized_session() as session:
282
+ async with get_initialized_mcp_session(self) as session:
250
283
  return await session.list_tools()
251
284
 
252
- def get_initialized_session(self):
253
- return get_initialized_mcp_session(
254
- str(self._mcp_config.url), self._mcp_config.headers, self._mcp_config.mode
255
- )
256
-
257
285
  def get_example_config(self) -> Dict[str, Any]:
258
286
  example_config = MCPConfig(
259
287
  url=AnyUrl("http://example.com:8000/mcp/messages"),