holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -8,6 +8,10 @@ toolsets:
|
|
|
8
8
|
prerequisites:
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
10
|
|
|
11
|
+
# Note: Many tools in this toolset use transformers with llm_summarize
|
|
12
|
+
# to automatically summarize large kubectl outputs when a fast model is configured.
|
|
13
|
+
# This reduces context window usage while preserving key information for debugging.
|
|
14
|
+
|
|
11
15
|
tools:
|
|
12
16
|
- name: "kubectl_describe"
|
|
13
17
|
description: >
|
|
@@ -17,6 +21,20 @@ toolsets:
|
|
|
17
21
|
- 'describe pod xyz-123'
|
|
18
22
|
- 'show service xyz-123 in namespace my-ns'
|
|
19
23
|
command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
24
|
+
transformers:
|
|
25
|
+
- name: llm_summarize
|
|
26
|
+
config:
|
|
27
|
+
input_threshold: 1000
|
|
28
|
+
prompt: |
|
|
29
|
+
Summarize this kubectl describe output focusing on:
|
|
30
|
+
- What needs attention or immediate action
|
|
31
|
+
- Resource status and health indicators
|
|
32
|
+
- Any errors, warnings, or non-standard states
|
|
33
|
+
- Key configuration details that could affect functionality
|
|
34
|
+
- When possible, mention exact field names so the user can grep for specific details
|
|
35
|
+
- Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
|
|
36
|
+
- Prefer aggregates and counts; list only outliers and actionable items
|
|
37
|
+
- Keep grep-friendly: include exact field names/values that matter
|
|
20
38
|
|
|
21
39
|
- name: "kubectl_get_by_name"
|
|
22
40
|
description: "Run `kubectl get <kind> <name> --show-labels`"
|
|
@@ -25,10 +43,36 @@ toolsets:
|
|
|
25
43
|
- name: "kubectl_get_by_kind_in_namespace"
|
|
26
44
|
description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
|
|
27
45
|
command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
|
|
46
|
+
transformers:
|
|
47
|
+
- name: llm_summarize
|
|
48
|
+
config:
|
|
49
|
+
input_threshold: 1000
|
|
50
|
+
prompt: |
|
|
51
|
+
Summarize this kubectl output focusing on:
|
|
52
|
+
- What needs attention or immediate action
|
|
53
|
+
- Group similar resources into aggregate descriptions
|
|
54
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
55
|
+
- List healthy resources as aggregate descriptions
|
|
56
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
57
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
|
|
58
|
+
- Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
|
|
28
59
|
|
|
29
60
|
- name: "kubectl_get_by_kind_in_cluster"
|
|
30
61
|
description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
|
|
31
62
|
command: "kubectl get -A --show-labels -o wide {{ kind }}"
|
|
63
|
+
transformers:
|
|
64
|
+
- name: llm_summarize
|
|
65
|
+
config:
|
|
66
|
+
input_threshold: 1000
|
|
67
|
+
prompt: |
|
|
68
|
+
Summarize this kubectl output focusing on:
|
|
69
|
+
- What needs attention or immediate action
|
|
70
|
+
- Group similar resources into a single line and description
|
|
71
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
72
|
+
- List healthy resources as aggregate descriptions
|
|
73
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
74
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
|
|
75
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
32
76
|
|
|
33
77
|
- name: "kubectl_find_resource"
|
|
34
78
|
description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
|
|
@@ -42,142 +86,302 @@ toolsets:
|
|
|
42
86
|
description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'job', 'node', etc."
|
|
43
87
|
command: "kubectl events --for {{resource_type}}/{{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
44
88
|
|
|
45
|
-
- name: "kubectl_memory_requests_all_namespaces"
|
|
46
|
-
description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
|
|
47
|
-
command: |
|
|
48
|
-
kubectl get pods --all-namespaces -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
|
|
49
|
-
awk '
|
|
50
|
-
function convert_to_mib(value) {
|
|
51
|
-
if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
|
|
52
|
-
if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
|
|
53
|
-
if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
|
|
54
|
-
if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
|
|
55
|
-
if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
|
|
56
|
-
if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
|
|
57
|
-
if (value ~ /Mi$/) return (value + 0);
|
|
58
|
-
if (value ~ /Ki$/) return (value + 0) / 1024;
|
|
59
|
-
if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
|
|
60
|
-
if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
|
|
61
|
-
if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
|
|
62
|
-
if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
|
|
63
|
-
if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
|
|
64
|
-
if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
|
|
65
|
-
return (value + 0) / (1024 * 1024); # Default: bytes
|
|
66
|
-
}
|
|
67
|
-
function sum_memory(requests) {
|
|
68
|
-
gsub(/^[ \t]+|[ \t]+$/, "", requests);
|
|
69
|
-
if (requests == "" || requests == "<none>") return 0;
|
|
70
|
-
split(requests, arr, ",");
|
|
71
|
-
total = 0;
|
|
72
|
-
for (i in arr) {
|
|
73
|
-
if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
|
|
74
|
-
}
|
|
75
|
-
return total;
|
|
76
|
-
}
|
|
77
|
-
{
|
|
78
|
-
namespace = $1;
|
|
79
|
-
name = $2;
|
|
80
|
-
requests = $3;
|
|
81
|
-
for (i=4; i<=NF; i++) {
|
|
82
|
-
requests = requests " " $i;
|
|
83
|
-
}
|
|
84
|
-
print namespace, name, sum_memory(requests) " Mi";
|
|
85
|
-
}' | sort -k3 -nr
|
|
86
|
-
|
|
87
|
-
- name: "kubectl_memory_requests_namespace"
|
|
88
|
-
description: "Fetch and display memory requests for all pods in a specified namespace in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
|
|
89
|
-
command: |
|
|
90
|
-
kubectl get pods -n {{ namespace }} -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
|
|
91
|
-
awk '
|
|
92
|
-
function convert_to_mib(value) {
|
|
93
|
-
if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
|
|
94
|
-
if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
|
|
95
|
-
if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
|
|
96
|
-
if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
|
|
97
|
-
if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
|
|
98
|
-
if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
|
|
99
|
-
if (value ~ /Mi$/) return (value + 0);
|
|
100
|
-
if (value ~ /Ki$/) return (value + 0) / 1024;
|
|
101
|
-
if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
|
|
102
|
-
if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
|
|
103
|
-
if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
|
|
104
|
-
if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
|
|
105
|
-
if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
|
|
106
|
-
if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
|
|
107
|
-
return (value + 0) / (1024 * 1024); # Default: bytes
|
|
108
|
-
}
|
|
109
|
-
function sum_memory(requests) {
|
|
110
|
-
gsub(/^[ \t]+|[ \t]+$/, "", requests);
|
|
111
|
-
if (requests == "" || requests == "<none>") return 0;
|
|
112
|
-
split(requests, arr, ",");
|
|
113
|
-
total = 0;
|
|
114
|
-
for (i in arr) {
|
|
115
|
-
if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
|
|
116
|
-
}
|
|
117
|
-
return total;
|
|
118
|
-
}
|
|
119
|
-
{
|
|
120
|
-
namespace = $1;
|
|
121
|
-
name = $2;
|
|
122
|
-
requests = $3;
|
|
123
|
-
for (i=4; i<=NF; i++) {
|
|
124
|
-
requests = requests " " $i;
|
|
125
|
-
}
|
|
126
|
-
print namespace, name, sum_memory(requests) " Mi";
|
|
127
|
-
}' | sort -k3 -nr
|
|
128
|
-
|
|
129
89
|
- name: "kubernetes_jq_query"
|
|
130
90
|
user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
|
|
131
91
|
description: >
|
|
132
|
-
Use kubectl to get json for all resources of a specific kind
|
|
133
|
-
|
|
92
|
+
Use kubectl to get json for all resources of a specific kind and filter with jq.
|
|
93
|
+
IMPORTANT: The 'kind' parameter must be the plural form of the resource type
|
|
94
|
+
(e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
|
|
95
|
+
Do not worry about escaping the jq_expr - it will be done by the system.
|
|
96
|
+
Example: .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
|
|
97
|
+
script: |
|
|
98
|
+
#!/bin/bash
|
|
99
|
+
|
|
100
|
+
echo "Executing paginated query for {{ kind }} resources..."
|
|
101
|
+
echo "Expression: {{ jq_expr }}"
|
|
102
|
+
echo "---"
|
|
103
|
+
|
|
104
|
+
# Get the API path for the resource kind using kubectl
|
|
105
|
+
API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
|
|
106
|
+
|
|
107
|
+
if [ -z "$API_INFO" ]; then
|
|
108
|
+
echo "Error: Unable to find resource kind '{{ kind }}'" >&2
|
|
109
|
+
exit 1
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
# Extract NAMESPACED value
|
|
113
|
+
if [[ "$API_INFO" == *" true "* ]]; then
|
|
114
|
+
NAMESPACED="true"
|
|
115
|
+
PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
|
|
116
|
+
elif [[ "$API_INFO" == *" false "* ]]; then
|
|
117
|
+
NAMESPACED="false"
|
|
118
|
+
PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
|
|
119
|
+
else
|
|
120
|
+
echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
|
|
121
|
+
exit 1
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
# Trim trailing spaces from prefix and collapse internal spaces
|
|
125
|
+
PREFIX=$(echo "$PREFIX" | sed 's/ *$//' | sed 's/ */ /g')
|
|
126
|
+
|
|
127
|
+
IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
|
|
128
|
+
FIELD_COUNT=0
|
|
129
|
+
for field in "${PREFIX_FIELDS[@]}"; do
|
|
130
|
+
((FIELD_COUNT++))
|
|
131
|
+
done
|
|
132
|
+
|
|
133
|
+
RESOURCE_NAME="${PREFIX_FIELDS[0]}"
|
|
134
|
+
if [ $FIELD_COUNT -ge 2 ]; then
|
|
135
|
+
API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
|
|
136
|
+
else
|
|
137
|
+
API_VERSION=""
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
|
|
141
|
+
echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
|
|
142
|
+
exit 1
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
# Build API path
|
|
146
|
+
if [[ "$API_VERSION" == "v1" ]]; then
|
|
147
|
+
API_PATH="/api/v1/${RESOURCE_NAME}"
|
|
148
|
+
else
|
|
149
|
+
API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
|
|
150
|
+
fi
|
|
151
|
+
|
|
152
|
+
# Process resources in chunks using API pagination
|
|
153
|
+
LIMIT=500 # Process 500 items at a time
|
|
154
|
+
CONTINUE=""
|
|
155
|
+
PROCESSED=0
|
|
156
|
+
TOTAL_MATCHES=0
|
|
157
|
+
|
|
158
|
+
while true; do
|
|
159
|
+
# Build API query with limit and continue token
|
|
160
|
+
if [ -z "$CONTINUE" ]; then
|
|
161
|
+
# First request - get from all namespaces
|
|
162
|
+
QUERY="${API_PATH}?limit=${LIMIT}"
|
|
163
|
+
else
|
|
164
|
+
# Subsequent requests with continue token
|
|
165
|
+
QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
|
|
169
|
+
exit_code=$?
|
|
170
|
+
|
|
171
|
+
if [ $exit_code -ne 0 ]; then
|
|
172
|
+
echo "Error: $OUTPUT" >&2
|
|
173
|
+
exit $exit_code
|
|
174
|
+
fi
|
|
175
|
+
|
|
176
|
+
ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
|
|
177
|
+
|
|
178
|
+
MATCHES=$(echo "$OUTPUT" | jq -r {{ jq_expr }} 2>&1)
|
|
179
|
+
jq_exit=$?
|
|
180
|
+
if [ $jq_exit -ne 0 ]; then
|
|
181
|
+
echo "Error: jq expression failed: $MATCHES" >&2
|
|
182
|
+
exit $jq_exit
|
|
183
|
+
fi
|
|
184
|
+
|
|
185
|
+
if [ "$ITEMS_COUNT" -gt 0 ]; then
|
|
186
|
+
if [ -n "$MATCHES" ]; then
|
|
187
|
+
echo "$MATCHES"
|
|
188
|
+
MATCH_COUNT=$(echo "$MATCHES" | grep -c . || true)
|
|
189
|
+
TOTAL_MATCHES=$((TOTAL_MATCHES + MATCH_COUNT))
|
|
190
|
+
fi
|
|
191
|
+
|
|
192
|
+
PROCESSED=$((PROCESSED + ITEMS_COUNT))
|
|
193
|
+
|
|
194
|
+
echo "Processed $PROCESSED items, found $TOTAL_MATCHES matches so far..." >&2
|
|
195
|
+
fi
|
|
196
|
+
|
|
197
|
+
CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
|
|
198
|
+
|
|
199
|
+
if [ -z "$CONTINUE" ]; then
|
|
200
|
+
break
|
|
201
|
+
fi
|
|
202
|
+
done
|
|
203
|
+
|
|
204
|
+
echo "---" >&2
|
|
205
|
+
echo "Total items processed: $PROCESSED, matches found: $TOTAL_MATCHES" >&2
|
|
206
|
+
transformers:
|
|
207
|
+
- name: llm_summarize
|
|
208
|
+
config:
|
|
209
|
+
input_threshold: 10000
|
|
210
|
+
prompt: |
|
|
211
|
+
Summarize this jq query output focusing on:
|
|
212
|
+
- Key patterns and commonalities in the data
|
|
213
|
+
- Notable outliers, anomalies, or items that need attention
|
|
214
|
+
- Group similar results into aggregate descriptions when possible
|
|
215
|
+
- Highlight any empty results, null values, or missing data
|
|
216
|
+
- When applicable, mention specific resource names, namespaces, or values that stand out
|
|
217
|
+
- Organize findings in a structured way that helps with troubleshooting
|
|
218
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
219
|
+
- Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
|
|
220
|
+
|
|
221
|
+
- name: "kubernetes_tabular_query"
|
|
222
|
+
user_description: "Tabular output of specific fields: kubectl get {{kind}} --all-namespaces -o custom-columns={{columns}}"
|
|
223
|
+
description: >
|
|
224
|
+
Extract specific fields from Kubernetes resources in tabular format with optional filtering.
|
|
225
|
+
Memory-efficient way to query large clusters - only requested fields are transmitted.
|
|
226
|
+
Column specification format: HEADER:FIELD_PATH,HEADER2:FIELD_PATH2,...
|
|
227
|
+
|
|
228
|
+
Optional filtering parameter:
|
|
229
|
+
- filter_pattern: Pattern to match in any column (supports grep regex)
|
|
230
|
+
|
|
231
|
+
Examples:
|
|
232
|
+
- Basic fields: NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
|
|
233
|
+
- Filter by status: filter_pattern="Running"
|
|
234
|
+
- Filter out lines with <none>: filter_pattern="-v '<none>'"
|
|
235
|
+
- Nested fields: CREATED:.metadata.creationTimestamp,IMAGE:.spec.containers[0].image
|
|
236
|
+
- Array fields: LABELS:.metadata.labels,PORTS:.spec.ports[*].port
|
|
237
|
+
|
|
238
|
+
Note: Output is tabular text with column headers. Filtering works on the entire line.
|
|
239
|
+
Note: not allowed characters are: ' / ; and newline
|
|
240
|
+
command: kubectl get {{ kind }} --all-namespaces -o custom-columns='{{ columns }}'{% if filter_pattern %} | (head -n 1; tail -n +2 | grep {{ filter_pattern }}){% endif %}
|
|
241
|
+
transformers:
|
|
242
|
+
- name: llm_summarize
|
|
243
|
+
config:
|
|
244
|
+
input_threshold: 10000
|
|
245
|
+
prompt: |
|
|
246
|
+
Summarize this tabular output focusing on:
|
|
247
|
+
- Key patterns and trends in the data
|
|
248
|
+
- Resources that need attention (errors, pending, failures)
|
|
249
|
+
- Group similar items into aggregate descriptions
|
|
250
|
+
- Highlight outliers or unusual values
|
|
251
|
+
- Mention specific resource names only for problematic items
|
|
252
|
+
- Provide counts and distributions where relevant
|
|
253
|
+
- Be concise: aim for ≤ 50% of the original size
|
|
254
|
+
- Keep output actionable and focused on anomalies
|
|
134
255
|
|
|
135
256
|
- name: "kubernetes_count"
|
|
136
257
|
user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
|
|
137
258
|
description: >
|
|
138
259
|
Use kubectl to get apply a jq filter and then count the results.
|
|
139
260
|
Use this whenever asked to count kubernetes resources.
|
|
261
|
+
IMPORTANT: The 'kind' parameter must be the plural form of the resource type
|
|
262
|
+
(e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
|
|
140
263
|
Use select() to filter objects before extracting properties, e.g. .items[] | select(.metadata.namespace == "test-1") | .metadata.name
|
|
141
264
|
Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give.
|
|
142
265
|
e.g. give an expression like .items[] | select(.spec.containers[].image | test("^gcr.io/") | not) | .metadata.name
|
|
143
266
|
script: |
|
|
267
|
+
#!/bin/bash
|
|
268
|
+
|
|
144
269
|
echo "Command executed: kubectl get {{ kind }} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
|
|
145
270
|
echo "---"
|
|
146
271
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
272
|
+
# Get the API path for the resource kind
|
|
273
|
+
API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
|
|
274
|
+
|
|
275
|
+
if [ -z "$API_INFO" ]; then
|
|
276
|
+
echo "Error: Unable to find resource kind '{{ kind }}'" >&2
|
|
277
|
+
exit 1
|
|
278
|
+
fi
|
|
279
|
+
|
|
280
|
+
if [[ "$API_INFO" == *" true "* ]]; then
|
|
281
|
+
NAMESPACED="true"
|
|
282
|
+
PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
|
|
283
|
+
elif [[ "$API_INFO" == *" false "* ]]; then
|
|
284
|
+
NAMESPACED="false"
|
|
285
|
+
PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
|
|
158
286
|
else
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
287
|
+
echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
|
|
288
|
+
exit 1
|
|
289
|
+
fi
|
|
290
|
+
|
|
291
|
+
PREFIX=$(echo "$PREFIX" | sed 's/ *$//' | sed 's/ */ /g')
|
|
292
|
+
|
|
293
|
+
IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
|
|
294
|
+
FIELD_COUNT=0
|
|
295
|
+
for field in "${PREFIX_FIELDS[@]}"; do
|
|
296
|
+
((FIELD_COUNT++))
|
|
297
|
+
done
|
|
298
|
+
RESOURCE_NAME="${PREFIX_FIELDS[0]}"
|
|
299
|
+
|
|
300
|
+
if [ $FIELD_COUNT -ge 2 ]; then
|
|
301
|
+
API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
|
|
302
|
+
else
|
|
303
|
+
API_VERSION=""
|
|
304
|
+
fi
|
|
305
|
+
|
|
306
|
+
if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
|
|
307
|
+
echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
|
|
308
|
+
exit 1
|
|
309
|
+
fi
|
|
310
|
+
|
|
311
|
+
# Build API path
|
|
312
|
+
if [[ "$API_VERSION" == "v1" ]]; then
|
|
313
|
+
API_PATH="/api/v1/${RESOURCE_NAME}"
|
|
314
|
+
else
|
|
315
|
+
API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
|
|
316
|
+
fi
|
|
165
317
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
318
|
+
# Process resources in chunks using API pagination
|
|
319
|
+
LIMIT=500
|
|
320
|
+
CONTINUE=""
|
|
321
|
+
ALL_MATCHES=""
|
|
322
|
+
BATCH_NUM=0
|
|
323
|
+
TOTAL_PROCESSED=0
|
|
324
|
+
|
|
325
|
+
while true; do
|
|
326
|
+
BATCH_NUM=$((BATCH_NUM + 1))
|
|
327
|
+
|
|
328
|
+
if [ -z "$CONTINUE" ]; then
|
|
329
|
+
QUERY="${API_PATH}?limit=${LIMIT}"
|
|
170
330
|
else
|
|
171
|
-
|
|
331
|
+
QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
|
|
172
332
|
fi
|
|
173
|
-
preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
|
|
174
333
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
334
|
+
OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
|
|
335
|
+
exit_code=$?
|
|
336
|
+
|
|
337
|
+
if [ $exit_code -ne 0 ]; then
|
|
338
|
+
echo "Error for query $QUERY: $OUTPUT" >&2
|
|
339
|
+
exit $exit_code
|
|
340
|
+
fi
|
|
341
|
+
|
|
342
|
+
ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
|
|
343
|
+
TOTAL_PROCESSED=$((TOTAL_PROCESSED + ITEMS_COUNT))
|
|
344
|
+
|
|
345
|
+
BATCH_MATCHES=$(echo "$OUTPUT" | jq -c -r {{ jq_expr }} 2>&1)
|
|
346
|
+
jq_exit=$?
|
|
347
|
+
if [ $jq_exit -ne 0 ]; then
|
|
348
|
+
echo "Error: jq expression failed: $BATCH_MATCHES" >&2
|
|
349
|
+
exit $jq_exit
|
|
350
|
+
fi
|
|
351
|
+
|
|
352
|
+
if [ -n "$BATCH_MATCHES" ]; then
|
|
353
|
+
if [ -z "$ALL_MATCHES" ]; then
|
|
354
|
+
ALL_MATCHES="$BATCH_MATCHES"
|
|
355
|
+
else
|
|
356
|
+
ALL_MATCHES="$ALL_MATCHES"$'\n'"$BATCH_MATCHES"
|
|
357
|
+
fi
|
|
358
|
+
fi
|
|
359
|
+
|
|
360
|
+
CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
|
|
361
|
+
if [ -z "$CONTINUE" ]; then
|
|
362
|
+
break
|
|
363
|
+
fi
|
|
364
|
+
|
|
365
|
+
echo "Processed batch $BATCH_NUM ($TOTAL_PROCESSED items so far)..." >&2
|
|
366
|
+
done
|
|
367
|
+
|
|
368
|
+
# Now process the collected matches
|
|
369
|
+
filtered_matches=$(echo "$ALL_MATCHES" | grep -v '^$' | grep -v '^null$')
|
|
370
|
+
if [ -z "$filtered_matches" ]; then
|
|
371
|
+
count=0
|
|
372
|
+
preview=""
|
|
373
|
+
else
|
|
374
|
+
count=$(echo "$filtered_matches" | wc -l)
|
|
375
|
+
preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
|
|
179
376
|
fi
|
|
180
377
|
|
|
378
|
+
echo "$count results"
|
|
379
|
+
echo "---"
|
|
380
|
+
echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
|
|
381
|
+
echo "$preview"
|
|
382
|
+
echo "---"
|
|
383
|
+
echo "Total items processed: $TOTAL_PROCESSED" >&2
|
|
384
|
+
|
|
181
385
|
# NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic
|
|
182
386
|
# http GET capabilities which are more powerful than we want to expose
|
|
183
387
|
#- name: "check_liveness_probe"
|
|
@@ -3,27 +3,27 @@ import re
|
|
|
3
3
|
import subprocess
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
|
-
from typing import Optional,
|
|
6
|
+
from typing import List, Optional, Set, Tuple
|
|
7
|
+
|
|
7
8
|
from pydantic import BaseModel
|
|
8
9
|
|
|
9
10
|
from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
|
|
10
11
|
from holmes.core.tools import (
|
|
11
12
|
StaticPrerequisite,
|
|
12
13
|
StructuredToolResult,
|
|
13
|
-
|
|
14
|
+
StructuredToolResultStatus,
|
|
14
15
|
ToolsetTag,
|
|
15
16
|
)
|
|
16
17
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
18
|
+
DEFAULT_TIME_SPAN_SECONDS,
|
|
17
19
|
BasePodLoggingToolset,
|
|
18
20
|
FetchPodLogsParams,
|
|
19
21
|
LoggingCapability,
|
|
20
22
|
LoggingConfig,
|
|
21
23
|
PodLoggingTool,
|
|
22
|
-
DEFAULT_TIME_SPAN_SECONDS,
|
|
23
24
|
)
|
|
24
25
|
from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
|
|
25
26
|
|
|
26
|
-
|
|
27
27
|
# match ISO 8601 format (YYYY-MM-DDTHH:MM:SS[.fffffffff]Z) or (YYYY-MM-DDTHH:MM:SS[.fffffffff]+/-XX:XX)
|
|
28
28
|
timestamp_pattern = re.compile(
|
|
29
29
|
r"^(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"
|
|
@@ -140,7 +140,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
140
140
|
# Ensure both results are not None (they should always be set by the loop)
|
|
141
141
|
if current_logs_result is None or previous_logs_result is None:
|
|
142
142
|
return StructuredToolResult(
|
|
143
|
-
status=
|
|
143
|
+
status=StructuredToolResultStatus.ERROR,
|
|
144
144
|
error="Internal error: Failed to fetch logs",
|
|
145
145
|
params=params.model_dump(),
|
|
146
146
|
)
|
|
@@ -162,7 +162,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
162
162
|
):
|
|
163
163
|
# Both commands failed - return error from current logs
|
|
164
164
|
return StructuredToolResult(
|
|
165
|
-
status=
|
|
165
|
+
status=StructuredToolResultStatus.ERROR,
|
|
166
166
|
error=current_logs_result.error,
|
|
167
167
|
params=params.model_dump(),
|
|
168
168
|
return_code=return_code,
|
|
@@ -206,7 +206,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
206
206
|
if len(filtered_logs) == 0:
|
|
207
207
|
# Return NO_DATA status when there are no logs
|
|
208
208
|
return StructuredToolResult(
|
|
209
|
-
status=
|
|
209
|
+
status=StructuredToolResultStatus.NO_DATA,
|
|
210
210
|
data="\n".join(
|
|
211
211
|
metadata_lines
|
|
212
212
|
), # Still include metadata for context
|
|
@@ -218,7 +218,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
218
218
|
response_data = formatted_logs + "\n" + "\n".join(metadata_lines)
|
|
219
219
|
|
|
220
220
|
return StructuredToolResult(
|
|
221
|
-
status=
|
|
221
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
222
222
|
data=response_data,
|
|
223
223
|
params=params.model_dump(),
|
|
224
224
|
return_code=return_code,
|
|
@@ -226,7 +226,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
226
226
|
except Exception as e:
|
|
227
227
|
logging.exception(f"Error fetching logs for pod {params.pod_name}")
|
|
228
228
|
return StructuredToolResult(
|
|
229
|
-
status=
|
|
229
|
+
status=StructuredToolResultStatus.ERROR,
|
|
230
230
|
error=f"Error fetching logs: {str(e)}",
|
|
231
231
|
params=params.model_dump(),
|
|
232
232
|
)
|
|
@@ -8,6 +8,10 @@ toolsets:
|
|
|
8
8
|
prerequisites:
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
10
|
|
|
11
|
+
# Note: Log tools use transformers with llm_summarize to automatically
|
|
12
|
+
# summarize large log outputs when a fast model is configured. This helps
|
|
13
|
+
# focus on errors, patterns, and key information while reducing context usage.
|
|
14
|
+
|
|
11
15
|
tools:
|
|
12
16
|
- name: "kubectl_previous_logs"
|
|
13
17
|
description: "Run `kubectl logs --previous` on a single Kubernetes pod. Used to fetch logs for a pod that crashed and see logs from before the crash. Never give a deployment name or a resource that is not a pod."
|
|
@@ -24,10 +28,38 @@ toolsets:
|
|
|
24
28
|
- name: "kubectl_logs"
|
|
25
29
|
description: "Run `kubectl logs` on a single Kubernetes pod. Never give a deployment name or a resource that is not a pod."
|
|
26
30
|
command: "kubectl logs {{pod_name}} -n {{ namespace }}"
|
|
31
|
+
transformers:
|
|
32
|
+
- name: llm_summarize
|
|
33
|
+
config:
|
|
34
|
+
input_threshold: 1000
|
|
35
|
+
prompt: |
|
|
36
|
+
Summarize these pod logs focusing on:
|
|
37
|
+
- Errors, exceptions, and warning messages
|
|
38
|
+
- Recent activity patterns and trends
|
|
39
|
+
- Any authentication, connection, or startup issues
|
|
40
|
+
- Performance indicators (response times, throughput)
|
|
41
|
+
- Group similar log entries together
|
|
42
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
43
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
44
|
+
- Include grep-ready keys/values; avoid repeating entire logs or unchanged defaults
|
|
27
45
|
|
|
28
46
|
- name: "kubectl_logs_all_containers"
|
|
29
47
|
description: "Run `kubectl logs` on all containers within a single Kubernetes pod."
|
|
30
48
|
command: "kubectl logs {{pod_name}} -n {{ namespace }} --all-containers"
|
|
49
|
+
transformers:
|
|
50
|
+
- name: llm_summarize
|
|
51
|
+
config:
|
|
52
|
+
input_threshold: 1000
|
|
53
|
+
prompt: |
|
|
54
|
+
Summarize these multi-container pod logs focusing on:
|
|
55
|
+
- Errors, exceptions, and warning messages by container
|
|
56
|
+
- Inter-container communication patterns
|
|
57
|
+
- Any authentication, connection, or startup issues
|
|
58
|
+
- Performance indicators and resource usage patterns
|
|
59
|
+
- Group similar log entries together by container
|
|
60
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
61
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
62
|
+
- Prioritize aggregates and actionable outliers over comprehensive details
|
|
31
63
|
|
|
32
64
|
- name: "kubectl_container_logs"
|
|
33
65
|
description: "Run `kubectl logs` on a single container within a Kubernetes pod. This is to get the logs of a specific container in a multi-container pod."
|