PyPI - holmesgpt - Versions diffs - 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl - Mend

holmesgpt 0.13.2py3-none-any.whl → 0.18.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

holmes/__init__.py +3 -5
holmes/clients/robusta_client.py +20 -6
holmes/common/env_vars.py +58 -3
holmes/common/openshift.py +1 -1
holmes/config.py +123 -148
holmes/core/conversations.py +71 -15
holmes/core/feedback.py +191 -0
holmes/core/investigation.py +31 -39
holmes/core/investigation_structured_output.py +3 -3
holmes/core/issue.py +1 -1
holmes/core/llm.py +508 -88
holmes/core/models.py +108 -4
holmes/core/openai_formatting.py +14 -1
holmes/core/prompt.py +48 -3
holmes/core/runbooks.py +1 -0
holmes/core/safeguards.py +8 -6
holmes/core/supabase_dal.py +295 -100
holmes/core/tool_calling_llm.py +489 -428
holmes/core/tools.py +325 -56
holmes/core/tools_utils/token_counting.py +21 -0
holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
holmes/core/tools_utils/tool_executor.py +0 -13
holmes/core/tools_utils/toolset_utils.py +1 -0
holmes/core/toolset_manager.py +191 -5
holmes/core/tracing.py +19 -3
holmes/core/transformers/__init__.py +23 -0
holmes/core/transformers/base.py +63 -0
holmes/core/transformers/llm_summarize.py +175 -0
holmes/core/transformers/registry.py +123 -0
holmes/core/transformers/transformer.py +32 -0
holmes/core/truncation/compaction.py +94 -0
holmes/core/truncation/dal_truncation_utils.py +23 -0
holmes/core/truncation/input_context_window_limiter.py +219 -0
holmes/interactive.py +228 -31
holmes/main.py +23 -40
holmes/plugins/interfaces.py +2 -1
holmes/plugins/prompts/__init__.py +2 -1
holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
holmes/plugins/prompts/generic_ask.jinja2 +0 -4
holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
holmes/plugins/runbooks/__init__.py +145 -17
holmes/plugins/runbooks/catalog.json +2 -0
holmes/plugins/sources/github/__init__.py +4 -2
holmes/plugins/sources/prometheus/models.py +1 -0
holmes/plugins/toolsets/__init__.py +44 -27
holmes/plugins/toolsets/aks-node-health.yaml +46 -0
holmes/plugins/toolsets/aks.yaml +64 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/utils.py +0 -32
holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
holmes/plugins/toolsets/bash/common/bash.py +23 -13
holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
holmes/plugins/toolsets/bash/common/stringify.py +1 -1
holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
holmes/plugins/toolsets/bash/parse_command.py +12 -13
holmes/plugins/toolsets/cilium.yaml +284 -0
holmes/plugins/toolsets/connectivity_check.py +124 -0
holmes/plugins/toolsets/coralogix/api.py +132 -119
holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
holmes/plugins/toolsets/coralogix/utils.py +15 -79
holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/git.py +54 -50
holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
holmes/plugins/toolsets/grafana/common.py +13 -29
holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
holmes/plugins/toolsets/grafana/loki_api.py +4 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
holmes/plugins/toolsets/internet/internet.py +15 -16
holmes/plugins/toolsets/internet/notion.py +9 -11
holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
holmes/plugins/toolsets/investigator/model.py +3 -1
holmes/plugins/toolsets/json_filter_mixin.py +134 -0
holmes/plugins/toolsets/kafka.py +36 -42
holmes/plugins/toolsets/kubernetes.yaml +317 -113
holmes/plugins/toolsets/kubernetes_logs.py +9 -9
holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
holmes/plugins/toolsets/openshift.yaml +283 -0
holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
holmes/plugins/toolsets/prometheus/utils.py +28 -0
holmes/plugins/toolsets/rabbitmq/api.py +23 -4
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
holmes/plugins/toolsets/robusta/robusta.py +239 -68
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
holmes/plugins/toolsets/utils.py +88 -0
holmes/utils/config_utils.py +91 -0
holmes/utils/connection_utils.py +31 -0
holmes/utils/console/result.py +10 -0
holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
holmes/utils/env.py +7 -0
holmes/utils/file_utils.py +2 -1
holmes/utils/global_instructions.py +60 -11
holmes/utils/holmes_status.py +6 -4
holmes/utils/holmes_sync_toolsets.py +0 -2
holmes/utils/krr_utils.py +188 -0
holmes/utils/log.py +15 -0
holmes/utils/markdown_utils.py +2 -3
holmes/utils/memory_limit.py +58 -0
holmes/utils/sentry_helper.py +64 -0
holmes/utils/stream.py +69 -8
holmes/utils/tags.py +4 -3
holmes/version.py +37 -15
holmesgpt-0.18.4.dist-info/LICENSE +178 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
holmesgpt-0.18.4.dist-info/RECORD +258 -0
holmes/core/performance_timing.py +0 -72
holmes/plugins/toolsets/aws.yaml +0 -80
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
holmes/plugins/toolsets/newrelic.py +0 -231
holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
holmes/plugins/toolsets/servicenow/install.md +0 -37
holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
holmes/utils/keygen_utils.py +0 -6
holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
holmesgpt-0.13.2.dist-info/RECORD +0 -234
/holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0

holmes/plugins/toolsets/kubernetes.yaml CHANGED Viewed

@@ -8,6 +8,10 @@ toolsets:
     prerequisites:
       - command: "kubectl version --client"
+    # Note: Many tools in this toolset use transformers with llm_summarize
+    # to automatically summarize large kubectl outputs when a fast model is configured.
+    # This reduces context window usage while preserving key information for debugging.
     tools:
       - name: "kubectl_describe"
         description: >
@@ -17,6 +21,20 @@ toolsets:
             - 'describe pod xyz-123'
             - 'show service xyz-123 in namespace my-ns'
         command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1000
+              prompt: |
+                Summarize this kubectl describe output focusing on:
+                - What needs attention or immediate action
+                - Resource status and health indicators
+                - Any errors, warnings, or non-standard states
+                - Key configuration details that could affect functionality
+                - When possible, mention exact field names so the user can grep for specific details
+                - Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
+                - Prefer aggregates and counts; list only outliers and actionable items
+                - Keep grep-friendly: include exact field names/values that matter
       - name: "kubectl_get_by_name"
         description: "Run `kubectl get <kind> <name> --show-labels`"
@@ -25,10 +43,36 @@ toolsets:
       - name: "kubectl_get_by_kind_in_namespace"
         description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
         command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1000
+              prompt: |
+                Summarize this kubectl output focusing on:
+                - What needs attention or immediate action
+                - Group similar resources into aggregate descriptions
+                - Make sure to mention outliers, errors, and non-standard states
+                - List healthy resources as aggregate descriptions
+                - When listing unhealthy resources, also try to use aggregate descriptions when possible
+                - When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
+                - Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
       - name: "kubectl_get_by_kind_in_cluster"
         description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
         command: "kubectl get -A --show-labels -o wide {{ kind }}"
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1000
+              prompt: |
+                Summarize this kubectl output focusing on:
+                - What needs attention or immediate action
+                - Group similar resources into a single line and description
+                - Make sure to mention outliers, errors, and non-standard states
+                - List healthy resources as aggregate descriptions
+                - When listing unhealthy resources, also try to use aggregate descriptions when possible
+                - When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
+                - Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
       - name: "kubectl_find_resource"
         description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
@@ -42,142 +86,302 @@ toolsets:
         description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'job', 'node', etc."
         command: "kubectl events --for {{resource_type}}/{{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
-      - name: "kubectl_memory_requests_all_namespaces"
-        description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
-        command: |
-          kubectl get pods --all-namespaces -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
-          awk '
-            function convert_to_mib(value) {
-              if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
-              if (value ~ /m$/) return (value + 0) / (1024^2 * 1000);           # Millibytes (m)
-              if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2);        # Binary units
-              if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
-              if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
-              if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
-              if (value ~ /Mi$/) return (value + 0);
-              if (value ~ /Ki$/) return (value + 0) / 1024;
-              if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2);         # Decimal units
-              if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
-              if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
-              if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
-              if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
-              if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
-              return (value + 0) / (1024 * 1024);                               # Default: bytes
-            }
-            function sum_memory(requests) {
-              gsub(/^[ \t]+|[ \t]+$/, "", requests);
-              if (requests == "" || requests == "<none>") return 0;
-              split(requests, arr, ",");
-              total = 0;
-              for (i in arr) {
-                if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
-              }
-              return total;
-            }
-            {
-              namespace = $1;
-              name = $2;
-              requests = $3;
-              for (i=4; i<=NF; i++) {
-                requests = requests " " $i;
-              }
-              print namespace, name, sum_memory(requests) " Mi";
-            }' | sort -k3 -nr
-      - name: "kubectl_memory_requests_namespace"
-        description: "Fetch and display memory requests for all pods in a specified namespace in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
-        command: |
-          kubectl get pods -n {{ namespace }} -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
-          awk '
-            function convert_to_mib(value) {
-              if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
-              if (value ~ /m$/) return (value + 0) / (1024^2 * 1000);           # Millibytes (m)
-              if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2);        # Binary units
-              if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
-              if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
-              if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
-              if (value ~ /Mi$/) return (value + 0);
-              if (value ~ /Ki$/) return (value + 0) / 1024;
-              if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2);         # Decimal units
-              if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
-              if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
-              if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
-              if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
-              if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
-              return (value + 0) / (1024 * 1024);                               # Default: bytes
-            }
-            function sum_memory(requests) {
-              gsub(/^[ \t]+|[ \t]+$/, "", requests);
-              if (requests == "" || requests == "<none>") return 0;
-              split(requests, arr, ",");
-              total = 0;
-              for (i in arr) {
-                if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
-              }
-              return total;
-            }
-            {
-              namespace = $1;
-              name = $2;
-              requests = $3;
-              for (i=4; i<=NF; i++) {
-                requests = requests " " $i;
-              }
-              print namespace, name, sum_memory(requests) " Mi";
-            }' | sort -k3 -nr
       - name: "kubernetes_jq_query"
         user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
         description: >
-          Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
-        command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
+          Use kubectl to get json for all resources of a specific kind and filter with jq.
+          IMPORTANT: The 'kind' parameter must be the plural form of the resource type
+          (e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
+          Do not worry about escaping the jq_expr - it will be done by the system.
+          Example: .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
+        script: |
+          #!/bin/bash
+          echo "Executing paginated query for {{ kind }} resources..."
+          echo "Expression: {{ jq_expr }}"
+          echo "---"
+          # Get the API path for the resource kind using kubectl
+          API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
+          if [ -z "$API_INFO" ]; then
+            echo "Error: Unable to find resource kind '{{ kind }}'" >&2
+            exit 1
+          fi
+          # Extract NAMESPACED value
+          if [[ "$API_INFO" == *" true "* ]]; then
+            NAMESPACED="true"
+            PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
+          elif [[ "$API_INFO" == *" false "* ]]; then
+            NAMESPACED="false"
+            PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
+          else
+            echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
+            exit 1
+          fi
+          # Trim trailing spaces from prefix and collapse internal spaces
+          PREFIX=$(echo "$PREFIX" | sed 's/  *$//' | sed 's/  */ /g')
+          IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
+          FIELD_COUNT=0
+          for field in "${PREFIX_FIELDS[@]}"; do
+            ((FIELD_COUNT++))
+          done
+          RESOURCE_NAME="${PREFIX_FIELDS[0]}"
+          if [ $FIELD_COUNT -ge 2 ]; then
+            API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
+          else
+            API_VERSION=""
+          fi
+          if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
+            echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
+            exit 1
+          fi
+          # Build API path
+          if [[ "$API_VERSION" == "v1" ]]; then
+            API_PATH="/api/v1/${RESOURCE_NAME}"
+          else
+            API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
+          fi
+          # Process resources in chunks using API pagination
+          LIMIT=500  # Process 500 items at a time
+          CONTINUE=""
+          PROCESSED=0
+          TOTAL_MATCHES=0
+          while true; do
+            # Build API query with limit and continue token
+            if [ -z "$CONTINUE" ]; then
+              # First request - get from all namespaces
+              QUERY="${API_PATH}?limit=${LIMIT}"
+            else
+              # Subsequent requests with continue token
+              QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
+            fi
+            OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
+            exit_code=$?
+            if [ $exit_code -ne 0 ]; then
+              echo "Error: $OUTPUT" >&2
+              exit $exit_code
+            fi
+            ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
+            MATCHES=$(echo "$OUTPUT" | jq -r {{ jq_expr }} 2>&1)
+            jq_exit=$?
+            if [ $jq_exit -ne 0 ]; then
+              echo "Error: jq expression failed: $MATCHES" >&2
+              exit $jq_exit
+            fi
+            if [ "$ITEMS_COUNT" -gt 0 ]; then
+              if [ -n "$MATCHES" ]; then
+                echo "$MATCHES"
+                MATCH_COUNT=$(echo "$MATCHES" | grep -c . || true)
+                TOTAL_MATCHES=$((TOTAL_MATCHES + MATCH_COUNT))
+              fi
+              PROCESSED=$((PROCESSED + ITEMS_COUNT))
+              echo "Processed $PROCESSED items, found $TOTAL_MATCHES matches so far..." >&2
+            fi
+            CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
+            if [ -z "$CONTINUE" ]; then
+              break
+            fi
+          done
+          echo "---" >&2
+          echo "Total items processed: $PROCESSED, matches found: $TOTAL_MATCHES" >&2
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 10000
+              prompt: |
+                Summarize this jq query output focusing on:
+                - Key patterns and commonalities in the data
+                - Notable outliers, anomalies, or items that need attention
+                - Group similar results into aggregate descriptions when possible
+                - Highlight any empty results, null values, or missing data
+                - When applicable, mention specific resource names, namespaces, or values that stand out
+                - Organize findings in a structured way that helps with troubleshooting
+                - Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
+                - Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
+      - name: "kubernetes_tabular_query"
+        user_description: "Tabular output of specific fields: kubectl get {{kind}} --all-namespaces -o custom-columns={{columns}}"
+        description: >
+          Extract specific fields from Kubernetes resources in tabular format with optional filtering.
+          Memory-efficient way to query large clusters - only requested fields are transmitted.
+          Column specification format: HEADER:FIELD_PATH,HEADER2:FIELD_PATH2,...
+          Optional filtering parameter:
+          - filter_pattern: Pattern to match in any column (supports grep regex)
+          Examples:
+          - Basic fields: NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName
+          - Filter by status: filter_pattern="Running"
+          - Filter out lines with <none>: filter_pattern="-v '<none>'"
+          - Nested fields: CREATED:.metadata.creationTimestamp,IMAGE:.spec.containers[0].image
+          - Array fields: LABELS:.metadata.labels,PORTS:.spec.ports[*].port
+          Note: Output is tabular text with column headers. Filtering works on the entire line.
+          Note: not allowed characters are: ' / ; and newline
+        command: kubectl get {{ kind }} --all-namespaces -o custom-columns='{{ columns }}'{% if filter_pattern %} | (head -n 1; tail -n +2 | grep {{ filter_pattern }}){% endif %}
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 10000
+              prompt: |
+                Summarize this tabular output focusing on:
+                - Key patterns and trends in the data
+                - Resources that need attention (errors, pending, failures)
+                - Group similar items into aggregate descriptions
+                - Highlight outliers or unusual values
+                - Mention specific resource names only for problematic items
+                - Provide counts and distributions where relevant
+                - Be concise: aim for ≤ 50% of the original size
+                - Keep output actionable and focused on anomalies
       - name: "kubernetes_count"
         user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
         description: >
           Use kubectl to get apply a jq filter and then count the results.
           Use this whenever asked to count kubernetes resources.
+          IMPORTANT: The 'kind' parameter must be the plural form of the resource type
+          (e.g., use "pods" not "pod", "services" not "service", "jobs" not "job").
           Use select() to filter objects before extracting properties, e.g. .items[] | select(.metadata.namespace == "test-1") | .metadata.name
           Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give.
           e.g. give an expression like .items[] | select(.spec.containers[].image | test("^gcr.io/") | not) | .metadata.name
         script: |
+          #!/bin/bash
           echo "Command executed: kubectl get {{ kind }} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
           echo "---"
-          # Execute the command and capture both stdout and stderr separately
-          temp_error=$(mktemp)
-          matches=$(kubectl get {{ kind }} --all-namespaces -o json 2>"$temp_error" | jq -c -r {{ jq_expr }} 2>>"$temp_error")
-          exit_code=$?
-          error_output=$(cat "$temp_error")
-          rm -f "$temp_error"
-          if [ $exit_code -ne 0 ]; then
-            echo "Error executing command (exit code: $exit_code):"
-            echo "$error_output"
-            exit $exit_code
+          # Get the API path for the resource kind
+          API_INFO=$(kubectl api-resources --no-headers | grep "^{{ kind }} " | head -1)
+          if [ -z "$API_INFO" ]; then
+            echo "Error: Unable to find resource kind '{{ kind }}'" >&2
+            exit 1
+          fi
+          if [[ "$API_INFO" == *" true "* ]]; then
+            NAMESPACED="true"
+            PREFIX=$(echo "$API_INFO" | sed 's/ true .*//')
+          elif [[ "$API_INFO" == *" false "* ]]; then
+            NAMESPACED="false"
+            PREFIX=$(echo "$API_INFO" | sed 's/ false .*//')
           else
-            # Show any stderr warnings even if command succeeded
-            if [ -n "$error_output" ]; then
-              echo "Warnings/stderr output:"
-              echo "$error_output"
-              echo "---"
-            fi
+            echo "Error: Could not find NAMESPACED field (true/false) in API info" >&2
+            exit 1
+          fi
+          PREFIX=$(echo "$PREFIX" | sed 's/  *$//' | sed 's/  */ /g')
+          IFS=' ' read -ra PREFIX_FIELDS <<< "$PREFIX"
+          FIELD_COUNT=0
+          for field in "${PREFIX_FIELDS[@]}"; do
+            ((FIELD_COUNT++))
+          done
+          RESOURCE_NAME="${PREFIX_FIELDS[0]}"
+          if [ $FIELD_COUNT -ge 2 ]; then
+            API_VERSION="${PREFIX_FIELDS[$((FIELD_COUNT - 1))]}"
+          else
+            API_VERSION=""
+          fi
+          if [ -z "$API_VERSION" ] || [ -z "$RESOURCE_NAME" ]; then
+            echo "Error: Unable to parse API info for resource kind '{{ kind }}'" >&2
+            exit 1
+          fi
+          # Build API path
+          if [[ "$API_VERSION" == "v1" ]]; then
+            API_PATH="/api/v1/${RESOURCE_NAME}"
+          else
+            API_PATH="/apis/${API_VERSION}/${RESOURCE_NAME}"
+          fi
-            # Filter out empty lines for accurate count
-            filtered_matches=$(echo "$matches" | grep -v '^$' | grep -v '^null$')
-            if [ -z "$filtered_matches" ]; then
-              count=0
+          # Process resources in chunks using API pagination
+          LIMIT=500
+          CONTINUE=""
+          ALL_MATCHES=""
+          BATCH_NUM=0
+          TOTAL_PROCESSED=0
+          while true; do
+            BATCH_NUM=$((BATCH_NUM + 1))
+            if [ -z "$CONTINUE" ]; then
+              QUERY="${API_PATH}?limit=${LIMIT}"
             else
-              count=$(echo "$filtered_matches" | wc -l)
+              QUERY="${API_PATH}?limit=${LIMIT}&continue=${CONTINUE}"
             fi
-            preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
-            echo "$count results"
-            echo "---"
-            echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
-            echo "$preview"
+            OUTPUT=$(kubectl get --raw "$QUERY" 2>&1)
+            exit_code=$?
+            if [ $exit_code -ne 0 ]; then
+              echo "Error for query $QUERY: $OUTPUT" >&2
+              exit $exit_code
+            fi
+            ITEMS_COUNT=$(echo "$OUTPUT" | jq '.items | length')
+            TOTAL_PROCESSED=$((TOTAL_PROCESSED + ITEMS_COUNT))
+            BATCH_MATCHES=$(echo "$OUTPUT" | jq -c -r {{ jq_expr }} 2>&1)
+            jq_exit=$?
+            if [ $jq_exit -ne 0 ]; then
+              echo "Error: jq expression failed: $BATCH_MATCHES" >&2
+              exit $jq_exit
+            fi
+            if [ -n "$BATCH_MATCHES" ]; then
+              if [ -z "$ALL_MATCHES" ]; then
+                ALL_MATCHES="$BATCH_MATCHES"
+              else
+                ALL_MATCHES="$ALL_MATCHES"$'\n'"$BATCH_MATCHES"
+              fi
+            fi
+            CONTINUE=$(echo "$OUTPUT" | jq -r '.metadata.continue // empty')
+            if [ -z "$CONTINUE" ]; then
+              break
+            fi
+            echo "Processed batch $BATCH_NUM ($TOTAL_PROCESSED items so far)..." >&2
+          done
+          # Now process the collected matches
+          filtered_matches=$(echo "$ALL_MATCHES" | grep -v '^$' | grep -v '^null$')
+          if [ -z "$filtered_matches" ]; then
+            count=0
+            preview=""
+          else
+            count=$(echo "$filtered_matches" | wc -l)
+            preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
           fi
+          echo "$count results"
+          echo "---"
+          echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
+          echo "$preview"
+          echo "---"
+          echo "Total items processed: $TOTAL_PROCESSED" >&2
     # NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic
     # http GET capabilities which are more powerful than we want to expose
     #- name: "check_liveness_probe"

holmes/plugins/toolsets/kubernetes_logs.py CHANGED Viewed

@@ -3,27 +3,27 @@ import re
 import subprocess
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime, timezone
-from typing import Optional, List, Tuple, Set
+from typing import List, Optional, Set, Tuple
 from pydantic import BaseModel
 from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
 from holmes.core.tools import (
     StaticPrerequisite,
     StructuredToolResult,
-    ToolResultStatus,
+    StructuredToolResultStatus,
     ToolsetTag,
 )
 from holmes.plugins.toolsets.logging_utils.logging_api import (
+    DEFAULT_TIME_SPAN_SECONDS,
     BasePodLoggingToolset,
     FetchPodLogsParams,
     LoggingCapability,
     LoggingConfig,
     PodLoggingTool,
-    DEFAULT_TIME_SPAN_SECONDS,
 )
 from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
 # match ISO 8601 format (YYYY-MM-DDTHH:MM:SS[.fffffffff]Z) or (YYYY-MM-DDTHH:MM:SS[.fffffffff]+/-XX:XX)
 timestamp_pattern = re.compile(
     r"^(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"
@@ -140,7 +140,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
             # Ensure both results are not None (they should always be set by the loop)
             if current_logs_result is None or previous_logs_result is None:
                 return StructuredToolResult(
-                    status=ToolResultStatus.ERROR,
+                    status=StructuredToolResultStatus.ERROR,
                     error="Internal error: Failed to fetch logs",
                     params=params.model_dump(),
                 )
@@ -162,7 +162,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
             ):
                 # Both commands failed - return error from current logs
                 return StructuredToolResult(
-                    status=ToolResultStatus.ERROR,
+                    status=StructuredToolResultStatus.ERROR,
                     error=current_logs_result.error,
                     params=params.model_dump(),
                     return_code=return_code,
@@ -206,7 +206,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
             if len(filtered_logs) == 0:
                 # Return NO_DATA status when there are no logs
                 return StructuredToolResult(
-                    status=ToolResultStatus.NO_DATA,
+                    status=StructuredToolResultStatus.NO_DATA,
                     data="\n".join(
                         metadata_lines
                     ),  # Still include metadata for context
@@ -218,7 +218,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
             response_data = formatted_logs + "\n" + "\n".join(metadata_lines)
             return StructuredToolResult(
-                status=ToolResultStatus.SUCCESS,
+                status=StructuredToolResultStatus.SUCCESS,
                 data=response_data,
                 params=params.model_dump(),
                 return_code=return_code,
@@ -226,7 +226,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
         except Exception as e:
             logging.exception(f"Error fetching logs for pod {params.pod_name}")
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 error=f"Error fetching logs: {str(e)}",
                 params=params.model_dump(),
             )

holmes/plugins/toolsets/kubernetes_logs.yaml CHANGED Viewed

@@ -8,6 +8,10 @@ toolsets:
     prerequisites:
       - command: "kubectl version --client"
+    # Note: Log tools use transformers with llm_summarize to automatically
+    # summarize large log outputs when a fast model is configured. This helps
+    # focus on errors, patterns, and key information while reducing context usage.
     tools:
       - name: "kubectl_previous_logs"
         description: "Run `kubectl logs --previous` on a single Kubernetes pod. Used to fetch logs for a pod that crashed and see logs from before the crash. Never give a deployment name or a resource that is not a pod."
@@ -24,10 +28,38 @@ toolsets:
       - name: "kubectl_logs"
         description: "Run `kubectl logs` on a single Kubernetes pod. Never give a deployment name or a resource that is not a pod."
         command: "kubectl logs {{pod_name}} -n {{ namespace }}"
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1000
+              prompt: |
+                Summarize these pod logs focusing on:
+                - Errors, exceptions, and warning messages
+                - Recent activity patterns and trends
+                - Any authentication, connection, or startup issues
+                - Performance indicators (response times, throughput)
+                - Group similar log entries together
+                - When possible, mention exact error codes or keywords for easier searching
+                - Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
+                - Include grep-ready keys/values; avoid repeating entire logs or unchanged defaults
       - name: "kubectl_logs_all_containers"
         description: "Run `kubectl logs` on all containers within a single Kubernetes pod."
         command: "kubectl logs {{pod_name}} -n {{ namespace }} --all-containers"
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1000
+              prompt: |
+                Summarize these multi-container pod logs focusing on:
+                - Errors, exceptions, and warning messages by container
+                - Inter-container communication patterns
+                - Any authentication, connection, or startup issues
+                - Performance indicators and resource usage patterns
+                - Group similar log entries together by container
+                - When possible, mention exact error codes or keywords for easier searching
+                - Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
+                - Prioritize aggregates and actionable outliers over comprehensive details
       - name: "kubectl_container_logs"
         description: "Run `kubectl logs` on a single container within a Kubernetes pod. This is to get the logs of a specific container in a multi-container pod."

holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

holmesgpt 0.13.2py3-none-any.whl → 0.18.4py3-none-any.whl