PyPI - holmesgpt - Versions diffs - 0.11.5__py3-none-any.whl - Mend

holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show

holmes/.git_archival.json +7 -0
holmes/__init__.py +76 -0
holmes/__init__.py.bak +76 -0
holmes/clients/robusta_client.py +24 -0
holmes/common/env_vars.py +47 -0
holmes/config.py +526 -0
holmes/core/__init__.py +0 -0
holmes/core/conversations.py +578 -0
holmes/core/investigation.py +152 -0
holmes/core/investigation_structured_output.py +264 -0
holmes/core/issue.py +54 -0
holmes/core/llm.py +250 -0
holmes/core/models.py +157 -0
holmes/core/openai_formatting.py +51 -0
holmes/core/performance_timing.py +72 -0
holmes/core/prompt.py +42 -0
holmes/core/resource_instruction.py +17 -0
holmes/core/runbooks.py +26 -0
holmes/core/safeguards.py +120 -0
holmes/core/supabase_dal.py +540 -0
holmes/core/tool_calling_llm.py +798 -0
holmes/core/tools.py +566 -0
holmes/core/tools_utils/__init__.py +0 -0
holmes/core/tools_utils/tool_executor.py +65 -0
holmes/core/tools_utils/toolset_utils.py +52 -0
holmes/core/toolset_manager.py +418 -0
holmes/interactive.py +229 -0
holmes/main.py +1041 -0
holmes/plugins/__init__.py +0 -0
holmes/plugins/destinations/__init__.py +6 -0
holmes/plugins/destinations/slack/__init__.py +2 -0
holmes/plugins/destinations/slack/plugin.py +163 -0
holmes/plugins/interfaces.py +32 -0
holmes/plugins/prompts/__init__.py +48 -0
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
holmes/plugins/prompts/generic_ask.jinja2 +36 -0
holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
holmes/plugins/runbooks/README.md +22 -0
holmes/plugins/runbooks/__init__.py +100 -0
holmes/plugins/runbooks/catalog.json +14 -0
holmes/plugins/runbooks/jira.yaml +12 -0
holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
holmes/plugins/sources/github/__init__.py +77 -0
holmes/plugins/sources/jira/__init__.py +123 -0
holmes/plugins/sources/opsgenie/__init__.py +93 -0
holmes/plugins/sources/pagerduty/__init__.py +147 -0
holmes/plugins/sources/prometheus/__init__.py +0 -0
holmes/plugins/sources/prometheus/models.py +104 -0
holmes/plugins/sources/prometheus/plugin.py +154 -0
holmes/plugins/toolsets/__init__.py +171 -0
holmes/plugins/toolsets/aks-node-health.yaml +65 -0
holmes/plugins/toolsets/aks.yaml +86 -0
holmes/plugins/toolsets/argocd.yaml +70 -0
holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
holmes/plugins/toolsets/aws.yaml +76 -0
holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
holmes/plugins/toolsets/azure_sql/install.md +66 -0
holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
holmes/plugins/toolsets/azure_sql/utils.py +83 -0
holmes/plugins/toolsets/bash/__init__.py +0 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
holmes/plugins/toolsets/bash/common/bash.py +52 -0
holmes/plugins/toolsets/bash/common/config.py +14 -0
holmes/plugins/toolsets/bash/common/stringify.py +25 -0
holmes/plugins/toolsets/bash/common/validators.py +24 -0
holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
holmes/plugins/toolsets/bash/parse_command.py +103 -0
holmes/plugins/toolsets/confluence.yaml +19 -0
holmes/plugins/toolsets/consts.py +5 -0
holmes/plugins/toolsets/coralogix/api.py +158 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
holmes/plugins/toolsets/coralogix/utils.py +181 -0
holmes/plugins/toolsets/datadog.py +153 -0
holmes/plugins/toolsets/docker.yaml +46 -0
holmes/plugins/toolsets/git.py +756 -0
holmes/plugins/toolsets/grafana/__init__.py +0 -0
holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
holmes/plugins/toolsets/grafana/common.py +68 -0
holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
holmes/plugins/toolsets/grafana/loki_api.py +89 -0
holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
holmes/plugins/toolsets/helm.yaml +42 -0
holmes/plugins/toolsets/internet/internet.py +275 -0
holmes/plugins/toolsets/internet/notion.py +137 -0
holmes/plugins/toolsets/kafka.py +638 -0
holmes/plugins/toolsets/kubernetes.yaml +255 -0
holmes/plugins/toolsets/kubernetes_logs.py +426 -0
holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
holmes/plugins/toolsets/logging_utils/types.py +0 -0
holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
holmes/plugins/toolsets/newrelic.py +222 -0
holmes/plugins/toolsets/opensearch/__init__.py +0 -0
holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
holmes/plugins/toolsets/rabbitmq/api.py +398 -0
holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
holmes/plugins/toolsets/robusta/__init__.py +0 -0
holmes/plugins/toolsets/robusta/robusta.py +235 -0
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
holmes/plugins/toolsets/runbook/__init__.py +0 -0
holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
holmes/plugins/toolsets/service_discovery.py +92 -0
holmes/plugins/toolsets/servicenow/install.md +37 -0
holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
holmes/plugins/toolsets/slab.yaml +20 -0
holmes/plugins/toolsets/utils.py +137 -0
holmes/plugins/utils.py +14 -0
holmes/utils/__init__.py +0 -0
holmes/utils/cache.py +84 -0
holmes/utils/cert_utils.py +40 -0
holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
holmes/utils/definitions.py +13 -0
holmes/utils/env.py +53 -0
holmes/utils/file_utils.py +56 -0
holmes/utils/global_instructions.py +20 -0
holmes/utils/holmes_status.py +22 -0
holmes/utils/holmes_sync_toolsets.py +80 -0
holmes/utils/markdown_utils.py +55 -0
holmes/utils/pydantic_utils.py +54 -0
holmes/utils/robusta.py +10 -0
holmes/utils/tags.py +97 -0
holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
holmesgpt-0.11.5.dist-info/METADATA +400 -0
holmesgpt-0.11.5.dist-info/RECORD +183 -0
holmesgpt-0.11.5.dist-info/WHEEL +4 -0
holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0

holmes/plugins/toolsets/kubernetes.yaml ADDED Viewed

@@ -0,0 +1,255 @@
+toolsets:
+  kubernetes/core:
+    description: "Read access to cluster resources (excluding secrets and other sensitive data)"
+    docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#core"
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    tags:
+      - core
+    prerequisites:
+      - command: "kubectl version --client"
+    tools:
+      - name: "kubectl_describe"
+        description: >
+          Run kubectl describe <kind> <name> -n <namespace>,
+          call this when users ask for description,
+          for example when a user asks
+            - 'describe pod xyz-123'
+            - 'show service xyz-123 in namespace my-ns'
+        command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
+      - name: "kubectl_get_by_name"
+        description: "Run `kubectl get <kind> <name> --show-labels`"
+        command: "kubectl get --show-labels -o wide {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
+      - name: "kubectl_get_by_kind_in_namespace"
+        description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
+        command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
+      - name: "kubectl_get_by_kind_in_cluster"
+        description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
+        command: "kubectl get -A --show-labels -o wide {{ kind }}"
+      - name: "kubectl_find_resource"
+        description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
+        command: "kubectl get -A --show-labels -o wide {{ kind }} | grep {{ keyword }}"
+      - name: "kubectl_get_yaml"
+        description: "Run `kubectl get -o yaml` on a single Kubernetes resource"
+        command: "kubectl get -o yaml {{ kind }} {{ name}}{% if namespace %} -n {{ namespace }}{% endif %}"
+      - name: "kubectl_events"
+        description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment, 'job'', 'node', etc."
+        command: "kubectl events --for {{resource_type}}/{{ pod_name }} -n {{ namespace }}"
+      - name: "kubectl_memory_requests_all_namespaces"
+        description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
+        command: |
+          kubectl get pods --all-namespaces -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
+          awk '
+            function convert_to_mib(value) {
+              if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
+              if (value ~ /m$/) return (value + 0) / (1024^2 * 1000);           # Millibytes (m)
+              if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2);        # Binary units
+              if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
+              if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
+              if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
+              if (value ~ /Mi$/) return (value + 0);
+              if (value ~ /Ki$/) return (value + 0) / 1024;
+              if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2);         # Decimal units
+              if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
+              if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
+              if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
+              if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
+              if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
+              return (value + 0) / (1024 * 1024);                               # Default: bytes
+            }
+            function sum_memory(requests) {
+              gsub(/^[ \t]+|[ \t]+$/, "", requests);
+              if (requests == "" || requests == "<none>") return 0;
+              split(requests, arr, ",");
+              total = 0;
+              for (i in arr) {
+                if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
+              }
+              return total;
+            }
+            {
+              namespace = $1;
+              name = $2;
+              requests = $3;
+              for (i=4; i<=NF; i++) {
+                requests = requests " " $i;
+              }
+              print namespace, name, sum_memory(requests) " Mi";
+            }' | sort -k3 -nr
+      - name: "kubectl_memory_requests_namespace"
+        description: "Fetch and display memory requests for all pods in a specified namespace in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
+        command: |
+          kubectl get pods -n {{ namespace }} -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
+          awk '
+            function convert_to_mib(value) {
+              if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
+              if (value ~ /m$/) return (value + 0) / (1024^2 * 1000);           # Millibytes (m)
+              if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2);        # Binary units
+              if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
+              if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
+              if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
+              if (value ~ /Mi$/) return (value + 0);
+              if (value ~ /Ki$/) return (value + 0) / 1024;
+              if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2);         # Decimal units
+              if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
+              if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
+              if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
+              if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
+              if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
+              return (value + 0) / (1024 * 1024);                               # Default: bytes
+            }
+            function sum_memory(requests) {
+              gsub(/^[ \t]+|[ \t]+$/, "", requests);
+              if (requests == "" || requests == "<none>") return 0;
+              split(requests, arr, ",");
+              total = 0;
+              for (i in arr) {
+                if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
+              }
+              return total;
+            }
+            {
+              namespace = $1;
+              name = $2;
+              requests = $3;
+              for (i=4; i<=NF; i++) {
+                requests = requests " " $i;
+              }
+              print namespace, name, sum_memory(requests) " Mi";
+            }' | sort -k3 -nr
+      - name: "kubernetes_jq_query"
+        user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
+        description: >
+          Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
+        command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
+      - name: "kubernetes_count"
+        user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
+        description: >
+          Use kubectl to get apply a jq filter and then count the results.
+          Use this whenever asked to count kubernetes resources.
+          Use select() to filter objects before extracting properties, e.g. .items[] | select(.metadata.namespace == "test-1") | .metadata.name
+          Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give.
+          e.g. give an expression like .items[] | select(.spec.containers[].image | test("^gcr.io/") | not) | .metadata.name
+        script: |
+          echo "Command executed: kubectl get {{ kind }} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
+          echo "---"
+          # Execute the command and capture both stdout and stderr separately
+          temp_error=$(mktemp)
+          matches=$(kubectl get {{ kind }} --all-namespaces -o json 2>"$temp_error" | jq -c -r {{ jq_expr }} 2>>"$temp_error")
+          exit_code=$?
+          error_output=$(cat "$temp_error")
+          rm -f "$temp_error"
+          if [ $exit_code -ne 0 ]; then
+            echo "Error executing command (exit code: $exit_code):"
+            echo "$error_output"
+            exit $exit_code
+          else
+            # Show any stderr warnings even if command succeeded
+            if [ -n "$error_output" ]; then
+              echo "Warnings/stderr output:"
+              echo "$error_output"
+              echo "---"
+            fi
+            # Filter out empty lines for accurate count
+            filtered_matches=$(echo "$matches" | grep -v '^$' | grep -v '^null$')
+            if [ -z "$filtered_matches" ]; then
+              count=0
+            else
+              count=$(echo "$filtered_matches" | wc -l)
+            fi
+            preview=$(echo "$filtered_matches" | head -n 10 | cut -c 1-200 | nl)
+            echo "$count results"
+            echo "---"
+            echo "A *preview* of results is shown below (up to 10 results, up to 200 chars):"
+            echo "$preview"
+          fi
+    # NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic
+    # http GET capabilities which are more powerful than we want to expose
+    #- name: "check_liveness_probe"
+    #  description: "Run an http Kubernetes liveness probe for a given pod and return the results. Can be used to troubleshoot previous failures of the same probe assuming they fail now in the same manner."
+    #  command: "kubectl get --raw '/api/v1/namespaces/{{pod_namespace}}/pods/{{pod_name}}:{{liveness_probe_port}}/healthz'"
+    #- name: "kubectl_debug_node"
+    #  description: "Run a command on a Kubernetes node"
+    #  command: "kubectl debug node/mynode --image=ubuntu"
+    #- name: "healthcheck_plugin"
+    #  description: "Check why a kubernetes health probe is failing. First call get_healthcheck_details"
+    #  command: "kubectl exec -n {{namespace}} {{ pod_name }} -- wget {{ url }}:{{port}}"
+    # try adding your own tools here!
+    # e.g. to query company-specific data or run your own commands
+  kubernetes/live-metrics:
+    description: "Provides real-time metrics for pods and nodes"
+    docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#live-metrics"
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    tags:
+      - core
+    tools:
+      - name: "kubectl_top_pods"
+        description: "Retrieves real-time CPU and memory usage for each pod in the cluster."
+        command: >
+          kubectl top pods -A
+      - name: "kubectl_top_nodes"
+        description: "Retrieves real-time CPU and memory usage for each node in the cluster."
+        command: >
+          kubectl top nodes
+  kubernetes/kube-prometheus-stack:
+    description: "Fetches prometheus definition"
+    docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#prometheus-stack"
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    tags:
+      - core
+    tools:
+      - name: "get_prometheus_target"
+        description: "Fetch the definition of a Prometheus target"
+        command: 'kubectl get --raw ''/api/v1/namespaces/{{prometheus_namespace}}/services/{{prometheus_service_name}}:9090/proxy/api/v1/targets'' | jq ''.data.activeTargets[] | select(.labels.job == "{{ target_name }}")'''
+  kubernetes/krew-extras: # To make this work, install kube-lineage with krew
+    description: "Fetches children/dependents and parents/dependencies resources using kube-lineage installed via `kubectl krew`"
+    docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#resource-lineage-extras-with-krew"
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    tags:
+      - cli
+    prerequisites:
+      - command: "kubectl version --client && kubectl lineage --version"
+    tools:
+      - name: "kubectl_lineage_children"
+        description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
+        command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }}"
+      - name: "kubectl_lineage_parents"
+        description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
+        command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }} -D"
+  kubernetes/kube-lineage-extras: # To make this work, build kube-lineage from source
+    description: "Fetches children/dependents and parents/dependencies resources using kube-lineage"
+    docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#resource-lineage-extras"
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    tags:
+      - cluster
+    prerequisites:
+      - command: "kubectl version --client && kube-lineage --version"
+    tools:
+      - name: "kubectl_lineage_children"
+        description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
+        command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }}"
+      - name: "kubectl_lineage_parents"
+        description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
+        command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }} -D"

holmes/plugins/toolsets/kubernetes_logs.py ADDED Viewed

@@ -0,0 +1,426 @@
+import logging
+import re
+import subprocess
+from typing import Optional, List, Tuple
+from pydantic import BaseModel
+from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
+from holmes.core.tools import (
+    StaticPrerequisite,
+    StructuredToolResult,
+    ToolResultStatus,
+    ToolsetTag,
+)
+from holmes.plugins.toolsets.logging_utils.logging_api import (
+    BasePodLoggingToolset,
+    FetchPodLogsParams,
+    LoggingConfig,
+    PodLoggingTool,
+)
+from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
+# match ISO 8601 format (YYYY-MM-DDTHH:MM:SS[.fffffffff]Z) or (YYYY-MM-DDTHH:MM:SS[.fffffffff]+/-XX:XX)
+timestamp_pattern = re.compile(
+    r"^(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))"
+)
+class Pod(BaseModel):
+    containers: list[str]
+class StructuredLog(BaseModel):
+    timestamp_ms: Optional[int]
+    container: Optional[str]
+    content: str
+class LogResult(BaseModel):
+    error: Optional[str]
+    return_code: Optional[int]
+    has_multiple_containers: bool
+    logs: list[StructuredLog]
+class KubernetesLogsToolset(BasePodLoggingToolset):
+    """Implementation of the unified logging API for Kubernetes logs using kubectl commands"""
+    def __init__(self):
+        prerequisite = StaticPrerequisite(enabled=False, disabled_reason="Initializing")
+        super().__init__(
+            name="kubernetes/logs",
+            description="Read Kubernetes pod logs using a unified API",
+            docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/kubernetes.html#logs",
+            icon_url="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s",
+            prerequisites=[prerequisite],
+            is_default=True,
+            tools=[
+                PodLoggingTool(self),
+            ],
+            tags=[ToolsetTag.CORE],
+        )
+        enabled, disabled_reason = self.health_check()
+        prerequisite.enabled = enabled
+        prerequisite.disabled_reason = disabled_reason
+    def health_check(self) -> Tuple[bool, str]:
+        try:
+            # Check if kubectl is available
+            result = subprocess.run(
+                ["kubectl", "version", "--client"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode == 0:
+                return True, ""
+            else:
+                return False, f"kubectl command failed: {result.stderr}"
+        except subprocess.TimeoutExpired:
+            return False, "kubectl command timed out"
+        except FileNotFoundError:
+            return False, "kubectl command not found"
+        except Exception as e:
+            return False, f"kubectl health check error: {str(e)}"
+    def get_example_config(self):
+        return LoggingConfig().model_dump()
+    def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
+        try:
+            all_logs: list[StructuredLog] = []
+            # Fetch previous logs
+            previous_logs_result = self._fetch_kubectl_logs(
+                params=params,
+                previous=True,
+            )
+            # Fetch current logs
+            current_logs_result = self._fetch_kubectl_logs(
+                params=params,
+                previous=False,
+            )
+            return_code: Optional[int] = current_logs_result.return_code
+            if previous_logs_result.logs:
+                all_logs.extend(previous_logs_result.logs)
+                return_code = previous_logs_result.return_code
+            if current_logs_result.logs:
+                all_logs.extend(current_logs_result.logs)
+                return_code = current_logs_result.return_code
+            if (
+                not all_logs
+                and previous_logs_result.error
+                and current_logs_result.error
+            ):
+                # Both commands failed - return error from current logs
+                return StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error=current_logs_result.error,
+                    params=params.model_dump(),
+                    return_code=return_code,
+                )
+            all_logs = filter_logs(all_logs, params)
+            if not all_logs:
+                return StructuredToolResult(
+                    status=ToolResultStatus.NO_DATA,
+                    params=params.model_dump(),
+                    return_code=return_code,
+                )
+            formatted_logs = format_logs(
+                logs=all_logs,
+                display_container_name=previous_logs_result.has_multiple_containers
+                or current_logs_result.has_multiple_containers,
+            )
+            return StructuredToolResult(
+                status=ToolResultStatus.SUCCESS,
+                data=formatted_logs,
+                params=params.model_dump(),
+                return_code=return_code,
+            )
+        except Exception as e:
+            logging.exception(f"Error fetching logs for pod {params.pod_name}")
+            return StructuredToolResult(
+                status=ToolResultStatus.ERROR,
+                error=f"Error fetching logs: {str(e)}",
+                params=params.model_dump(),
+            )
+    def _fetch_kubectl_logs(
+        self,
+        params: FetchPodLogsParams,
+        previous: bool = False,
+    ) -> LogResult:
+        """Fetch logs using kubectl command"""
+        cmd = [
+            "kubectl",
+            "logs",
+            params.pod_name,
+            "-n",
+            params.namespace,
+            "--all-containers=true",
+            "--timestamps=true",
+            "--prefix=true",
+        ]
+        if previous:
+            cmd.append("--previous")
+        try:
+            result = subprocess.run(
+                cmd,
+                text=True,
+                timeout=KUBERNETES_LOGS_TIMEOUT_SECONDS,
+                check=False,  # do not throw error, we just return the error code
+                stdin=subprocess.DEVNULL,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+            )
+            if result.returncode == 0:
+                # Parse the logs - kubectl with --all-containers prefixes lines with container name
+                log_result = self._parse_kubectl_logs(logs=result.stdout)
+                log_result.return_code = result.returncode
+                return log_result
+            else:
+                error_msg = (
+                    result.stdout.strip()
+                    or f"kubectl logs command failed with return code {result.returncode}"
+                )
+                logging.debug(
+                    f"kubectl logs command failed for pod {params.pod_name} "
+                    f"(previous={previous}): {error_msg}"
+                )
+                return LogResult(
+                    logs=[],
+                    error=error_msg,
+                    return_code=result.returncode,
+                    has_multiple_containers=False,
+                )
+        except subprocess.TimeoutExpired:
+            error_msg = f"kubectl logs command timed out after {KUBERNETES_LOGS_TIMEOUT_SECONDS} seconds"
+            logging.warning(
+                f"kubectl logs command timed out for pod {params.pod_name} "
+                f"(previous={previous})"
+            )
+            return LogResult(
+                logs=[],
+                error=error_msg,
+                return_code=None,
+                has_multiple_containers=False,
+            )
+        except Exception as e:
+            error_msg = f"Error executing kubectl: {str(e)}"
+            logging.error(
+                f"Error executing kubectl logs for pod {params.pod_name} "
+                f"(previous={previous}): {str(e)}"
+            )
+            return LogResult(
+                logs=[],
+                error=error_msg,
+                return_code=None,
+                has_multiple_containers=False,
+            )
+    def _parse_kubectl_logs(self, logs: str) -> LogResult:
+        """Parse kubectl logs output with container prefixes"""
+        structured_logs: List[StructuredLog] = []
+        if not logs:
+            return LogResult(
+                logs=structured_logs,
+                error=None,
+                return_code=None,
+                has_multiple_containers=False,
+            )
+        has_multiple_containers = False
+        previous_container: Optional[str] = None
+        for line in logs.strip().split("\n"):
+            if not line:
+                continue
+            # kubectl with --all-containers prefixes lines with [pod/container]
+            # Format: [pod/container] timestamp content
+            container_match = re.match(r"^\[([^/]+)/([^\]]+)\] (.*)$", line)
+            if container_match:
+                pod_name, container_name, rest_of_line = container_match.groups()
+                if not has_multiple_containers and not previous_container:
+                    previous_container = container_name
+                elif (
+                    not has_multiple_containers and previous_container != container_name
+                ):
+                    has_multiple_containers = True
+                # Now extract timestamp from rest_of_line
+                timestamp_match = timestamp_pattern.match(rest_of_line)
+                if timestamp_match:
+                    timestamp_str = timestamp_match.group(0)
+                    try:
+                        log_unix_ts = to_unix_ms(timestamp_str)
+                        prefix_length = len(timestamp_str)
+                        content = rest_of_line[prefix_length:]
+                        # Remove only the single space after timestamp, preserve other whitespaces to
+                        #   keep the indentations of the original logs
+                        if content.startswith(" "):
+                            content = content[1:]
+                        structured_logs.append(
+                            StructuredLog(
+                                timestamp_ms=log_unix_ts,
+                                content=content,
+                                container=container_name,
+                            )
+                        )
+                    except ValueError:
+                        # Keep the line with container info but no timestamp
+                        structured_logs.append(
+                            StructuredLog(
+                                timestamp_ms=None,
+                                content=rest_of_line,
+                                container=container_name,
+                            )
+                        )
+                else:
+                    # No timestamp but has container info
+                    structured_logs.append(
+                        StructuredLog(
+                            timestamp_ms=None,
+                            content=rest_of_line,
+                            container=container_name,
+                        )
+                    )
+            else:
+                # No container prefix - parse as regular log line
+                parsed = parse_logs(line, None)
+                structured_logs.extend(parsed)
+        return LogResult(
+            logs=structured_logs,
+            error=None,
+            return_code=None,
+            has_multiple_containers=has_multiple_containers,
+        )
+def format_logs(logs: List[StructuredLog], display_container_name: bool) -> str:
+    if display_container_name:
+        return "\n".join([f"{log.container or 'N/A'}: {log.content}" for log in logs])
+    else:
+        return "\n".join([log.content for log in logs])
+class TimeFilter(BaseModel):
+    start_ms: int
+    end_ms: int
+def filter_logs(
+    logs: List[StructuredLog], params: FetchPodLogsParams
+) -> List[StructuredLog]:
+    time_filter: Optional[TimeFilter] = None
+    if params.start_time or params.end_time:
+        start, end = process_timestamps_to_int(
+            start=params.start_time,
+            end=params.end_time,
+            default_time_span_seconds=3600,
+        )
+        time_filter = TimeFilter(start_ms=start * 1000, end_ms=end * 1000)
+    filtered_logs = []
+    logs.sort(key=lambda x: x.timestamp_ms or 0)
+    for log in logs:
+        if params.filter and params.filter.lower() not in log.content.lower():
+            # exclude this log
+            continue
+        if (
+            time_filter
+            and log.timestamp_ms
+            and (
+                log.timestamp_ms
+                < time_filter.start_ms  # log is before expected time range
+                or time_filter.end_ms
+                < log.timestamp_ms  # log is after expected time range
+            )
+        ):
+            # exclude this log
+            continue
+        else:
+            filtered_logs.append(log)
+    if params.limit and params.limit < len(filtered_logs):
+        filtered_logs = filtered_logs[-params.limit :]
+    return filtered_logs
+def parse_logs(
+    logs: Optional[str], container_name: Optional[str]
+) -> list[StructuredLog]:
+    structured_logs = []
+    if logs:
+        for log_line in logs.strip().split("\n"):
+            if not isinstance(log_line, str):
+                # defensive code given logs are from an external API
+                structured_logs.append(
+                    StructuredLog(
+                        timestamp_ms=None,
+                        content=str(log_line),
+                        container=container_name,
+                    )
+                )
+                continue
+            match = timestamp_pattern.match(log_line)
+            if match:
+                timestamp_str = match.group(0)
+                try:
+                    log_unix_ts = to_unix_ms(timestamp_str)
+                    prefix_length = len(timestamp_str)
+                    # Remove only the single space after timestamp, preserve other whitespace
+                    line_content = log_line[prefix_length:]
+                    if line_content.startswith(" "):
+                        line_content = line_content[1:]
+                    structured_logs.append(
+                        StructuredLog(
+                            timestamp_ms=log_unix_ts,
+                            content=line_content,
+                            container=container_name,
+                        )
+                    )
+                except ValueError:
+                    # For invalid timestamp formats (when regex matches but date parsing fails)
+                    # keep the original line - this is important for testing and consistency
+                    structured_logs.append(
+                        StructuredLog(
+                            timestamp_ms=None,
+                            content=log_line,
+                            container=container_name,
+                        )
+                    )
+            elif len(structured_logs) > 0:
+                # if a line has no timestamp, assume it is part of a previous line
+                structured_logs[-1].content += "\n" + log_line
+            else:
+                structured_logs.append(
+                    StructuredLog(
+                        timestamp_ms=None, content=log_line, container=container_name
+                    )
+                )
+    return structured_logs