PyPI - holmesgpt - Versions diffs - 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (125) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +19 -1
holmes/common/env_vars.py +17 -0
holmes/config.py +69 -9
holmes/core/conversations.py +11 -0
holmes/core/investigation.py +16 -3
holmes/core/investigation_structured_output.py +12 -0
holmes/core/llm.py +13 -1
holmes/core/models.py +9 -1
holmes/core/openai_formatting.py +72 -12
holmes/core/prompt.py +13 -0
holmes/core/supabase_dal.py +3 -0
holmes/core/todo_manager.py +88 -0
holmes/core/tool_calling_llm.py +230 -157
holmes/core/tools.py +10 -1
holmes/core/tools_utils/tool_executor.py +7 -2
holmes/core/tools_utils/toolset_utils.py +7 -2
holmes/core/toolset_manager.py +1 -5
holmes/core/tracing.py +4 -3
holmes/interactive.py +1 -0
holmes/main.py +9 -2
holmes/plugins/prompts/__init__.py +7 -1
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
holmes/plugins/prompts/generic_ask.jinja2 +4 -3
holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
holmes/plugins/runbooks/CLAUDE.md +85 -0
holmes/plugins/runbooks/README.md +24 -0
holmes/plugins/toolsets/__init__.py +19 -6
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
holmes/plugins/toolsets/bash/aws/constants.py +529 -0
holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
holmes/plugins/toolsets/bash/azure/constants.py +339 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
holmes/plugins/toolsets/bash/common/stringify.py +14 -1
holmes/plugins/toolsets/bash/common/validators.py +91 -0
holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
holmes/plugins/toolsets/bash/docker/constants.py +255 -0
holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
holmes/plugins/toolsets/bash/helm/constants.py +92 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
holmes/plugins/toolsets/bash/parse_command.py +106 -32
holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
holmes/plugins/toolsets/bash/utilities/head.py +12 -0
holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
holmes/plugins/toolsets/coralogix/api.py +6 -6
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
holmes/plugins/toolsets/git.py +15 -15
holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
holmes/plugins/toolsets/internet/internet.py +2 -1
holmes/plugins/toolsets/internet/notion.py +2 -1
holmes/plugins/toolsets/investigator/__init__.py +0 -0
holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
holmes/plugins/toolsets/investigator/model.py +15 -0
holmes/plugins/toolsets/kafka.py +14 -7
holmes/plugins/toolsets/kubernetes_logs.py +454 -25
holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
holmes/plugins/toolsets/newrelic.py +8 -3
holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
holmes/plugins/toolsets/robusta/robusta.py +4 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
holmes/plugins/toolsets/utils.py +8 -1
holmes/utils/console/logging.py +6 -1
holmes/utils/llms.py +20 -0
holmes/utils/stream.py +90 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
holmes/utils/robusta.py +0 -9
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0

holmes/core/tools_utils/toolset_utils.py CHANGED Viewed

@@ -16,12 +16,17 @@ def filter_out_default_logging_toolset(toolsets: list[Toolset]) -> list[Toolset]
     All other types of toolsets are included as is.
     """
-    logging_toolsets: list[BasePodLoggingToolset] = []
+    logging_toolsets: list[Toolset] = []
     final_toolsets: list[Toolset] = []
     for ts in toolsets:
+        toolset_type = (
+            ts.original_toolset_type
+            if hasattr(ts, "original_toolset_type")
+            else type(ts)
+        )
         if (
-            isinstance(ts, BasePodLoggingToolset)
+            issubclass(toolset_type, BasePodLoggingToolset)
             and ts.status == ToolsetStatusEnum.ENABLED
         ):
             logging_toolsets.append(ts)

holmes/core/toolset_manager.py CHANGED Viewed

@@ -266,11 +266,7 @@ class ToolsetManager:
                 toolset.path = cached_status.get("path", None)
             # check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
             # not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
-            if (
-                toolset.enabled
-                and toolset.status == ToolsetStatusEnum.ENABLED
-                and using_cached
-            ):
+            if toolset.enabled and toolset.status == ToolsetStatusEnum.ENABLED:
                 enabled_toolsets_from_cache.append(toolset)
         self.check_toolset_prerequisites(enabled_toolsets_from_cache)

holmes/core/tracing.py CHANGED Viewed

@@ -91,10 +91,11 @@ class SpanType(Enum):
     """Standard span types for tracing categorization."""
     LLM = "llm"
-    TOOL = "tool"
-    TASK = "task"
     SCORE = "score"
+    FUNCTION = "function"
     EVAL = "eval"
+    TASK = "task"
+    TOOL = "tool"
 class DummySpan:
@@ -119,7 +120,7 @@ class DummySpan:
 class DummyTracer:
     """A no-op tracer implementation for when tracing is disabled."""
-    def start_experiment(self, experiment_name=None, metadata=None):
+    def start_experiment(self, experiment_name=None, additional_metadata=None):
         """No-op experiment creation."""
         return None

holmes/interactive.py CHANGED Viewed

@@ -1002,6 +1002,7 @@ def run_interactive_loop(
                     user_input,
                     include_files,
                     ai.tool_executor,
+                    ai.investigation_id,
                     runbooks,
                     system_prompt_additions,
                 )

holmes/main.py CHANGED Viewed

@@ -94,7 +94,7 @@ opt_custom_runbooks: Optional[List[Path]] = typer.Option(
     help="Path to a custom runbooks (can specify -r multiple times to add multiple runbooks)",
 )
 opt_max_steps: Optional[int] = typer.Option(
-    10,
+    40,
     "--max-steps",
     help="Advanced. Maximum number of steps the LLM can take to investigate the issue",
 )
@@ -104,6 +104,11 @@ opt_verbose: Optional[List[bool]] = typer.Option(
     "-v",
     help="Verbose output. You can pass multiple times to increase the verbosity. e.g. -v or -vv or -vvv",
 )
+opt_log_costs: bool = typer.Option(
+    False,
+    "--log-costs",
+    help="Show LLM cost information in the output",
+)
 opt_echo_request: bool = typer.Option(
     True,
     "--echo/--no-echo",
@@ -176,6 +181,7 @@ def ask(
     custom_toolsets: Optional[List[Path]] = opt_custom_toolsets,
     max_steps: Optional[int] = opt_max_steps,
     verbose: Optional[List[bool]] = opt_verbose,
+    log_costs: bool = opt_log_costs,
     # semi-common options
     destination: Optional[DestinationType] = opt_destination,
     slack_token: Optional[str] = opt_slack_token,
@@ -219,7 +225,7 @@ def ask(
     """
     Ask any question and answer using available tools
     """
-    console = init_logging(verbose)  # type: ignore
+    console = init_logging(verbose, log_costs)  # type: ignore
     # Detect and read piped input
     piped_data = None
@@ -302,6 +308,7 @@ def ask(
         prompt,  # type: ignore
         include_file,
         ai.tool_executor,
+        ai.investigation_id,
         config.get_runbook_catalog(),
         system_prompt_additions,
     )

holmes/plugins/prompts/__init__.py CHANGED Viewed

@@ -43,6 +43,12 @@ def load_and_render_prompt(prompt: str, context: Optional[dict] = None) -> str:
         context = {}
     now = datetime.now(timezone.utc)
-    context.update({"now": f"{now}", "now_timestamp_seconds": int(now.timestamp())})
+    context.update(
+        {
+            "now": f"{now}",
+            "now_timestamp_seconds": int(now.timestamp()),
+            "current_year": now.year,
+        }
+    )
     return template.render(**context)

holmes/plugins/prompts/_current_date_time.jinja2 CHANGED Viewed

	@@ -1 +1,2 @@
1 1	When querying tools, always query for the relevant time period. The current UTC date and time are {{ now }}. The current UTC timestamp in seconds is {{ now_timestamp_seconds }}.
2	+ When users mention dates without years (e.g., 'March 25th', 'last May', etc.), assume they either mean the current year ({{ current_year }}) unless context suggests otherwise.

holmes/plugins/prompts/_default_log_prompt.jinja2 CHANGED Viewed

@@ -7,5 +7,7 @@
 * If you have an issue id or finding id, use `fetch_finding_by_id` as it contains time information about the issue (`starts_at`, `updated_at` and `ends_at`).
 ** Then, use `start_time=-300` (5 minutes before `end_time`) and `end_time=<issue start_at time>`  when calling `fetch_pod_logs`.
 ** If there are too many logs, or not enough, narrow or widen the timestamps
-** If looking for a specific keyword, use the `filter` argument
-* If you are not provided with time information. Ignore the `start_time` and `end_time`. The tool `fetch_pod_logs` will default to the latest logs.
+* If the user did not explicitly ask about a given timeframe, ignore the `start_time` and `end_time` so it will use the default.
+* IMPORTANT: ALWAYS inform the user about the actual time period fetched (e.g., "Looking at logs from the last <X> days")
+* IMPORTANT: If a limit was applied, ALWAYS tell the user how many logs were shown vs total (e.g., "Showing latest <Y> of <Z> logs")
+* IMPORTANT: If any filters were applied, ALWAYS mention them explicitly

holmes/plugins/prompts/_fetch_logs.jinja2 CHANGED Viewed

@@ -4,8 +4,14 @@
 {%- set k8s_yaml_ts = toolsets | selectattr("name", "equalto", "kubernetes/logs") | rejectattr("fetch_pod_logs", "defined") | first -%}
 {%- set opensearch_ts = toolsets | selectattr("name", "equalto", "opensearch/logs") | first -%}
 {%- set datadog_ts = toolsets | selectattr("name", "equalto", "datadog/logs") | first -%}
+{%- set bash_ts = toolsets | selectattr("name", "equalto", "bash") | first -%}
 ## Logs
+* IMPORTANT: ALWAYS inform the user about what logs you fetched. For example: "Here are pod logs for ..."
+* IMPORTANT: If logs commands have limits mention them. For example: "Showing last 100 lines of logs:"
+* IMPORTANT: If a filter was used, mention the filter. For example: "Logs filtered for 'error':"
 {% if loki_ts and loki_ts.status == "enabled" -%}
 * For any logs, including for investigating kubernetes problems, use Loki
 * Use the tool fetch_loki_logs_for_resource to get the logs of any kubernetes pod or node
@@ -15,7 +21,7 @@
 * If you have an issue id or finding id, use `fetch_finding_by_id` as it contains time information about the issue (`starts_at`, `updated_at` and `ends_at`).
 ** Then, defaults to `start_timestamp=-300` (5 minutes before end_timestamp) and `end_timestamp=<issue start_at time>`.
 ** If there are too many logs, or not enough, narrow or widen the timestamps
-* If you are not provided with time information. Ignore start_timestamp and end_timestamp. Loki will default to the latest logs.
+* If you are not provided with time information. Ignore start_timestamp and end_timestamp.
 {%- elif coralogix_ts and coralogix_ts.status == "enabled" -%}
 ### coralogix/logs
 #### Coralogix Logs Toolset
@@ -39,6 +45,9 @@ Tools to search and fetch logs from Kubernetes.
 * Check both kubectl_logs and kubectl_previous_logs because a pod restart mean kubectl_logs may not have relevant logs
 {%- elif opensearch_ts and opensearch_ts.status == "enabled" -%}
 {% include '_default_log_prompt.jinja2' %}
+{%- elif bash_ts and bash_ts.status == "enabled" -%}
+Use the tool `run_bash_command` to run `kubectl logs` commands and fetch any relevant pod logs.
+DO NOT use `--tail` or `| tail` when calling `kubectl logs` because you may miss critical information.
 {%- else -%}
 * You have not been given access to tools to fetch kubernetes logs for nodes, pods, services or apps. This is likely a misconfiguration.
 * If you need logs to answer questions or investigate issues, tell the user to configure the documentation and enable one of these toolsets:

holmes/plugins/prompts/_general_instructions.jinja2 CHANGED Viewed

@@ -1,3 +1,5 @@
+{% include 'investigation_procedure.jinja2' %}
 {% include '_ai_safety.jinja2' %}
 # In general
@@ -49,6 +51,18 @@
 * For any question, try to make the answer specific to the user's cluster.
 ** For example, if asked to port forward, find out the app or pod port (kubectl describe) and provide a port forward command specific to the user's question
+# MANDATORY Task Management
+* You MUST use the TodoWrite tool for ANY investigation requiring multiple steps
+* Your FIRST tool call MUST be TodoWrite to create your investigation plan
+* Break down ALL complex problems into smaller, manageable tasks
+* You MUST update task status (pending → in_progress → completed) as you work through your investigation
+* The TodoWrite tool will show you a formatted task list - reference this throughout your investigation
+* Mark tasks as 'in_progress' when you start them, 'completed' when finished
+* Follow ALL tasks in your plan - don't skip any tasks
+* Use task management to ensure you don't miss important investigation steps
+* If you discover additional steps during investigation, add them to your task list using TodoWrite
 # Tool/function calls
 You are able to make tool calls / function calls. Recognise when a tool has already been called and reuse its result.

holmes/plugins/prompts/_permission_errors.jinja2 CHANGED Viewed

@@ -3,4 +3,4 @@
 If during the investigation you encounter a permissions error (e.g., `Error from server (Forbidden):`), **ALWAYS** follow these steps to ensure a thorough resolution:
 1. Analyze the Error Message: Identify the missing resource, API group, and verbs from the error details.
 2. Check which user/service account you're running with and what permissions it has
-3. Report this to the user and refer them to https://robusta-dev.github.io/holmesgpt/data-sources/permissions/
+3. Report this to the user and refer them to https://holmesgpt.dev/data-sources/permissions/

holmes/plugins/prompts/_toolsets_instructions.jinja2 CHANGED Viewed

@@ -51,14 +51,14 @@ If you need a toolset to access a system that you don't otherwise have access to
   - If the toolset has `status: disabled`: Ask the user to configure it.
     - Share the setup instructions URL with the user
   - If there are no relevant toolsets in the list above, tell the user that you are missing an integration to access XYZ:
-    You should give an answer similar to "I don't have access to <system>. To add a HolmesGPT integration for <system> you can [connect an MCP server](https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/) or add a [custom toolset](https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/)."
+    You should give an answer similar to "I don't have access to <system>. To add a HolmesGPT integration for <system> you can [connect an MCP server](https://holmesgpt.dev/data-sources/remote-mcp-servers/) or add a [custom toolset](https://holmesgpt.dev/data-sources/custom-toolsets/)."
 Likewise, if users ask about setting up or configuring integrations (e.g., "How can I give you access to ArgoCD applications?"):
 ALWAYS check if there's a disabled or failed toolset that matches what the user is asking about. If you find one:
 1. If the toolset has a specific documentation URL (toolset.docs_url), ALWAYS direct them to that URL first
 2. If no specific documentation exists, then direct them to the general Holmes documentation:
-   - For all toolset configurations: https://robusta-dev.github.io/holmesgpt/data-sources/
-   - For custom toolsets: https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/
-   - For remote MCP servers: https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/
+   - For all toolset configurations: https://holmesgpt.dev/data-sources/
+   - For custom toolsets: https://holmesgpt.dev/data-sources/custom-toolsets/
+   - For remote MCP servers: https://holmesgpt.dev/data-sources/remote-mcp-servers/
 When providing configuration guidance, always prefer the specific toolset documentation URL when available.

holmes/plugins/prompts/generic_ask.jinja2 CHANGED Viewed

@@ -4,13 +4,14 @@ Ask for multiple tool calls at the same time as it saves time for the user.
 Do not say 'based on the tool output' or explicitly refer to tools at all.
 If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
 If you have a good and concrete suggestion for how the user can fix something, tell them even if not asked explicitly
-{% include '_current_date_time.jinja2' %}
-Use conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.
 If you are unsure about the answer to the user's request or how to satisfy their request, you should gather more information. This can be done by asking the user for more information.
 Bias towards not asking the user for help if you can find the answer yourself.
+{% include '_current_date_time.jinja2' %}
+Use conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.
 {% include '_general_instructions.jinja2' %}
 {% include '_runbook_instructions.jinja2' %}

holmes/plugins/prompts/investigation_procedure.jinja2 ADDED Viewed

@@ -0,0 +1,210 @@
+{% if investigation_id %}
+# Investigation ID for this session
+Investigation id: {{ investigation_id }}
+{% endif %}
+CLARIFICATION REQUIREMENT: Before starting ANY investigation, if the user's question is ambiguous or lacks critical details, you MUST ask for clarification first. Do NOT create TodoWrite tasks for unclear questions.
+Only proceed with TodoWrite and investigation AFTER you have clear, specific requirements.
+CRITICAL: For multi-step questions, you MUST start by calling the TodoWrite tool with a `todos` parameter containing an array of task objects. Each task must have:
+- `id`: unique identifier (string)
+- `content`: specific task description (string)
+- `status`: "pending" for new tasks (string)
+MANDATORY Task Status Updates:
+- When starting a task: Call TodoWrite changing that task's status to "in_progress"
+- When completing a task: Call TodoWrite changing that task's status to "completed"
+PARALLEL EXECUTION RULES:
+- When possible, work on multiple tasks at a time. If tasks depend on one another, do them one after the other.
+- You MAY execute multiple INDEPENDENT tasks simultaneously
+- Mark multiple tasks as "in_progress" if they don't depend on each other
+- Wait for dependent tasks to complete before starting tasks that need their results
+- Always use a single TodoWrite call to update multiple task statuses
+DEPENDENCY ANALYSIS:
+Before marking tasks as "in_progress", determine if they are:
+- ✅ INDEPENDENT: Can run simultaneously (e.g., "Check pod A logs" + "Check pod B logs")
+- ❌ DEPENDENT: One needs results from another (e.g., "Find pod name" → "Get pod logs")
+PARALLEL EXECUTION EXAMPLE:
+TodoWrite(todos=[
+{"id": "1", "content": "Check frontend pod logs", "status": "in_progress"},
+{"id": "2", "content": "Check backend service config", "status": "in_progress"},
+{"id": "3", "content": "Analyze network policies", "status": "in_progress"},
+{"id": "4", "content": "Compare logs from both pods", "status": "pending"}  # Depends on 1,2
+])
+Examples:
+- Task 1: find the pod name
+  Task 2: get the pod logs
+Execution Order: Perform Task 2 after Task 1
+- Task 1: get the pod events
+  Task 2: get the pod logs
+Execution Order: Perform both tasks together
+MAXIMIZE PARALLEL TOOL CALLS:
+- When executing multiple in_progress tasks, make ALL their tool calls at once
+- Example: If tasks 1,2,3 are in_progress, call kubectl_logs + kubectl_describe + kubectl_get simultaneously
+# CRITICAL: TASK COMPLETION ENFORCEMENT
+YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEPTIONS.
+**BEFORE providing any final answer or conclusion, you MUST:**
+1. **Check TodoWrite status**: Verify ALL tasks show "completed" status
+2. **If ANY task is "pending" or "in_progress"**:
+ - DO NOT provide a final answer
+ - Continue working on the next pending task
+ - Use TodoWrite to mark it "in_progress"
+ - Complete the task
+ - Mark it "completed" with TodoWrite
+3. **Only after ALL tasks are "completed"**: Proceed to verification and final answer
+**VIOLATION CONSEQUENCES**:
+- Providing answers with pending tasks = INVESTIGATION FAILURE
+- You MUST complete the verification task as the final step before any answer
+- Incomplete investigations are unacceptable and must be continued
+**Task Status Check Example:**
+Before final answer, confirm you see something like:
+[✓] completed - Task 1
+[✓] completed - Task 2[✓] completed - Task 3
+[✓] completed - Investigation Verification
+If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final answer.
+  Status Update Example:
+  # Starting task 2:
+  TodoWrite(todos=[
+    {"id": "1", "content": "Check pod status", "status": "completed"},
+    {"id": "2", "content": "Examine logs", "status": "in_progress"},
+    {"id": "3", "content": "Check resources", "status": "pending"}
+  ])
+{% if todo_list %}
+{{ todo_list }}
+{% endif %}
+# MANDATORY Multi-Phase Investigation Process
+For ANY question requiring investigation, you MUST follow this structured approach:
+## Phase 1: Initial Investigation
+1. **IMMEDIATELY START with TodoWrite**: Create initial investigation task list
+2. **Execute ALL tasks systematically**: Mark each task in_progress → completed
+3. **Complete EVERY task** in the current list before proceeding
+## Phase Evaluation and Continuation
+After completing ALL tasks in current list, you MUST:
+1. **STOP and Evaluate**: Ask yourself these critical questions:
+ - "Do I have enough information to completely answer the user's question?"
+ - "Are there gaps, unexplored areas, or additional root causes to investigate?"
+ - "Have I followed the 'five whys' methodology to the actual root cause?"
+ - "Did my investigation reveal new questions or areas that need exploration?"
+ - "Are there any additional investigation steps I can perform, in order to provide a more accurate solution?"
+If the answer to any of those questions is 'yes' - The investigation is INCOMPLETE!
+2. **If Investigation is INCOMPLETE**:
+ - Call TodoWrite to create a NEW task list for the next investigation phase
+ - Label it clearly: "Investigation Phase 2: [specific focus area]"
+ - Focus tasks on the specific gaps/questions discovered in the previous phase
+ - Execute ALL tasks in this new list
+ - Repeat this evaluation process
+3. **Continue Creating New Phases** until you can answer "YES" to:
+ - "Do I have enough information to completely answer the user's question?"
+ - "Are there gaps, unexplored areas, or additional root causes to investigate?"
+ - "Have I followed the 'five whys' methodology to the actual root cause?"
+ - "Did my investigation reveal new questions or areas that need exploration?"
+ - "Are there any additional investigation steps I can perform, in order to provide a more accurate solution?"
+ - "I have thoroughly investigated all aspects of this problem"
+ - "I can provide a complete answer with specific, actionable information"
+ - "No additional investigation would improve my answer"
+## MANDATORY Final Phase: Final Review
+  **Before providing final answer, you MUST:**
+  - Confirm answer addresses user question completely! This is the most important thing
+  - Verify all claims backed by tool evidence
+  - Ensure actionable information provided
+  - If additional investigation steps are required, start a new investigation phase, and create a new task list to gather the missing information.
+## CRITICAL ENFORCEMENT RULES
+    **ABSOLUTE REQUIREMENTS:**
+    - NO final answer until the final review phase is 100% completed
+    - Each investigation phase must have ALL tasks completed before evaluation
+    - You MUST explicitly create new investigation phases when gaps are identified
+    - Final Review phase is MANDATORY - never skip it
+    **EXAMPLES of Phase Progression:**
+    *Phase 1*: Initial investigation discovers pod crashes
+    *Phase 2*: Deep dive into specific pod logs and resource constraints
+    *Phase 3*: Investigate upstream services causing the crashes
+    *Final Review Phase*: Self-critique and validate the complete solution
+    *Phase 1*: Initial investigation - check pod health, metrics, logs, traces
+    *Phase 2*: Based on data from the traces in Phase 1, investigate another workload in the cluster, that seem to be the root cause of the issue. Investigate this workload as well
+    *Phase 3*: Based on logs gathered in Phase 2, investigate a 3rd party managed service, that seems to be the cause for the whole chain of events.
+    *Final Review Phase*: Validate that the chain of events, accross the different components, can lead to the investigated scenario.
+    **VIOLATION CONSEQUENCES:**
+    - Providing answers without Final Review phase = INVESTIGATION FAILURE
+    - Skipping investigation phases when gaps exist = INCOMPLETE ANALYSIS
+    - Not completing all tasks in a phase = PROCESS VIOLATION
+# FINAL REVIEW PHASE EXECUTION GUIDE
+    When executing Final Review, you must:
+    - Reread the original user question word-by-word
+    - Compare against your proposed answer
+    - Identify any aspects not addressed
+    - Make sure you answer what the user asked!
+    - List each claim in your answer
+    - Trace each claim back to specific tool outputs
+    - Flag any unsupported statements
+    - Walk through your "five whys" chain
+    - Verify each "why" logically follows from evidence
+    - Ensure you reached actual root cause, not just symptoms
+    - Verify exact resource names are provided (not generic examples)
+    - Check commands are complete and runnable
+    - Ensure steps are specific to user's environment
+    - List any resource names, namespaces, configurations mentioned
+    - Verify each was confirmed via tool calls
+    - Flag anything assumed without verification
+    - Identify potential weaknesses in your investigation
+    - Consider alternative explanations not explored
+    - Assess if additional investigation would strengthen answer
+    - If there are additional investigation steps that can help the user, start a new phase, and create a new task list to perform these steps
+# INVESTIGATION PHASE TRANSITION EXAMPLES
+  **Example 1: Increased Error Rate**
+  Phase 1: Check pod status, basic connectivity, logs, traces
+  → Evaluation: From traces, detected that the error is related to an upstream service
+  Phase 2: Investigate the upstream service detected in Phase 1
+  → Evaluation: Found the upstream service has error while connecting to a managed storage service.
+  Phase 3: Investigate the external managed storage found in Phase 2
+  → Evaluation: Complete - found managed service is down due to outage
+  Verification Phase: Validate solution addresses original increased error rate.
+  **Example 2: Application Performance Issue**
+  Phase 1: Check application metrics, resource usage
+  → Evaluation: Found high CPU usage, but root cause unclear
+  Phase 2: Investigate database connections, query performance
+  → Evaluation: Complete - found slow database queries causing CPU spike
+  Verification Phase: Confirm analysis provides actionable database optimization steps
+  **REMEMBER:** Each evaluation is a decision point:
+  - Continue investigating (create new phase) OR
+  - Proceed to verification (investigation complete)
+  Never guess - if unsure whether investigation is complete, create another phase.

holmes/plugins/prompts/kubernetes_workload_ask.jinja2 CHANGED Viewed

@@ -6,6 +6,8 @@ If you output an answer and then realize you need to call more tools or there ar
 If the user provides you with extra instructions in a triple single quotes section, ALWAYS perform their instructions and then perform your investigation.
 {% include '_current_date_time.jinja2' %}
+{% include 'investigation_procedure.jinja2' %}
 {% include '_ai_safety.jinja2' %}
 Global Instructions

holmes/plugins/runbooks/CLAUDE.md ADDED Viewed

@@ -0,0 +1,85 @@
+You are an expert in automated diagnostics and runbook creation for an AI-driven troubleshooting agents. I will provide you with one or more issue descriptions or test scenarios.
+Your task is to generate a strictly executable runbook for AI Agent to follow. The runbook should be machine-readable but human-understandable, and must include the following sections:
+# Runbook Content Structure
+## 1. Goal
+- **Primary Objective:** Clearly define the specific category of issues this runbook addresses (e.g., "diagnose network connectivity problems", "troubleshoot pod startup failures", "investigate performance degradation").
+- **Scope:** Specify the environment, technology stack, or system components covered by this runbook.
+- **Agent Mandate:** Explicitly state that the AI agent must follow the workflow steps sequentially and systematically without deviation to ensure consistent, thorough troubleshooting.
+- **Expected Outcome:** Define what successful completion of this runbook should achieve (root cause identification, issue resolution, or escalation criteria).
+## 2. Workflow for [Issue Category] Diagnosis
+- Provide numbered, sequential steps the AI agent must execute in order.
+- Each step should specify:
+  - **Action:** Describe the diagnostic function conceptually (e.g., "retrieve container logs from specified pod", "check service connectivity between components", "examine resource utilization metrics")
+  - **Function Description:** Explain what the function should accomplish rather than naming specific tools (e.g., "query the cluster to list all pods in a namespace and their current status" instead of "kubectl_get_pods()")
+  - **Parameters:** What data/arguments to pass to the function (namespace, pod name, time range, etc.)
+  - **Expected Output:** What information to gather from the result (status codes, error messages, metrics, configurations)
+  - **Success/Failure Criteria:** How to interpret the output and what indicates normal vs. problematic conditions
+- Use conditional logic (IF/ELSE) when branching is required based on findings.
+- Describe functions generically so they can be mapped to available tools (e.g., "execute a command to test network connectivity" rather than "ping_host()")
+- Include verification steps to confirm each diagnostic action was successful.
+## 3. Synthesize Findings
+- **Data Correlation:** Describe how the AI agent should combine outputs from multiple workflow steps.
+- **Pattern Recognition:** Specify what patterns, error messages, or metrics indicate specific root causes.
+- **Prioritization Logic:** Provide criteria for ranking potential causes by likelihood or severity.
+- **Evidence Requirements:** Define what evidence is needed to confidently identify each potential root cause.
+- **Example Scenarios:** Include sample synthesis statements showing how findings should be summarized.
+## 4. Recommended Remediation Steps
+- **Immediate Actions:** List temporary workarounds or urgent fixes for critical issues.
+- **Permanent Solutions:** Provide step-by-step permanent remediation procedures.
+- **Verification Steps:** Define how to confirm each remediation action was successful.
+- **Documentation References:** Include links to official documentation, best practices, or vendor guidance.
+- **Escalation Criteria:** Specify when and how to escalate if remediation steps fail.
+- **Post-Remediation Monitoring:** Describe what to monitor to prevent recurrence.
+# File Organization Guidelines
+## Folder Structure
+*Category folders are used to distinguish and categorize different runbooks based on their focus area or technology domain. Each runbook must be placed into a specific category folder under `holmes/plugins/runbooks/` for better organization and discoverability. Create a new category folder if your runbook doesn't fit into existing categories.*
+## File Naming
+*Use consistent naming conventions for runbook files:*
+- Use descriptive, lowercase names with hyphens: `dns-resolution-troubleshooting.md`
+- Include the issue type or technology: `redis-connection-issues.md`
+- Avoid generic names like `troubleshooting.md` or `debug.md`
+### Catalog Registration
+After creating your runbook, you must add an entry to `catalog.json` in the runbooks directory to make it discoverable by AI agents.
+**Steps to add a new catalog entry:**
+1. **Open** `holmes/plugins/runbooks/catalog.json`
+2. **Add your entry** to the JSON array following this structure:
+   ```json
+   {
+     "name": "Brief, descriptive name of the runbook",
+     "path": "category-folder/your-runbook-filename.md",
+     "description": "Clear description of what issues this runbook addresses",
+     "tags": ["relevant", "tags", "for", "search"]
+   }
+   ```
+3. **Ensure proper JSON formatting** - add a comma after the previous entry if needed
+4. **Validate the JSON** is properly formatted before committing
+**Field Guidelines:**
+- `name`: Keep concise but descriptive (e.g., "Redis Connection Issues")
+- `path`: Always include the category folder (e.g., "database/redis-connection-issues.md")
+- `description`: Explain what specific problems this runbook solves
+- `tags`: Include technology names, issue types, and relevant keywords
+Example catalog entry:
+```json
+{
+  "name": "DNS Resolution Troubleshooting",
+  "path": "networking/dns-resolution-troubleshooting.md",
+  "description": "Comprehensive guide for diagnosing and resolving DNS resolution issues in Kubernetes clusters",
+  "tags": ["dns", "networking", "kubernetes", "troubleshooting"]
+}
+```

holmes/plugins/runbooks/README.md CHANGED Viewed

@@ -20,3 +20,27 @@ This runbook is mainly used for `holmes investigate`
 Catalog specified in [catalog.json](catalog.json) contains a collection of runbooks written in markdown.
 During runtime, LLM will compare the runbook description with the user question and return the most matched runbook for investigation. It's possible no runbook is returned for no match.
+## Generating Runbooks
+To ensure all runbooks follow a consistent format and improve troubleshooting accuracy, contributors should use the standardized [runbook format prompt](runbook-format.prompt.md) when creating new runbooks.
+### Using the Runbook Format Prompt
+1. **Start with the Template**: Use `prompt.md` as your guide when creating new runbooks
+2. **Follow the Structure**: Ensure your runbook includes all required sections:
+   - **Goal**: Clear definition of issues addressed and agent mandate
+   - **Workflow**: Sequential diagnostic steps with detailed function descriptions
+   - **Synthesize Findings**: Logic for combining outputs and identifying root causes
+   - **Recommended Remediation Steps**: Both immediate and permanent solutions
+### Benefits of Using the Standard Format
+- **Consistency**: All runbooks follow the same structure and terminology
+- **AI Agent Compatibility**: Ensures runbooks are machine-readable and executable by AI agents
+- **Improved Accuracy**: Standardized format reduces ambiguity and improves diagnostic success rates
+- **Maintainability**: Easier to update and maintain runbooks across the project
+### Example Usage
+When creating a runbook for a new issue category (e.g., storage problems, authentication failures), provide the issue description to an LLM along with the prompt template to generate a properly formatted runbook that follows the established patterns.

holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl