PyPI - holmesgpt - Versions diffs - 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl - Mend

holmesgpt 0.13.2py3-none-any.whl → 0.18.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

holmes/__init__.py +3 -5
holmes/clients/robusta_client.py +20 -6
holmes/common/env_vars.py +58 -3
holmes/common/openshift.py +1 -1
holmes/config.py +123 -148
holmes/core/conversations.py +71 -15
holmes/core/feedback.py +191 -0
holmes/core/investigation.py +31 -39
holmes/core/investigation_structured_output.py +3 -3
holmes/core/issue.py +1 -1
holmes/core/llm.py +508 -88
holmes/core/models.py +108 -4
holmes/core/openai_formatting.py +14 -1
holmes/core/prompt.py +48 -3
holmes/core/runbooks.py +1 -0
holmes/core/safeguards.py +8 -6
holmes/core/supabase_dal.py +295 -100
holmes/core/tool_calling_llm.py +489 -428
holmes/core/tools.py +325 -56
holmes/core/tools_utils/token_counting.py +21 -0
holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
holmes/core/tools_utils/tool_executor.py +0 -13
holmes/core/tools_utils/toolset_utils.py +1 -0
holmes/core/toolset_manager.py +191 -5
holmes/core/tracing.py +19 -3
holmes/core/transformers/__init__.py +23 -0
holmes/core/transformers/base.py +63 -0
holmes/core/transformers/llm_summarize.py +175 -0
holmes/core/transformers/registry.py +123 -0
holmes/core/transformers/transformer.py +32 -0
holmes/core/truncation/compaction.py +94 -0
holmes/core/truncation/dal_truncation_utils.py +23 -0
holmes/core/truncation/input_context_window_limiter.py +219 -0
holmes/interactive.py +228 -31
holmes/main.py +23 -40
holmes/plugins/interfaces.py +2 -1
holmes/plugins/prompts/__init__.py +2 -1
holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
holmes/plugins/prompts/generic_ask.jinja2 +0 -4
holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
holmes/plugins/runbooks/__init__.py +145 -17
holmes/plugins/runbooks/catalog.json +2 -0
holmes/plugins/sources/github/__init__.py +4 -2
holmes/plugins/sources/prometheus/models.py +1 -0
holmes/plugins/toolsets/__init__.py +44 -27
holmes/plugins/toolsets/aks-node-health.yaml +46 -0
holmes/plugins/toolsets/aks.yaml +64 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
holmes/plugins/toolsets/azure_sql/utils.py +0 -32
holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
holmes/plugins/toolsets/bash/common/bash.py +23 -13
holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
holmes/plugins/toolsets/bash/common/stringify.py +1 -1
holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
holmes/plugins/toolsets/bash/parse_command.py +12 -13
holmes/plugins/toolsets/cilium.yaml +284 -0
holmes/plugins/toolsets/connectivity_check.py +124 -0
holmes/plugins/toolsets/coralogix/api.py +132 -119
holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
holmes/plugins/toolsets/coralogix/utils.py +15 -79
holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/git.py +54 -50
holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
holmes/plugins/toolsets/grafana/common.py +13 -29
holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
holmes/plugins/toolsets/grafana/loki_api.py +4 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
holmes/plugins/toolsets/internet/internet.py +15 -16
holmes/plugins/toolsets/internet/notion.py +9 -11
holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
holmes/plugins/toolsets/investigator/model.py +3 -1
holmes/plugins/toolsets/json_filter_mixin.py +134 -0
holmes/plugins/toolsets/kafka.py +36 -42
holmes/plugins/toolsets/kubernetes.yaml +317 -113
holmes/plugins/toolsets/kubernetes_logs.py +9 -9
holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
holmes/plugins/toolsets/openshift.yaml +283 -0
holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
holmes/plugins/toolsets/prometheus/utils.py +28 -0
holmes/plugins/toolsets/rabbitmq/api.py +23 -4
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
holmes/plugins/toolsets/robusta/robusta.py +239 -68
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
holmes/plugins/toolsets/utils.py +88 -0
holmes/utils/config_utils.py +91 -0
holmes/utils/connection_utils.py +31 -0
holmes/utils/console/result.py +10 -0
holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
holmes/utils/env.py +7 -0
holmes/utils/file_utils.py +2 -1
holmes/utils/global_instructions.py +60 -11
holmes/utils/holmes_status.py +6 -4
holmes/utils/holmes_sync_toolsets.py +0 -2
holmes/utils/krr_utils.py +188 -0
holmes/utils/log.py +15 -0
holmes/utils/markdown_utils.py +2 -3
holmes/utils/memory_limit.py +58 -0
holmes/utils/sentry_helper.py +64 -0
holmes/utils/stream.py +69 -8
holmes/utils/tags.py +4 -3
holmes/version.py +37 -15
holmesgpt-0.18.4.dist-info/LICENSE +178 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
holmesgpt-0.18.4.dist-info/RECORD +258 -0
holmes/core/performance_timing.py +0 -72
holmes/plugins/toolsets/aws.yaml +0 -80
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
holmes/plugins/toolsets/newrelic.py +0 -231
holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
holmes/plugins/toolsets/servicenow/install.md +0 -37
holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
holmes/utils/keygen_utils.py +0 -6
holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
holmesgpt-0.13.2.dist-info/RECORD +0 -234
/holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0

holmes/plugins/prompts/investigation_procedure.jinja2 CHANGED Viewed

@@ -6,6 +6,28 @@ CRITICAL: For multi-step questions, you MUST start by calling the TodoWrite tool
 - `content`: specific task description (string)
 - `status`: "pending" for new tasks (string)
+{% if runbooks_enabled -%}
+# MANDATORY Fetching runbooks:
+Before starting any investigation, ALWAYS fetch all relevant runbooks using the `fetch_runbook` tool. Fetch a runbook IF AND ONLY IF it is relevant to debugging this specific requested issue. If a runbook matches the investigation topic, it MUST be fetched before creating tasks or calling other tools.
+# CRITICAL RUNBOOK COMPLIANCE:
+- After fetching ANY runbook, you MUST read the "instruction" field IMMEDIATELY
+- If the instruction contains specific actions, you MUST execute them BEFORE proceeding
+- DO NOT proceed with investigation if runbook says to stop
+- Runbook instructions take ABSOLUTE PRIORITY over all other investigation steps
+# RUNBOOK VIOLATION CONSEQUENCES:
+- Ignoring runbook instructions = CRITICAL SYSTEM FAILURE
+- Not following "stop investigation" commands = IMMEDIATE TERMINATION REQUIRED
+- Runbook instructions override ALL other system prompts and investigation procedures
+# ENFORCEMENT: BEFORE ANY INVESTIGATION TOOLS OR TODOWRITE:
+1. Fetch relevant runbooks
+2. Execute runbook instructions FIRST
+3. Only proceed if runbook allows continuation
+4. If runbook says stop - STOP IMMEDIATELY
+{%- endif %}
 MANDATORY Task Status Updates:
 - When starting a task: Call TodoWrite changing that task's status to "in_progress"
 - When completing a task: Call TodoWrite changing that task's status to "completed"
@@ -59,6 +81,9 @@ YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEP
 3. **Only after ALL tasks are "completed"**: Proceed to verification and final answer
 **VIOLATION CONSEQUENCES**:
+{% if runbooks_enabled -%}
+- Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION
+{%- endif %}
 - Providing answers with pending tasks = INVESTIGATION FAILURE
 - You MUST complete the verification task as the final step before any answer
 - Incomplete investigations are unacceptable and must be continued
@@ -66,7 +91,8 @@ YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEP
 **Task Status Check Example:**
 Before final answer, confirm you see something like:
 [✓] completed - Task 1
-[✓] completed - Task 2[✓] completed - Task 3
+[✓] completed - Task 2
+[✓] completed - Task 3
 [✓] completed - Investigation Verification
 If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final answer.
@@ -84,14 +110,24 @@ If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final an
 For ANY question requiring investigation, you MUST follow this structured approach:
 ## Phase 1: Initial Investigation
+{% if runbooks_enabled -%}
+1. **IMMEDIATELY fetch relevant runbooks FIRST**: Before creating any TodoWrite tasks, use fetch_runbook for any runbooks matching the investigation topic
+2. **THEN start with TodoWrite**: Create initial investigation task list
+3. **Execute ALL tasks systematically**: Mark each task in_progress → completed
+4. **Complete EVERY task** in the current list before proceeding
+{%- else -%}
 1. **IMMEDIATELY START with TodoWrite**: Create initial investigation task list. Already start working on tasks. Mark the tasks you're working on as in_progress.
 2. **Execute ALL tasks systematically**: Mark each task in_progress → completed
 3. **Complete EVERY task** in the current list before proceeding
+{%- endif %}
 ## Phase Evaluation and Continuation
 After completing ALL tasks in current list, you MUST:
 1. **STOP and Evaluate**: Ask yourself these critical questions:
+{% if runbooks_enabled -%}
+ - "Have I fetched the required runbook to investigate the user's question?"
+{%- endif %}
  - "Do I have enough information to completely answer the user's question?"
  - "Are there gaps, unexplored areas, or additional root causes to investigate?"
  - "Have I followed the 'five whys' methodology to the actual root cause?"
@@ -122,6 +158,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
   **Before providing final answer, you MUST:**
   - Confirm answer addresses user question completely! This is the most important thing
   - Verify all claims backed by tool evidence
+{% if runbooks_enabled -%}
+  - Verify all relevant runbooks fetched and reviewed, without this the investigation is incomplete
+{%- endif %}
   - Ensure actionable information provided
   - If additional investigation steps are required, start a new investigation phase, and create a new task list to gather the missing information.
@@ -136,8 +175,15 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
     **EXAMPLES of Phase Progression:**
     *Phase 1*: Initial investigation discovers pod crashes
+{% if runbooks_enabled -%}
+    *Phase 2*: Fetch runbooks for specific application investigation or investigating pod crashes
+    *Phase 3*: Deep dive into specific pod logs and resource constraints
+    *Phase 4*: Investigate upstream services causing the crashes
+{%- else -%}
     *Phase 2*: Deep dive into specific pod logs and resource constraints
     *Phase 3*: Investigate upstream services causing the crashes
+{%- endif %}
     *Final Review Phase*: Self-critique and validate the complete solution
     *Phase 1*: Initial investigation - check pod health, metrics, logs, traces
@@ -146,6 +192,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
     *Final Review Phase*: Validate that the chain of events, accross the different components, can lead to the investigated scenario.
     **VIOLATION CONSEQUENCES:**
+{% if runbooks_enabled -%}
+    - Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION
+{%- endif %}
     - Providing answers without Final Review phase = INVESTIGATION FAILURE
     - Skipping investigation phases when gaps exist = INCOMPLETE ANALYSIS
     - Not completing all tasks in a phase = PROCESS VIOLATION

holmes/plugins/prompts/kubernetes_workload_ask.jinja2 CHANGED Viewed

@@ -4,7 +4,6 @@ Do not say 'based on the tool output' or explicitly refer to tools at all.
 If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
 If the user provides you with extra instructions in a triple single quotes section, ALWAYS perform their instructions and then perform your investigation.
-{% include '_current_date_time.jinja2' %}
 {% include 'investigation_procedure.jinja2' %}

holmes/plugins/prompts/kubernetes_workload_chat.jinja2 CHANGED Viewed

@@ -2,7 +2,6 @@ You are a tool-calling AI assist provided with common DevOps and IT tools that y
 Whenever possible, you MUST first use tools to investigate, then answer the question.
 Do not say 'based on the tool output' or explicitly refer to tools at all.
 If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
-{% include '_current_date_time.jinja2' %}
 ### Context Awareness:
 Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{resource}}.

holmes/plugins/runbooks/__init__.py CHANGED Viewed

@@ -4,18 +4,70 @@ import os
 import os.path
 from datetime import date
 from pathlib import Path
-from typing import List, Optional, Pattern, Union
+from typing import TYPE_CHECKING, List, Optional, Pattern, Tuple, Union
+import yaml
 from pydantic import BaseModel, PrivateAttr
 from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
+if TYPE_CHECKING:
+    from holmes.core.supabase_dal import SupabaseDal
 THIS_DIR = os.path.abspath(os.path.dirname(__file__))
 DEFAULT_RUNBOOK_SEARCH_PATH = THIS_DIR
 CATALOG_FILE = "catalog.json"
+class RobustaRunbookInstruction(BaseModel):
+    id: str
+    symptom: str
+    title: str
+    instruction: Optional[str] = None
+    alerts: List[str] = []
+    """
+    Custom YAML dumper to represent multi-line strings in literal block style due to instructions often being multi-line.
+    for example:
+    instructions: |
+      Step 1: Do this
+      Step 2: Do that
+    instead of:
+    instructions: "Step 1: Do this
+    Step 2: Do that"
+    """
+    class _LiteralDumper(yaml.SafeDumper):
+        pass
+    @staticmethod
+    def _repr_str(dumper, s: str):
+        s = s.replace("\\n", "\n")
+        return dumper.represent_scalar(
+            "tag:yaml.org,2002:str", s, style="|" if "\n" in s else None
+        )
+    _LiteralDumper.add_representer(str, _repr_str)  # type: ignore
+    def to_list_string(self) -> str:
+        return f"{self.id}"
+    def to_prompt_string(self) -> str:
+        return f"id='{self.id}' | title='{self.title}' | symptom='{self.symptom}' | relevant alerts={', '.join(self.alerts)}"
+    def pretty(self) -> str:
+        try:
+            data = self.model_dump(exclude_none=True)  # pydantic v2
+        except AttributeError:
+            data = self.dict(exclude_none=True)  # pydantic v1
+        return yaml.dump(
+            data, Dumper=self._LiteralDumper, sort_keys=False, allow_unicode=True
+        )
 class IssueMatcher(RobustaBaseConfig):
     issue_id: Optional[Pattern] = None  # unique id
     issue_name: Optional[Pattern] = None  # not necessary unique
@@ -62,37 +114,108 @@ class RunbookCatalogEntry(BaseModel):
     Different from runbooks provided by Runbook class, this entry points to markdown file containing the runbook content.
     """
+    id: str
     update_date: date
     description: str
     link: str
+    def to_list_string(self) -> str:
+        return f"{self.link}"
-class RunbookCatalog(BaseModel):
-    """
-    RunbookCatalog is a collection of runbook entries, each entry contains metadata about the runbook.
-    The correct runbook can be selected from the list by comparing the description with the user question.
-    """
-    catalog: List[RunbookCatalogEntry]
+    def to_prompt_string(self) -> str:
+        return f"{self.link} | description: {self.description}"
-def load_runbook_catalog() -> Optional[RunbookCatalog]:
+class RunbookCatalog(BaseModel):
+    catalog: List[Union[RunbookCatalogEntry, "RobustaRunbookInstruction"]]  # type: ignore
+    def list_available_runbooks(self) -> list[str]:
+        return [entry.to_list_string() for entry in self.catalog]
+    def split_by_type(
+        self,
+    ) -> Tuple[List[RunbookCatalogEntry], List[RobustaRunbookInstruction]]:
+        md: List[RunbookCatalogEntry] = []
+        robusta: List[RobustaRunbookInstruction] = []  #
+        for catalog_entry in self.catalog:
+            if isinstance(catalog_entry, RunbookCatalogEntry):
+                md.append(catalog_entry)
+            elif isinstance(catalog_entry, RobustaRunbookInstruction):
+                robusta.append(catalog_entry)
+        return md, robusta
+    def to_prompt_string(self) -> str:
+        md, robusta = self.split_by_type()
+        parts: List[str] = [""]
+        if md:
+            parts.append("Here are MD runbooks:")
+            parts.extend(f"* {e.to_prompt_string()}" for e in md)
+        if robusta:
+            parts.append("\nHere are Robusta runbooks:")
+            parts.extend(f"* {e.to_prompt_string()}" for e in robusta)
+        return "\n".join(parts)
+def load_runbook_catalog(
+    dal: Optional["SupabaseDal"] = None,
+    custom_catalog_paths: Optional[List[Union[str, Path]]] = None,
+) -> Optional[RunbookCatalog]:  # type: ignore
     dir_path = os.path.dirname(os.path.realpath(__file__))
+    catalog = None
     catalogPath = os.path.join(dir_path, CATALOG_FILE)
-    if not os.path.isfile(catalogPath):
-        return None
     try:
-        with open(catalogPath) as file:
-            catalog_dict = json.load(file)
-            return RunbookCatalog(**catalog_dict)
+        if os.path.isfile(catalogPath):
+            with open(catalogPath) as file:
+                catalog_dict = json.load(file)
+                catalog = RunbookCatalog(**catalog_dict)
     except json.JSONDecodeError as e:
         logging.error(f"Error decoding JSON from {catalogPath}: {e}")
     except Exception as e:
         logging.error(
             f"Unexpected error while loading runbook catalog from {catalogPath}: {e}"
         )
-    return None
+    # Append custom catalog files if provided
+    if custom_catalog_paths:
+        for custom_catalog_path in custom_catalog_paths:
+            try:
+                custom_catalog_path_str = str(custom_catalog_path)
+                if not os.path.isfile(custom_catalog_path_str):
+                    logging.warning(
+                        f"Custom catalog file not found: {custom_catalog_path_str}"
+                    )
+                    continue
+                with open(custom_catalog_path_str) as file:
+                    custom_catalog_dict = json.load(file)
+                    custom_catalog = RunbookCatalog(**custom_catalog_dict)
+                    if catalog:
+                        catalog.catalog.extend(custom_catalog.catalog)
+                    else:
+                        catalog = custom_catalog
+            except json.JSONDecodeError as e:
+                logging.error(f"Error decoding JSON from {custom_catalog_path}: {e}")
+            except Exception as e:
+                logging.error(
+                    f"Unexpected error while loading custom catalog from {custom_catalog_path}: {e}"
+                )
+    # Append additional runbooks from SupabaseDal if provided
+    if dal:
+        try:
+            supabase_entries = dal.get_runbook_catalog()
+            if not supabase_entries:
+                return catalog
+            if catalog:
+                catalog.catalog.extend(supabase_entries)
+            else:
+                # if failed to load from file, create new catalog from supabase
+                catalog = RunbookCatalog(catalog=supabase_entries)  # type: ignore
+        except Exception as e:
+            logging.error(f"Error loading runbooks from Supabase: {e}")
+    return catalog
 def get_runbook_by_path(
@@ -108,9 +231,14 @@ def get_runbook_by_path(
     Returns:
         Full path to the runbook if found, None otherwise
     """
+    # Validate runbook_relative_path is not empty
+    if not runbook_relative_path or not runbook_relative_path.strip():
+        return None
     for search_path in search_paths:
         runbook_path = os.path.join(search_path, runbook_relative_path)
-        if os.path.exists(runbook_path):
+        # Ensure it's a file, not a directory
+        if os.path.isfile(runbook_path):
             return runbook_path
     return None

holmes/plugins/runbooks/catalog.json CHANGED Viewed

@@ -1,11 +1,13 @@
 {
   "catalog": [
     {
+      "id": "dns-troubleshooting.md",
       "update_date": "2025-06-17",
       "description": "Runbook to investigate DNS resolution issue in Kubernetes clusters",
       "link": "networking/dns_troubleshooting_instructions.md"
     },
     {
+      "id": "upgrade-troubleshooting.md",
       "update_date": "2025-07-08",
       "description": "Runbook to troubleshoot upgrade issues in Azure Kubernetes Service clusters",
       "link": "upgrade/upgrade_troubleshooting_instructions.md"

holmes/plugins/sources/github/__init__.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import logging
 from typing import List
+import requests  # type: ignore
+from holmes.core.issue import Issue
 from holmes.core.tool_calling_llm import LLMResult
 from holmes.plugins.interfaces import SourcePlugin
-from holmes.core.issue import Issue
-import requests  # type: ignore
 class GitHubSource(SourcePlugin):

holmes/plugins/sources/prometheus/models.py CHANGED Viewed

@@ -2,6 +2,7 @@ import html
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Union
 from urllib.parse import parse_qs, unquote, urlparse
 from pydantic import BaseModel, computed_field

holmes/plugins/toolsets/__init__.py CHANGED Viewed

@@ -7,14 +7,19 @@ import yaml  # type: ignore
 from pydantic import ValidationError
 import holmes.utils.env as env_utils
-from holmes.common.env_vars import USE_LEGACY_KUBERNETES_LOGS
+from holmes.common.env_vars import (
+    DISABLE_PROMETHEUS_TOOLSET,
+    USE_LEGACY_KUBERNETES_LOGS,
+)
 from holmes.core.supabase_dal import SupabaseDal
 from holmes.core.tools import Toolset, ToolsetType, ToolsetYamlFromConfig, YAMLToolset
 from holmes.plugins.toolsets.atlas_mongodb.mongodb_atlas import MongoDBAtlasToolset
 from holmes.plugins.toolsets.azure_sql.azure_sql_toolset import AzureSQLToolset
 from holmes.plugins.toolsets.bash.bash_toolset import BashExecutorToolset
-from holmes.plugins.toolsets.coralogix.toolset_coralogix_logs import (
-    CoralogixLogsToolset,
+from holmes.plugins.toolsets.connectivity_check import ConnectivityCheckToolset
+from holmes.plugins.toolsets.coralogix.toolset_coralogix import CoralogixToolset
+from holmes.plugins.toolsets.datadog.toolset_datadog_general import (
+    DatadogGeneralToolset,
 )
 from holmes.plugins.toolsets.datadog.toolset_datadog_logs import DatadogLogsToolset
 from holmes.plugins.toolsets.datadog.toolset_datadog_metrics import (
@@ -23,32 +28,31 @@ from holmes.plugins.toolsets.datadog.toolset_datadog_metrics import (
 from holmes.plugins.toolsets.datadog.toolset_datadog_traces import (
     DatadogTracesToolset,
 )
-from holmes.plugins.toolsets.datadog.toolset_datadog_rds import (
-    DatadogRDSToolset,
+from holmes.plugins.toolsets.elasticsearch.elasticsearch import (
+    ElasticsearchClusterToolset,
+    ElasticsearchDataToolset,
 )
-from holmes.plugins.toolsets.datadog.toolset_datadog_general import (
-    DatadogGeneralToolset,
+from holmes.plugins.toolsets.elasticsearch.opensearch_query_assist import (
+    OpenSearchQueryAssistToolset,
 )
 from holmes.plugins.toolsets.git import GitToolset
+from holmes.plugins.toolsets.grafana.loki.toolset_grafana_loki import GrafanaLokiToolset
 from holmes.plugins.toolsets.grafana.toolset_grafana import GrafanaToolset
-from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset
 from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset
 from holmes.plugins.toolsets.internet.internet import InternetToolset
 from holmes.plugins.toolsets.internet.notion import NotionToolset
+from holmes.plugins.toolsets.investigator.core_investigation import (
+    CoreInvestigationToolset,
+)
 from holmes.plugins.toolsets.kafka import KafkaToolset
 from holmes.plugins.toolsets.kubernetes_logs import KubernetesLogsToolset
 from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
-from holmes.plugins.toolsets.newrelic import NewRelicToolset
-from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
-from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
-from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
-from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset
+from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
 from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
 from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
 from holmes.plugins.toolsets.runbook.runbook_fetcher import RunbookToolset
-from holmes.plugins.toolsets.servicenow.servicenow import ServiceNowToolset
-from holmes.plugins.toolsets.investigator.core_investigation import (
-    CoreInvestigationToolset,
+from holmes.plugins.toolsets.servicenow_tables.servicenow_tables import (
+    ServiceNowTablesToolset,
 )
 THIS_DIR = os.path.abspath(os.path.dirname(__file__))
@@ -71,13 +75,16 @@ def load_toolsets_from_file(
     return toolsets
-def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
+def load_python_toolsets(
+    dal: Optional[SupabaseDal],
+    additional_search_paths: Optional[List[str]] = None,
+) -> List[Toolset]:
     logging.debug("loading python toolsets")
     toolsets: list[Toolset] = [
         CoreInvestigationToolset(),  # Load first for higher priority
         InternetToolset(),
+        ConnectivityCheckToolset(),
         RobustaToolset(dal),
-        OpenSearchToolset(),
         GrafanaLokiToolset(),
         GrafanaTempoToolset(),
         NewRelicToolset(),
@@ -88,26 +95,34 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
         DatadogGeneralToolset(),
         DatadogMetricsToolset(),
         DatadogTracesToolset(),
-        DatadogRDSToolset(),
-        PrometheusToolset(),
-        OpenSearchLogsToolset(),
-        OpenSearchTracesToolset(),
-        CoralogixLogsToolset(),
+        OpenSearchQueryAssistToolset(),
+        CoralogixToolset(),
         RabbitMQToolset(),
         GitToolset(),
         BashExecutorToolset(),
         MongoDBAtlasToolset(),
-        RunbookToolset(),
+        RunbookToolset(dal=dal, additional_search_paths=additional_search_paths),
         AzureSQLToolset(),
-        ServiceNowToolset(),
+        ServiceNowTablesToolset(),
+        ElasticsearchDataToolset(),
+        ElasticsearchClusterToolset(),
     ]
+    if not DISABLE_PROMETHEUS_TOOLSET:
+        from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset
+        toolsets.append(PrometheusToolset())
     if not USE_LEGACY_KUBERNETES_LOGS:
         toolsets.append(KubernetesLogsToolset())
     return toolsets
-def load_builtin_toolsets(dal: Optional[SupabaseDal] = None) -> List[Toolset]:
+def load_builtin_toolsets(
+    dal: Optional[SupabaseDal] = None,
+    additional_search_paths: Optional[List[str]] = None,
+) -> List[Toolset]:
     all_toolsets: List[Toolset] = []
     logging.debug(f"loading toolsets from {THIS_DIR}")
@@ -123,7 +138,9 @@ def load_builtin_toolsets(dal: Optional[SupabaseDal] = None) -> List[Toolset]:
         toolsets_from_file = load_toolsets_from_file(path, strict_check=True)
         all_toolsets.extend(toolsets_from_file)
-    all_toolsets.extend(load_python_toolsets(dal=dal))  # type: ignore
+    all_toolsets.extend(
+        load_python_toolsets(dal=dal, additional_search_paths=additional_search_paths)
+    )  # type: ignore
     # disable built-in toolsets by default, and the user can enable them explicitly in config.
     for toolset in all_toolsets:

holmes/plugins/toolsets/aks-node-health.yaml CHANGED Viewed

@@ -7,17 +7,49 @@ toolsets:
       - command: "az account show"
       - command: "az aks --help"
       - command: "kubectl version --client"
+    # Note: Tools in this toolset use transformers with llm_summarize
+    # to automatically summarize large outputs from Azure CLI and kubectl commands
+    # when a fast model is configured, focusing on health issues and troubleshooting.
     tools:
       - name: "check_node_status"
         description: "Checks the status of all nodes in the AKS cluster."
         user_description: "get the status of all nodes in the AKS cluster"
         command: |
           kubectl get nodes
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 800
+              prompt: |
+                Summarize this node status output focusing on:
+                - Any nodes that are NotReady or in error states
+                - Node health patterns and issues requiring attention
+                - Group healthy nodes together with aggregate counts
+                - Highlight nodes with concerning conditions or ages
+                - When possible, mention exact node names for follow-up investigation
+                - Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
+                - Prefer aggregates and counts; list only outliers and actionable items
+                - Keep grep-friendly: include exact field names/values that matter
       - name: "describe_node"
         description: "Describes a specific node in the AKS cluster to inspect its conditions."
         user_description: "describe node {{ NODE_NAME }} in the AKS cluster"
         command: |
           kubectl describe node {{ NODE_NAME }}
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1200
+              prompt: |
+                Summarize this node description focusing on:
+                - Node conditions and health status (Ready, MemoryPressure, DiskPressure, etc.)
+                - Resource capacity vs allocatable vs current usage
+                - Any taints, labels, or annotations indicating issues
+                - Recent events that show problems or state changes
+                - System information relevant to troubleshooting
+                - When possible, highlight specific condition reasons for investigation
+                - Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
+                - Prioritize aggregates and actionable outliers over comprehensive details
       - name: "get_node_events"
         description: "Fetches recent events for a specific node to surface warnings and errors."
         user_description: "get events for node {{ NODE_NAME }}"
@@ -33,6 +65,20 @@ toolsets:
         user_description: "review Azure Activity Log for resource group {{ RESOURCE_GROUP_NAME }}"
         command: |
           az monitor activity-log list --resource-group {{ RESOURCE_GROUP_NAME }}
+        transformers:
+          - name: llm_summarize
+            config:
+              input_threshold: 1500
+              prompt: |
+                Summarize this Azure Activity Log focusing on:
+                - Recent administrative actions or configuration changes
+                - Any failed operations or errors that could impact node health
+                - Resource scaling, updates, or maintenance activities
+                - Network security group, load balancer, or VM-related changes
+                - Group similar activities and highlight time patterns
+                - When possible, mention specific operation names and correlation IDs
+                - Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
+                - Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
       - name: "check_top_resource_consuming_pods"
         description: "Checks for the top resource-consuming pods on a specific node."
         user_description: "get the top resource-consuming pods on node {{ NODE_NAME }}"

holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

holmesgpt 0.13.2py3-none-any.whl → 0.18.4py3-none-any.whl