PyPI - holmesgpt - Versions diffs - 0.11.5__py3-none-any.whl → 0.12.0a0__py3-none-any.whl - Mend

holmesgpt 0.11.5py3-none-any.whl → 0.12.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (41) hide show

holmes/__init__.py +1 -1
holmes/common/env_vars.py +8 -4
holmes/config.py +54 -14
holmes/core/investigation_structured_output.py +7 -0
holmes/core/llm.py +14 -4
holmes/core/models.py +24 -0
holmes/core/tool_calling_llm.py +48 -6
holmes/core/tools.py +7 -4
holmes/core/toolset_manager.py +24 -5
holmes/core/tracing.py +224 -0
holmes/interactive.py +761 -44
holmes/main.py +59 -127
holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -10
holmes/plugins/toolsets/__init__.py +10 -2
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +3 -0
holmes/plugins/toolsets/datadog/datadog_api.py +161 -0
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +26 -0
holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +310 -0
holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +51 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +267 -0
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +488 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +689 -0
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -0
holmes/plugins/toolsets/internet/internet.py +1 -1
holmes/plugins/toolsets/logging_utils/logging_api.py +9 -3
holmes/plugins/toolsets/opensearch/opensearch_logs.py +3 -0
holmes/plugins/toolsets/utils.py +6 -2
holmes/utils/cache.py +4 -4
holmes/utils/console/consts.py +2 -0
holmes/utils/console/logging.py +95 -0
holmes/utils/console/result.py +37 -0
holmes/utils/robusta.py +2 -3
{holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0a0.dist-info}/METADATA +3 -4
{holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0a0.dist-info}/RECORD +39 -30
{holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0a0.dist-info}/WHEEL +1 -1
holmes/__init__.py.bak +0 -76
holmes/plugins/toolsets/datadog.py +0 -153
{holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0a0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0a0.dist-info}/entry_points.txt +0 -0

holmes/main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # ruff: noqa: E402
 import os
+import sys
 from holmes.utils.cert_utils import add_custom_certificate
@@ -15,14 +16,11 @@ import json
 import logging
 import socket
 import uuid
-import warnings
-from enum import Enum
+from datetime import datetime
 from pathlib import Path
 from typing import List, Optional
 import typer
-from rich.console import Console
-from rich.logging import RichHandler
 from rich.markdown import Markdown
 from rich.rule import Rule
@@ -35,13 +33,16 @@ from holmes.config import (
 )
 from holmes.core.prompt import build_initial_ask_messages
 from holmes.core.resource_instruction import ResourceInstructionDocument
-from holmes.core.tool_calling_llm import LLMResult
 from holmes.core.tools import pretty_print_toolset_status
+from holmes.core.tracing import SpanType, TracingFactory
 from holmes.interactive import run_interactive_loop
 from holmes.plugins.destinations import DestinationType
 from holmes.plugins.interfaces import Issue
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.plugins.sources.opsgenie import OPSGENIE_TEAM_INTEGRATION_KEY_HELP
+from holmes.utils.console.consts import system_prompt_help
+from holmes.utils.console.logging import init_logging
+from holmes.utils.console.result import handle_result
 from holmes.utils.file_utils import write_json_file
 app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
@@ -68,94 +69,6 @@ toolset_app = typer.Typer(
 app.add_typer(toolset_app, name="toolset")
-class Verbosity(Enum):
-    NORMAL = 0
-    LOG_QUERIES = 1  # TODO: currently unused
-    VERBOSE = 2
-    VERY_VERBOSE = 3
-def cli_flags_to_verbosity(verbose_flags: List[bool]) -> Verbosity:
-    if verbose_flags is None or len(verbose_flags) == 0:
-        return Verbosity.NORMAL
-    elif len(verbose_flags) == 1:
-        return Verbosity.LOG_QUERIES
-    elif len(verbose_flags) == 2:
-        return Verbosity.VERBOSE
-    else:
-        return Verbosity.VERY_VERBOSE
-def suppress_noisy_logs():
-    # disable INFO logs from OpenAI
-    logging.getLogger("httpx").setLevel(logging.WARNING)
-    # disable INFO logs from LiteLLM
-    logging.getLogger("LiteLLM").setLevel(logging.WARNING)
-    # disable INFO logs from AWS (relevant when using bedrock)
-    logging.getLogger("boto3").setLevel(logging.WARNING)
-    logging.getLogger("botocore").setLevel(logging.WARNING)
-    # when running in --verbose mode we don't want to see DEBUG logs from these libraries
-    logging.getLogger("openai._base_client").setLevel(logging.INFO)
-    logging.getLogger("httpcore").setLevel(logging.INFO)
-    logging.getLogger("markdown_it").setLevel(logging.INFO)
-    # suppress UserWarnings from the slack_sdk module
-    warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*")
-def init_logging(verbose_flags: Optional[List[bool]] = None):
-    verbosity = cli_flags_to_verbosity(verbose_flags)  # type: ignore
-    if verbosity == Verbosity.VERY_VERBOSE:
-        logging.basicConfig(
-            level=logging.DEBUG,
-            format="%(message)s",
-            handlers=[
-                RichHandler(
-                    show_level=False,
-                    markup=True,
-                    show_time=False,
-                    show_path=False,
-                    console=Console(width=None),
-                )
-            ],
-        )
-    elif verbosity == Verbosity.VERBOSE:
-        logging.basicConfig(
-            level=logging.INFO,
-            format="%(message)s",
-            handlers=[
-                RichHandler(
-                    show_level=False,
-                    markup=True,
-                    show_time=False,
-                    show_path=False,
-                    console=Console(width=None),
-                )
-            ],
-        )
-        logging.getLogger().setLevel(logging.DEBUG)
-        suppress_noisy_logs()
-    else:
-        logging.basicConfig(
-            level=logging.INFO,
-            format="%(message)s",
-            handlers=[
-                RichHandler(
-                    show_level=False,
-                    markup=True,
-                    show_time=False,
-                    show_path=False,
-                    console=Console(width=None),
-                )
-            ],
-        )
-        suppress_noisy_logs()
-    logging.debug(f"verbosity is {verbosity}")
-    return Console()
 # Common cli options
 # The defaults for options that are also in the config file MUST be None or else the cli defaults will override settings in the config file
 opt_api_key: Optional[str] = typer.Option(
@@ -231,9 +144,6 @@ opt_documents: Optional[str] = typer.Option(
     help="Additional documents to provide the LLM (typically URLs to runbooks)",
 )
-# Common help texts
-system_prompt_help = "Advanced. System prompt for LLM. Values starting with builtin:// are loaded from holmes/plugins/prompts, values starting with file:// are loaded from the given path, other values are interpreted as a prompt string"
 def parse_documents(documents: Optional[str]) -> List[ResourceInstructionDocument]:
     resource_documents = []
@@ -247,35 +157,6 @@ def parse_documents(documents: Optional[str]) -> List[ResourceInstructionDocumen
     return resource_documents
-def handle_result(
-    result: LLMResult,
-    console: Console,
-    destination: DestinationType,
-    config: Config,
-    issue: Issue,
-    show_tool_output: bool,
-    add_separator: bool,
-):
-    if destination == DestinationType.CLI:
-        if show_tool_output and result.tool_calls:
-            for tool_call in result.tool_calls:
-                console.print("[bold magenta]Used Tool:[/bold magenta]", end="")
-                # we need to print this separately with markup=False because it contains arbitrary text and we don't want console.print to interpret it
-                console.print(
-                    f"{tool_call.description}. Output=\n{tool_call.result}",
-                    markup=False,
-                )
-        console.print("[bold green]AI:[/bold green]", end=" ")
-        console.print(Markdown(result.result))  # type: ignore
-        if add_separator:
-            console.print(Rule())
-    elif destination == DestinationType.SLACK:
-        slack = config.create_slack_destination()
-        slack.send_issue(issue, result)
 # TODO: add streaming output
 @app.command()
 def ask(
@@ -323,11 +204,31 @@ def ask(
         "-i/-n",
         help="Enter interactive mode after the initial question? For scripting, disable this with --no-interactive",
     ),
+    refresh_toolsets: bool = typer.Option(
+        False,
+        "--refresh-toolsets",
+        help="Refresh the toolsets status",
+    ),
+    trace: Optional[str] = typer.Option(
+        None,
+        "--trace",
+        help="Enable tracing to the specified provider (e.g., 'braintrust')",
+    ),
 ):
     """
     Ask any question and answer using available tools
     """
     console = init_logging(verbose)  # type: ignore
+    # Detect and read piped input
+    piped_data = None
+    if not sys.stdin.isatty():
+        piped_data = sys.stdin.read().strip()
+        if interactive:
+            console.print(
+                "[bold yellow]Interactive mode disabled when reading piped input[/bold yellow]"
+            )
+            interactive = False
     config = Config.load_from_file(
         config_file,
         api_key=api_key,
@@ -338,8 +239,17 @@ def ask(
         slack_channel=slack_channel,
     )
+    # Create tracer if trace option is provided
+    tracer = TracingFactory.create_tracer(trace, project="HolmesGPT-CLI")
+    experiment_name = f"holmes-ask-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+    tracer.start_experiment(
+        experiment_name=experiment_name, metadata={"prompt": prompt or "holmes-ask"}
+    )
     ai = config.create_console_toolcalling_llm(
         dal=None,  # type: ignore
+        refresh_toolsets=refresh_toolsets,  # flag to refresh the toolset status
+        tracer=tracer,
     )
     template_context = {
         "toolsets": ai.tool_executor.toolsets,
@@ -360,11 +270,20 @@ def ask(
         console.print(
             f"[bold yellow]Loaded prompt from file {prompt_file}[/bold yellow]"
         )
-    elif not prompt and not interactive:
+    elif not prompt and not interactive and not piped_data:
         raise typer.BadParameter(
             "Either the 'prompt' argument or the --prompt-file option must be provided (unless using --interactive mode)."
         )
+    # Handle piped data
+    if piped_data:
+        if prompt:
+            # User provided both piped data and a prompt
+            prompt = f"Here's some piped output:\n\n{piped_data}\n\n{prompt}"
+        else:
+            # Only piped data, no prompt - ask what to do with it
+            prompt = f"Here's some piped output:\n\n{piped_data}\n\nWhat can you tell me about this output?"
     if echo_request and not interactive and prompt:
         console.print("[bold yellow]User:[/bold yellow] " + prompt)
@@ -377,6 +296,7 @@ def ask(
             include_file,
             post_processing_prompt,
             show_tool_output,
+            tracer,
         )
         return
@@ -387,7 +307,16 @@ def ask(
         include_file,
     )
-    response = ai.call(messages, post_processing_prompt)
+    with tracer.start_trace(
+        f'holmes ask "{prompt}"', span_type=SpanType.TASK
+    ) as trace_span:
+        trace_span.log(input=prompt, metadata={"type": "user_question"})
+        response = ai.call(messages, post_processing_prompt, trace_span=trace_span)
+        trace_span.log(
+            output=response.result,
+        )
+        trace_url = tracer.get_trace_url()
     messages = response.messages  # type: ignore # Update messages with the full history
     if json_output_file:
@@ -410,6 +339,9 @@ def ask(
         False,  # type: ignore
     )
+    if trace_url:
+        console.print(f"🔍 View trace: {trace_url}")
 @investigate_app.command()
 def alertmanager(

holmes/plugins/prompts/_fetch_logs.jinja2 CHANGED Viewed

@@ -3,6 +3,7 @@
 {%- set k8s_base_ts = toolsets | selectattr("name", "equalto", "kubernetes/logs") | selectattr("fetch_pod_logs", "defined") | first -%}
 {%- set k8s_yaml_ts = toolsets | selectattr("name", "equalto", "kubernetes/logs") | rejectattr("fetch_pod_logs", "defined") | first -%}
 {%- set opensearch_ts = toolsets | selectattr("name", "equalto", "opensearch/logs") | first -%}
+{%- set datadog_ts = toolsets | selectattr("name", "equalto", "datadog/logs") | first -%}
 # Logs
 {% if loki_ts and loki_ts.status == "enabled" -%}
@@ -19,6 +20,8 @@
 {% include '_default_log_prompt.jinja2' %}
 {%- elif k8s_base_ts and k8s_base_ts.status == "enabled" -%}
 {% include '_default_log_prompt.jinja2' %}
+{%- elif datadog_ts and datadog_ts.status == "enabled" -%}
+{% include '_default_log_prompt.jinja2' %}
 {%- elif k8s_yaml_ts and k8s_yaml_ts.status == "enabled" -%}
 * if the user wants to find a specific term in a pod's logs, use kubectl_logs_grep
 * use both kubectl_previous_logs and kubectl_logs when reading logs. Treat the output of both as a single unified logs stream
@@ -33,4 +36,5 @@
 ** 'grafana/loki'
 ** 'opensearch/logs'
 ** 'coralogix/logs'
+** 'datadog/logs'
 {%- endif -%}

holmes/plugins/prompts/kubernetes_workload_ask.jinja2 CHANGED Viewed

@@ -10,7 +10,6 @@ Global Instructions
 You may receive a set of “Global Instructions” that describe how to perform certain tasks, handle certain situations, or apply certain best practices. They are not mandatory for every request, but serve as a reference resource and must be used if the current scenario or user request aligns with one of the described methods or conditions.
 Use these rules when deciding how to apply them:
-* If the user prompt includes Global Instructions, treat them as a reference resource.
 * Some Global Instructions may describe how to handle specific tasks or scenarios. If the user's current request or the instructions in a triple quotes section reference one of these tasks, ALWAYS follow the Global Instruction for that task.
 * Some Global Instructions may define general conditions that always apply if a certain scenario occurs (e.g., "whenever investigating a memory issue, always check resource limits"). If such a condition matches the current situation, apply the Global Instruction accordingly.
 * If user's prompt or the instructions in a triple quotes section direct you to perform a task (e.g., “Find owner”) and there is a Global Instruction on how to do that task, ALWAYS follow the Global Instructions on how to perform it.
@@ -41,10 +40,6 @@ In general:
 * do not give an answer like "Pod's node affinity/selector doesn't match any available nodes" because that doesn't include data on WHICH label doesn't match
 * if investigating an issue on many pods, there is no need to check more than 3 individual pods in the same deployment. pick up to a representative 3 from each deployment if relevant
 * if you find errors and warning in a pods logs and you believe they indicate a real issue. consider the pod as not healthy.
-* if the user says something isn't working, ALWAYS:
-** use kubectl_describe on the owner workload + individual pods and look for any transient issues they might have been referring to
-** check the application aspects by accessing the application logs and other relevant tools
-** look for misconfigured ingresses/services etc
 {% include '_toolsets_instructions.jinja2' %}
@@ -53,9 +48,7 @@ In general:
 Style guide:
 * Be painfully concise.
 * Leave out "the" and filler words when possible.
-* Be terse but not at the expense of leaving out important data like the root cause and how to fix.
-* if asked by Global Instructions or instructions in a triple single quotes section to explicitly include something in the answer, don't leave it out.
-* return a json object with the following schema as a result:
+* your answer should ONLY return a json object with the following schema as a result:
 {
   "type": "object",
   "properties": {
@@ -69,13 +62,12 @@ Style guide:
     }
   },
   "required": [
-    "reasoning",
+    "root_cause_summary",
     "workload_healthy"
   ]
 }
 {% if alerts %}
 Here are issues and configuration changes that happend to this kubernetes workload in recent time. Check if these can help you understand the issue.
 {% for a in alerts %}

holmes/plugins/toolsets/__init__.py CHANGED Viewed

@@ -14,7 +14,13 @@ from holmes.core.tools import Toolset, ToolsetType, ToolsetYamlFromConfig, YAMLT
 from holmes.plugins.toolsets.coralogix.toolset_coralogix_logs import (
     CoralogixLogsToolset,
 )
-from holmes.plugins.toolsets.datadog import DatadogToolset
+from holmes.plugins.toolsets.datadog.toolset_datadog_logs import DatadogLogsToolset
+from holmes.plugins.toolsets.datadog.toolset_datadog_metrics import (
+    DatadogMetricsToolset,
+)
+from holmes.plugins.toolsets.datadog.toolset_datadog_traces import (
+    DatadogTracesToolset,
+)
 from holmes.plugins.toolsets.kubernetes_logs import KubernetesLogsToolset
 from holmes.plugins.toolsets.git import GitToolset
 from holmes.plugins.toolsets.grafana.toolset_grafana import GrafanaToolset
@@ -68,7 +74,9 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
         GrafanaToolset(),
         NotionToolset(),
         KafkaToolset(),
-        DatadogToolset(),
+        DatadogLogsToolset(),
+        DatadogMetricsToolset(),
+        DatadogTracesToolset(),
         PrometheusToolset(),
         OpenSearchLogsToolset(),
         OpenSearchTracesToolset(),

holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Dict, List
-import pyodbc
 import logging
 import struct
 from azure.core.credentials import TokenCredential
@@ -38,6 +37,8 @@ class AzureSQLAPIClient:
         self, server_name: str, database_name: str, query: str
     ) -> List[Dict]:
         """Execute a T-SQL query against the Azure SQL database."""
+        import pyodbc  # type: ignore
         conn = None
         cursor = None

holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py CHANGED Viewed

@@ -62,6 +62,9 @@ class CoralogixLogsToolset(BasePodLoggingToolset):
     def coralogix_config(self) -> Optional[CoralogixConfig]:
         return self.config
+    def logger_name(self) -> str:
+        return "Coralogix"
     def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
         if not self.coralogix_config:
             return StructuredToolResult(

holmes/plugins/toolsets/datadog/datadog_api.py ADDED Viewed

@@ -0,0 +1,161 @@
+import logging
+from typing import Any, Optional, Dict
+import requests  # type: ignore
+from pydantic import AnyUrl, BaseModel
+from requests.structures import CaseInsensitiveDict  # type: ignore
+from tenacity import retry, retry_if_exception, stop_after_attempt, wait_incrementing
+from tenacity.wait import wait_base
+START_RETRY_DELAY = (
+    5.0  # Initial fallback delay if datadog does not return a reset_time
+)
+INCREMENT_RETRY_DELAY = 5.0  # Delay increment after each rate limit, if datadog does not return a reset_time
+MAX_RETRY_COUNT_ON_RATE_LIMIT = 5
+RATE_LIMIT_REMAINING_SECONDS_HEADER = "X-RateLimit-Reset"
+class DatadogBaseConfig(BaseModel):
+    """Base configuration for all Datadog toolsets"""
+    dd_api_key: str
+    dd_app_key: str
+    site_api_url: AnyUrl
+    request_timeout: int = 60
+class DataDogRequestError(Exception):
+    payload: dict
+    status_code: int
+    response_text: str
+    response_headers: CaseInsensitiveDict[str]
+    def __init__(
+        self,
+        payload: dict,
+        status_code: int,
+        response_text: str,
+        response_headers: CaseInsensitiveDict[str],
+    ):
+        super().__init__(f"HTTP error: {status_code} - {response_text}")
+        self.payload = payload
+        self.status_code = status_code
+        self.response_text = response_text
+        self.response_headers = response_headers
+def get_headers(dd_config: DatadogBaseConfig) -> Dict[str, str]:
+    """Get standard headers for Datadog API requests.
+    Args:
+        dd_config: Datadog configuration object
+    Returns:
+        Dictionary of headers for Datadog API requests
+    """
+    return {
+        "Content-Type": "application/json",
+        "DD-API-KEY": dd_config.dd_api_key,
+        "DD-APPLICATION-KEY": dd_config.dd_app_key,
+    }
+def extract_cursor(data: dict) -> Optional[str]:
+    """Extract cursor for paginating through Datadog logs API responses."""
+    if data is None:
+        return None
+    meta = data.get("meta", {})
+    if meta is None:
+        return None
+    page = meta.get("page", {})
+    if page is None:
+        return None
+    return page.get("after", None)
+class retry_if_http_429_error(retry_if_exception):
+    def __init__(self):
+        def is_http_429_error(exception):
+            return (
+                isinstance(exception, DataDogRequestError)
+                and exception.status_code == 429
+            )
+        super().__init__(predicate=is_http_429_error)
+class wait_for_retry_after_header(wait_base):
+    def __init__(self, fallback):
+        self.fallback = fallback
+    def __call__(self, retry_state):
+        if retry_state.outcome:
+            exc = retry_state.outcome.exception()
+            if isinstance(exc, DataDogRequestError) and exc.response_headers.get(
+                RATE_LIMIT_REMAINING_SECONDS_HEADER
+            ):
+                reset_time_header = exc.response_headers.get(
+                    RATE_LIMIT_REMAINING_SECONDS_HEADER
+                )
+                if reset_time_header:
+                    try:
+                        reset_time = int(reset_time_header)
+                        wait_time = max(0, reset_time) + 0.1
+                        return wait_time
+                    except ValueError:
+                        logging.warning(
+                            f"Received invalid {RATE_LIMIT_REMAINING_SECONDS_HEADER} header value from datadog: {reset_time_header}"
+                        )
+        return self.fallback(retry_state)
+@retry(
+    retry=retry_if_http_429_error(),
+    wait=wait_for_retry_after_header(
+        fallback=wait_incrementing(
+            start=START_RETRY_DELAY, increment=INCREMENT_RETRY_DELAY
+        )
+    ),
+    stop=stop_after_attempt(MAX_RETRY_COUNT_ON_RATE_LIMIT),
+    before_sleep=lambda retry_state: logging.warning(
+        f"DataDog API rate limited. Retrying... "
+        f"(attempt {retry_state.attempt_number}/{MAX_RETRY_COUNT_ON_RATE_LIMIT})"
+    ),
+    reraise=True,
+)
+def execute_datadog_http_request(
+    url: str,
+    headers: dict,
+    payload_or_params: dict,
+    timeout: int,
+    method: str = "POST",
+) -> Any:
+    if method == "GET":
+        response = requests.get(
+            url, headers=headers, params=payload_or_params, timeout=timeout
+        )
+    else:
+        response = requests.post(
+            url, headers=headers, json=payload_or_params, timeout=timeout
+        )
+    if response.status_code == 200:
+        response_data = response.json()
+        if method == "POST" and response_data and "data" in response_data:
+            cursor = extract_cursor(response_data)
+            data = response_data.get("data", [])
+            return data, cursor
+        else:
+            return response_data
+    else:
+        raise DataDogRequestError(
+            payload=payload_or_params,
+            status_code=response.status_code,
+            response_text=response.text,
+            response_headers=response.headers,
+        )

holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 ADDED Viewed

@@ -0,0 +1,26 @@
+## Datadog Metrics Tools Usage Guide
+When investigating metrics-related issues:
+1. **Start with `list_active_datadog_metrics`** to discover available metrics
+   - Use filters like `host` or `tag_filter` to narrow results
+   - Default shows metrics from last 24 hours
+2. **Use `query_datadog_metrics`** to fetch actual metric data
+   - Query syntax: `metric_name{tag:value}`
+   - Example: `system.cpu.user{host:myhost}`
+   - Returns timeseries data with timestamps and values
+3. **Use `get_datadog_metric_metadata`** to understand metric properties
+   - Provides metric type (gauge/count/rate), unit, and description
+   - Accepts comma-separated list for batch queries
+### Time Parameters
+- Use RFC3339 format: `2023-03-01T10:30:00Z`
+- Or relative seconds: `-3600` for 1 hour ago
+- Defaults to 1 hour window if not specified
+### Common Patterns
+- CPU investigation: First list metrics with `tag_filter:kube_node_name:nodename`, then query specific metrics
+- Memory issues: Look for `system.mem.*` or `kubernetes.memory.*` metrics
+- Container metrics: Filter by pod/container tags

holmesgpt 0.11.5__py3-none-any.whl → 0.12.0a0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.11.5py3-none-any.whl → 0.12.0a0py3-none-any.whl