holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +4 -3
  3. holmes/common/env_vars.py +18 -2
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +11 -6
  6. holmes/core/conversations.py +30 -13
  7. holmes/core/investigation.py +21 -25
  8. holmes/core/investigation_structured_output.py +3 -3
  9. holmes/core/issue.py +1 -1
  10. holmes/core/llm.py +50 -31
  11. holmes/core/models.py +19 -17
  12. holmes/core/openai_formatting.py +1 -1
  13. holmes/core/prompt.py +47 -2
  14. holmes/core/runbooks.py +1 -0
  15. holmes/core/safeguards.py +4 -2
  16. holmes/core/supabase_dal.py +4 -2
  17. holmes/core/tool_calling_llm.py +102 -141
  18. holmes/core/tools.py +19 -28
  19. holmes/core/tools_utils/token_counting.py +9 -2
  20. holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
  21. holmes/core/tools_utils/tool_executor.py +0 -18
  22. holmes/core/tools_utils/toolset_utils.py +1 -0
  23. holmes/core/toolset_manager.py +37 -2
  24. holmes/core/tracing.py +13 -2
  25. holmes/core/transformers/__init__.py +1 -1
  26. holmes/core/transformers/base.py +1 -0
  27. holmes/core/transformers/llm_summarize.py +3 -2
  28. holmes/core/transformers/registry.py +2 -1
  29. holmes/core/transformers/transformer.py +1 -0
  30. holmes/core/truncation/compaction.py +37 -2
  31. holmes/core/truncation/input_context_window_limiter.py +3 -2
  32. holmes/interactive.py +52 -8
  33. holmes/main.py +17 -37
  34. holmes/plugins/interfaces.py +2 -1
  35. holmes/plugins/prompts/__init__.py +2 -1
  36. holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
  37. holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
  38. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  39. holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
  40. holmes/plugins/prompts/generic_ask.jinja2 +0 -2
  41. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
  42. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
  43. holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
  44. holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
  45. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
  46. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
  47. holmes/plugins/runbooks/__init__.py +32 -3
  48. holmes/plugins/sources/github/__init__.py +4 -2
  49. holmes/plugins/sources/prometheus/models.py +1 -0
  50. holmes/plugins/toolsets/__init__.py +30 -26
  51. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
  52. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  53. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  54. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  55. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  56. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  57. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
  58. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
  59. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
  60. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
  61. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  62. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
  63. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
  64. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
  65. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
  66. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
  67. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
  68. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  69. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  70. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  71. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  72. holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
  73. holmes/plugins/toolsets/bash/common/bash.py +19 -9
  74. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  75. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  76. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  77. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  78. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  79. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  80. holmes/plugins/toolsets/connectivity_check.py +124 -0
  81. holmes/plugins/toolsets/coralogix/api.py +132 -119
  82. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  84. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  85. holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
  86. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
  87. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  88. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  89. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  90. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  91. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
  92. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
  93. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
  94. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
  95. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  96. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  97. holmes/plugins/toolsets/git.py +7 -8
  98. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  99. holmes/plugins/toolsets/grafana/common.py +2 -30
  100. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
  101. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
  102. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
  103. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  104. holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
  105. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
  106. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
  107. holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
  108. holmes/plugins/toolsets/internet/internet.py +10 -10
  109. holmes/plugins/toolsets/internet/notion.py +5 -6
  110. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  111. holmes/plugins/toolsets/investigator/model.py +3 -1
  112. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  113. holmes/plugins/toolsets/kafka.py +12 -7
  114. holmes/plugins/toolsets/kubernetes.yaml +260 -30
  115. holmes/plugins/toolsets/kubernetes_logs.py +3 -3
  116. holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
  117. holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
  118. holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
  119. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
  120. holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
  121. holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
  122. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
  123. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  124. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
  125. holmes/plugins/toolsets/robusta/robusta.py +5 -5
  126. holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
  127. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
  128. holmes/plugins/toolsets/utils.py +1 -1
  129. holmes/utils/config_utils.py +1 -1
  130. holmes/utils/connection_utils.py +31 -0
  131. holmes/utils/console/result.py +10 -0
  132. holmes/utils/file_utils.py +2 -1
  133. holmes/utils/global_instructions.py +10 -26
  134. holmes/utils/holmes_status.py +4 -3
  135. holmes/utils/log.py +15 -0
  136. holmes/utils/markdown_utils.py +2 -3
  137. holmes/utils/memory_limit.py +58 -0
  138. holmes/utils/sentry_helper.py +23 -0
  139. holmes/utils/stream.py +12 -5
  140. holmes/utils/tags.py +4 -3
  141. holmes/version.py +3 -1
  142. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
  143. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  144. holmes/plugins/toolsets/aws.yaml +0 -80
  145. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
  146. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  147. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
  148. holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
  149. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  150. holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
  151. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  152. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
  153. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  154. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  155. holmes/utils/keygen_utils.py +0 -6
  156. holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
  157. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
  158. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
  159. /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
  160. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
  161. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  162. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,21 +1,20 @@
1
1
  import base64
2
2
  import logging
3
- import requests # type: ignore
4
3
  import os
5
- from typing import Any, Optional, Dict, List, Tuple
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import requests # type: ignore
6
7
  from pydantic import BaseModel
8
+
7
9
  from holmes.core.tools import (
10
+ CallablePrerequisite,
8
11
  StructuredToolResult,
9
12
  StructuredToolResultStatus,
10
- ToolInvokeContext,
11
- )
12
-
13
- from holmes.core.tools import (
14
- Toolset,
15
13
  Tool,
14
+ ToolInvokeContext,
16
15
  ToolParameter,
16
+ Toolset,
17
17
  ToolsetTag,
18
- CallablePrerequisite,
19
18
  )
20
19
  from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
21
20
 
@@ -1,12 +1,11 @@
1
1
  import logging
2
+ from abc import abstractmethod
2
3
  from typing import Any, ClassVar, Tuple, Type
3
4
 
4
5
  from holmes.core.tools import CallablePrerequisite, Tool, Toolset, ToolsetTag
5
6
  from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
6
7
  from holmes.plugins.toolsets.grafana.common import GrafanaConfig
7
8
 
8
- from holmes.plugins.toolsets.grafana.grafana_api import grafana_health_check
9
-
10
9
 
11
10
  class BaseGrafanaToolset(Toolset):
12
11
  config_class: ClassVar[Type[GrafanaConfig]] = GrafanaConfig
@@ -39,16 +38,29 @@ class BaseGrafanaToolset(Toolset):
39
38
 
40
39
  try:
41
40
  self._grafana_config = self.config_class(**config)
42
- return grafana_health_check(self._grafana_config)
41
+ return self.health_check()
43
42
 
44
43
  except Exception as e:
45
44
  logging.exception(f"Failed to set up grafana toolset {self.name}")
46
45
  return False, str(e)
47
46
 
47
+ @abstractmethod
48
+ def health_check(self) -> Tuple[bool, str]:
49
+ """
50
+ Check if the toolset is healthy and can connect to its data source.
51
+
52
+ Subclasses must implement this method to verify connectivity.
53
+ This method should NOT raise exceptions - catch them internally
54
+ and return (False, "error message") instead.
55
+
56
+ Returns:
57
+ Tuple[bool, str]: (True, "") on success, (False, "error message") on failure.
58
+ """
59
+ raise NotImplementedError("Subclasses must implement health_check()")
60
+
48
61
  def get_example_config(self):
49
62
  example_config = GrafanaConfig(
50
63
  api_key="YOUR API KEY",
51
64
  url="YOUR GRAFANA URL",
52
- grafana_datasource_uid="UID OF DATASOURCE IN GRAFANA",
53
65
  )
54
66
  return example_config.model_dump()
@@ -1,9 +1,6 @@
1
- import json
2
1
  from typing import Dict, Optional
3
- from pydantic import BaseModel
4
- import datetime
5
2
 
6
- from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
3
+ from pydantic import BaseModel
7
4
 
8
5
 
9
6
  class GrafanaConfig(BaseModel):
@@ -19,7 +16,7 @@ class GrafanaConfig(BaseModel):
19
16
  url: str
20
17
  grafana_datasource_uid: Optional[str] = None
21
18
  external_url: Optional[str] = None
22
- healthcheck: Optional[str] = "ready"
19
+ verify_ssl: bool = True
23
20
 
24
21
 
25
22
  def build_headers(api_key: Optional[str], additional_headers: Optional[Dict[str, str]]):
@@ -36,19 +33,6 @@ def build_headers(api_key: Optional[str], additional_headers: Optional[Dict[str,
36
33
  return headers
37
34
 
38
35
 
39
- def format_log(log: Dict) -> str:
40
- log_str = log.get("log", "")
41
- timestamp_nanoseconds = log.get("timestamp")
42
- if timestamp_nanoseconds:
43
- timestamp_seconds = int(timestamp_nanoseconds) // 1_000_000_000
44
- dt = datetime.datetime.fromtimestamp(timestamp_seconds)
45
- log_str = dt.strftime("%Y-%m-%dT%H:%M:%SZ") + " " + log_str
46
- else:
47
- log_str = json.dumps(log)
48
-
49
- return log_str
50
-
51
-
52
36
  def get_base_url(config: GrafanaConfig) -> str:
53
37
  if config.grafana_datasource_uid:
54
38
  return f"{config.url}/api/datasources/proxy/uid/{config.grafana_datasource_uid}"
@@ -56,18 +40,6 @@ def get_base_url(config: GrafanaConfig) -> str:
56
40
  return config.url
57
41
 
58
42
 
59
- def ensure_grafana_uid_or_return_error_result(
60
- config: GrafanaConfig,
61
- ) -> Optional[StructuredToolResult]:
62
- if not config.grafana_datasource_uid:
63
- return StructuredToolResult(
64
- status=StructuredToolResultStatus.ERROR,
65
- error="This tool only works when the toolset is configued ",
66
- )
67
- else:
68
- return None
69
-
70
-
71
43
  class GrafanaTempoLabelsConfig(BaseModel):
72
44
  pod: str = "k8s.pod.name"
73
45
  namespace: str = "k8s.namespace.name"
@@ -13,7 +13,6 @@ from holmes.plugins.toolsets.grafana.common import (
13
13
  get_base_url,
14
14
  )
15
15
 
16
-
17
16
  logger = logging.getLogger(__name__)
18
17
 
19
18
 
@@ -104,6 +103,7 @@ class GrafanaTempoAPI:
104
103
  headers=self.headers,
105
104
  params=params,
106
105
  timeout=timeout,
106
+ verify=self.config.verify_ssl,
107
107
  )
108
108
  response.raise_for_status()
109
109
  return response.json()
@@ -145,6 +145,7 @@ class GrafanaTempoAPI:
145
145
  url,
146
146
  headers=self.headers,
147
147
  timeout=30,
148
+ verify=self.config.verify_ssl,
148
149
  )
149
150
 
150
151
  # Just check status code, don't try to parse JSON
@@ -1,9 +1,25 @@
1
1
  Grafana Loki is a multi-tenant log aggregation system designed to store and query logs from all your applications and infrastructure.
2
2
 
3
- * Deleted K8s objects don’t delete their past logs. You can still find them by time-bounded queries on stable labels (e.g., namespace + app) or by regex on pod names.
4
- * If you can't find the kubernetes workload in the cluster YOU SHOULD still try to find logs from it using non specific values on labels.
3
+ **IMPORTANT WILDCARD USAGE:**
4
+ - **ALWAYS use wildcards** when searching for pods unless you have the COMPLETE pod name with all suffixes
5
+ - Kubernetes pod names include deployment hash + replica ID (e.g., `nginx-ingress-7b9899-x2km9`, `frontend-5f4d3b2a1-abc123`)
6
+ - When user says "nginx pod" or "frontend pod", search for `nginx-*` or `frontend-*` NOT just `nginx` or `frontend`
7
+ - Loki supports wildcards: `*` matches any characters (e.g., `nginx-*`, `*ingress*`, `*-x2km9`)
8
+ - For partial matches, use wildcards on both sides: `*keyword*` to find logs from any pod containing "keyword"
9
+
10
+ **When user provides what looks like a complete pod name** (e.g., `my-workload-5f9d8b7c4d-x2km9`):
11
+ - Query Loki directly with that exact pod name
12
+ - Do NOT try to verify if the pod exists in Kubernetes first
13
+ - This allows querying historical pods that have been deleted/replaced
14
+
5
15
  * If asked to check for logs, you must always try 1-2 of the best queries you can construct to search for the logs.
6
16
 
7
17
  Loki indexes log lines using labels to help find relevant log lines.
8
18
  For example a default Kubernetes labels setup would look like that
9
19
  {namespace="prod", app="backend-api", container="api", pod="backend-api-68b7d9df9c-xyz12", stream="stdout"}
20
+
21
+
22
+ ### Time Parameters
23
+ - Use RFC3339 format: `2023-03-01T10:30:00Z`
24
+ - Or relative seconds: `-3600` for 1 hour ago
25
+ - If no time range is specificed use last 4 hours as default time.
@@ -1,7 +1,11 @@
1
- from typing import Dict
2
- import os
3
1
  import json
2
+ import os
3
+ from typing import Dict, Optional, Tuple
4
+ from urllib.parse import quote
5
+
4
6
  from holmes.core.tools import (
7
+ StructuredToolResult,
8
+ StructuredToolResultStatus,
5
9
  Tool,
6
10
  ToolInvokeContext,
7
11
  ToolParameter,
@@ -9,26 +13,85 @@ from holmes.core.tools import (
9
13
  from holmes.plugins.toolsets.consts import (
10
14
  STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
11
15
  )
12
-
13
- from holmes.plugins.toolsets.grafana.common import get_base_url
14
- from holmes.plugins.toolsets.grafana.toolset_grafana import BaseGrafanaToolset
15
- from holmes.plugins.toolsets.utils import (
16
- process_timestamps_to_rfc3339,
17
- standard_start_datetime_tool_param_description,
16
+ from holmes.plugins.toolsets.grafana.common import GrafanaConfig, get_base_url
17
+ from holmes.plugins.toolsets.grafana.loki_api import (
18
+ execute_loki_query,
18
19
  )
20
+ from holmes.plugins.toolsets.grafana.toolset_grafana import BaseGrafanaToolset
19
21
  from holmes.plugins.toolsets.logging_utils.logging_api import (
20
- DEFAULT_TIME_SPAN_SECONDS,
21
22
  DEFAULT_LOG_LIMIT,
23
+ DEFAULT_TIME_SPAN_SECONDS,
22
24
  )
23
- from holmes.plugins.toolsets.grafana.loki_api import (
24
- execute_loki_query,
25
+ from holmes.plugins.toolsets.utils import (
26
+ process_timestamps_to_rfc3339,
27
+ standard_start_datetime_tool_param_description,
28
+ toolset_name_for_one_liner,
25
29
  )
26
30
 
27
- from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
28
- from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
31
+
32
+ def _build_grafana_loki_explore_url(
33
+ config: GrafanaConfig, query: str, start: str, end: str, limit: int = 100
34
+ ) -> Optional[str]:
35
+ if not config.grafana_datasource_uid:
36
+ return None
37
+ try:
38
+ base_url = config.external_url or config.url
39
+ datasource_uid = config.grafana_datasource_uid or "loki"
40
+
41
+ from_str = start if start else "now-1h"
42
+ to_str = end if end else "now"
43
+
44
+ pane_id = "tmp"
45
+ safe_query = query if query else "{}"
46
+ panes = {
47
+ pane_id: {
48
+ "datasource": datasource_uid,
49
+ "queries": [
50
+ {
51
+ "refId": "A",
52
+ "datasource": {"type": "loki", "uid": datasource_uid},
53
+ "expr": safe_query,
54
+ "queryType": "range",
55
+ "maxLines": limit,
56
+ }
57
+ ],
58
+ "range": {"from": from_str, "to": to_str},
59
+ }
60
+ }
61
+
62
+ panes_encoded = quote(
63
+ json.dumps(panes, separators=(",", ":"), ensure_ascii=False), safe=""
64
+ )
65
+ return f"{base_url}/explore?schemaVersion=1&panes={panes_encoded}&orgId=1"
66
+ except Exception:
67
+ return None
29
68
 
30
69
 
31
70
  class GrafanaLokiToolset(BaseGrafanaToolset):
71
+ def health_check(self) -> Tuple[bool, str]:
72
+ """Test a dummy query to check if service available."""
73
+ (start, end) = process_timestamps_to_rfc3339(
74
+ start_timestamp=-1,
75
+ end_timestamp=None,
76
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
77
+ )
78
+
79
+ c = self._grafana_config
80
+ try:
81
+ _ = execute_loki_query(
82
+ base_url=get_base_url(c),
83
+ api_key=c.api_key,
84
+ headers=c.headers,
85
+ query='{job="test_endpoint"}',
86
+ start=start,
87
+ end=end,
88
+ limit=1,
89
+ verify_ssl=c.verify_ssl,
90
+ )
91
+ except Exception as e:
92
+ return False, f"Unable to connect to Loki.\n{str(e)}"
93
+ return True, ""
94
+
32
95
  def __init__(self):
33
96
  super().__init__(
34
97
  name="grafana/loki",
@@ -68,7 +131,7 @@ class LokiQuery(Tool):
68
131
  required=False,
69
132
  ),
70
133
  "limit": ToolParameter(
71
- description="Maximum number of entries to return (default: 100)",
134
+ description=f"Maximum number of entries to return (default: {DEFAULT_LOG_LIMIT})",
72
135
  type="integer",
73
136
  required=False,
74
137
  ),
@@ -85,28 +148,39 @@ class LokiQuery(Tool):
85
148
  )
86
149
 
87
150
  config = self.toolset._grafana_config
151
+ query_str = params.get("query", '{query="no_query_fallback"}')
88
152
  try:
89
153
  data = execute_loki_query(
90
154
  base_url=get_base_url(config),
91
155
  api_key=config.api_key,
92
156
  headers=config.headers,
93
- query=params.get(
94
- "query", '{query="no_query_fallback"}'
95
- ), # make sure a string returns. fall back to query that will return nothing.
157
+ query=query_str,
96
158
  start=start,
97
159
  end=end,
98
160
  limit=params.get("limit") or DEFAULT_LOG_LIMIT,
161
+ verify_ssl=config.verify_ssl,
99
162
  )
163
+
164
+ explore_url = _build_grafana_loki_explore_url(
165
+ config,
166
+ query_str,
167
+ start,
168
+ end,
169
+ limit=params.get("limit") or DEFAULT_LOG_LIMIT,
170
+ )
171
+
100
172
  if data:
101
173
  return StructuredToolResult(
102
174
  status=StructuredToolResultStatus.SUCCESS,
103
- data=json.dumps(data),
175
+ data=data,
104
176
  params=params,
177
+ url=explore_url,
105
178
  )
106
179
  else:
107
180
  return StructuredToolResult(
108
181
  status=StructuredToolResultStatus.NO_DATA,
109
182
  params=params,
183
+ url=explore_url,
110
184
  )
111
185
  except Exception as e:
112
186
  return StructuredToolResult(
@@ -42,6 +42,7 @@ def execute_loki_query(
42
42
  start: Union[int, str],
43
43
  end: Union[int, str],
44
44
  limit: int,
45
+ verify_ssl: bool = True,
45
46
  ) -> List[Dict]:
46
47
  params = {"query": query, "limit": limit, "start": start, "end": end}
47
48
  try:
@@ -50,6 +51,7 @@ def execute_loki_query(
50
51
  url,
51
52
  headers=build_headers(api_key=api_key, additional_headers=headers),
52
53
  params=params, # type: ignore
54
+ verify=verify_ssl,
53
55
  )
54
56
  response.raise_for_status()
55
57
 
@@ -74,6 +76,7 @@ def query_loki_logs_by_label(
74
76
  label: str,
75
77
  namespace_search_key: str = "namespace",
76
78
  limit: int = 200,
79
+ verify_ssl: bool = True,
77
80
  ) -> List[Dict]:
78
81
  query = f'{{{namespace_search_key}="{namespace}", {label}="{label_value}"}}'
79
82
  if filter:
@@ -86,4 +89,5 @@ def query_loki_logs_by_label(
86
89
  start=start,
87
90
  end=end,
88
91
  limit=limit,
92
+ verify_ssl=verify_ssl,
89
93
  )
@@ -1,29 +1,50 @@
1
1
  import os
2
- from typing import ClassVar, Dict, Optional, Type, cast
3
- from urllib.parse import urljoin
4
2
  from abc import ABC
3
+ from typing import Any, ClassVar, Dict, Optional, Tuple, Type, cast
4
+ from urllib.parse import urlencode, urljoin
5
+
6
+ import requests # type: ignore
7
+
5
8
  from holmes.core.tools import (
6
9
  StructuredToolResult,
10
+ StructuredToolResultStatus,
7
11
  Tool,
8
12
  ToolInvokeContext,
9
13
  ToolParameter,
10
- StructuredToolResultStatus,
11
14
  )
12
15
  from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset
13
- import requests # type: ignore
14
-
15
16
  from holmes.plugins.toolsets.grafana.common import (
16
- get_base_url,
17
17
  GrafanaConfig,
18
18
  build_headers,
19
+ get_base_url,
19
20
  )
21
+ from holmes.plugins.toolsets.json_filter_mixin import JsonFilterMixin
20
22
  from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
21
23
 
22
24
 
23
25
  class GrafanaDashboardConfig(GrafanaConfig):
24
- """Configuration specific to Grafana Dashboard toolset with api/health as default healthcheck"""
25
-
26
- healthcheck: Optional[str] = "api/health"
26
+ """Configuration specific to Grafana Dashboard toolset."""
27
+
28
+ pass
29
+
30
+
31
+ def _build_grafana_dashboard_url(
32
+ config: GrafanaDashboardConfig,
33
+ uid: Optional[str] = None,
34
+ query_params: Optional[Dict[str, Any]] = None,
35
+ ) -> Optional[str]:
36
+ try:
37
+ base_url = config.external_url or config.url
38
+ if uid:
39
+ return f"{base_url.rstrip('/')}/d/{uid}"
40
+ else:
41
+ query_string = urlencode(query_params, doseq=True) if query_params else ""
42
+ if query_string:
43
+ return f"{base_url.rstrip('/')}/dashboards?{query_string}"
44
+ else:
45
+ return f"{base_url.rstrip('/')}/dashboards"
46
+ except Exception:
47
+ return None
27
48
 
28
49
 
29
50
  class GrafanaToolset(BaseGrafanaToolset):
@@ -47,6 +68,15 @@ class GrafanaToolset(BaseGrafanaToolset):
47
68
  os.path.dirname(__file__), "toolset_grafana_dashboard.jinja2"
48
69
  )
49
70
 
71
+ def health_check(self) -> Tuple[bool, str]:
72
+ """Test connectivity by invoking GetDashboardTags tool."""
73
+ tool = GetDashboardTags(self)
74
+ try:
75
+ _ = tool._make_grafana_request("api/dashboards/tags", {})
76
+ return True, ""
77
+ except Exception as e:
78
+ return False, f"Failed to connect to Grafana {str(e)}"
79
+
50
80
  @property
51
81
  def grafana_config(self) -> GrafanaDashboardConfig:
52
82
  return cast(GrafanaDashboardConfig, self._grafana_config)
@@ -76,14 +106,21 @@ class BaseGrafanaTool(Tool, ABC):
76
106
  Returns:
77
107
  StructuredToolResult with the API response data
78
108
  """
79
- url = urljoin(get_base_url(self._toolset.grafana_config), endpoint)
109
+ base_url = get_base_url(self._toolset.grafana_config)
110
+ if not base_url.endswith("/"):
111
+ base_url += "/"
112
+ url = urljoin(base_url, endpoint)
80
113
  headers = build_headers(
81
114
  api_key=self._toolset.grafana_config.api_key,
82
115
  additional_headers=self._toolset.grafana_config.headers,
83
116
  )
84
117
 
85
118
  response = requests.get(
86
- url, headers=headers, params=query_params, timeout=timeout
119
+ url,
120
+ headers=headers,
121
+ params=query_params,
122
+ timeout=timeout,
123
+ verify=self._toolset.grafana_config.verify_ssl,
87
124
  )
88
125
  response.raise_for_status()
89
126
  data = response.json()
@@ -186,46 +223,83 @@ class SearchDashboards(BaseGrafanaTool):
186
223
  if params.get("page"):
187
224
  query_params["page"] = params["page"]
188
225
 
189
- return self._make_grafana_request("/api/search", params, query_params)
226
+ result = self._make_grafana_request("api/search", params, query_params)
227
+
228
+ config = self._toolset.grafana_config
229
+ search_url = _build_grafana_dashboard_url(config, query_params=query_params)
230
+
231
+ if params.get("dashboardUIDs"):
232
+ uids = [
233
+ uid.strip() for uid in params["dashboardUIDs"].split(",") if uid.strip()
234
+ ]
235
+ if len(uids) == 1:
236
+ search_url = _build_grafana_dashboard_url(config, uid=uids[0])
237
+
238
+ return StructuredToolResult(
239
+ status=result.status,
240
+ data=result.data,
241
+ params=result.params,
242
+ url=search_url if search_url else None,
243
+ )
190
244
 
191
245
  def get_parameterized_one_liner(self, params: Dict) -> str:
192
246
  return f"{toolset_name_for_one_liner(self._toolset.name)}: Search Dashboards"
193
247
 
194
248
 
195
- class GetDashboardByUID(BaseGrafanaTool):
249
+ class GetDashboardByUID(JsonFilterMixin, BaseGrafanaTool):
196
250
  def __init__(self, toolset: GrafanaToolset):
197
251
  super().__init__(
198
252
  toolset=toolset,
199
253
  name="grafana_get_dashboard_by_uid",
200
254
  description="Get a dashboard by its UID using the /api/dashboards/uid/:uid endpoint",
201
- parameters={
202
- "uid": ToolParameter(
203
- description="The unique identifier of the dashboard",
204
- type="string",
205
- required=True,
206
- ),
207
- },
255
+ parameters=self.extend_parameters(
256
+ {
257
+ "uid": ToolParameter(
258
+ description="The unique identifier of the dashboard",
259
+ type="string",
260
+ required=True,
261
+ )
262
+ }
263
+ ),
208
264
  )
209
265
 
210
266
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
211
267
  uid = params["uid"]
212
- return self._make_grafana_request(f"/api/dashboards/uid/{uid}", params)
268
+ result = self._make_grafana_request(f"api/dashboards/uid/{uid}", params)
269
+
270
+ dashboard_url = _build_grafana_dashboard_url(
271
+ self._toolset.grafana_config, uid=uid
272
+ )
273
+
274
+ filtered_result = self.filter_result(result, params)
275
+ filtered_result.url = dashboard_url if dashboard_url else result.url
276
+ return filtered_result
213
277
 
214
278
  def get_parameterized_one_liner(self, params: Dict) -> str:
215
279
  return f"{toolset_name_for_one_liner(self._toolset.name)}: Get Dashboard {params.get('uid', '')}"
216
280
 
217
281
 
218
- class GetHomeDashboard(BaseGrafanaTool):
282
+ class GetHomeDashboard(JsonFilterMixin, BaseGrafanaTool):
219
283
  def __init__(self, toolset: GrafanaToolset):
220
284
  super().__init__(
221
285
  toolset=toolset,
222
286
  name="grafana_get_home_dashboard",
223
287
  description="Get the home dashboard using the /api/dashboards/home endpoint",
224
- parameters={},
288
+ parameters=self.extend_parameters({}),
225
289
  )
226
290
 
227
291
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
228
- return self._make_grafana_request("/api/dashboards/home", params)
292
+ result = self._make_grafana_request("api/dashboards/home", params)
293
+ config = self._toolset.grafana_config
294
+ dashboard_url = None
295
+ if isinstance(result.data, dict):
296
+ uid = result.data.get("dashboard", {}).get("uid")
297
+ if uid:
298
+ dashboard_url = _build_grafana_dashboard_url(config, uid=uid)
299
+
300
+ filtered_result = self.filter_result(result, params)
301
+ filtered_result.url = dashboard_url if dashboard_url else None
302
+ return filtered_result
229
303
 
230
304
  def get_parameterized_one_liner(self, params: Dict) -> str:
231
305
  return f"{toolset_name_for_one_liner(self._toolset.name)}: Get Home Dashboard"
@@ -241,7 +315,17 @@ class GetDashboardTags(BaseGrafanaTool):
241
315
  )
242
316
 
243
317
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
244
- return self._make_grafana_request("/api/dashboards/tags", params)
318
+ result = self._make_grafana_request("api/dashboards/tags", params)
319
+
320
+ config = self._toolset.grafana_config
321
+ tags_url = _build_grafana_dashboard_url(config)
322
+
323
+ return StructuredToolResult(
324
+ status=result.status,
325
+ data=result.data,
326
+ params=result.params,
327
+ url=tags_url,
328
+ )
245
329
 
246
330
  def get_parameterized_one_liner(self, params: Dict) -> str:
247
331
  return f"{toolset_name_for_one_liner(self._toolset.name)}: Get Dashboard Tags"
@@ -1,4 +1,26 @@
1
1
  ---
2
+ ## **IMPORTANT: Handling Grafana Dashboard URLs**
3
+
4
+ **When the user provides a Grafana dashboard URL (e.g., http://some-domain.com/d/09ec8aa1e996d6ffcd6817bbaff4db1b/kubernetes-api-server):**
5
+
6
+ 1. **DO NOT use the internet/fetch_webpage tool** - Grafana URLs should always be handled by the grafana dashboards toolset
7
+ 2. **Extract the dashboard parameters from the URL:**
8
+ - Pattern: `/d/{uid}/{dashboard-name}`
9
+ - Example: `/d/09ec8aa1e996d6ffcd6817bbaff4db1b/kubernetes-api-server` → UID is `09ec8aa1e996d6ffcd6817bbaff4db1b`
10
+ 3. **Use `grafana_get_dashboard_by_uid` with the extracted UID** to fetch the dashboard definition
11
+ 4. **Extract queries from the dashboard panels** and execute them with the appropriate toolset (Prometheus, Loki, etc.)
12
+
13
+ ### **Example Workflow:**
14
+ User: "Look at this graph: http://localhost:3000/d/abc123/my-dashboard?from=now-1h&to=now"
15
+
16
+ 1. Recognize this is a Grafana URL (contains /d/)
17
+ 2. Extract UID: abc123
18
+ 3. Call grafana_get_dashboard_by_uid(uid="abc123")
19
+ 4. Analyze dashboard panels and their queries
20
+ 5. Execute relevant queries using the time range from the URL (from=now-1h, to=now)
21
+
22
+ **Key Point:** Always prefer the Grafana toolset for any URL pointing to a Grafana instance. This gives you access to the dashboard structure, panel queries, and metadata - not just HTML content.
23
+
2
24
 
3
25
  ## **Instruction: Prometheus Dashboard Query Execution**
4
26