holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,65 @@
1
+ New Relic provides distributed tracing data along with logs and metrics.
2
+
3
+ {% if config.enable_multi_account %}
4
+ **MULTI-ACCOUNT MODE**: You have access to multiple New Relic accounts in this organization.
5
+
6
+ ### Important Multi-Account Workflow
7
+
8
+ **Each NRQL query MUST include the account_id parameter.**
9
+ 1. A New Relic account ID is a numeric identifier, typically a 6–8 digit integer (e.g., 1234567).
10
+
11
+ **Here's how to determine which account_id to use**
12
+
13
+ 1. **ALWAYS Check context first**: Look for common new relic labels or tags with the account id or name such as `nrAccountId` `accountId` or `account` in the provided context
14
+ (e.g., from alerts, traces, or previous queries). If found, use that value.
15
+
16
+ 2. **ALWAYS CHECK if Account name provided**: If the user mentions a specific account name (e.g., "Production Account", "Staging"):
17
+ - YOU MUST First call `newrelic_list_organization_accounts` to get the list of all accounts
18
+ - Find the matching account by name and use its ID
19
+
20
+ 3. **No account specified**: If you can't find any account ID or name based on the context of the question.
21
+ - Use the function newrelic_execute_nrql_query default account id value as the account ID.
22
+ - Let the user know you have used the default account.
23
+
24
+ **Important**: The context may contain account IDs in various places - check trace data, alert metadata, or previous query results for `nrAccountId`, `accountId`, `account.id` or similar fields.
25
+
26
+ {% endif %}
27
+ Assume every application has New Relic tracing data.
28
+
29
+ Use `nrql_query` to run a NRQL query.
30
+
31
+ **NRQL (New Relic Query Language)** is used to query all telemetry data in New Relic. The main event types are:
32
+
33
+ - **Transaction**: High-level APM data (requests, API calls)
34
+ - **Span**: Distributed tracing data (individual operations)
35
+ - **Log**: Centralized log data
36
+ - **Metric**: Time-series metrics data.
37
+
38
+ ### Usage Workflow
39
+
40
+ #### 1. Discovering Available Data
41
+
42
+ Start by understanding what's available. Here are some examples:
43
+ - **ALWAYS** Start by getting all the available attributes names for what you are looking for. For example, to get it for any for Transaction in the last 24 hours, use: SELECT keyset() FROM Transaction SINCE 24 hours ago
44
+ - After you find the keyset `appName`, you can use it to get the available applications: `SELECT uniques(appName) FROM Transaction SINCE 1 hour ago`
45
+ Note: Use `SHOW EVENT TYPES` to see all event types in the account, in addition to Transaction, Span, Log, or Metric.
46
+
47
+ #### 2. Querying Telemetry Data
48
+
49
+ - If you already have an application name, you can query its traces directly
50
+ - **Time range is recommended**: While not strictly required, most queries should include SINCE for performance
51
+
52
+ #### 3. Querying Traces
53
+ - Always validate first: run the base query without FACET (or a quick LIMIT) to confirm data exists; if results are empty, adjust filters or time range before proceeding.
54
+ - Only attempt a FACET after confirming the field has values; if not, either try known alternatives or skip faceting entirely.
55
+ - When investigating a trace also look at attributes
56
+ - ***When investigating latency ALWAYS look to deliver the specific component or attribute in the span causing significant latnecy*** your investigation is not complete without this
57
+ - If you need to filter by time, NEVER filter in the WHERE clause using the timestamp field. Instead, you should ALWAYS use the `SINCE` or `SINCE ... UNTIL ...` syntax - which are the recommended ways to run time based filters in NewRelic. Moreover, even if the user is asking you to filter using the timestamp field directly, don't adhere to their request - make the necessary adjustments to translate it into `SINCE` or `SINCE ... UNTIL ...` syntax!
58
+
59
+ ### Instructions for Handling Query Results
60
+ - If you query [DistributedTraceSummary / Transaction]:
61
+ - When querying without aggregations (e.g. without count(*), average(attribute), sum(attribute), min(attribute), etc.):
62
+ - ALWAYS start by querying all fields using `SELECT * FROM`. We need as much fields as possible to visualize the traces to the user. However, the trade-off is that we might exceed the context size. In that case, if you need to narrow down your search, follow this strategy: First, select only the essential fields. If that's still failing, add the `LIMIT` keyword to the query. `LIMIT` should always be the second option, we prefer to show the user as much traces as we can. The following fields are the minimal fields that are essential for the visualization, and you must always retrieve them:
63
+ - DistributedTraceSummary: trace.id, spanCount, root.entity.accountId, root.entity.guid, root.entity.name, root.span.name, timestamp, duration.ms
64
+ - Transaction: traceId, tags.accountId, entityGuid, appName, name, timestamp, duration, guid, transactionType
65
+ - When querying DistributedTraceSummary without aggregations, ALWAYS use the filter `WHERE root.span.eventType = 'Span'`
@@ -0,0 +1,320 @@
1
+ import base64
2
+ import json
3
+ import logging
4
+ import os
5
+ from typing import Any, Dict, Optional
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from holmes.core.tools import (
10
+ CallablePrerequisite,
11
+ StructuredToolResult,
12
+ StructuredToolResultStatus,
13
+ Tool,
14
+ ToolInvokeContext,
15
+ ToolParameter,
16
+ Toolset,
17
+ ToolsetTag,
18
+ )
19
+ from holmes.plugins.toolsets.newrelic.new_relic_api import NewRelicAPI
20
+ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
21
+
22
+
23
+ def _build_newrelic_query_url(
24
+ base_url: str,
25
+ account_id: str,
26
+ nrql_query: str,
27
+ ) -> Optional[str]:
28
+ """Build a New Relic query URL for the NRQL query builder.
29
+
30
+ Note: URL links to queries are not officially supported by New Relic, so we are using
31
+ a workaround to open their overlay to the query builder with the query pre-filled.
32
+ This uses the dashboard launcher with an overlay parameter to open the query builder nerdlet.
33
+
34
+ """
35
+ try:
36
+ account_id_int = int(account_id) if isinstance(account_id, str) else account_id
37
+
38
+ overlay = {
39
+ "nerdletId": "data-exploration.query-builder",
40
+ "initialActiveInterface": "nrqlEditor",
41
+ "initialQueries": [
42
+ {
43
+ "accountId": account_id_int,
44
+ "nrql": nrql_query,
45
+ }
46
+ ],
47
+ }
48
+
49
+ overlay_json = json.dumps(overlay, separators=(",", ":"))
50
+ overlay_base64 = base64.b64encode(overlay_json.encode("utf-8")).decode("utf-8")
51
+
52
+ pane = {
53
+ "nerdletId": "dashboards.list",
54
+ "entityDomain": "VIZ",
55
+ "entityType": "DASHBOARD",
56
+ }
57
+ pane_json = json.dumps(pane, separators=(",", ":"))
58
+ pane_base64 = base64.b64encode(pane_json.encode("utf-8")).decode("utf-8")
59
+
60
+ url = (
61
+ f"{base_url}/launcher/dashboards.launcher"
62
+ f"?pane={pane_base64}"
63
+ f"&overlay={overlay_base64}"
64
+ )
65
+
66
+ return url
67
+ except Exception:
68
+ return None
69
+
70
+
71
+ class ExecuteNRQLQuery(Tool):
72
+ def __init__(self, toolset: "NewRelicToolset"):
73
+ parameters = {
74
+ "query": ToolParameter(
75
+ description="""The NRQL query string to execute.
76
+
77
+ MANDATORY: Before querying any event type, ALWAYS run `SELECT keyset() FROM <EventType> SINCE <timeframe>` to discover available attributes. Never use attributes without confirming they exist first. Make sure to remember which fields are stringKeys, numericKeys or booleanKeys as this will be important in subsequent queries.
78
+
79
+ Example: Before querying Transactions, run: `SELECT keyset() FROM Transaction SINCE 24 hours ago`
80
+
81
+ ### ⚠️ Critical Rule: NRQL `FACET` Usa ge
82
+
83
+ When using **FACET** in NRQL:
84
+ - Any **non-constant value** in the `SELECT` clause **must be aggregated**.
85
+ - The attribute you **FACET** on must **not appear in `SELECT`** unless it's wrapped in an aggregation.
86
+
87
+ #### ✅ Correct
88
+ ```nrql
89
+ -- Aggregated metric + facet
90
+ SELECT count(*) FROM Transaction FACET transactionType
91
+
92
+ -- Multiple aggregations with facet
93
+ SELECT count(*), average(duration) FROM Transaction FACET transactionType
94
+ ```
95
+
96
+ #### ❌ Incorrect
97
+ ```nrql
98
+ -- Not allowed: raw attribute in SELECT
99
+ SELECT count(*), transactionType FROM Transaction FACET transactionType
100
+ ```
101
+ """,
102
+ type="string",
103
+ required=True,
104
+ ),
105
+ "description": ToolParameter(
106
+ description="A brief 6 word human understandable description of the query you are running.",
107
+ type="string",
108
+ required=True,
109
+ ),
110
+ "query_type": ToolParameter(
111
+ description="Either 'Metrics', 'Logs', 'Traces', 'Discover Attributes' or 'Other'.",
112
+ type="string",
113
+ required=True,
114
+ ),
115
+ }
116
+
117
+ # Add account_id parameter only in multi-account mode
118
+ if toolset.enable_multi_account:
119
+ parameters["account_id"] = ToolParameter(
120
+ description=(
121
+ f"A New Relic account ID is a numeric identifier, typically a 6-8 digit integer (e.g., 1234567). It contains only digits, has no prefixes or separators, and uniquely identifies a New Relic account. default: {toolset.nr_account_id}"
122
+ ),
123
+ type="integer",
124
+ required=True,
125
+ )
126
+
127
+ super().__init__(
128
+ name="newrelic_execute_nrql_query",
129
+ description="Get Traces, APM, Spans, Logs and more by executing a NRQL query in New Relic. "
130
+ "Returns the result of the NRQL function. "
131
+ "⚠️ CRITICAL: NRQL silently returns empty results for invalid queries instead of errors. "
132
+ "If you get empty results, your query likely has issues such as: "
133
+ "1) Wrong attribute names (use SELECT keyset() first to verify), "
134
+ "2) Type mismatches (string vs numeric fields), "
135
+ "3) Wrong event type. "
136
+ "Always verify attribute names and types before querying.",
137
+ parameters=parameters,
138
+ )
139
+ self._toolset = toolset
140
+
141
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
142
+ if self._toolset.enable_multi_account:
143
+ account_id = params.get("account_id") or self._toolset.nr_account_id
144
+ account_id = str(account_id)
145
+ else:
146
+ account_id = self._toolset.nr_account_id
147
+
148
+ if not account_id:
149
+ raise ValueError("NewRelic account ID is not configured")
150
+
151
+ api = self._toolset.create_api_client(account_id)
152
+
153
+ query = params["query"]
154
+ result = api.execute_nrql_query(query)
155
+
156
+ result_with_key = {
157
+ "query": query,
158
+ "data": result,
159
+ "is_eu": self._toolset.is_eu_datacenter,
160
+ }
161
+
162
+ # Build New Relic query URL
163
+ explore_url = _build_newrelic_query_url(
164
+ base_url=self._toolset.base_url,
165
+ account_id=account_id,
166
+ nrql_query=query,
167
+ )
168
+
169
+ return StructuredToolResult(
170
+ status=StructuredToolResultStatus.SUCCESS,
171
+ data=result_with_key,
172
+ params=params,
173
+ url=explore_url,
174
+ )
175
+
176
+ def get_parameterized_one_liner(self, params) -> str:
177
+ description = params.get("description", "")
178
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Execute NRQL ({description})"
179
+
180
+
181
+ class ListOrganizationAccounts(Tool):
182
+ def __init__(self, toolset: "NewRelicToolset"):
183
+ super().__init__(
184
+ name="newrelic_list_organization_accounts",
185
+ description=(
186
+ "List all account names and IDs accessible in the New Relic organization. "
187
+ "Use this tool to:\n"
188
+ "1. Find the account ID when given an account name\n"
189
+ "2. Map account names to IDs for running NRQL queries\n"
190
+ "Returns a list of accounts with 'id' and 'name' fields."
191
+ ),
192
+ parameters={},
193
+ )
194
+ self._toolset = toolset
195
+
196
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
197
+ api = self._toolset.create_api_client(
198
+ account_id="0"
199
+ ) # organization query does not need account_id
200
+
201
+ accounts = api.get_organization_accounts()
202
+
203
+ result_with_key = {
204
+ "accounts": accounts,
205
+ "total_count": len(accounts),
206
+ "is_eu": self._toolset.is_eu_datacenter,
207
+ }
208
+
209
+ # Build New Relic accounts URL
210
+ accounts_url = (
211
+ f"{self._toolset.base_url}/admin-portal/organizations/organization-detail"
212
+ )
213
+
214
+ return StructuredToolResult(
215
+ status=StructuredToolResultStatus.SUCCESS,
216
+ data=result_with_key,
217
+ params=params,
218
+ url=accounts_url,
219
+ )
220
+
221
+ def get_parameterized_one_liner(self, params) -> str:
222
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: List organization accounts"
223
+
224
+
225
+ class NewrelicConfig(BaseModel):
226
+ nr_api_key: str
227
+ nr_account_id: str
228
+ is_eu_datacenter: Optional[bool] = False
229
+ enable_multi_account: Optional[bool] = False
230
+
231
+
232
+ class NewRelicToolset(Toolset):
233
+ nr_api_key: Optional[str] = None
234
+ nr_account_id: Optional[str] = None
235
+ is_eu_datacenter: bool = False
236
+ enable_multi_account: bool = False
237
+
238
+ @property
239
+ def base_url(self) -> str:
240
+ """Get the New Relic base URL based on datacenter region."""
241
+ return (
242
+ "https://one.eu.newrelic.com"
243
+ if self.is_eu_datacenter
244
+ else "https://one.newrelic.com"
245
+ )
246
+
247
+ def create_api_client(self, account_id: Optional[str] = None) -> NewRelicAPI:
248
+ """Create a NewRelicAPI client instance.
249
+
250
+ Args:
251
+ account_id: Account ID to use. If None, uses the default from config.
252
+ Set to "0" for organization-level queries.
253
+
254
+ Returns:
255
+ Configured NewRelicAPI instance
256
+
257
+ Raises:
258
+ ValueError: If API key is not configured
259
+ """
260
+ if not self.nr_api_key:
261
+ raise ValueError("NewRelic API key is not configured")
262
+
263
+ effective_account_id = (
264
+ account_id if account_id is not None else self.nr_account_id
265
+ )
266
+
267
+ if not effective_account_id:
268
+ raise ValueError("NewRelic Account id is not configured")
269
+
270
+ return NewRelicAPI(
271
+ api_key=self.nr_api_key,
272
+ account_id=effective_account_id,
273
+ is_eu_datacenter=self.is_eu_datacenter,
274
+ )
275
+
276
+ def __init__(self):
277
+ super().__init__(
278
+ name="newrelic",
279
+ description="Toolset for interacting with New Relic to fetch logs, traces, and execute freeform NRQL queries",
280
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/newrelic/",
281
+ icon_url="https://companieslogo.com/img/orig/NEWR-de5fcb2e.png?t=1720244493",
282
+ prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], # type: ignore
283
+ tools=[],
284
+ tags=[ToolsetTag.CORE],
285
+ )
286
+
287
+ def prerequisites_callable(
288
+ self, config: dict[str, Any]
289
+ ) -> tuple[bool, Optional[str]]:
290
+ if not config:
291
+ return False, "No configuration provided"
292
+
293
+ try:
294
+ nr_config = NewrelicConfig(**config)
295
+ self.nr_account_id = nr_config.nr_account_id
296
+ self.nr_api_key = nr_config.nr_api_key
297
+ self.is_eu_datacenter = nr_config.is_eu_datacenter or False
298
+ self.enable_multi_account = nr_config.enable_multi_account or False
299
+
300
+ # Tool uses enable_multi_account flag.
301
+ self.tools = [ExecuteNRQLQuery(self)]
302
+ if self.enable_multi_account:
303
+ self.tools.append(ListOrganizationAccounts(self))
304
+ template_file_path = os.path.abspath(
305
+ os.path.join(os.path.dirname(__file__), "newrelic.jinja2")
306
+ )
307
+ self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
308
+
309
+ return True, None
310
+ except Exception as e:
311
+ logging.exception("Failed to set up New Relic toolset")
312
+ return False, str(e)
313
+
314
+ def get_example_config(self) -> Dict[str, Any]:
315
+ return {
316
+ "nr_api_key": "NRAK-XXXXXXXXXXXXXXXXXXXXXXXXXX",
317
+ "nr_account_id": "1234567",
318
+ "is_eu_datacenter": False,
319
+ "enable_multi_account": False,
320
+ }
@@ -0,0 +1,283 @@
1
+ toolsets:
2
+ openshift/core:
3
+ description: "Read access to OpenShift cluster resources including projects, routes, and deployment configs"
4
+ docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
5
+ tags:
6
+ - core
7
+ prerequisites:
8
+ - command: "oc version --client"
9
+
10
+ # Note: Many tools in this toolset use transformers with llm_summarize
11
+ # to automatically summarize large oc outputs when a fast model is configured.
12
+ # This reduces context window usage while preserving key information for debugging.
13
+
14
+ tools:
15
+ - name: "oc_describe"
16
+ description: >
17
+ Run oc describe <kind> <name> -n <namespace>,
18
+ call this when users ask for description,
19
+ for example when a user asks
20
+ - 'describe pod xyz-123'
21
+ - 'show service xyz-123 in namespace my-ns'
22
+ - 'describe route my-route'
23
+ - 'show deployment config xyz'
24
+ command: "oc describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
25
+ transformers:
26
+ - name: llm_summarize
27
+ config:
28
+ input_threshold: 1000
29
+ prompt: |
30
+ Summarize this oc describe output focusing on:
31
+ - What needs attention or immediate action
32
+ - Resource status and health indicators
33
+ - Any errors, warnings, or non-standard states
34
+ - Key configuration details that could affect functionality
35
+ - OpenShift-specific features like routes, image streams, or security context constraints
36
+ - When possible, mention exact field names so the user can grep for specific details
37
+ - Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
38
+ - Prefer aggregates and counts; list only outliers and actionable items
39
+ - Keep grep-friendly: include exact field names/values that matter``
40
+
41
+ - name: "oc_get_by_name"
42
+ description: "Run `oc get <kind> <name> --show-labels`"
43
+ command: "oc get --show-labels -o wide {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
44
+
45
+ - name: "oc_get_by_kind_in_namespace"
46
+ description: "Run `oc get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
47
+ command: "oc get --show-labels -o wide {{ kind }} -n {{ namespace }}"
48
+ transformers:
49
+ - name: llm_summarize
50
+ config:
51
+ input_threshold: 1000
52
+ prompt: |
53
+ Summarize this oc output focusing on:
54
+ - What needs attention or immediate action
55
+ - Group similar resources into aggregate descriptions
56
+ - Make sure to mention outliers, errors, and non-standard states
57
+ - List healthy resources as aggregate descriptions
58
+ - When listing unhealthy resources, also try to use aggregate descriptions when possible
59
+ - When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
60
+ - Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
61
+
62
+ - name: "oc_get_by_kind_in_cluster"
63
+ description: "Run `oc get -A <kind> --show-labels` to get all resources of a given type in the cluster"
64
+ command: "oc get -A --show-labels -o wide {{ kind }}"
65
+ transformers:
66
+ - name: llm_summarize
67
+ config:
68
+ input_threshold: 1000
69
+ prompt: |
70
+ Summarize this oc output focusing on:
71
+ - What needs attention or immediate action
72
+ - Group similar resources into a single line and description
73
+ - Make sure to mention outliers, errors, and non-standard states
74
+ - List healthy resources as aggregate descriptions
75
+ - When listing unhealthy resources, also try to use aggregate descriptions when possible
76
+ - When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
77
+ - Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
78
+
79
+ - name: "oc_find_resource"
80
+ description: "Run `oc get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
81
+ command: "oc get -A --show-labels -o wide {{ kind }} | grep {{ keyword }}"
82
+
83
+ - name: "oc_get_yaml"
84
+ description: "Run `oc get -o yaml` on a single OpenShift resource"
85
+ command: "oc get -o yaml {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
86
+
87
+ - name: "oc_events"
88
+ description: "Retrieve the events for a specific OpenShift resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'deploymentconfig', 'route', etc."
89
+ command: "oc get events --field-selector involvedObject.kind={{ resource_type }},involvedObject.name={{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
90
+
91
+ - name: "oc_projects"
92
+ description: "List all projects (namespaces) in the OpenShift cluster"
93
+ command: "oc get projects"
94
+
95
+ - name: "oc_project_current"
96
+ description: "Show the current project (namespace) context"
97
+ command: "oc project"
98
+
99
+ - name: "oc_routes"
100
+ description: "List all routes in a specific namespace or cluster-wide"
101
+ command: "oc get routes{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
102
+
103
+ - name: "oc_route_describe"
104
+ description: "Describe a specific route to see its configuration and status"
105
+ command: "oc describe route {{ route_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
106
+
107
+ - name: "oc_imagestreams"
108
+ description: "List image streams in a namespace or cluster-wide"
109
+ command: "oc get imagestreams{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
110
+
111
+ - name: "oc_deploymentconfigs"
112
+ description: "List deployment configs in a namespace or cluster-wide"
113
+ command: "oc get deploymentconfigs{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
114
+
115
+ - name: "oc_buildconfigs"
116
+ description: "List build configs in a namespace or cluster-wide"
117
+ command: "oc get buildconfigs{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
118
+
119
+ - name: "oc_builds"
120
+ description: "List builds in a namespace or cluster-wide"
121
+ command: "oc get builds{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
122
+
123
+ - name: "oc_adm_openshift_audit_logs"
124
+ description: "Get OpenShift audit logs from a specified node"
125
+ command: "oc adm node-logs {{ node_name }} --path=openshift-apiserver/audit.log"
126
+
127
+ - name: "oc_adm_openshift_audit_logs_with_filter"
128
+ description: "Get OpenShift audit logs from a specified node with an applied filter"
129
+ command: "oc adm node-logs {{ node_name }} --path=openshift-apiserver/audit.log | grep {{ grep_filter }}"
130
+
131
+ - name: "oc_build_logs"
132
+ description: "Get logs from a specific build"
133
+ command: "oc logs build/{{ build_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
134
+
135
+ - name: "openshift_jq_query"
136
+ user_description: "Query OpenShift Resources: oc get {{kind}} -n {{ namespace }} -o json | jq -r {{jq_expr}}"
137
+ description: >
138
+ Use oc to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^registry.redhat.io/") | not)
139
+ command: oc get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
140
+ transformers:
141
+ - name: llm_summarize
142
+ config:
143
+ input_threshold: 1000
144
+ prompt: |
145
+ Summarize this jq query output focusing on:
146
+ - Key patterns and commonalities in the data
147
+ - Notable outliers, anomalies, or items that need attention
148
+ - Group similar results into aggregate descriptions when possible
149
+ - Highlight any empty results, null values, or missing data
150
+ - When applicable, mention specific resource names, namespaces, or values that stand out
151
+ - Organize findings in a structured way that helps with troubleshooting
152
+ - Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
153
+ - Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
154
+
155
+ openshift/logs:
156
+ description: "Read pod logs using oc command"
157
+ docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
158
+ tags:
159
+ - core
160
+ prerequisites:
161
+ - command: "oc version --client"
162
+
163
+ # Note: Log tools use transformers with llm_summarize to automatically
164
+ # summarize large log outputs when a fast model is configured. This helps
165
+ # focus on errors, patterns, and key information while reducing context usage.
166
+
167
+ tools:
168
+ - name: "oc_previous_logs"
169
+ description: "Run `oc logs --previous` on a single pod. Used to fetch logs for a pod that crashed and see logs from before the crash. Never give a deployment name or a resource that is not a pod."
170
+ command: "oc logs {{pod_name}} -n {{ namespace }} --previous"
171
+
172
+ - name: "oc_previous_logs_all_containers"
173
+ description: "Run `oc logs --previous` on a single pod. Used to fetch logs for a pod that crashed and see logs from before the crash."
174
+ command: "oc logs {{pod_name}} -n {{ namespace }} --previous --all-containers"
175
+
176
+ - name: "oc_container_previous_logs"
177
+ description: "Run `oc logs --previous` on a single container of a pod. Used to fetch logs for a pod that crashed and see logs from before the crash."
178
+ command: "oc logs {{pod_name}} -c {{container_name}} -n {{ namespace }} --previous"
179
+
180
+ - name: "oc_logs"
181
+ description: "Run `oc logs` on a single pod. Never give a deployment name or a resource that is not a pod."
182
+ command: "oc logs {{pod_name}} -n {{ namespace }}"
183
+ transformers:
184
+ - name: llm_summarize
185
+ config:
186
+ input_threshold: 1000
187
+ prompt: |
188
+ Summarize these pod logs focusing on:
189
+ - Errors, exceptions, and warning messages
190
+ - Recent activity patterns and trends
191
+ - Any authentication, connection, or startup issues
192
+ - Performance indicators (response times, throughput)
193
+ - Group similar log entries together
194
+ - When possible, mention exact error codes or keywords for easier searching
195
+ - Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
196
+ - Include grep-ready keys/values; avoid repeating entire logs or unchanged defaults
197
+
198
+ - name: "oc_logs_all_containers"
199
+ description: "Run `oc logs` on all containers within a single pod."
200
+ command: "oc logs {{pod_name}} -n {{ namespace }} --all-containers"
201
+ transformers:
202
+ - name: llm_summarize
203
+ config:
204
+ input_threshold: 1000
205
+ prompt: |
206
+ Summarize these multi-container pod logs focusing on:
207
+ - Errors, exceptions, and warning messages by container
208
+ - Inter-container communication patterns
209
+ - Any authentication, connection, or startup issues
210
+ - Performance indicators and resource usage patterns
211
+ - Group similar log entries together by container
212
+ - When possible, mention exact error codes or keywords for easier searching
213
+ - Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
214
+ - Prioritize aggregates and actionable outliers over comprehensive details
215
+
216
+ - name: "oc_container_logs"
217
+ description: "Run `oc logs` on a single container within a pod. This is to get the logs of a specific container in a multi-container pod."
218
+ command: "oc logs {{pod_name}} -c {{container_name}} -n {{ namespace }} "
219
+
220
+ - name: "oc_logs_grep"
221
+ description: "Search for a specific term in the logs of a single pod. Only provide a pod name, not a deployment or other resource."
222
+ command: "oc logs {{ pod_name }} -n {{ namespace }} | grep {{ search_term }}"
223
+
224
+ - name: "oc_logs_all_containers_grep"
225
+ description: "Search for a specific term in the logs of a single pod across all of its containers. Only provide a pod name, not a deployment or other resource."
226
+ command: "oc logs {{pod_name}} -n {{ namespace }} --all-containers | grep {{ search_term }}"
227
+
228
+ openshift/live-metrics:
229
+ description: "Provides real-time metrics for pods and nodes using oc"
230
+ docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
231
+ llm_instructions: |
232
+ The oc_top_pods or oc_top_nodes do not return time series data or metrics that can be used for graphs
233
+ Do NOT use oc_top_pods or oc_top_nodes for graph generation - it only shows current snapshot data
234
+ oc_top_pods or oc_top_nodes are for current status checks, not historical graphs
235
+ tags:
236
+ - core
237
+ prerequisites:
238
+ - command: "oc adm top nodes"
239
+ tools:
240
+ - name: "oc_top_pods"
241
+ description: "Retrieves real-time CPU and memory usage for each pod in the cluster."
242
+ command: >
243
+ oc adm top pods -A
244
+ - name: "oc_top_nodes"
245
+ description: "Retrieves real-time CPU and memory usage for each node in the cluster."
246
+ command: >
247
+ oc adm top nodes
248
+
249
+ openshift/security:
250
+ description: "OpenShift security-related resources and configurations"
251
+ docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
252
+ tags:
253
+ - core
254
+ prerequisites:
255
+ - command: "oc version --client"
256
+ tools:
257
+ - name: "oc_scc"
258
+ description: "List Security Context Constraints (SCCs) in the cluster"
259
+ command: "oc get scc{% if scc_name %} {{ scc_name }}{% endif %} -o wide"
260
+
261
+ - name: "oc_scc_describe"
262
+ description: "Describe a specific Security Context Constraint"
263
+ command: "oc describe scc {{ scc_name }}"
264
+
265
+ - name: "oc_policy_who_can"
266
+ description: "Check who can perform a specific action on a resource"
267
+ command: "oc policy who-can {{ verb }} {{ resource }}{% if namespace %} -n {{ namespace }}{% endif %}"
268
+
269
+ - name: "oc_policy_can_i"
270
+ description: "Check if the current user can perform a specific action"
271
+ command: "oc policy can-i {{ verb }} {{ resource }}{% if namespace %} -n {{ namespace }}{% endif %}"
272
+
273
+ - name: "oc_serviceaccounts"
274
+ description: "List service accounts in a namespace or cluster-wide"
275
+ command: "oc get serviceaccounts{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
276
+
277
+ - name: "oc_rolebindings"
278
+ description: "List role bindings in a namespace or cluster-wide"
279
+ command: "oc get rolebindings{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
280
+
281
+ - name: "oc_clusterrolebindings"
282
+ description: "List cluster role bindings"
283
+ command: "oc get clusterrolebindings -o wide"