holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
New Relic provides distributed tracing data along with logs and metrics.
|
|
2
|
+
|
|
3
|
+
{% if config.enable_multi_account %}
|
|
4
|
+
**MULTI-ACCOUNT MODE**: You have access to multiple New Relic accounts in this organization.
|
|
5
|
+
|
|
6
|
+
### Important Multi-Account Workflow
|
|
7
|
+
|
|
8
|
+
**Each NRQL query MUST include the account_id parameter.**
|
|
9
|
+
1. A New Relic account ID is a numeric identifier, typically a 6–8 digit integer (e.g., 1234567).
|
|
10
|
+
|
|
11
|
+
**Here's how to determine which account_id to use**
|
|
12
|
+
|
|
13
|
+
1. **ALWAYS Check context first**: Look for common new relic labels or tags with the account id or name such as `nrAccountId` `accountId` or `account` in the provided context
|
|
14
|
+
(e.g., from alerts, traces, or previous queries). If found, use that value.
|
|
15
|
+
|
|
16
|
+
2. **ALWAYS CHECK if Account name provided**: If the user mentions a specific account name (e.g., "Production Account", "Staging"):
|
|
17
|
+
- YOU MUST First call `newrelic_list_organization_accounts` to get the list of all accounts
|
|
18
|
+
- Find the matching account by name and use its ID
|
|
19
|
+
|
|
20
|
+
3. **No account specified**: If you can't find any account ID or name based on the context of the question.
|
|
21
|
+
- Use the function newrelic_execute_nrql_query default account id value as the account ID.
|
|
22
|
+
- Let the user know you have used the default account.
|
|
23
|
+
|
|
24
|
+
**Important**: The context may contain account IDs in various places - check trace data, alert metadata, or previous query results for `nrAccountId`, `accountId`, `account.id` or similar fields.
|
|
25
|
+
|
|
26
|
+
{% endif %}
|
|
27
|
+
Assume every application has New Relic tracing data.
|
|
28
|
+
|
|
29
|
+
Use `nrql_query` to run a NRQL query.
|
|
30
|
+
|
|
31
|
+
**NRQL (New Relic Query Language)** is used to query all telemetry data in New Relic. The main event types are:
|
|
32
|
+
|
|
33
|
+
- **Transaction**: High-level APM data (requests, API calls)
|
|
34
|
+
- **Span**: Distributed tracing data (individual operations)
|
|
35
|
+
- **Log**: Centralized log data
|
|
36
|
+
- **Metric**: Time-series metrics data.
|
|
37
|
+
|
|
38
|
+
### Usage Workflow
|
|
39
|
+
|
|
40
|
+
#### 1. Discovering Available Data
|
|
41
|
+
|
|
42
|
+
Start by understanding what's available. Here are some examples:
|
|
43
|
+
- **ALWAYS** Start by getting all the available attributes names for what you are looking for. For example, to get it for any for Transaction in the last 24 hours, use: SELECT keyset() FROM Transaction SINCE 24 hours ago
|
|
44
|
+
- After you find the keyset `appName`, you can use it to get the available applications: `SELECT uniques(appName) FROM Transaction SINCE 1 hour ago`
|
|
45
|
+
Note: Use `SHOW EVENT TYPES` to see all event types in the account, in addition to Transaction, Span, Log, or Metric.
|
|
46
|
+
|
|
47
|
+
#### 2. Querying Telemetry Data
|
|
48
|
+
|
|
49
|
+
- If you already have an application name, you can query its traces directly
|
|
50
|
+
- **Time range is recommended**: While not strictly required, most queries should include SINCE for performance
|
|
51
|
+
|
|
52
|
+
#### 3. Querying Traces
|
|
53
|
+
- Always validate first: run the base query without FACET (or a quick LIMIT) to confirm data exists; if results are empty, adjust filters or time range before proceeding.
|
|
54
|
+
- Only attempt a FACET after confirming the field has values; if not, either try known alternatives or skip faceting entirely.
|
|
55
|
+
- When investigating a trace also look at attributes
|
|
56
|
+
- ***When investigating latency ALWAYS look to deliver the specific component or attribute in the span causing significant latnecy*** your investigation is not complete without this
|
|
57
|
+
- If you need to filter by time, NEVER filter in the WHERE clause using the timestamp field. Instead, you should ALWAYS use the `SINCE` or `SINCE ... UNTIL ...` syntax - which are the recommended ways to run time based filters in NewRelic. Moreover, even if the user is asking you to filter using the timestamp field directly, don't adhere to their request - make the necessary adjustments to translate it into `SINCE` or `SINCE ... UNTIL ...` syntax!
|
|
58
|
+
|
|
59
|
+
### Instructions for Handling Query Results
|
|
60
|
+
- If you query [DistributedTraceSummary / Transaction]:
|
|
61
|
+
- When querying without aggregations (e.g. without count(*), average(attribute), sum(attribute), min(attribute), etc.):
|
|
62
|
+
- ALWAYS start by querying all fields using `SELECT * FROM`. We need as much fields as possible to visualize the traces to the user. However, the trade-off is that we might exceed the context size. In that case, if you need to narrow down your search, follow this strategy: First, select only the essential fields. If that's still failing, add the `LIMIT` keyword to the query. `LIMIT` should always be the second option, we prefer to show the user as much traces as we can. The following fields are the minimal fields that are essential for the visualization, and you must always retrieve them:
|
|
63
|
+
- DistributedTraceSummary: trace.id, spanCount, root.entity.accountId, root.entity.guid, root.entity.name, root.span.name, timestamp, duration.ms
|
|
64
|
+
- Transaction: traceId, tags.accountId, entityGuid, appName, name, timestamp, duration, guid, transactionType
|
|
65
|
+
- When querying DistributedTraceSummary without aggregations, ALWAYS use the filter `WHERE root.span.eventType = 'Span'`
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from holmes.core.tools import (
|
|
10
|
+
CallablePrerequisite,
|
|
11
|
+
StructuredToolResult,
|
|
12
|
+
StructuredToolResultStatus,
|
|
13
|
+
Tool,
|
|
14
|
+
ToolInvokeContext,
|
|
15
|
+
ToolParameter,
|
|
16
|
+
Toolset,
|
|
17
|
+
ToolsetTag,
|
|
18
|
+
)
|
|
19
|
+
from holmes.plugins.toolsets.newrelic.new_relic_api import NewRelicAPI
|
|
20
|
+
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _build_newrelic_query_url(
|
|
24
|
+
base_url: str,
|
|
25
|
+
account_id: str,
|
|
26
|
+
nrql_query: str,
|
|
27
|
+
) -> Optional[str]:
|
|
28
|
+
"""Build a New Relic query URL for the NRQL query builder.
|
|
29
|
+
|
|
30
|
+
Note: URL links to queries are not officially supported by New Relic, so we are using
|
|
31
|
+
a workaround to open their overlay to the query builder with the query pre-filled.
|
|
32
|
+
This uses the dashboard launcher with an overlay parameter to open the query builder nerdlet.
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
account_id_int = int(account_id) if isinstance(account_id, str) else account_id
|
|
37
|
+
|
|
38
|
+
overlay = {
|
|
39
|
+
"nerdletId": "data-exploration.query-builder",
|
|
40
|
+
"initialActiveInterface": "nrqlEditor",
|
|
41
|
+
"initialQueries": [
|
|
42
|
+
{
|
|
43
|
+
"accountId": account_id_int,
|
|
44
|
+
"nrql": nrql_query,
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
overlay_json = json.dumps(overlay, separators=(",", ":"))
|
|
50
|
+
overlay_base64 = base64.b64encode(overlay_json.encode("utf-8")).decode("utf-8")
|
|
51
|
+
|
|
52
|
+
pane = {
|
|
53
|
+
"nerdletId": "dashboards.list",
|
|
54
|
+
"entityDomain": "VIZ",
|
|
55
|
+
"entityType": "DASHBOARD",
|
|
56
|
+
}
|
|
57
|
+
pane_json = json.dumps(pane, separators=(",", ":"))
|
|
58
|
+
pane_base64 = base64.b64encode(pane_json.encode("utf-8")).decode("utf-8")
|
|
59
|
+
|
|
60
|
+
url = (
|
|
61
|
+
f"{base_url}/launcher/dashboards.launcher"
|
|
62
|
+
f"?pane={pane_base64}"
|
|
63
|
+
f"&overlay={overlay_base64}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return url
|
|
67
|
+
except Exception:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ExecuteNRQLQuery(Tool):
|
|
72
|
+
def __init__(self, toolset: "NewRelicToolset"):
|
|
73
|
+
parameters = {
|
|
74
|
+
"query": ToolParameter(
|
|
75
|
+
description="""The NRQL query string to execute.
|
|
76
|
+
|
|
77
|
+
MANDATORY: Before querying any event type, ALWAYS run `SELECT keyset() FROM <EventType> SINCE <timeframe>` to discover available attributes. Never use attributes without confirming they exist first. Make sure to remember which fields are stringKeys, numericKeys or booleanKeys as this will be important in subsequent queries.
|
|
78
|
+
|
|
79
|
+
Example: Before querying Transactions, run: `SELECT keyset() FROM Transaction SINCE 24 hours ago`
|
|
80
|
+
|
|
81
|
+
### ⚠️ Critical Rule: NRQL `FACET` Usa ge
|
|
82
|
+
|
|
83
|
+
When using **FACET** in NRQL:
|
|
84
|
+
- Any **non-constant value** in the `SELECT` clause **must be aggregated**.
|
|
85
|
+
- The attribute you **FACET** on must **not appear in `SELECT`** unless it's wrapped in an aggregation.
|
|
86
|
+
|
|
87
|
+
#### ✅ Correct
|
|
88
|
+
```nrql
|
|
89
|
+
-- Aggregated metric + facet
|
|
90
|
+
SELECT count(*) FROM Transaction FACET transactionType
|
|
91
|
+
|
|
92
|
+
-- Multiple aggregations with facet
|
|
93
|
+
SELECT count(*), average(duration) FROM Transaction FACET transactionType
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### ❌ Incorrect
|
|
97
|
+
```nrql
|
|
98
|
+
-- Not allowed: raw attribute in SELECT
|
|
99
|
+
SELECT count(*), transactionType FROM Transaction FACET transactionType
|
|
100
|
+
```
|
|
101
|
+
""",
|
|
102
|
+
type="string",
|
|
103
|
+
required=True,
|
|
104
|
+
),
|
|
105
|
+
"description": ToolParameter(
|
|
106
|
+
description="A brief 6 word human understandable description of the query you are running.",
|
|
107
|
+
type="string",
|
|
108
|
+
required=True,
|
|
109
|
+
),
|
|
110
|
+
"query_type": ToolParameter(
|
|
111
|
+
description="Either 'Metrics', 'Logs', 'Traces', 'Discover Attributes' or 'Other'.",
|
|
112
|
+
type="string",
|
|
113
|
+
required=True,
|
|
114
|
+
),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Add account_id parameter only in multi-account mode
|
|
118
|
+
if toolset.enable_multi_account:
|
|
119
|
+
parameters["account_id"] = ToolParameter(
|
|
120
|
+
description=(
|
|
121
|
+
f"A New Relic account ID is a numeric identifier, typically a 6-8 digit integer (e.g., 1234567). It contains only digits, has no prefixes or separators, and uniquely identifies a New Relic account. default: {toolset.nr_account_id}"
|
|
122
|
+
),
|
|
123
|
+
type="integer",
|
|
124
|
+
required=True,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
super().__init__(
|
|
128
|
+
name="newrelic_execute_nrql_query",
|
|
129
|
+
description="Get Traces, APM, Spans, Logs and more by executing a NRQL query in New Relic. "
|
|
130
|
+
"Returns the result of the NRQL function. "
|
|
131
|
+
"⚠️ CRITICAL: NRQL silently returns empty results for invalid queries instead of errors. "
|
|
132
|
+
"If you get empty results, your query likely has issues such as: "
|
|
133
|
+
"1) Wrong attribute names (use SELECT keyset() first to verify), "
|
|
134
|
+
"2) Type mismatches (string vs numeric fields), "
|
|
135
|
+
"3) Wrong event type. "
|
|
136
|
+
"Always verify attribute names and types before querying.",
|
|
137
|
+
parameters=parameters,
|
|
138
|
+
)
|
|
139
|
+
self._toolset = toolset
|
|
140
|
+
|
|
141
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
142
|
+
if self._toolset.enable_multi_account:
|
|
143
|
+
account_id = params.get("account_id") or self._toolset.nr_account_id
|
|
144
|
+
account_id = str(account_id)
|
|
145
|
+
else:
|
|
146
|
+
account_id = self._toolset.nr_account_id
|
|
147
|
+
|
|
148
|
+
if not account_id:
|
|
149
|
+
raise ValueError("NewRelic account ID is not configured")
|
|
150
|
+
|
|
151
|
+
api = self._toolset.create_api_client(account_id)
|
|
152
|
+
|
|
153
|
+
query = params["query"]
|
|
154
|
+
result = api.execute_nrql_query(query)
|
|
155
|
+
|
|
156
|
+
result_with_key = {
|
|
157
|
+
"query": query,
|
|
158
|
+
"data": result,
|
|
159
|
+
"is_eu": self._toolset.is_eu_datacenter,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Build New Relic query URL
|
|
163
|
+
explore_url = _build_newrelic_query_url(
|
|
164
|
+
base_url=self._toolset.base_url,
|
|
165
|
+
account_id=account_id,
|
|
166
|
+
nrql_query=query,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return StructuredToolResult(
|
|
170
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
171
|
+
data=result_with_key,
|
|
172
|
+
params=params,
|
|
173
|
+
url=explore_url,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
177
|
+
description = params.get("description", "")
|
|
178
|
+
return f"{toolset_name_for_one_liner(self._toolset.name)}: Execute NRQL ({description})"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ListOrganizationAccounts(Tool):
|
|
182
|
+
def __init__(self, toolset: "NewRelicToolset"):
|
|
183
|
+
super().__init__(
|
|
184
|
+
name="newrelic_list_organization_accounts",
|
|
185
|
+
description=(
|
|
186
|
+
"List all account names and IDs accessible in the New Relic organization. "
|
|
187
|
+
"Use this tool to:\n"
|
|
188
|
+
"1. Find the account ID when given an account name\n"
|
|
189
|
+
"2. Map account names to IDs for running NRQL queries\n"
|
|
190
|
+
"Returns a list of accounts with 'id' and 'name' fields."
|
|
191
|
+
),
|
|
192
|
+
parameters={},
|
|
193
|
+
)
|
|
194
|
+
self._toolset = toolset
|
|
195
|
+
|
|
196
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
197
|
+
api = self._toolset.create_api_client(
|
|
198
|
+
account_id="0"
|
|
199
|
+
) # organization query does not need account_id
|
|
200
|
+
|
|
201
|
+
accounts = api.get_organization_accounts()
|
|
202
|
+
|
|
203
|
+
result_with_key = {
|
|
204
|
+
"accounts": accounts,
|
|
205
|
+
"total_count": len(accounts),
|
|
206
|
+
"is_eu": self._toolset.is_eu_datacenter,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
# Build New Relic accounts URL
|
|
210
|
+
accounts_url = (
|
|
211
|
+
f"{self._toolset.base_url}/admin-portal/organizations/organization-detail"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return StructuredToolResult(
|
|
215
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
216
|
+
data=result_with_key,
|
|
217
|
+
params=params,
|
|
218
|
+
url=accounts_url,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
222
|
+
return f"{toolset_name_for_one_liner(self._toolset.name)}: List organization accounts"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class NewrelicConfig(BaseModel):
|
|
226
|
+
nr_api_key: str
|
|
227
|
+
nr_account_id: str
|
|
228
|
+
is_eu_datacenter: Optional[bool] = False
|
|
229
|
+
enable_multi_account: Optional[bool] = False
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class NewRelicToolset(Toolset):
|
|
233
|
+
nr_api_key: Optional[str] = None
|
|
234
|
+
nr_account_id: Optional[str] = None
|
|
235
|
+
is_eu_datacenter: bool = False
|
|
236
|
+
enable_multi_account: bool = False
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def base_url(self) -> str:
|
|
240
|
+
"""Get the New Relic base URL based on datacenter region."""
|
|
241
|
+
return (
|
|
242
|
+
"https://one.eu.newrelic.com"
|
|
243
|
+
if self.is_eu_datacenter
|
|
244
|
+
else "https://one.newrelic.com"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def create_api_client(self, account_id: Optional[str] = None) -> NewRelicAPI:
|
|
248
|
+
"""Create a NewRelicAPI client instance.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
account_id: Account ID to use. If None, uses the default from config.
|
|
252
|
+
Set to "0" for organization-level queries.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Configured NewRelicAPI instance
|
|
256
|
+
|
|
257
|
+
Raises:
|
|
258
|
+
ValueError: If API key is not configured
|
|
259
|
+
"""
|
|
260
|
+
if not self.nr_api_key:
|
|
261
|
+
raise ValueError("NewRelic API key is not configured")
|
|
262
|
+
|
|
263
|
+
effective_account_id = (
|
|
264
|
+
account_id if account_id is not None else self.nr_account_id
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
if not effective_account_id:
|
|
268
|
+
raise ValueError("NewRelic Account id is not configured")
|
|
269
|
+
|
|
270
|
+
return NewRelicAPI(
|
|
271
|
+
api_key=self.nr_api_key,
|
|
272
|
+
account_id=effective_account_id,
|
|
273
|
+
is_eu_datacenter=self.is_eu_datacenter,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def __init__(self):
|
|
277
|
+
super().__init__(
|
|
278
|
+
name="newrelic",
|
|
279
|
+
description="Toolset for interacting with New Relic to fetch logs, traces, and execute freeform NRQL queries",
|
|
280
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/newrelic/",
|
|
281
|
+
icon_url="https://companieslogo.com/img/orig/NEWR-de5fcb2e.png?t=1720244493",
|
|
282
|
+
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], # type: ignore
|
|
283
|
+
tools=[],
|
|
284
|
+
tags=[ToolsetTag.CORE],
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def prerequisites_callable(
|
|
288
|
+
self, config: dict[str, Any]
|
|
289
|
+
) -> tuple[bool, Optional[str]]:
|
|
290
|
+
if not config:
|
|
291
|
+
return False, "No configuration provided"
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
nr_config = NewrelicConfig(**config)
|
|
295
|
+
self.nr_account_id = nr_config.nr_account_id
|
|
296
|
+
self.nr_api_key = nr_config.nr_api_key
|
|
297
|
+
self.is_eu_datacenter = nr_config.is_eu_datacenter or False
|
|
298
|
+
self.enable_multi_account = nr_config.enable_multi_account or False
|
|
299
|
+
|
|
300
|
+
# Tool uses enable_multi_account flag.
|
|
301
|
+
self.tools = [ExecuteNRQLQuery(self)]
|
|
302
|
+
if self.enable_multi_account:
|
|
303
|
+
self.tools.append(ListOrganizationAccounts(self))
|
|
304
|
+
template_file_path = os.path.abspath(
|
|
305
|
+
os.path.join(os.path.dirname(__file__), "newrelic.jinja2")
|
|
306
|
+
)
|
|
307
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
308
|
+
|
|
309
|
+
return True, None
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logging.exception("Failed to set up New Relic toolset")
|
|
312
|
+
return False, str(e)
|
|
313
|
+
|
|
314
|
+
def get_example_config(self) -> Dict[str, Any]:
|
|
315
|
+
return {
|
|
316
|
+
"nr_api_key": "NRAK-XXXXXXXXXXXXXXXXXXXXXXXXXX",
|
|
317
|
+
"nr_account_id": "1234567",
|
|
318
|
+
"is_eu_datacenter": False,
|
|
319
|
+
"enable_multi_account": False,
|
|
320
|
+
}
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
toolsets:
|
|
2
|
+
openshift/core:
|
|
3
|
+
description: "Read access to OpenShift cluster resources including projects, routes, and deployment configs"
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
|
|
5
|
+
tags:
|
|
6
|
+
- core
|
|
7
|
+
prerequisites:
|
|
8
|
+
- command: "oc version --client"
|
|
9
|
+
|
|
10
|
+
# Note: Many tools in this toolset use transformers with llm_summarize
|
|
11
|
+
# to automatically summarize large oc outputs when a fast model is configured.
|
|
12
|
+
# This reduces context window usage while preserving key information for debugging.
|
|
13
|
+
|
|
14
|
+
tools:
|
|
15
|
+
- name: "oc_describe"
|
|
16
|
+
description: >
|
|
17
|
+
Run oc describe <kind> <name> -n <namespace>,
|
|
18
|
+
call this when users ask for description,
|
|
19
|
+
for example when a user asks
|
|
20
|
+
- 'describe pod xyz-123'
|
|
21
|
+
- 'show service xyz-123 in namespace my-ns'
|
|
22
|
+
- 'describe route my-route'
|
|
23
|
+
- 'show deployment config xyz'
|
|
24
|
+
command: "oc describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
25
|
+
transformers:
|
|
26
|
+
- name: llm_summarize
|
|
27
|
+
config:
|
|
28
|
+
input_threshold: 1000
|
|
29
|
+
prompt: |
|
|
30
|
+
Summarize this oc describe output focusing on:
|
|
31
|
+
- What needs attention or immediate action
|
|
32
|
+
- Resource status and health indicators
|
|
33
|
+
- Any errors, warnings, or non-standard states
|
|
34
|
+
- Key configuration details that could affect functionality
|
|
35
|
+
- OpenShift-specific features like routes, image streams, or security context constraints
|
|
36
|
+
- When possible, mention exact field names so the user can grep for specific details
|
|
37
|
+
- Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
|
|
38
|
+
- Prefer aggregates and counts; list only outliers and actionable items
|
|
39
|
+
- Keep grep-friendly: include exact field names/values that matter``
|
|
40
|
+
|
|
41
|
+
- name: "oc_get_by_name"
|
|
42
|
+
description: "Run `oc get <kind> <name> --show-labels`"
|
|
43
|
+
command: "oc get --show-labels -o wide {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
44
|
+
|
|
45
|
+
- name: "oc_get_by_kind_in_namespace"
|
|
46
|
+
description: "Run `oc get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
|
|
47
|
+
command: "oc get --show-labels -o wide {{ kind }} -n {{ namespace }}"
|
|
48
|
+
transformers:
|
|
49
|
+
- name: llm_summarize
|
|
50
|
+
config:
|
|
51
|
+
input_threshold: 1000
|
|
52
|
+
prompt: |
|
|
53
|
+
Summarize this oc output focusing on:
|
|
54
|
+
- What needs attention or immediate action
|
|
55
|
+
- Group similar resources into aggregate descriptions
|
|
56
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
57
|
+
- List healthy resources as aggregate descriptions
|
|
58
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
59
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
|
|
60
|
+
- Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
|
|
61
|
+
|
|
62
|
+
- name: "oc_get_by_kind_in_cluster"
|
|
63
|
+
description: "Run `oc get -A <kind> --show-labels` to get all resources of a given type in the cluster"
|
|
64
|
+
command: "oc get -A --show-labels -o wide {{ kind }}"
|
|
65
|
+
transformers:
|
|
66
|
+
- name: llm_summarize
|
|
67
|
+
config:
|
|
68
|
+
input_threshold: 1000
|
|
69
|
+
prompt: |
|
|
70
|
+
Summarize this oc output focusing on:
|
|
71
|
+
- What needs attention or immediate action
|
|
72
|
+
- Group similar resources into a single line and description
|
|
73
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
74
|
+
- List healthy resources as aggregate descriptions
|
|
75
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
76
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
|
|
77
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
78
|
+
|
|
79
|
+
- name: "oc_find_resource"
|
|
80
|
+
description: "Run `oc get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
|
|
81
|
+
command: "oc get -A --show-labels -o wide {{ kind }} | grep {{ keyword }}"
|
|
82
|
+
|
|
83
|
+
- name: "oc_get_yaml"
|
|
84
|
+
description: "Run `oc get -o yaml` on a single OpenShift resource"
|
|
85
|
+
command: "oc get -o yaml {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
86
|
+
|
|
87
|
+
- name: "oc_events"
|
|
88
|
+
description: "Retrieve the events for a specific OpenShift resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'deploymentconfig', 'route', etc."
|
|
89
|
+
command: "oc get events --field-selector involvedObject.kind={{ resource_type }},involvedObject.name={{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
90
|
+
|
|
91
|
+
- name: "oc_projects"
|
|
92
|
+
description: "List all projects (namespaces) in the OpenShift cluster"
|
|
93
|
+
command: "oc get projects"
|
|
94
|
+
|
|
95
|
+
- name: "oc_project_current"
|
|
96
|
+
description: "Show the current project (namespace) context"
|
|
97
|
+
command: "oc project"
|
|
98
|
+
|
|
99
|
+
- name: "oc_routes"
|
|
100
|
+
description: "List all routes in a specific namespace or cluster-wide"
|
|
101
|
+
command: "oc get routes{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
102
|
+
|
|
103
|
+
- name: "oc_route_describe"
|
|
104
|
+
description: "Describe a specific route to see its configuration and status"
|
|
105
|
+
command: "oc describe route {{ route_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
106
|
+
|
|
107
|
+
- name: "oc_imagestreams"
|
|
108
|
+
description: "List image streams in a namespace or cluster-wide"
|
|
109
|
+
command: "oc get imagestreams{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
110
|
+
|
|
111
|
+
- name: "oc_deploymentconfigs"
|
|
112
|
+
description: "List deployment configs in a namespace or cluster-wide"
|
|
113
|
+
command: "oc get deploymentconfigs{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
114
|
+
|
|
115
|
+
- name: "oc_buildconfigs"
|
|
116
|
+
description: "List build configs in a namespace or cluster-wide"
|
|
117
|
+
command: "oc get buildconfigs{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
118
|
+
|
|
119
|
+
- name: "oc_builds"
|
|
120
|
+
description: "List builds in a namespace or cluster-wide"
|
|
121
|
+
command: "oc get builds{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
122
|
+
|
|
123
|
+
- name: "oc_adm_openshift_audit_logs"
|
|
124
|
+
description: "Get OpenShift audit logs from a specified node"
|
|
125
|
+
command: "oc adm node-logs {{ node_name }} --path=openshift-apiserver/audit.log"
|
|
126
|
+
|
|
127
|
+
- name: "oc_adm_openshift_audit_logs_with_filter"
|
|
128
|
+
description: "Get OpenShift audit logs from a specified node with an applied filter"
|
|
129
|
+
command: "oc adm node-logs {{ node_name }} --path=openshift-apiserver/audit.log | grep {{ grep_filter }}"
|
|
130
|
+
|
|
131
|
+
- name: "oc_build_logs"
|
|
132
|
+
description: "Get logs from a specific build"
|
|
133
|
+
command: "oc logs build/{{ build_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
134
|
+
|
|
135
|
+
- name: "openshift_jq_query"
|
|
136
|
+
user_description: "Query OpenShift Resources: oc get {{kind}} -n {{ namespace }} -o json | jq -r {{jq_expr}}"
|
|
137
|
+
description: >
|
|
138
|
+
Use oc to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^registry.redhat.io/") | not)
|
|
139
|
+
command: oc get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
|
|
140
|
+
transformers:
|
|
141
|
+
- name: llm_summarize
|
|
142
|
+
config:
|
|
143
|
+
input_threshold: 1000
|
|
144
|
+
prompt: |
|
|
145
|
+
Summarize this jq query output focusing on:
|
|
146
|
+
- Key patterns and commonalities in the data
|
|
147
|
+
- Notable outliers, anomalies, or items that need attention
|
|
148
|
+
- Group similar results into aggregate descriptions when possible
|
|
149
|
+
- Highlight any empty results, null values, or missing data
|
|
150
|
+
- When applicable, mention specific resource names, namespaces, or values that stand out
|
|
151
|
+
- Organize findings in a structured way that helps with troubleshooting
|
|
152
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
153
|
+
- Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
|
|
154
|
+
|
|
155
|
+
openshift/logs:
|
|
156
|
+
description: "Read pod logs using oc command"
|
|
157
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
|
|
158
|
+
tags:
|
|
159
|
+
- core
|
|
160
|
+
prerequisites:
|
|
161
|
+
- command: "oc version --client"
|
|
162
|
+
|
|
163
|
+
# Note: Log tools use transformers with llm_summarize to automatically
|
|
164
|
+
# summarize large log outputs when a fast model is configured. This helps
|
|
165
|
+
# focus on errors, patterns, and key information while reducing context usage.
|
|
166
|
+
|
|
167
|
+
tools:
|
|
168
|
+
- name: "oc_previous_logs"
|
|
169
|
+
description: "Run `oc logs --previous` on a single pod. Used to fetch logs for a pod that crashed and see logs from before the crash. Never give a deployment name or a resource that is not a pod."
|
|
170
|
+
command: "oc logs {{pod_name}} -n {{ namespace }} --previous"
|
|
171
|
+
|
|
172
|
+
- name: "oc_previous_logs_all_containers"
|
|
173
|
+
description: "Run `oc logs --previous` on a single pod. Used to fetch logs for a pod that crashed and see logs from before the crash."
|
|
174
|
+
command: "oc logs {{pod_name}} -n {{ namespace }} --previous --all-containers"
|
|
175
|
+
|
|
176
|
+
- name: "oc_container_previous_logs"
|
|
177
|
+
description: "Run `oc logs --previous` on a single container of a pod. Used to fetch logs for a pod that crashed and see logs from before the crash."
|
|
178
|
+
command: "oc logs {{pod_name}} -c {{container_name}} -n {{ namespace }} --previous"
|
|
179
|
+
|
|
180
|
+
- name: "oc_logs"
|
|
181
|
+
description: "Run `oc logs` on a single pod. Never give a deployment name or a resource that is not a pod."
|
|
182
|
+
command: "oc logs {{pod_name}} -n {{ namespace }}"
|
|
183
|
+
transformers:
|
|
184
|
+
- name: llm_summarize
|
|
185
|
+
config:
|
|
186
|
+
input_threshold: 1000
|
|
187
|
+
prompt: |
|
|
188
|
+
Summarize these pod logs focusing on:
|
|
189
|
+
- Errors, exceptions, and warning messages
|
|
190
|
+
- Recent activity patterns and trends
|
|
191
|
+
- Any authentication, connection, or startup issues
|
|
192
|
+
- Performance indicators (response times, throughput)
|
|
193
|
+
- Group similar log entries together
|
|
194
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
195
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
196
|
+
- Include grep-ready keys/values; avoid repeating entire logs or unchanged defaults
|
|
197
|
+
|
|
198
|
+
- name: "oc_logs_all_containers"
|
|
199
|
+
description: "Run `oc logs` on all containers within a single pod."
|
|
200
|
+
command: "oc logs {{pod_name}} -n {{ namespace }} --all-containers"
|
|
201
|
+
transformers:
|
|
202
|
+
- name: llm_summarize
|
|
203
|
+
config:
|
|
204
|
+
input_threshold: 1000
|
|
205
|
+
prompt: |
|
|
206
|
+
Summarize these multi-container pod logs focusing on:
|
|
207
|
+
- Errors, exceptions, and warning messages by container
|
|
208
|
+
- Inter-container communication patterns
|
|
209
|
+
- Any authentication, connection, or startup issues
|
|
210
|
+
- Performance indicators and resource usage patterns
|
|
211
|
+
- Group similar log entries together by container
|
|
212
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
213
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
214
|
+
- Prioritize aggregates and actionable outliers over comprehensive details
|
|
215
|
+
|
|
216
|
+
- name: "oc_container_logs"
|
|
217
|
+
description: "Run `oc logs` on a single container within a pod. This is to get the logs of a specific container in a multi-container pod."
|
|
218
|
+
command: "oc logs {{pod_name}} -c {{container_name}} -n {{ namespace }} "
|
|
219
|
+
|
|
220
|
+
- name: "oc_logs_grep"
|
|
221
|
+
description: "Search for a specific term in the logs of a single pod. Only provide a pod name, not a deployment or other resource."
|
|
222
|
+
command: "oc logs {{ pod_name }} -n {{ namespace }} | grep {{ search_term }}"
|
|
223
|
+
|
|
224
|
+
- name: "oc_logs_all_containers_grep"
|
|
225
|
+
description: "Search for a specific term in the logs of a single pod across all of its containers. Only provide a pod name, not a deployment or other resource."
|
|
226
|
+
command: "oc logs {{pod_name}} -n {{ namespace }} --all-containers | grep {{ search_term }}"
|
|
227
|
+
|
|
228
|
+
openshift/live-metrics:
|
|
229
|
+
description: "Provides real-time metrics for pods and nodes using oc"
|
|
230
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
|
|
231
|
+
llm_instructions: |
|
|
232
|
+
The oc_top_pods or oc_top_nodes do not return time series data or metrics that can be used for graphs
|
|
233
|
+
Do NOT use oc_top_pods or oc_top_nodes for graph generation - it only shows current snapshot data
|
|
234
|
+
oc_top_pods or oc_top_nodes are for current status checks, not historical graphs
|
|
235
|
+
tags:
|
|
236
|
+
- core
|
|
237
|
+
prerequisites:
|
|
238
|
+
- command: "oc adm top nodes"
|
|
239
|
+
tools:
|
|
240
|
+
- name: "oc_top_pods"
|
|
241
|
+
description: "Retrieves real-time CPU and memory usage for each pod in the cluster."
|
|
242
|
+
command: >
|
|
243
|
+
oc adm top pods -A
|
|
244
|
+
- name: "oc_top_nodes"
|
|
245
|
+
description: "Retrieves real-time CPU and memory usage for each node in the cluster."
|
|
246
|
+
command: >
|
|
247
|
+
oc adm top nodes
|
|
248
|
+
|
|
249
|
+
openshift/security:
|
|
250
|
+
description: "OpenShift security-related resources and configurations"
|
|
251
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/openshift/"
|
|
252
|
+
tags:
|
|
253
|
+
- core
|
|
254
|
+
prerequisites:
|
|
255
|
+
- command: "oc version --client"
|
|
256
|
+
tools:
|
|
257
|
+
- name: "oc_scc"
|
|
258
|
+
description: "List Security Context Constraints (SCCs) in the cluster"
|
|
259
|
+
command: "oc get scc{% if scc_name %} {{ scc_name }}{% endif %} -o wide"
|
|
260
|
+
|
|
261
|
+
- name: "oc_scc_describe"
|
|
262
|
+
description: "Describe a specific Security Context Constraint"
|
|
263
|
+
command: "oc describe scc {{ scc_name }}"
|
|
264
|
+
|
|
265
|
+
- name: "oc_policy_who_can"
|
|
266
|
+
description: "Check who can perform a specific action on a resource"
|
|
267
|
+
command: "oc policy who-can {{ verb }} {{ resource }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
268
|
+
|
|
269
|
+
- name: "oc_policy_can_i"
|
|
270
|
+
description: "Check if the current user can perform a specific action"
|
|
271
|
+
command: "oc policy can-i {{ verb }} {{ resource }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
272
|
+
|
|
273
|
+
- name: "oc_serviceaccounts"
|
|
274
|
+
description: "List service accounts in a namespace or cluster-wide"
|
|
275
|
+
command: "oc get serviceaccounts{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
276
|
+
|
|
277
|
+
- name: "oc_rolebindings"
|
|
278
|
+
description: "List role bindings in a namespace or cluster-wide"
|
|
279
|
+
command: "oc get rolebindings{% if namespace %} -n {{ namespace }}{% else %} -A{% endif %} -o wide"
|
|
280
|
+
|
|
281
|
+
- name: "oc_clusterrolebindings"
|
|
282
|
+
description: "List cluster role bindings"
|
|
283
|
+
command: "oc get clusterrolebindings -o wide"
|