holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from holmes.core.tools import (
|
|
6
|
+
StructuredToolResult,
|
|
7
|
+
StructuredToolResultStatus,
|
|
8
|
+
Tool,
|
|
9
|
+
ToolParameter,
|
|
10
|
+
Toolset,
|
|
11
|
+
ToolsetTag,
|
|
12
|
+
ToolInvokeContext,
|
|
13
|
+
ToolsetEnvironmentPrerequisite,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PplQueryAssistTool(Tool):
|
|
18
|
+
def __init__(self, toolset: "OpenSearchQueryAssistToolset"):
|
|
19
|
+
super().__init__(
|
|
20
|
+
name="opensearch_ppl_query_assist",
|
|
21
|
+
description="Generate valid OpenSearch Piped Processing Language (PPL) queries to suggest to users for execution",
|
|
22
|
+
parameters={
|
|
23
|
+
"query": ToolParameter(
|
|
24
|
+
description="Valid OpenSearch Piped Processing Language (PPL) query to suggest to users for execution",
|
|
25
|
+
type="string",
|
|
26
|
+
required=True,
|
|
27
|
+
),
|
|
28
|
+
},
|
|
29
|
+
)
|
|
30
|
+
self._toolset = toolset
|
|
31
|
+
|
|
32
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
33
|
+
try:
|
|
34
|
+
query = params.get("query", "")
|
|
35
|
+
response_data = {"query": query}
|
|
36
|
+
return StructuredToolResult(
|
|
37
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
38
|
+
data=response_data,
|
|
39
|
+
params=params,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logging.exception(f"error using {self.name} tool")
|
|
44
|
+
return StructuredToolResult(
|
|
45
|
+
status=StructuredToolResultStatus.ERROR,
|
|
46
|
+
error=f"Failed to generate PPL query: {str(e)}",
|
|
47
|
+
params=params,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
51
|
+
query = params.get("query", "")
|
|
52
|
+
return f"OpenSearchQueryToolset: Query ({query})"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class OpenSearchQueryAssistToolset(Toolset):
|
|
56
|
+
"""OpenSearch query assist with PPL queries"""
|
|
57
|
+
|
|
58
|
+
def __init__(self):
|
|
59
|
+
super().__init__(
|
|
60
|
+
name="opensearch/query_assist",
|
|
61
|
+
description="OpenSearch query assist with PPL queries.",
|
|
62
|
+
experimental=True,
|
|
63
|
+
icon_url="https://opensearch.org/assets/brand/PNG/Mark/opensearch_mark_default.png",
|
|
64
|
+
tools=[PplQueryAssistTool(self)],
|
|
65
|
+
tags=[ToolsetTag.CORE],
|
|
66
|
+
prerequisites=[ToolsetEnvironmentPrerequisite(env=["OPENSEARCH_URL"])],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def get_example_config(self) -> Dict[str, Any]:
|
|
70
|
+
return {"opensearch_url": "http://localhost:9200"}
|
|
71
|
+
|
|
72
|
+
def _reload_instructions(self):
|
|
73
|
+
template_file_path = os.path.abspath(
|
|
74
|
+
os.path.join(
|
|
75
|
+
os.path.dirname(__file__), "opensearch_query_assist_instructions.jinja2"
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# Query Generation
|
|
2
|
+
You have access to the opensearch_ppl_query_assist tool to help you generate your valid, accurate OpenSearch Piped Processing Language (PPL) queries.
|
|
3
|
+
DO NOT PROVIDE INVALID QUERIES. ALWAYS CHECK YOUR QUERY WITH VALID QUERIES FIRST.
|
|
4
|
+
|
|
5
|
+
Once a valid query is generated, you MUST provide a concise, but informative breakdown of each part of the query structure
|
|
6
|
+
|
|
7
|
+
## CRITICAL: Query Intent Detection
|
|
8
|
+
|
|
9
|
+
ALWAYS check if the user's question is about:
|
|
10
|
+
|
|
11
|
+
* Log Analysis: Errors, warnings, messages, patterns, tool usage
|
|
12
|
+
* Metrics Analysis: Performance, latency, throughput, resource usage
|
|
13
|
+
* Time-based Analysis: "Last X hours/days", "recent", "today", "since"
|
|
14
|
+
* Aggregation Requests: Count, sum, average, top, frequency
|
|
15
|
+
* Troubleshooting: Issues, problems, failures, debugging
|
|
16
|
+
|
|
17
|
+
If ANY of the above apply → Generate PPL query IMMEDIATELY and use the OpenSearch Dashboards Page State
|
|
18
|
+
|
|
19
|
+
### Example GOOD response:
|
|
20
|
+
I've retrieved your current query from the query bar `source=logs-otel-v1* | STAT count() BY severityText` and it
|
|
21
|
+
appears there is a typo in "STAT", it should be "STATS". Below is the fixed query:
|
|
22
|
+
```
|
|
23
|
+
source=logs-otel-v1* | STATS count() BY severityText
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## CRITICAL: OpenSearch Dashboards Page State
|
|
28
|
+
User may be using this agent from OpenSearch Dashboards (OSD) for which provides the current page state.
|
|
29
|
+
It may be included in the conversation history as a system message.
|
|
30
|
+
|
|
31
|
+
IMPORTANT: YOU CAN USE THE CURRENT USE QUERY TO HELP ENHANCE/MODIFY/FIX/SUGGEST VALID QUERY USING THE SAME INDEX PATTERN
|
|
32
|
+
REFER TO "Core PPL Commands" FOR SYNTAX
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
## OpenSearch PPL Query Language
|
|
36
|
+
|
|
37
|
+
### PPL (Piped Processing Language) Overview
|
|
38
|
+
PPL is OpenSearch's query language for analyzing logs, metrics, and traces. It uses a pipe-based syntax similar to Unix commands, processing data through sequential transformations.
|
|
39
|
+
|
|
40
|
+
### Core PPL Commands
|
|
41
|
+
|
|
42
|
+
**Data Source & Search:**
|
|
43
|
+
- `source=<index>` or `search source=<index>` - Specify data source
|
|
44
|
+
- `source=<cluster>:<index>` - Cross-cluster search
|
|
45
|
+
- `| where <condition>` - Filter results
|
|
46
|
+
- `| fields <field-list>` - Project specific fields
|
|
47
|
+
- `| fields - <field-list>` - Exclude specific fields
|
|
48
|
+
|
|
49
|
+
**Data Transformation:**
|
|
50
|
+
- `| stats <aggregation> by <field>` - Aggregate data (count(), sum(), avg(), min(), max())
|
|
51
|
+
- `| eval <field>=<expression>` - Create calculated fields
|
|
52
|
+
- `| sort [+|-] <field>` - Sort results (+ ascending, - descending)
|
|
53
|
+
- `| head <n>` - Return first n results
|
|
54
|
+
- `| tail <n>` - Return last n results
|
|
55
|
+
- `| dedup <field-list>` - Remove duplicates
|
|
56
|
+
|
|
57
|
+
**Advanced Analysis:**
|
|
58
|
+
- `| top [N] <field>` - Find most common values
|
|
59
|
+
- `| rare [N] <field>` - Find least common values
|
|
60
|
+
- `| parse <field> <regex>` - Extract fields using regex patterns
|
|
61
|
+
- `| grok <field> <pattern>` - Parse using grok patterns
|
|
62
|
+
- `| patterns <field> [SIMPLE_PATTERN|BRAIN]` - Extract log patterns
|
|
63
|
+
|
|
64
|
+
**Time Series:**
|
|
65
|
+
- `| trendline SMA(<period>, <field>)` - Calculate moving averages
|
|
66
|
+
- `| fillnull with <value> in <fields>` - Replace null values
|
|
67
|
+
|
|
68
|
+
**Joins & Lookups:**
|
|
69
|
+
- `| join <table>` - Join with another dataset
|
|
70
|
+
- `| lookup <table> <field>` - Enrich with lookup data (requires Calcite)
|
|
71
|
+
|
|
72
|
+
**Pattern Extraction:**
|
|
73
|
+
- `| patterns message BRAIN` - Semantic log pattern extraction
|
|
74
|
+
- `| patterns new_field='extracted' pattern='[0-9]' message` - Custom regex patterns
|
|
75
|
+
|
|
76
|
+
### PPL Query Examples for Observability
|
|
77
|
+
|
|
78
|
+
**Error Analysis:**
|
|
79
|
+
```ppl
|
|
80
|
+
source=ai-agent-logs-*
|
|
81
|
+
| where level="ERROR"
|
|
82
|
+
| stats count() by message
|
|
83
|
+
| sort - count
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Service Latency Analysis:**
|
|
87
|
+
```ppl
|
|
88
|
+
source=traces
|
|
89
|
+
| where service="checkout"
|
|
90
|
+
| stats avg(duration) as avg_latency, max(duration) as max_latency by endpoint
|
|
91
|
+
| where avg_latency > 100
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Log Pattern Detection:**
|
|
95
|
+
```ppl
|
|
96
|
+
source=ai-agent-audit-logs-*
|
|
97
|
+
| patterns message BRAIN
|
|
98
|
+
| stats count() by patterns_field
|
|
99
|
+
| top 10 patterns_field
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**Time-based Aggregation:**
|
|
103
|
+
```ppl
|
|
104
|
+
source=metrics
|
|
105
|
+
| eval hour=date_format(timestamp, 'HH')
|
|
106
|
+
| stats avg(cpu_usage) by hour, host
|
|
107
|
+
| sort hour
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Multi-field Correlation:**
|
|
111
|
+
```ppl
|
|
112
|
+
source=ai-agent-logs-*
|
|
113
|
+
| parse message '.*thread_id=(?<tid>[^,]+).*run_id=(?<rid>[^,]+)'
|
|
114
|
+
| stats count() by tid, rid, level
|
|
115
|
+
| where count > 100
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Advanced PPL Query Patterns:**
|
|
119
|
+
|
|
120
|
+
**Top N Analysis with Filtering:**
|
|
121
|
+
```ppl
|
|
122
|
+
source=ai-agent-logs-*
|
|
123
|
+
| where timestamp >= now() - 1h
|
|
124
|
+
| top 20 message by level
|
|
125
|
+
| where level in ["ERROR", "WARN"]
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Deduplication and Unique Values:**
|
|
129
|
+
```ppl
|
|
130
|
+
source=ai-agent-audit-logs-*
|
|
131
|
+
| dedup thread_id
|
|
132
|
+
| fields thread_id, run_id, timestamp
|
|
133
|
+
| sort - timestamp
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Fillnull for Missing Data Handling:**
|
|
137
|
+
```ppl
|
|
138
|
+
source=ai-agent-metrics-*
|
|
139
|
+
| fillnull with 0 in cpu_usage, memory_usage
|
|
140
|
+
| stats avg(cpu_usage) as avg_cpu, avg(memory_usage) as avg_mem by host
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Rare Events Detection:**
|
|
144
|
+
```ppl
|
|
145
|
+
source=ai-agent-logs-*
|
|
146
|
+
| rare 10 error_code
|
|
147
|
+
| where count < 5
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
**Field Extraction with Grok:**
|
|
151
|
+
```ppl
|
|
152
|
+
source=ai-agent-logs-*
|
|
153
|
+
| grok message '%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:msg}'
|
|
154
|
+
| stats count() by level
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Time Span Aggregations:**
|
|
158
|
+
```ppl
|
|
159
|
+
source=ai-agent-metrics-*
|
|
160
|
+
| stats count() by span(timestamp, 5m) as time_bucket, status
|
|
161
|
+
| where status != 200
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Eval with Conditional Logic:**
|
|
165
|
+
```ppl
|
|
166
|
+
source=ai-agent-logs-*
|
|
167
|
+
| eval severity = case(
|
|
168
|
+
level = "ERROR", 1,
|
|
169
|
+
level = "WARN", 2,
|
|
170
|
+
level = "INFO", 3,
|
|
171
|
+
else = 4
|
|
172
|
+
)
|
|
173
|
+
| stats count() by severity
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Join Operations (with Calcite enabled):**
|
|
177
|
+
```ppl
|
|
178
|
+
source=ai-agent-logs-*
|
|
179
|
+
| join left=l right=r on l.thread_id = r.thread_id
|
|
180
|
+
[ source=ai-agent-audit-logs-* ]
|
|
181
|
+
| fields l.timestamp, l.message, r.tool_name
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Subquery for Complex Filtering:**
|
|
185
|
+
```ppl
|
|
186
|
+
source=ai-agent-logs-*
|
|
187
|
+
| where thread_id in [
|
|
188
|
+
source=ai-agent-audit-logs-*
|
|
189
|
+
| where tool_name = "opensearch__search"
|
|
190
|
+
| fields thread_id
|
|
191
|
+
]
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Trendline for Moving Averages:**
|
|
195
|
+
```ppl
|
|
196
|
+
source=ai-agent-metrics-*
|
|
197
|
+
| trendline SMA(5, cpu_usage) as cpu_trend
|
|
198
|
+
| fields timestamp, cpu_usage, cpu_trend
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### PPL Best Practices
|
|
202
|
+
|
|
203
|
+
1. **Index Patterns**: Use wildcards for daily indices: `source=ai-agent-logs-*`
|
|
204
|
+
2. **Field Extraction**: Use `parse` for structured logs, `patterns` for unstructured
|
|
205
|
+
3. **Performance**: Apply `where` filters early in the pipeline
|
|
206
|
+
4. **Aggregations**: Use `stats` before `sort` for better performance
|
|
207
|
+
5. **Null Handling**: Use `fillnull` to handle missing data in calculations
|
|
208
|
+
|
|
209
|
+
### OpenSearch Index Patterns (Current Environment)
|
|
210
|
+
- `ai-agent-logs-YYYY.MM.DD` - Application logs
|
|
211
|
+
- `ai-agent-audit-logs-YYYY.MM.DD` - Audit logs
|
|
212
|
+
- `ai-agent-metrics-YYYY.MM.DD` - Prometheus metrics
|
|
213
|
+
|
|
214
|
+
## Query Response Formatting
|
|
215
|
+
You MUST respond with queries in the following format. `ppl` contains the valid ppl query
|
|
216
|
+
```typescript
|
|
217
|
+
query: {
|
|
218
|
+
ppl: string,
|
|
219
|
+
}
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## More PPL Queries
|
|
223
|
+
{% include "opensearch_ppl_query_docs.jinja2" %}
|
|
@@ -7,6 +7,7 @@ from cachetools import TTLCache # type: ignore
|
|
|
7
7
|
from holmes.core.tools import (
|
|
8
8
|
CallablePrerequisite,
|
|
9
9
|
Tool,
|
|
10
|
+
ToolInvokeContext,
|
|
10
11
|
ToolParameter,
|
|
11
12
|
ToolsetTag,
|
|
12
13
|
)
|
|
@@ -18,7 +19,7 @@ from holmes.plugins.toolsets.opensearch.opensearch_utils import (
|
|
|
18
19
|
add_auth_header,
|
|
19
20
|
get_search_url,
|
|
20
21
|
)
|
|
21
|
-
from holmes.core.tools import StructuredToolResult,
|
|
22
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
22
23
|
from holmes.plugins.toolsets.utils import get_param_or_raise, toolset_name_for_one_liner
|
|
23
24
|
|
|
24
25
|
TRACES_FIELDS_CACHE_KEY = "cached_traces_fields"
|
|
@@ -34,9 +35,7 @@ class GetTracesFields(Tool):
|
|
|
34
35
|
self._toolset = toolset
|
|
35
36
|
self._cache = None
|
|
36
37
|
|
|
37
|
-
def _invoke(
|
|
38
|
-
self, params: dict, user_approved: bool = False
|
|
39
|
-
) -> StructuredToolResult:
|
|
38
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
40
39
|
try:
|
|
41
40
|
if not self._cache and self._toolset.opensearch_config.fields_ttl_seconds:
|
|
42
41
|
self._cache = TTLCache(
|
|
@@ -48,7 +47,7 @@ class GetTracesFields(Tool):
|
|
|
48
47
|
if cached_response:
|
|
49
48
|
logging.debug("traces fields returned from cache")
|
|
50
49
|
return StructuredToolResult(
|
|
51
|
-
status=
|
|
50
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
52
51
|
data=cached_response,
|
|
53
52
|
params=params,
|
|
54
53
|
)
|
|
@@ -81,7 +80,7 @@ class GetTracesFields(Tool):
|
|
|
81
80
|
if self._cache:
|
|
82
81
|
self._cache[TRACES_FIELDS_CACHE_KEY] = response
|
|
83
82
|
return StructuredToolResult(
|
|
84
|
-
status=
|
|
83
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
85
84
|
data=response,
|
|
86
85
|
params=params,
|
|
87
86
|
)
|
|
@@ -90,21 +89,21 @@ class GetTracesFields(Tool):
|
|
|
90
89
|
"Timeout while fetching opensearch traces fields", exc_info=True
|
|
91
90
|
)
|
|
92
91
|
return StructuredToolResult(
|
|
93
|
-
status=
|
|
92
|
+
status=StructuredToolResultStatus.ERROR,
|
|
94
93
|
error="Request timed out while fetching opensearch traces fields",
|
|
95
94
|
params=params,
|
|
96
95
|
)
|
|
97
96
|
except RequestException as e:
|
|
98
97
|
logging.warning("Failed to fetch opensearch traces fields", exc_info=True)
|
|
99
98
|
return StructuredToolResult(
|
|
100
|
-
status=
|
|
99
|
+
status=StructuredToolResultStatus.ERROR,
|
|
101
100
|
error=f"Network error while opensearch traces fields: {str(e)}",
|
|
102
101
|
params=params,
|
|
103
102
|
)
|
|
104
103
|
except Exception as e:
|
|
105
104
|
logging.warning("Failed to process opensearch traces fields", exc_info=True)
|
|
106
105
|
return StructuredToolResult(
|
|
107
|
-
status=
|
|
106
|
+
status=StructuredToolResultStatus.ERROR,
|
|
108
107
|
error=f"Unexpected error: {str(e)}",
|
|
109
108
|
params=params,
|
|
110
109
|
)
|
|
@@ -129,9 +128,7 @@ class TracesSearchQuery(Tool):
|
|
|
129
128
|
self._toolset = toolset
|
|
130
129
|
self._cache = None
|
|
131
130
|
|
|
132
|
-
def _invoke(
|
|
133
|
-
self, params: dict, user_approved: bool = False
|
|
134
|
-
) -> StructuredToolResult:
|
|
131
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
135
132
|
err_msg = ""
|
|
136
133
|
try:
|
|
137
134
|
body = json.loads(get_param_or_raise(params, "query"))
|
|
@@ -157,7 +154,7 @@ class TracesSearchQuery(Tool):
|
|
|
157
154
|
|
|
158
155
|
logs_response.raise_for_status()
|
|
159
156
|
return StructuredToolResult(
|
|
160
|
-
status=
|
|
157
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
161
158
|
data=json.dumps(logs_response.json()),
|
|
162
159
|
params=params,
|
|
163
160
|
)
|
|
@@ -166,14 +163,14 @@ class TracesSearchQuery(Tool):
|
|
|
166
163
|
"Timeout while fetching opensearch traces search", exc_info=True
|
|
167
164
|
)
|
|
168
165
|
return StructuredToolResult(
|
|
169
|
-
status=
|
|
166
|
+
status=StructuredToolResultStatus.ERROR,
|
|
170
167
|
error=f"Request timed out while fetching opensearch traces search {err_msg}",
|
|
171
168
|
params=params,
|
|
172
169
|
)
|
|
173
170
|
except RequestException as e:
|
|
174
171
|
logging.warning("Failed to fetch opensearch traces search", exc_info=True)
|
|
175
172
|
return StructuredToolResult(
|
|
176
|
-
status=
|
|
173
|
+
status=StructuredToolResultStatus.ERROR,
|
|
177
174
|
error=f"Network error while opensearch traces search {err_msg} : {str(e)}",
|
|
178
175
|
params=params,
|
|
179
176
|
)
|
|
@@ -182,7 +179,7 @@ class TracesSearchQuery(Tool):
|
|
|
182
179
|
"Failed to process opensearch traces search ", exc_info=True
|
|
183
180
|
)
|
|
184
181
|
return StructuredToolResult(
|
|
185
|
-
status=
|
|
182
|
+
status=StructuredToolResultStatus.ERROR,
|
|
186
183
|
error=f"Unexpected error {err_msg}: {str(e)}",
|
|
187
184
|
params=params,
|
|
188
185
|
)
|