holmesgpt 0.14.2__py3-none-any.whl → 0.14.4a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +6 -0
- holmes/config.py +3 -6
- holmes/core/conversations.py +12 -2
- holmes/core/feedback.py +191 -0
- holmes/core/llm.py +16 -12
- holmes/core/models.py +101 -1
- holmes/core/supabase_dal.py +23 -9
- holmes/core/tool_calling_llm.py +197 -15
- holmes/core/tools.py +20 -7
- holmes/core/tools_utils/token_counting.py +13 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
- holmes/core/tools_utils/tool_executor.py +11 -6
- holmes/core/toolset_manager.py +5 -1
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/interactive.py +146 -14
- holmes/plugins/prompts/_fetch_logs.jinja2 +3 -0
- holmes/plugins/runbooks/__init__.py +6 -1
- holmes/plugins/toolsets/__init__.py +11 -4
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +5 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +1 -1
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +6 -13
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +3 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +4 -9
- holmes/plugins/toolsets/git.py +14 -12
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
- holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +18 -36
- holmes/plugins/toolsets/internet/internet.py +2 -3
- holmes/plugins/toolsets/internet/notion.py +2 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
- holmes/plugins/toolsets/kafka.py +7 -18
- holmes/plugins/toolsets/logging_utils/logging_api.py +79 -3
- holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +135 -98
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
- holmes/plugins/toolsets/robusta/robusta.py +4 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
- holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
- holmes/utils/sentry_helper.py +1 -1
- holmes/utils/stream.py +22 -7
- holmes/version.py +34 -14
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.4a0.dist-info}/METADATA +6 -8
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.4a0.dist-info}/RECORD +66 -60
- holmes/core/tools_utils/data_types.py +0 -81
- holmes/plugins/toolsets/newrelic.py +0 -231
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.4a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.4a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.4a0.dist-info}/entry_points.txt +0 -0
|
@@ -15,11 +15,14 @@ from holmes.core.tools import (
|
|
|
15
15
|
CallablePrerequisite,
|
|
16
16
|
StructuredToolResult,
|
|
17
17
|
Tool,
|
|
18
|
+
ToolInvokeContext,
|
|
18
19
|
ToolParameter,
|
|
19
20
|
StructuredToolResultStatus,
|
|
20
21
|
Toolset,
|
|
21
22
|
ToolsetTag,
|
|
22
23
|
)
|
|
24
|
+
from holmes.core.tools_utils.token_counting import count_tool_response_tokens
|
|
25
|
+
from holmes.core.tools_utils.tool_context_window_limiter import get_pct_token_count
|
|
23
26
|
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
24
27
|
from holmes.plugins.toolsets.prometheus.utils import parse_duration_to_seconds
|
|
25
28
|
from holmes.plugins.toolsets.service_discovery import PrometheusDiscovery
|
|
@@ -42,8 +45,6 @@ PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata API
|
|
|
42
45
|
# Default timeout values for PromQL queries
|
|
43
46
|
DEFAULT_QUERY_TIMEOUT_SECONDS = 20
|
|
44
47
|
MAX_QUERY_TIMEOUT_SECONDS = 180
|
|
45
|
-
# Default character limit for query responses to prevent token limit issues
|
|
46
|
-
DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 30000
|
|
47
48
|
# Default timeout for metadata API calls (discovery endpoints)
|
|
48
49
|
DEFAULT_METADATA_TIMEOUT_SECONDS = 20
|
|
49
50
|
MAX_METADATA_TIMEOUT_SECONDS = 60
|
|
@@ -91,9 +92,12 @@ class PrometheusConfig(BaseModel):
|
|
|
91
92
|
rules_cache_duration_seconds: Optional[int] = 1800 # 30 minutes
|
|
92
93
|
additional_labels: Optional[Dict[str, str]] = None
|
|
93
94
|
prometheus_ssl_enabled: bool = True
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
|
|
96
|
+
# Custom limit to the max number of tokens that a query result can take to proactively
|
|
97
|
+
# prevent token limit issues. Expressed in % of the model's context window.
|
|
98
|
+
# This limit only overrides the global limit for all tools (TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT)
|
|
99
|
+
# if it is lower.
|
|
100
|
+
query_response_size_limit_pct: Optional[int] = None
|
|
97
101
|
|
|
98
102
|
@field_validator("prometheus_url")
|
|
99
103
|
def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
|
|
@@ -318,7 +322,7 @@ def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]
|
|
|
318
322
|
|
|
319
323
|
|
|
320
324
|
def create_data_summary_for_large_result(
|
|
321
|
-
result_data: Dict, query: str,
|
|
325
|
+
result_data: Dict, query: str, data_size_tokens: int, is_range_query: bool = False
|
|
322
326
|
) -> Dict[str, Any]:
|
|
323
327
|
"""
|
|
324
328
|
Create a summary for large Prometheus results instead of returning full data.
|
|
@@ -326,7 +330,7 @@ def create_data_summary_for_large_result(
|
|
|
326
330
|
Args:
|
|
327
331
|
result_data: The Prometheus data result
|
|
328
332
|
query: The original PromQL query
|
|
329
|
-
|
|
333
|
+
data_size_tokens: Size of the data in tokens
|
|
330
334
|
is_range_query: Whether this is a range query (vs instant query)
|
|
331
335
|
|
|
332
336
|
Returns:
|
|
@@ -361,10 +365,10 @@ def create_data_summary_for_large_result(
|
|
|
361
365
|
)
|
|
362
366
|
|
|
363
367
|
return {
|
|
364
|
-
"message": f"Data too large to return ({
|
|
368
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} time series with {total_points:,} total data points.",
|
|
365
369
|
"series_count": num_items,
|
|
366
370
|
"total_data_points": total_points,
|
|
367
|
-
"
|
|
371
|
+
"data_size_tokens": data_size_tokens,
|
|
368
372
|
"label_cardinality": label_summary,
|
|
369
373
|
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results to the top {min(5, num_items)} series. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "pod", "other", "", "")',
|
|
370
374
|
}
|
|
@@ -394,15 +398,46 @@ def create_data_summary_for_large_result(
|
|
|
394
398
|
)
|
|
395
399
|
|
|
396
400
|
return {
|
|
397
|
-
"message": f"Data too large to return ({
|
|
401
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} results.",
|
|
398
402
|
"result_count": num_items,
|
|
399
403
|
"result_type": result_type,
|
|
400
|
-
"
|
|
404
|
+
"data_size_tokens": data_size_tokens,
|
|
401
405
|
"label_cardinality": label_summary,
|
|
402
406
|
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "instance", "other", "", "")',
|
|
403
407
|
}
|
|
404
408
|
|
|
405
409
|
|
|
410
|
+
class MetricsBasedResponse(BaseModel):
|
|
411
|
+
status: str
|
|
412
|
+
error_message: Optional[str] = None
|
|
413
|
+
data: Optional[str] = None
|
|
414
|
+
random_key: str
|
|
415
|
+
tool_name: str
|
|
416
|
+
description: str
|
|
417
|
+
query: str
|
|
418
|
+
start: Optional[str] = None
|
|
419
|
+
end: Optional[str] = None
|
|
420
|
+
step: Optional[float] = None
|
|
421
|
+
output_type: Optional[str] = None
|
|
422
|
+
data_summary: Optional[dict[str, Any]] = None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def create_structured_tool_result(
|
|
426
|
+
params: dict, response: MetricsBasedResponse
|
|
427
|
+
) -> StructuredToolResult:
|
|
428
|
+
status = StructuredToolResultStatus.SUCCESS
|
|
429
|
+
if response.error_message or response.status.lower() in ("failed", "error"):
|
|
430
|
+
status = StructuredToolResultStatus.ERROR
|
|
431
|
+
elif not response.data:
|
|
432
|
+
status = StructuredToolResultStatus.NO_DATA
|
|
433
|
+
|
|
434
|
+
return StructuredToolResult(
|
|
435
|
+
status=status,
|
|
436
|
+
data=response.model_dump_json(indent=2),
|
|
437
|
+
params=params,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
406
441
|
class ListPrometheusRules(BasePrometheusTool):
|
|
407
442
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
408
443
|
super().__init__(
|
|
@@ -413,9 +448,7 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
413
448
|
)
|
|
414
449
|
self._cache = None
|
|
415
450
|
|
|
416
|
-
def _invoke(
|
|
417
|
-
self, params: dict, user_approved: bool = False
|
|
418
|
-
) -> StructuredToolResult:
|
|
451
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
419
452
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
420
453
|
return StructuredToolResult(
|
|
421
454
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -533,9 +566,7 @@ class GetMetricNames(BasePrometheusTool):
|
|
|
533
566
|
toolset=toolset,
|
|
534
567
|
)
|
|
535
568
|
|
|
536
|
-
def _invoke(
|
|
537
|
-
self, params: dict, user_approved: bool = False
|
|
538
|
-
) -> StructuredToolResult:
|
|
569
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
539
570
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
540
571
|
return StructuredToolResult(
|
|
541
572
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -654,9 +685,7 @@ class GetLabelValues(BasePrometheusTool):
|
|
|
654
685
|
toolset=toolset,
|
|
655
686
|
)
|
|
656
687
|
|
|
657
|
-
def _invoke(
|
|
658
|
-
self, params: dict, user_approved: bool = False
|
|
659
|
-
) -> StructuredToolResult:
|
|
688
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
660
689
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
661
690
|
return StructuredToolResult(
|
|
662
691
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -770,9 +799,7 @@ class GetAllLabels(BasePrometheusTool):
|
|
|
770
799
|
toolset=toolset,
|
|
771
800
|
)
|
|
772
801
|
|
|
773
|
-
def _invoke(
|
|
774
|
-
self, params: dict, user_approved: bool = False
|
|
775
|
-
) -> StructuredToolResult:
|
|
802
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
776
803
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
777
804
|
return StructuredToolResult(
|
|
778
805
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -877,9 +904,7 @@ class GetSeries(BasePrometheusTool):
|
|
|
877
904
|
toolset=toolset,
|
|
878
905
|
)
|
|
879
906
|
|
|
880
|
-
def _invoke(
|
|
881
|
-
self, params: dict, user_approved: bool = False
|
|
882
|
-
) -> StructuredToolResult:
|
|
907
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
883
908
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
884
909
|
return StructuredToolResult(
|
|
885
910
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -981,9 +1006,7 @@ class GetMetricMetadata(BasePrometheusTool):
|
|
|
981
1006
|
toolset=toolset,
|
|
982
1007
|
)
|
|
983
1008
|
|
|
984
|
-
def _invoke(
|
|
985
|
-
self, params: dict, user_approved: bool = False
|
|
986
|
-
) -> StructuredToolResult:
|
|
1009
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
987
1010
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
988
1011
|
return StructuredToolResult(
|
|
989
1012
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1072,9 +1095,7 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
1072
1095
|
toolset=toolset,
|
|
1073
1096
|
)
|
|
1074
1097
|
|
|
1075
|
-
def _invoke(
|
|
1076
|
-
self, params: dict, user_approved: bool = False
|
|
1077
|
-
) -> StructuredToolResult:
|
|
1098
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
1078
1099
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
1079
1100
|
return StructuredToolResult(
|
|
1080
1101
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1120,56 +1141,64 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
1120
1141
|
error_message = (
|
|
1121
1142
|
"The prometheus query returned no result. Is the query correct?"
|
|
1122
1143
|
)
|
|
1123
|
-
response_data =
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1144
|
+
response_data = MetricsBasedResponse(
|
|
1145
|
+
status=status,
|
|
1146
|
+
error_message=error_message,
|
|
1147
|
+
random_key=generate_random_key(),
|
|
1148
|
+
tool_name=self.name,
|
|
1149
|
+
description=description,
|
|
1150
|
+
query=query,
|
|
1151
|
+
)
|
|
1152
|
+
structured_tool_result: StructuredToolResult
|
|
1132
1153
|
# Check if data should be included based on size
|
|
1133
1154
|
if self.toolset.config.tool_calls_return_data:
|
|
1134
1155
|
result_data = data.get("data", {})
|
|
1156
|
+
response_data.data = result_data
|
|
1157
|
+
|
|
1158
|
+
structured_tool_result = create_structured_tool_result(
|
|
1159
|
+
params=params, response=response_data
|
|
1160
|
+
)
|
|
1161
|
+
token_count = count_tool_response_tokens(
|
|
1162
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1163
|
+
)
|
|
1135
1164
|
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1165
|
+
token_limit = context.max_token_count
|
|
1166
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1167
|
+
custom_token_limit = get_pct_token_count(
|
|
1168
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1169
|
+
llm=context.llm,
|
|
1170
|
+
)
|
|
1171
|
+
if custom_token_limit < token_limit:
|
|
1172
|
+
token_limit = custom_token_limit
|
|
1139
1173
|
|
|
1140
1174
|
# Provide summary if data is too large
|
|
1141
|
-
if
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
> self.toolset.config.query_response_size_limit
|
|
1145
|
-
):
|
|
1146
|
-
response_data["data_summary"] = (
|
|
1175
|
+
if token_count > token_limit:
|
|
1176
|
+
response_data.data = None
|
|
1177
|
+
response_data.data_summary = (
|
|
1147
1178
|
create_data_summary_for_large_result(
|
|
1148
1179
|
result_data,
|
|
1149
1180
|
query,
|
|
1150
|
-
|
|
1181
|
+
token_count,
|
|
1151
1182
|
is_range_query=False,
|
|
1152
1183
|
)
|
|
1153
1184
|
)
|
|
1154
1185
|
logging.info(
|
|
1155
1186
|
f"Prometheus instant query returned large dataset: "
|
|
1156
|
-
f"{response_data
|
|
1157
|
-
f"{
|
|
1187
|
+
f"{response_data.data_summary.get('result_count', 0)} results, "
|
|
1188
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1158
1189
|
f"Returning summary instead of full data."
|
|
1159
1190
|
)
|
|
1160
|
-
# Also add
|
|
1161
|
-
response_data
|
|
1162
|
-
f"Data size: {
|
|
1191
|
+
# Also add token info to the summary for debugging
|
|
1192
|
+
response_data.data_summary["_debug_info"] = (
|
|
1193
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1163
1194
|
)
|
|
1164
1195
|
else:
|
|
1165
|
-
response_data
|
|
1196
|
+
response_data.data = result_data
|
|
1166
1197
|
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
status=StructuredToolResultStatus.SUCCESS,
|
|
1170
|
-
data=data_str,
|
|
1171
|
-
params=params,
|
|
1198
|
+
structured_tool_result = create_structured_tool_result(
|
|
1199
|
+
params=params, response=response_data
|
|
1172
1200
|
)
|
|
1201
|
+
return structured_tool_result
|
|
1173
1202
|
|
|
1174
1203
|
# Handle known Prometheus error status codes
|
|
1175
1204
|
error_msg = "Unknown error occurred"
|
|
@@ -1280,9 +1309,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
1280
1309
|
toolset=toolset,
|
|
1281
1310
|
)
|
|
1282
1311
|
|
|
1283
|
-
def _invoke(
|
|
1284
|
-
self, params: dict, user_approved: bool = False
|
|
1285
|
-
) -> StructuredToolResult:
|
|
1312
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
1286
1313
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
1287
1314
|
return StructuredToolResult(
|
|
1288
1315
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1352,59 +1379,69 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
1352
1379
|
error_message = (
|
|
1353
1380
|
"The prometheus query returned no result. Is the query correct?"
|
|
1354
1381
|
)
|
|
1355
|
-
response_data =
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1382
|
+
response_data = MetricsBasedResponse(
|
|
1383
|
+
status=status,
|
|
1384
|
+
error_message=error_message,
|
|
1385
|
+
random_key=generate_random_key(),
|
|
1386
|
+
tool_name=self.name,
|
|
1387
|
+
description=description,
|
|
1388
|
+
query=query,
|
|
1389
|
+
start=start,
|
|
1390
|
+
end=end,
|
|
1391
|
+
step=step,
|
|
1392
|
+
output_type=output_type,
|
|
1393
|
+
)
|
|
1394
|
+
|
|
1395
|
+
structured_tool_result: StructuredToolResult
|
|
1367
1396
|
|
|
1368
1397
|
# Check if data should be included based on size
|
|
1369
1398
|
if self.toolset.config.tool_calls_return_data:
|
|
1370
1399
|
result_data = data.get("data", {})
|
|
1400
|
+
response_data.data = result_data
|
|
1401
|
+
structured_tool_result = create_structured_tool_result(
|
|
1402
|
+
params=params, response=response_data
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
token_count = count_tool_response_tokens(
|
|
1406
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1407
|
+
)
|
|
1371
1408
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1409
|
+
token_limit = context.max_token_count
|
|
1410
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1411
|
+
custom_token_limit = get_pct_token_count(
|
|
1412
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1413
|
+
llm=context.llm,
|
|
1414
|
+
)
|
|
1415
|
+
if custom_token_limit < token_limit:
|
|
1416
|
+
token_limit = custom_token_limit
|
|
1375
1417
|
|
|
1376
1418
|
# Provide summary if data is too large
|
|
1377
|
-
if
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
> self.toolset.config.query_response_size_limit
|
|
1381
|
-
):
|
|
1382
|
-
response_data["data_summary"] = (
|
|
1419
|
+
if token_count > token_limit:
|
|
1420
|
+
response_data.data = None
|
|
1421
|
+
response_data.data_summary = (
|
|
1383
1422
|
create_data_summary_for_large_result(
|
|
1384
|
-
result_data, query,
|
|
1423
|
+
result_data, query, token_count, is_range_query=True
|
|
1385
1424
|
)
|
|
1386
1425
|
)
|
|
1387
1426
|
logging.info(
|
|
1388
1427
|
f"Prometheus range query returned large dataset: "
|
|
1389
|
-
f"{response_data
|
|
1390
|
-
f"{
|
|
1428
|
+
f"{response_data.data_summary.get('series_count', 0)} series, "
|
|
1429
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1391
1430
|
f"Returning summary instead of full data."
|
|
1392
1431
|
)
|
|
1393
1432
|
# Also add character info to the summary for debugging
|
|
1394
|
-
response_data
|
|
1395
|
-
f"Data size: {
|
|
1433
|
+
response_data.data_summary["_debug_info"] = (
|
|
1434
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1396
1435
|
)
|
|
1397
1436
|
else:
|
|
1398
|
-
response_data
|
|
1399
|
-
|
|
1400
|
-
data_str = json.dumps(response_data, indent=2)
|
|
1437
|
+
response_data.data = result_data
|
|
1401
1438
|
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
data=data_str,
|
|
1405
|
-
params=params,
|
|
1439
|
+
structured_tool_result = create_structured_tool_result(
|
|
1440
|
+
params=params, response=response_data
|
|
1406
1441
|
)
|
|
1407
1442
|
|
|
1443
|
+
return structured_tool_result
|
|
1444
|
+
|
|
1408
1445
|
error_msg = "Unknown error occurred"
|
|
1409
1446
|
if response.status_code in [400, 429]:
|
|
1410
1447
|
try:
|
|
@@ -7,6 +7,7 @@ from holmes.core.tools import (
|
|
|
7
7
|
CallablePrerequisite,
|
|
8
8
|
StructuredToolResult,
|
|
9
9
|
Tool,
|
|
10
|
+
ToolInvokeContext,
|
|
10
11
|
ToolParameter,
|
|
11
12
|
StructuredToolResultStatus,
|
|
12
13
|
Toolset,
|
|
@@ -63,9 +64,7 @@ class ListConfiguredClusters(BaseRabbitMQTool):
|
|
|
63
64
|
toolset=toolset,
|
|
64
65
|
)
|
|
65
66
|
|
|
66
|
-
def _invoke(
|
|
67
|
-
self, params: dict, user_approved: bool = False
|
|
68
|
-
) -> StructuredToolResult:
|
|
67
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
69
68
|
if not self.toolset.config:
|
|
70
69
|
raise ValueError("RabbitMQ is not configured.")
|
|
71
70
|
|
|
@@ -103,9 +102,7 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
|
|
|
103
102
|
toolset=toolset,
|
|
104
103
|
)
|
|
105
104
|
|
|
106
|
-
def _invoke(
|
|
107
|
-
self, params: dict, user_approved: bool = False
|
|
108
|
-
) -> StructuredToolResult:
|
|
105
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
109
106
|
try:
|
|
110
107
|
# Fetch node details which include partition info
|
|
111
108
|
cluster_config = self._get_cluster_config(
|
|
@@ -7,6 +7,7 @@ from holmes.core.supabase_dal import SupabaseDal
|
|
|
7
7
|
from holmes.core.tools import (
|
|
8
8
|
StaticPrerequisite,
|
|
9
9
|
Tool,
|
|
10
|
+
ToolInvokeContext,
|
|
10
11
|
ToolParameter,
|
|
11
12
|
Toolset,
|
|
12
13
|
ToolsetTag,
|
|
@@ -45,9 +46,7 @@ class FetchRobustaFinding(Tool):
|
|
|
45
46
|
logging.error(error)
|
|
46
47
|
return {"error": error}
|
|
47
48
|
|
|
48
|
-
def _invoke(
|
|
49
|
-
self, params: dict, user_approved: bool = False
|
|
50
|
-
) -> StructuredToolResult:
|
|
49
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
51
50
|
finding_id = params[PARAM_FINDING_ID]
|
|
52
51
|
try:
|
|
53
52
|
finding = self._fetch_finding(finding_id)
|
|
@@ -115,9 +114,7 @@ class FetchResourceRecommendation(Tool):
|
|
|
115
114
|
)
|
|
116
115
|
return None
|
|
117
116
|
|
|
118
|
-
def _invoke(
|
|
119
|
-
self, params: dict, user_approved: bool = False
|
|
120
|
-
) -> StructuredToolResult:
|
|
117
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
121
118
|
try:
|
|
122
119
|
recommendations = self._resource_recommendation(params)
|
|
123
120
|
if recommendations:
|
|
@@ -175,9 +172,7 @@ class FetchConfigurationChanges(Tool):
|
|
|
175
172
|
)
|
|
176
173
|
return None
|
|
177
174
|
|
|
178
|
-
def _invoke(
|
|
179
|
-
self, params: dict, user_approved: bool = False
|
|
180
|
-
) -> StructuredToolResult:
|
|
175
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
181
176
|
try:
|
|
182
177
|
changes = self._fetch_change_history(params)
|
|
183
178
|
if changes:
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
import textwrap
|
|
3
4
|
from typing import Any, Dict, List, Optional
|
|
4
5
|
|
|
5
6
|
from holmes.core.tools import (
|
|
6
7
|
StructuredToolResult,
|
|
7
8
|
Tool,
|
|
9
|
+
ToolInvokeContext,
|
|
8
10
|
ToolParameter,
|
|
9
11
|
StructuredToolResultStatus,
|
|
10
12
|
Toolset,
|
|
11
13
|
ToolsetTag,
|
|
12
14
|
)
|
|
13
15
|
|
|
14
|
-
from holmes.plugins.runbooks import
|
|
16
|
+
from holmes.plugins.runbooks import (
|
|
17
|
+
get_runbook_by_path,
|
|
18
|
+
load_runbook_catalog,
|
|
19
|
+
DEFAULT_RUNBOOK_SEARCH_PATH,
|
|
20
|
+
)
|
|
15
21
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
16
22
|
|
|
17
23
|
|
|
@@ -19,30 +25,104 @@ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
|
19
25
|
# runbooks from external sources as well.
|
|
20
26
|
class RunbookFetcher(Tool):
|
|
21
27
|
toolset: "RunbookToolset"
|
|
28
|
+
available_runbooks: List[str] = []
|
|
29
|
+
additional_search_paths: Optional[List[str]] = None
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
toolset: "RunbookToolset",
|
|
34
|
+
additional_search_paths: Optional[List[str]] = None,
|
|
35
|
+
):
|
|
36
|
+
catalog = load_runbook_catalog()
|
|
37
|
+
available_runbooks = []
|
|
38
|
+
if catalog:
|
|
39
|
+
available_runbooks = [entry.link for entry in catalog.catalog]
|
|
40
|
+
|
|
41
|
+
# If additional search paths are configured (e.g., for testing), also scan those for .md files
|
|
42
|
+
if additional_search_paths:
|
|
43
|
+
for search_path in additional_search_paths:
|
|
44
|
+
if not os.path.isdir(search_path):
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
for file in os.listdir(search_path):
|
|
48
|
+
if file.endswith(".md") and file not in available_runbooks:
|
|
49
|
+
available_runbooks.append(file)
|
|
50
|
+
|
|
51
|
+
# Build description with available runbooks
|
|
52
|
+
runbook_list = ", ".join([f'"{rb}"' for rb in available_runbooks])
|
|
22
53
|
|
|
23
|
-
def __init__(self, toolset: "RunbookToolset"):
|
|
24
54
|
super().__init__(
|
|
25
55
|
name="fetch_runbook",
|
|
26
56
|
description="Get runbook content by runbook link. Use this to get troubleshooting steps for incidents",
|
|
27
57
|
parameters={
|
|
28
|
-
# use link as a more generic term for runbook path, considering we may have external links in the future
|
|
29
58
|
"link": ToolParameter(
|
|
30
|
-
description="The link to the runbook",
|
|
59
|
+
description=f"The link to the runbook (non-empty string required). Must be one of: {runbook_list}",
|
|
31
60
|
type="string",
|
|
32
61
|
required=True,
|
|
33
62
|
),
|
|
34
63
|
},
|
|
35
|
-
toolset=toolset, # type: ignore
|
|
64
|
+
toolset=toolset, # type: ignore[call-arg]
|
|
65
|
+
available_runbooks=available_runbooks, # type: ignore[call-arg]
|
|
66
|
+
additional_search_paths=additional_search_paths, # type: ignore[call-arg]
|
|
36
67
|
)
|
|
37
68
|
|
|
38
|
-
def _invoke(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
link
|
|
69
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
70
|
+
link: str = params.get("link", "")
|
|
71
|
+
# Validate link is not empty
|
|
72
|
+
if not link or not link.strip():
|
|
73
|
+
err_msg = (
|
|
74
|
+
"Runbook link cannot be empty. Please provide a valid runbook path."
|
|
75
|
+
)
|
|
76
|
+
logging.error(err_msg)
|
|
77
|
+
return StructuredToolResult(
|
|
78
|
+
status=StructuredToolResultStatus.ERROR,
|
|
79
|
+
error=err_msg,
|
|
80
|
+
params=params,
|
|
81
|
+
)
|
|
42
82
|
|
|
83
|
+
# Build list of allowed search paths
|
|
43
84
|
search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
|
|
44
|
-
if self.
|
|
45
|
-
search_paths.extend(self.
|
|
85
|
+
if self.additional_search_paths:
|
|
86
|
+
search_paths.extend(self.additional_search_paths)
|
|
87
|
+
|
|
88
|
+
# Validate link is in the available runbooks list OR is a valid path within allowed directories
|
|
89
|
+
if link not in self.available_runbooks:
|
|
90
|
+
# For links not in the catalog, perform strict path validation
|
|
91
|
+
if not link.endswith(".md"):
|
|
92
|
+
err_msg = f"Invalid runbook link '{link}'. Must end with .md extension."
|
|
93
|
+
logging.error(err_msg)
|
|
94
|
+
return StructuredToolResult(
|
|
95
|
+
status=StructuredToolResultStatus.ERROR,
|
|
96
|
+
error=err_msg,
|
|
97
|
+
params=params,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Check if the link would resolve to a valid path within allowed directories
|
|
101
|
+
# This prevents path traversal attacks like ../../secret.md
|
|
102
|
+
is_valid_path = False
|
|
103
|
+
for search_path in search_paths:
|
|
104
|
+
candidate_path = os.path.join(search_path, link)
|
|
105
|
+
# Canonicalize both paths to resolve any .. or . components
|
|
106
|
+
real_search_path = os.path.realpath(search_path)
|
|
107
|
+
real_candidate_path = os.path.realpath(candidate_path)
|
|
108
|
+
|
|
109
|
+
# Check if the resolved path is within the allowed directory
|
|
110
|
+
if (
|
|
111
|
+
real_candidate_path.startswith(real_search_path + os.sep)
|
|
112
|
+
or real_candidate_path == real_search_path
|
|
113
|
+
):
|
|
114
|
+
if os.path.isfile(real_candidate_path):
|
|
115
|
+
is_valid_path = True
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
if not is_valid_path:
|
|
119
|
+
err_msg = f"Invalid runbook link '{link}'. Must be one of: {', '.join(self.available_runbooks) if self.available_runbooks else 'No runbooks available'}"
|
|
120
|
+
logging.error(err_msg)
|
|
121
|
+
return StructuredToolResult(
|
|
122
|
+
status=StructuredToolResultStatus.ERROR,
|
|
123
|
+
error=err_msg,
|
|
124
|
+
params=params,
|
|
125
|
+
)
|
|
46
126
|
|
|
47
127
|
runbook_path = get_runbook_by_path(link, search_paths)
|
|
48
128
|
|
|
@@ -116,7 +196,7 @@ class RunbookFetcher(Tool):
|
|
|
116
196
|
|
|
117
197
|
class RunbookToolset(Toolset):
|
|
118
198
|
def __init__(self, additional_search_paths: Optional[List[str]] = None):
|
|
119
|
-
# Store additional search paths in config
|
|
199
|
+
# Store additional search paths in config for RunbookFetcher to access
|
|
120
200
|
config = {}
|
|
121
201
|
if additional_search_paths:
|
|
122
202
|
config["additional_search_paths"] = additional_search_paths
|
|
@@ -126,7 +206,7 @@ class RunbookToolset(Toolset):
|
|
|
126
206
|
description="Fetch runbooks",
|
|
127
207
|
icon_url="https://platform.robusta.dev/demos/runbook.svg",
|
|
128
208
|
tools=[
|
|
129
|
-
RunbookFetcher(self),
|
|
209
|
+
RunbookFetcher(self, additional_search_paths),
|
|
130
210
|
],
|
|
131
211
|
docs_url="https://holmesgpt.dev/data-sources/",
|
|
132
212
|
tags=[
|