holmesgpt 0.14.2__py3-none-any.whl → 0.14.3a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +6 -0
- holmes/config.py +3 -6
- holmes/core/conversations.py +12 -2
- holmes/core/feedback.py +191 -0
- holmes/core/llm.py +16 -12
- holmes/core/models.py +101 -1
- holmes/core/supabase_dal.py +23 -9
- holmes/core/tool_calling_llm.py +197 -15
- holmes/core/tools.py +20 -7
- holmes/core/tools_utils/token_counting.py +13 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
- holmes/core/tools_utils/tool_executor.py +11 -6
- holmes/core/toolset_manager.py +5 -1
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/interactive.py +146 -14
- holmes/plugins/prompts/_fetch_logs.jinja2 +3 -0
- holmes/plugins/runbooks/__init__.py +6 -1
- holmes/plugins/toolsets/__init__.py +11 -4
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +5 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +1 -1
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +6 -13
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +3 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +4 -9
- holmes/plugins/toolsets/git.py +14 -12
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
- holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +18 -36
- holmes/plugins/toolsets/internet/internet.py +2 -3
- holmes/plugins/toolsets/internet/notion.py +2 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
- holmes/plugins/toolsets/kafka.py +7 -18
- holmes/plugins/toolsets/logging_utils/logging_api.py +79 -3
- holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +131 -97
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
- holmes/plugins/toolsets/robusta/robusta.py +4 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
- holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
- holmes/utils/sentry_helper.py +1 -1
- holmes/utils/stream.py +22 -7
- holmes/version.py +34 -14
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +6 -8
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +66 -60
- holmes/core/tools_utils/data_types.py +0 -81
- holmes/plugins/toolsets/newrelic.py +0 -231
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
|
@@ -15,11 +15,14 @@ from holmes.core.tools import (
|
|
|
15
15
|
CallablePrerequisite,
|
|
16
16
|
StructuredToolResult,
|
|
17
17
|
Tool,
|
|
18
|
+
ToolInvokeContext,
|
|
18
19
|
ToolParameter,
|
|
19
20
|
StructuredToolResultStatus,
|
|
20
21
|
Toolset,
|
|
21
22
|
ToolsetTag,
|
|
22
23
|
)
|
|
24
|
+
from holmes.core.tools_utils.token_counting import count_tool_response_tokens
|
|
25
|
+
from holmes.core.tools_utils.tool_context_window_limiter import get_pct_token_count
|
|
23
26
|
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
24
27
|
from holmes.plugins.toolsets.prometheus.utils import parse_duration_to_seconds
|
|
25
28
|
from holmes.plugins.toolsets.service_discovery import PrometheusDiscovery
|
|
@@ -42,8 +45,6 @@ PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata API
|
|
|
42
45
|
# Default timeout values for PromQL queries
|
|
43
46
|
DEFAULT_QUERY_TIMEOUT_SECONDS = 20
|
|
44
47
|
MAX_QUERY_TIMEOUT_SECONDS = 180
|
|
45
|
-
# Default character limit for query responses to prevent token limit issues
|
|
46
|
-
DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 30000
|
|
47
48
|
# Default timeout for metadata API calls (discovery endpoints)
|
|
48
49
|
DEFAULT_METADATA_TIMEOUT_SECONDS = 20
|
|
49
50
|
MAX_METADATA_TIMEOUT_SECONDS = 60
|
|
@@ -91,8 +92,8 @@ class PrometheusConfig(BaseModel):
|
|
|
91
92
|
rules_cache_duration_seconds: Optional[int] = 1800 # 30 minutes
|
|
92
93
|
additional_labels: Optional[Dict[str, str]] = None
|
|
93
94
|
prometheus_ssl_enabled: bool = True
|
|
94
|
-
|
|
95
|
-
|
|
95
|
+
query_response_size_limit_pct: Optional[int] = (
|
|
96
|
+
2 # Limit the max number of tokens that a query result can take to proactively prevent token limit issues. Expressed in % of the model's context window
|
|
96
97
|
)
|
|
97
98
|
|
|
98
99
|
@field_validator("prometheus_url")
|
|
@@ -318,7 +319,7 @@ def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]
|
|
|
318
319
|
|
|
319
320
|
|
|
320
321
|
def create_data_summary_for_large_result(
|
|
321
|
-
result_data: Dict, query: str,
|
|
322
|
+
result_data: Dict, query: str, data_size_tokens: int, is_range_query: bool = False
|
|
322
323
|
) -> Dict[str, Any]:
|
|
323
324
|
"""
|
|
324
325
|
Create a summary for large Prometheus results instead of returning full data.
|
|
@@ -326,7 +327,7 @@ def create_data_summary_for_large_result(
|
|
|
326
327
|
Args:
|
|
327
328
|
result_data: The Prometheus data result
|
|
328
329
|
query: The original PromQL query
|
|
329
|
-
|
|
330
|
+
data_size_tokens: Size of the data in tokens
|
|
330
331
|
is_range_query: Whether this is a range query (vs instant query)
|
|
331
332
|
|
|
332
333
|
Returns:
|
|
@@ -361,10 +362,10 @@ def create_data_summary_for_large_result(
|
|
|
361
362
|
)
|
|
362
363
|
|
|
363
364
|
return {
|
|
364
|
-
"message": f"Data too large to return ({
|
|
365
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} time series with {total_points:,} total data points.",
|
|
365
366
|
"series_count": num_items,
|
|
366
367
|
"total_data_points": total_points,
|
|
367
|
-
"
|
|
368
|
+
"data_size_tokens": data_size_tokens,
|
|
368
369
|
"label_cardinality": label_summary,
|
|
369
370
|
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results to the top {min(5, num_items)} series. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "pod", "other", "", "")',
|
|
370
371
|
}
|
|
@@ -394,15 +395,46 @@ def create_data_summary_for_large_result(
|
|
|
394
395
|
)
|
|
395
396
|
|
|
396
397
|
return {
|
|
397
|
-
"message": f"Data too large to return ({
|
|
398
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} results.",
|
|
398
399
|
"result_count": num_items,
|
|
399
400
|
"result_type": result_type,
|
|
400
|
-
"
|
|
401
|
+
"data_size_tokens": data_size_tokens,
|
|
401
402
|
"label_cardinality": label_summary,
|
|
402
403
|
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "instance", "other", "", "")',
|
|
403
404
|
}
|
|
404
405
|
|
|
405
406
|
|
|
407
|
+
class MetricsBasedResponse(BaseModel):
|
|
408
|
+
status: str
|
|
409
|
+
error_message: Optional[str] = None
|
|
410
|
+
data: Optional[str] = None
|
|
411
|
+
random_key: str
|
|
412
|
+
tool_name: str
|
|
413
|
+
description: str
|
|
414
|
+
query: str
|
|
415
|
+
start: Optional[str] = None
|
|
416
|
+
end: Optional[str] = None
|
|
417
|
+
step: Optional[float] = None
|
|
418
|
+
output_type: Optional[str] = None
|
|
419
|
+
data_summary: Optional[dict[str, Any]] = None
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def create_structured_tool_result(
|
|
423
|
+
params: dict, response: MetricsBasedResponse
|
|
424
|
+
) -> StructuredToolResult:
|
|
425
|
+
status = StructuredToolResultStatus.SUCCESS
|
|
426
|
+
if response.error_message or response.status.lower() in ("failed", "error"):
|
|
427
|
+
status = StructuredToolResultStatus.ERROR
|
|
428
|
+
elif not response.data:
|
|
429
|
+
status = StructuredToolResultStatus.NO_DATA
|
|
430
|
+
|
|
431
|
+
return StructuredToolResult(
|
|
432
|
+
status=status,
|
|
433
|
+
data=response.model_dump_json(indent=2),
|
|
434
|
+
params=params,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
|
|
406
438
|
class ListPrometheusRules(BasePrometheusTool):
|
|
407
439
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
408
440
|
super().__init__(
|
|
@@ -413,9 +445,7 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
413
445
|
)
|
|
414
446
|
self._cache = None
|
|
415
447
|
|
|
416
|
-
def _invoke(
|
|
417
|
-
self, params: dict, user_approved: bool = False
|
|
418
|
-
) -> StructuredToolResult:
|
|
448
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
419
449
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
420
450
|
return StructuredToolResult(
|
|
421
451
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -533,9 +563,7 @@ class GetMetricNames(BasePrometheusTool):
|
|
|
533
563
|
toolset=toolset,
|
|
534
564
|
)
|
|
535
565
|
|
|
536
|
-
def _invoke(
|
|
537
|
-
self, params: dict, user_approved: bool = False
|
|
538
|
-
) -> StructuredToolResult:
|
|
566
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
539
567
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
540
568
|
return StructuredToolResult(
|
|
541
569
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -654,9 +682,7 @@ class GetLabelValues(BasePrometheusTool):
|
|
|
654
682
|
toolset=toolset,
|
|
655
683
|
)
|
|
656
684
|
|
|
657
|
-
def _invoke(
|
|
658
|
-
self, params: dict, user_approved: bool = False
|
|
659
|
-
) -> StructuredToolResult:
|
|
685
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
660
686
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
661
687
|
return StructuredToolResult(
|
|
662
688
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -770,9 +796,7 @@ class GetAllLabels(BasePrometheusTool):
|
|
|
770
796
|
toolset=toolset,
|
|
771
797
|
)
|
|
772
798
|
|
|
773
|
-
def _invoke(
|
|
774
|
-
self, params: dict, user_approved: bool = False
|
|
775
|
-
) -> StructuredToolResult:
|
|
799
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
776
800
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
777
801
|
return StructuredToolResult(
|
|
778
802
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -877,9 +901,7 @@ class GetSeries(BasePrometheusTool):
|
|
|
877
901
|
toolset=toolset,
|
|
878
902
|
)
|
|
879
903
|
|
|
880
|
-
def _invoke(
|
|
881
|
-
self, params: dict, user_approved: bool = False
|
|
882
|
-
) -> StructuredToolResult:
|
|
904
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
883
905
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
884
906
|
return StructuredToolResult(
|
|
885
907
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -981,9 +1003,7 @@ class GetMetricMetadata(BasePrometheusTool):
|
|
|
981
1003
|
toolset=toolset,
|
|
982
1004
|
)
|
|
983
1005
|
|
|
984
|
-
def _invoke(
|
|
985
|
-
self, params: dict, user_approved: bool = False
|
|
986
|
-
) -> StructuredToolResult:
|
|
1006
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
987
1007
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
988
1008
|
return StructuredToolResult(
|
|
989
1009
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1072,9 +1092,7 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
1072
1092
|
toolset=toolset,
|
|
1073
1093
|
)
|
|
1074
1094
|
|
|
1075
|
-
def _invoke(
|
|
1076
|
-
self, params: dict, user_approved: bool = False
|
|
1077
|
-
) -> StructuredToolResult:
|
|
1095
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
1078
1096
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
1079
1097
|
return StructuredToolResult(
|
|
1080
1098
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1120,56 +1138,64 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
1120
1138
|
error_message = (
|
|
1121
1139
|
"The prometheus query returned no result. Is the query correct?"
|
|
1122
1140
|
)
|
|
1123
|
-
response_data =
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1141
|
+
response_data = MetricsBasedResponse(
|
|
1142
|
+
status=status,
|
|
1143
|
+
error_message=error_message,
|
|
1144
|
+
random_key=generate_random_key(),
|
|
1145
|
+
tool_name=self.name,
|
|
1146
|
+
description=description,
|
|
1147
|
+
query=query,
|
|
1148
|
+
)
|
|
1149
|
+
structured_tool_result: StructuredToolResult
|
|
1132
1150
|
# Check if data should be included based on size
|
|
1133
1151
|
if self.toolset.config.tool_calls_return_data:
|
|
1134
1152
|
result_data = data.get("data", {})
|
|
1153
|
+
response_data.data = result_data
|
|
1135
1154
|
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1155
|
+
structured_tool_result = create_structured_tool_result(
|
|
1156
|
+
params=params, response=response_data
|
|
1157
|
+
)
|
|
1158
|
+
token_count = count_tool_response_tokens(
|
|
1159
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
token_limit = context.max_token_count
|
|
1163
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1164
|
+
custom_token_limit = get_pct_token_count(
|
|
1165
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1166
|
+
llm=context.llm,
|
|
1167
|
+
)
|
|
1168
|
+
if custom_token_limit < token_limit:
|
|
1169
|
+
token_limit = custom_token_limit
|
|
1139
1170
|
|
|
1140
1171
|
# Provide summary if data is too large
|
|
1141
|
-
if
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
> self.toolset.config.query_response_size_limit
|
|
1145
|
-
):
|
|
1146
|
-
response_data["data_summary"] = (
|
|
1172
|
+
if token_count > token_limit:
|
|
1173
|
+
response_data.data = None
|
|
1174
|
+
response_data.data_summary = (
|
|
1147
1175
|
create_data_summary_for_large_result(
|
|
1148
1176
|
result_data,
|
|
1149
1177
|
query,
|
|
1150
|
-
|
|
1178
|
+
token_count,
|
|
1151
1179
|
is_range_query=False,
|
|
1152
1180
|
)
|
|
1153
1181
|
)
|
|
1154
1182
|
logging.info(
|
|
1155
1183
|
f"Prometheus instant query returned large dataset: "
|
|
1156
|
-
f"{response_data
|
|
1157
|
-
f"{
|
|
1184
|
+
f"{response_data.data_summary.get('result_count', 0)} results, "
|
|
1185
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1158
1186
|
f"Returning summary instead of full data."
|
|
1159
1187
|
)
|
|
1160
|
-
# Also add
|
|
1161
|
-
response_data
|
|
1162
|
-
f"Data size: {
|
|
1188
|
+
# Also add token info to the summary for debugging
|
|
1189
|
+
response_data.data_summary["_debug_info"] = (
|
|
1190
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1163
1191
|
)
|
|
1164
1192
|
else:
|
|
1165
|
-
response_data
|
|
1193
|
+
response_data.data = result_data
|
|
1166
1194
|
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
status=StructuredToolResultStatus.SUCCESS,
|
|
1170
|
-
data=data_str,
|
|
1171
|
-
params=params,
|
|
1195
|
+
structured_tool_result = create_structured_tool_result(
|
|
1196
|
+
params=params, response=response_data
|
|
1172
1197
|
)
|
|
1198
|
+
return structured_tool_result
|
|
1173
1199
|
|
|
1174
1200
|
# Handle known Prometheus error status codes
|
|
1175
1201
|
error_msg = "Unknown error occurred"
|
|
@@ -1280,9 +1306,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
1280
1306
|
toolset=toolset,
|
|
1281
1307
|
)
|
|
1282
1308
|
|
|
1283
|
-
def _invoke(
|
|
1284
|
-
self, params: dict, user_approved: bool = False
|
|
1285
|
-
) -> StructuredToolResult:
|
|
1309
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
1286
1310
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
1287
1311
|
return StructuredToolResult(
|
|
1288
1312
|
status=StructuredToolResultStatus.ERROR,
|
|
@@ -1352,59 +1376,69 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
1352
1376
|
error_message = (
|
|
1353
1377
|
"The prometheus query returned no result. Is the query correct?"
|
|
1354
1378
|
)
|
|
1355
|
-
response_data =
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1379
|
+
response_data = MetricsBasedResponse(
|
|
1380
|
+
status=status,
|
|
1381
|
+
error_message=error_message,
|
|
1382
|
+
random_key=generate_random_key(),
|
|
1383
|
+
tool_name=self.name,
|
|
1384
|
+
description=description,
|
|
1385
|
+
query=query,
|
|
1386
|
+
start=start,
|
|
1387
|
+
end=end,
|
|
1388
|
+
step=step,
|
|
1389
|
+
output_type=output_type,
|
|
1390
|
+
)
|
|
1391
|
+
|
|
1392
|
+
structured_tool_result: StructuredToolResult
|
|
1367
1393
|
|
|
1368
1394
|
# Check if data should be included based on size
|
|
1369
1395
|
if self.toolset.config.tool_calls_return_data:
|
|
1370
1396
|
result_data = data.get("data", {})
|
|
1397
|
+
response_data.data = result_data
|
|
1398
|
+
structured_tool_result = create_structured_tool_result(
|
|
1399
|
+
params=params, response=response_data
|
|
1400
|
+
)
|
|
1371
1401
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1402
|
+
token_count = count_tool_response_tokens(
|
|
1403
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
token_limit = context.max_token_count
|
|
1407
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1408
|
+
custom_token_limit = get_pct_token_count(
|
|
1409
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1410
|
+
llm=context.llm,
|
|
1411
|
+
)
|
|
1412
|
+
if custom_token_limit < token_limit:
|
|
1413
|
+
token_limit = custom_token_limit
|
|
1375
1414
|
|
|
1376
1415
|
# Provide summary if data is too large
|
|
1377
|
-
if
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
> self.toolset.config.query_response_size_limit
|
|
1381
|
-
):
|
|
1382
|
-
response_data["data_summary"] = (
|
|
1416
|
+
if token_count > token_limit:
|
|
1417
|
+
response_data.data = None
|
|
1418
|
+
response_data.data_summary = (
|
|
1383
1419
|
create_data_summary_for_large_result(
|
|
1384
|
-
result_data, query,
|
|
1420
|
+
result_data, query, token_count, is_range_query=True
|
|
1385
1421
|
)
|
|
1386
1422
|
)
|
|
1387
1423
|
logging.info(
|
|
1388
1424
|
f"Prometheus range query returned large dataset: "
|
|
1389
|
-
f"{response_data
|
|
1390
|
-
f"{
|
|
1425
|
+
f"{response_data.data_summary.get('series_count', 0)} series, "
|
|
1426
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1391
1427
|
f"Returning summary instead of full data."
|
|
1392
1428
|
)
|
|
1393
1429
|
# Also add character info to the summary for debugging
|
|
1394
|
-
response_data
|
|
1395
|
-
f"Data size: {
|
|
1430
|
+
response_data.data_summary["_debug_info"] = (
|
|
1431
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1396
1432
|
)
|
|
1397
1433
|
else:
|
|
1398
|
-
response_data
|
|
1399
|
-
|
|
1400
|
-
data_str = json.dumps(response_data, indent=2)
|
|
1434
|
+
response_data.data = result_data
|
|
1401
1435
|
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
data=data_str,
|
|
1405
|
-
params=params,
|
|
1436
|
+
structured_tool_result = create_structured_tool_result(
|
|
1437
|
+
params=params, response=response_data
|
|
1406
1438
|
)
|
|
1407
1439
|
|
|
1440
|
+
return structured_tool_result
|
|
1441
|
+
|
|
1408
1442
|
error_msg = "Unknown error occurred"
|
|
1409
1443
|
if response.status_code in [400, 429]:
|
|
1410
1444
|
try:
|
|
@@ -7,6 +7,7 @@ from holmes.core.tools import (
|
|
|
7
7
|
CallablePrerequisite,
|
|
8
8
|
StructuredToolResult,
|
|
9
9
|
Tool,
|
|
10
|
+
ToolInvokeContext,
|
|
10
11
|
ToolParameter,
|
|
11
12
|
StructuredToolResultStatus,
|
|
12
13
|
Toolset,
|
|
@@ -63,9 +64,7 @@ class ListConfiguredClusters(BaseRabbitMQTool):
|
|
|
63
64
|
toolset=toolset,
|
|
64
65
|
)
|
|
65
66
|
|
|
66
|
-
def _invoke(
|
|
67
|
-
self, params: dict, user_approved: bool = False
|
|
68
|
-
) -> StructuredToolResult:
|
|
67
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
69
68
|
if not self.toolset.config:
|
|
70
69
|
raise ValueError("RabbitMQ is not configured.")
|
|
71
70
|
|
|
@@ -103,9 +102,7 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
|
|
|
103
102
|
toolset=toolset,
|
|
104
103
|
)
|
|
105
104
|
|
|
106
|
-
def _invoke(
|
|
107
|
-
self, params: dict, user_approved: bool = False
|
|
108
|
-
) -> StructuredToolResult:
|
|
105
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
109
106
|
try:
|
|
110
107
|
# Fetch node details which include partition info
|
|
111
108
|
cluster_config = self._get_cluster_config(
|
|
@@ -7,6 +7,7 @@ from holmes.core.supabase_dal import SupabaseDal
|
|
|
7
7
|
from holmes.core.tools import (
|
|
8
8
|
StaticPrerequisite,
|
|
9
9
|
Tool,
|
|
10
|
+
ToolInvokeContext,
|
|
10
11
|
ToolParameter,
|
|
11
12
|
Toolset,
|
|
12
13
|
ToolsetTag,
|
|
@@ -45,9 +46,7 @@ class FetchRobustaFinding(Tool):
|
|
|
45
46
|
logging.error(error)
|
|
46
47
|
return {"error": error}
|
|
47
48
|
|
|
48
|
-
def _invoke(
|
|
49
|
-
self, params: dict, user_approved: bool = False
|
|
50
|
-
) -> StructuredToolResult:
|
|
49
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
51
50
|
finding_id = params[PARAM_FINDING_ID]
|
|
52
51
|
try:
|
|
53
52
|
finding = self._fetch_finding(finding_id)
|
|
@@ -115,9 +114,7 @@ class FetchResourceRecommendation(Tool):
|
|
|
115
114
|
)
|
|
116
115
|
return None
|
|
117
116
|
|
|
118
|
-
def _invoke(
|
|
119
|
-
self, params: dict, user_approved: bool = False
|
|
120
|
-
) -> StructuredToolResult:
|
|
117
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
121
118
|
try:
|
|
122
119
|
recommendations = self._resource_recommendation(params)
|
|
123
120
|
if recommendations:
|
|
@@ -175,9 +172,7 @@ class FetchConfigurationChanges(Tool):
|
|
|
175
172
|
)
|
|
176
173
|
return None
|
|
177
174
|
|
|
178
|
-
def _invoke(
|
|
179
|
-
self, params: dict, user_approved: bool = False
|
|
180
|
-
) -> StructuredToolResult:
|
|
175
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
181
176
|
try:
|
|
182
177
|
changes = self._fetch_change_history(params)
|
|
183
178
|
if changes:
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
import textwrap
|
|
3
4
|
from typing import Any, Dict, List, Optional
|
|
4
5
|
|
|
5
6
|
from holmes.core.tools import (
|
|
6
7
|
StructuredToolResult,
|
|
7
8
|
Tool,
|
|
9
|
+
ToolInvokeContext,
|
|
8
10
|
ToolParameter,
|
|
9
11
|
StructuredToolResultStatus,
|
|
10
12
|
Toolset,
|
|
11
13
|
ToolsetTag,
|
|
12
14
|
)
|
|
13
15
|
|
|
14
|
-
from holmes.plugins.runbooks import
|
|
16
|
+
from holmes.plugins.runbooks import (
|
|
17
|
+
get_runbook_by_path,
|
|
18
|
+
load_runbook_catalog,
|
|
19
|
+
DEFAULT_RUNBOOK_SEARCH_PATH,
|
|
20
|
+
)
|
|
15
21
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
16
22
|
|
|
17
23
|
|
|
@@ -19,30 +25,104 @@ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
|
19
25
|
# runbooks from external sources as well.
|
|
20
26
|
class RunbookFetcher(Tool):
|
|
21
27
|
toolset: "RunbookToolset"
|
|
28
|
+
available_runbooks: List[str] = []
|
|
29
|
+
additional_search_paths: Optional[List[str]] = None
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
toolset: "RunbookToolset",
|
|
34
|
+
additional_search_paths: Optional[List[str]] = None,
|
|
35
|
+
):
|
|
36
|
+
catalog = load_runbook_catalog()
|
|
37
|
+
available_runbooks = []
|
|
38
|
+
if catalog:
|
|
39
|
+
available_runbooks = [entry.link for entry in catalog.catalog]
|
|
40
|
+
|
|
41
|
+
# If additional search paths are configured (e.g., for testing), also scan those for .md files
|
|
42
|
+
if additional_search_paths:
|
|
43
|
+
for search_path in additional_search_paths:
|
|
44
|
+
if not os.path.isdir(search_path):
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
for file in os.listdir(search_path):
|
|
48
|
+
if file.endswith(".md") and file not in available_runbooks:
|
|
49
|
+
available_runbooks.append(file)
|
|
50
|
+
|
|
51
|
+
# Build description with available runbooks
|
|
52
|
+
runbook_list = ", ".join([f'"{rb}"' for rb in available_runbooks])
|
|
22
53
|
|
|
23
|
-
def __init__(self, toolset: "RunbookToolset"):
|
|
24
54
|
super().__init__(
|
|
25
55
|
name="fetch_runbook",
|
|
26
56
|
description="Get runbook content by runbook link. Use this to get troubleshooting steps for incidents",
|
|
27
57
|
parameters={
|
|
28
|
-
# use link as a more generic term for runbook path, considering we may have external links in the future
|
|
29
58
|
"link": ToolParameter(
|
|
30
|
-
description="The link to the runbook",
|
|
59
|
+
description=f"The link to the runbook (non-empty string required). Must be one of: {runbook_list}",
|
|
31
60
|
type="string",
|
|
32
61
|
required=True,
|
|
33
62
|
),
|
|
34
63
|
},
|
|
35
|
-
toolset=toolset, # type: ignore
|
|
64
|
+
toolset=toolset, # type: ignore[call-arg]
|
|
65
|
+
available_runbooks=available_runbooks, # type: ignore[call-arg]
|
|
66
|
+
additional_search_paths=additional_search_paths, # type: ignore[call-arg]
|
|
36
67
|
)
|
|
37
68
|
|
|
38
|
-
def _invoke(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
link
|
|
69
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
70
|
+
link: str = params.get("link", "")
|
|
71
|
+
# Validate link is not empty
|
|
72
|
+
if not link or not link.strip():
|
|
73
|
+
err_msg = (
|
|
74
|
+
"Runbook link cannot be empty. Please provide a valid runbook path."
|
|
75
|
+
)
|
|
76
|
+
logging.error(err_msg)
|
|
77
|
+
return StructuredToolResult(
|
|
78
|
+
status=StructuredToolResultStatus.ERROR,
|
|
79
|
+
error=err_msg,
|
|
80
|
+
params=params,
|
|
81
|
+
)
|
|
42
82
|
|
|
83
|
+
# Build list of allowed search paths
|
|
43
84
|
search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
|
|
44
|
-
if self.
|
|
45
|
-
search_paths.extend(self.
|
|
85
|
+
if self.additional_search_paths:
|
|
86
|
+
search_paths.extend(self.additional_search_paths)
|
|
87
|
+
|
|
88
|
+
# Validate link is in the available runbooks list OR is a valid path within allowed directories
|
|
89
|
+
if link not in self.available_runbooks:
|
|
90
|
+
# For links not in the catalog, perform strict path validation
|
|
91
|
+
if not link.endswith(".md"):
|
|
92
|
+
err_msg = f"Invalid runbook link '{link}'. Must end with .md extension."
|
|
93
|
+
logging.error(err_msg)
|
|
94
|
+
return StructuredToolResult(
|
|
95
|
+
status=StructuredToolResultStatus.ERROR,
|
|
96
|
+
error=err_msg,
|
|
97
|
+
params=params,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Check if the link would resolve to a valid path within allowed directories
|
|
101
|
+
# This prevents path traversal attacks like ../../secret.md
|
|
102
|
+
is_valid_path = False
|
|
103
|
+
for search_path in search_paths:
|
|
104
|
+
candidate_path = os.path.join(search_path, link)
|
|
105
|
+
# Canonicalize both paths to resolve any .. or . components
|
|
106
|
+
real_search_path = os.path.realpath(search_path)
|
|
107
|
+
real_candidate_path = os.path.realpath(candidate_path)
|
|
108
|
+
|
|
109
|
+
# Check if the resolved path is within the allowed directory
|
|
110
|
+
if (
|
|
111
|
+
real_candidate_path.startswith(real_search_path + os.sep)
|
|
112
|
+
or real_candidate_path == real_search_path
|
|
113
|
+
):
|
|
114
|
+
if os.path.isfile(real_candidate_path):
|
|
115
|
+
is_valid_path = True
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
if not is_valid_path:
|
|
119
|
+
err_msg = f"Invalid runbook link '{link}'. Must be one of: {', '.join(self.available_runbooks) if self.available_runbooks else 'No runbooks available'}"
|
|
120
|
+
logging.error(err_msg)
|
|
121
|
+
return StructuredToolResult(
|
|
122
|
+
status=StructuredToolResultStatus.ERROR,
|
|
123
|
+
error=err_msg,
|
|
124
|
+
params=params,
|
|
125
|
+
)
|
|
46
126
|
|
|
47
127
|
runbook_path = get_runbook_by_path(link, search_paths)
|
|
48
128
|
|
|
@@ -116,7 +196,7 @@ class RunbookFetcher(Tool):
|
|
|
116
196
|
|
|
117
197
|
class RunbookToolset(Toolset):
|
|
118
198
|
def __init__(self, additional_search_paths: Optional[List[str]] = None):
|
|
119
|
-
# Store additional search paths in config
|
|
199
|
+
# Store additional search paths in config for RunbookFetcher to access
|
|
120
200
|
config = {}
|
|
121
201
|
if additional_search_paths:
|
|
122
202
|
config["additional_search_paths"] = additional_search_paths
|
|
@@ -126,7 +206,7 @@ class RunbookToolset(Toolset):
|
|
|
126
206
|
description="Fetch runbooks",
|
|
127
207
|
icon_url="https://platform.robusta.dev/demos/runbook.svg",
|
|
128
208
|
tools=[
|
|
129
|
-
RunbookFetcher(self),
|
|
209
|
+
RunbookFetcher(self, additional_search_paths),
|
|
130
210
|
],
|
|
131
211
|
docs_url="https://holmesgpt.dev/data-sources/",
|
|
132
212
|
tags=[
|