holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,27 +1,36 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from datetime import datetime, timedelta
|
|
3
1
|
import logging
|
|
4
|
-
from
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
5
4
|
from enum import Enum
|
|
5
|
+
from math import ceil
|
|
6
|
+
from typing import Optional, Set
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel, field_validator
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
from holmes.core.llm import LLM
|
|
9
11
|
from holmes.core.tools import (
|
|
10
12
|
StructuredToolResult,
|
|
11
13
|
Tool,
|
|
14
|
+
ToolInvokeContext,
|
|
12
15
|
ToolParameter,
|
|
13
16
|
Toolset,
|
|
14
17
|
)
|
|
18
|
+
from holmes.core.tools_utils.token_counting import count_tool_response_tokens
|
|
15
19
|
from holmes.plugins.toolsets.utils import get_param_or_raise
|
|
16
20
|
|
|
17
21
|
# Default values for log fetching
|
|
18
22
|
DEFAULT_LOG_LIMIT = 100
|
|
19
23
|
SECONDS_PER_DAY = 24 * 60 * 60
|
|
20
24
|
DEFAULT_TIME_SPAN_SECONDS = 7 * SECONDS_PER_DAY # 1 week in seconds
|
|
21
|
-
DEFAULT_GRAPH_TIME_SPAN_SECONDS = 1 *
|
|
25
|
+
DEFAULT_GRAPH_TIME_SPAN_SECONDS = 1 * 60 * 60 # 1 hour in seconds
|
|
22
26
|
|
|
23
27
|
POD_LOGGING_TOOL_NAME = "fetch_pod_logs"
|
|
24
28
|
|
|
29
|
+
TRUNCATION_PROMPT_PREFIX = "[... PREVIOUS LOGS ABOVE THIS LINE HAVE BEEN TRUNCATED]"
|
|
30
|
+
MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE: int = (
|
|
31
|
+
50 + len(TRUNCATION_PROMPT_PREFIX)
|
|
32
|
+
) # prevents the truncation algorithm from going too slow once the actual token count gets close to the expected limit
|
|
33
|
+
|
|
25
34
|
|
|
26
35
|
class LoggingCapability(str, Enum):
|
|
27
36
|
"""Optional advanced logging capabilities"""
|
|
@@ -74,6 +83,76 @@ class BasePodLoggingToolset(Toolset, ABC):
|
|
|
74
83
|
return ""
|
|
75
84
|
|
|
76
85
|
|
|
86
|
+
def truncate_logs(
|
|
87
|
+
logging_structured_tool_result: StructuredToolResult,
|
|
88
|
+
llm: LLM,
|
|
89
|
+
token_limit: int,
|
|
90
|
+
structured_params: FetchPodLogsParams,
|
|
91
|
+
tool_call_id: str,
|
|
92
|
+
tool_name: str,
|
|
93
|
+
):
|
|
94
|
+
original_token_count = count_tool_response_tokens(
|
|
95
|
+
llm=llm,
|
|
96
|
+
structured_tool_result=logging_structured_tool_result,
|
|
97
|
+
tool_call_id=tool_call_id,
|
|
98
|
+
tool_name=tool_name,
|
|
99
|
+
)
|
|
100
|
+
token_count = original_token_count
|
|
101
|
+
text = None
|
|
102
|
+
while token_count > token_limit:
|
|
103
|
+
# Loop because we are counting tokens but trimming characters. This means we try to trim a number of
|
|
104
|
+
# characters proportional to the number of tokens but we may still have too many tokens
|
|
105
|
+
if not text:
|
|
106
|
+
text = logging_structured_tool_result.get_stringified_data()
|
|
107
|
+
if not text:
|
|
108
|
+
# Weird scenario where the result exceeds the token allowance but there is not data.
|
|
109
|
+
# Exit and do nothing because I don't know how to handle such scenario.
|
|
110
|
+
logging.warning(
|
|
111
|
+
f"The calculated token count for logs is {token_count} but the limit is {token_limit}. However the data field is empty so there are no logs to truncate."
|
|
112
|
+
)
|
|
113
|
+
return
|
|
114
|
+
ratio = token_count / token_limit
|
|
115
|
+
character_count = len(text)
|
|
116
|
+
number_of_characters_to_truncate = character_count - ceil(
|
|
117
|
+
character_count / ratio
|
|
118
|
+
)
|
|
119
|
+
number_of_characters_to_truncate = max(
|
|
120
|
+
MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE, number_of_characters_to_truncate
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
if len(text) <= number_of_characters_to_truncate:
|
|
124
|
+
logging.warning(
|
|
125
|
+
f"The calculated token count for logs is {token_count} (max allowed tokens={token_limit}) but the logs are only {len(text)} characters which is below the intended truncation of {number_of_characters_to_truncate} characters. Logs will no longer be truncated"
|
|
126
|
+
)
|
|
127
|
+
return
|
|
128
|
+
else:
|
|
129
|
+
linefeed_truncation_offset = max(
|
|
130
|
+
text[number_of_characters_to_truncate:].find("\n"), 0
|
|
131
|
+
) # keep log lines atomic
|
|
132
|
+
|
|
133
|
+
# Tentatively add the truncation prefix.
|
|
134
|
+
# When counting tokens, we want to include the TRUNCATION_PROMPT_PREFIX because it will be part of the tool response.
|
|
135
|
+
# Because we're truncating based on character counts but ultimately checking tokens count,
|
|
136
|
+
# it is possible that the character truncation is incorrect and more need to be truncated.
|
|
137
|
+
# This will be caught in the next iteration and the truncation prefix will be truncated
|
|
138
|
+
# because MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE cannot be smaller than TRUNCATION_PROMPT_PREFIX
|
|
139
|
+
text = (
|
|
140
|
+
TRUNCATION_PROMPT_PREFIX
|
|
141
|
+
+ text[number_of_characters_to_truncate + linefeed_truncation_offset :]
|
|
142
|
+
)
|
|
143
|
+
logging_structured_tool_result.data = text
|
|
144
|
+
token_count = count_tool_response_tokens(
|
|
145
|
+
llm=llm,
|
|
146
|
+
structured_tool_result=logging_structured_tool_result,
|
|
147
|
+
tool_call_id=tool_call_id,
|
|
148
|
+
tool_name=tool_name,
|
|
149
|
+
)
|
|
150
|
+
if token_count < original_token_count:
|
|
151
|
+
logging.info(
|
|
152
|
+
f"Logs for pod {structured_params.pod_name}/{structured_params.namespace} have been truncated from {original_token_count} tokens down to {token_count} tokens."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
77
156
|
class PodLoggingTool(Tool):
|
|
78
157
|
"""Common tool for fetching pod logs across different logging backends"""
|
|
79
158
|
|
|
@@ -175,9 +254,7 @@ If you hit the log limit and see lots of repetitive INFO logs, use exclude_filte
|
|
|
175
254
|
|
|
176
255
|
return params
|
|
177
256
|
|
|
178
|
-
def _invoke(
|
|
179
|
-
self, params: dict, user_approved: bool = False
|
|
180
|
-
) -> StructuredToolResult:
|
|
257
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
181
258
|
structured_params = FetchPodLogsParams(
|
|
182
259
|
namespace=get_param_or_raise(params, "namespace"),
|
|
183
260
|
pod_name=get_param_or_raise(params, "pod_name"),
|
|
@@ -192,6 +269,15 @@ If you hit the log limit and see lots of repetitive INFO logs, use exclude_filte
|
|
|
192
269
|
params=structured_params,
|
|
193
270
|
)
|
|
194
271
|
|
|
272
|
+
truncate_logs(
|
|
273
|
+
logging_structured_tool_result=result,
|
|
274
|
+
llm=context.llm,
|
|
275
|
+
token_limit=context.max_token_count,
|
|
276
|
+
structured_params=structured_params,
|
|
277
|
+
tool_call_id=context.tool_call_id,
|
|
278
|
+
tool_name=context.tool_name,
|
|
279
|
+
)
|
|
280
|
+
|
|
195
281
|
return result
|
|
196
282
|
|
|
197
283
|
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
@@ -1,71 +1,168 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
from mcp.client.session import ClientSession
|
|
10
|
+
from mcp.client.sse import sse_client
|
|
11
|
+
from mcp.client.stdio import StdioServerParameters, stdio_client
|
|
12
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
13
|
+
from mcp.types import Tool as MCP_Tool
|
|
14
|
+
from pydantic import AnyUrl, BaseModel, Field, model_validator
|
|
15
|
+
|
|
16
|
+
from holmes.common.env_vars import SSE_READ_TIMEOUT
|
|
1
17
|
from holmes.core.tools import (
|
|
2
|
-
|
|
18
|
+
CallablePrerequisite,
|
|
19
|
+
StructuredToolResult,
|
|
20
|
+
StructuredToolResultStatus,
|
|
3
21
|
Tool,
|
|
22
|
+
ToolInvokeContext,
|
|
4
23
|
ToolParameter,
|
|
5
|
-
|
|
6
|
-
ToolResultStatus,
|
|
7
|
-
CallablePrerequisite,
|
|
24
|
+
Toolset,
|
|
8
25
|
)
|
|
9
26
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
27
|
+
# Lock per MCP server URL to serialize calls to the same server
|
|
28
|
+
_server_locks: Dict[str, threading.Lock] = {}
|
|
29
|
+
_locks_lock = threading.Lock()
|
|
13
30
|
|
|
14
|
-
from mcp.types import Tool as MCP_Tool
|
|
15
|
-
from mcp.types import CallToolResult
|
|
16
31
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
32
|
+
def get_server_lock(url: str) -> threading.Lock:
|
|
33
|
+
"""Get or create a lock for a specific MCP server URL."""
|
|
34
|
+
with _locks_lock:
|
|
35
|
+
if url not in _server_locks:
|
|
36
|
+
_server_locks[url] = threading.Lock()
|
|
37
|
+
return _server_locks[url]
|
|
21
38
|
|
|
22
39
|
|
|
23
|
-
class
|
|
24
|
-
|
|
40
|
+
class MCPMode(str, Enum):
|
|
41
|
+
SSE = "sse"
|
|
42
|
+
STREAMABLE_HTTP = "streamable-http"
|
|
43
|
+
STDIO = "stdio"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class MCPConfig(BaseModel):
|
|
47
|
+
url: AnyUrl
|
|
48
|
+
mode: MCPMode = MCPMode.SSE
|
|
25
49
|
headers: Optional[Dict[str, str]] = None
|
|
26
50
|
|
|
27
|
-
def
|
|
28
|
-
self
|
|
29
|
-
|
|
51
|
+
def get_lock_string(self) -> str:
|
|
52
|
+
return str(self.url)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class StdioMCPConfig(BaseModel):
|
|
56
|
+
mode: MCPMode = MCPMode.STDIO
|
|
57
|
+
command: str
|
|
58
|
+
args: Optional[List[str]] = None
|
|
59
|
+
env: Optional[Dict[str, str]] = None
|
|
60
|
+
|
|
61
|
+
def get_lock_string(self) -> str:
|
|
62
|
+
return str(self.command)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@asynccontextmanager
|
|
66
|
+
async def get_initialized_mcp_session(toolset: "RemoteMCPToolset"):
|
|
67
|
+
if toolset._mcp_config is None:
|
|
68
|
+
raise ValueError("MCP config is not initialized")
|
|
69
|
+
|
|
70
|
+
if isinstance(toolset._mcp_config, StdioMCPConfig):
|
|
71
|
+
server_params = StdioServerParameters(
|
|
72
|
+
command=toolset._mcp_config.command,
|
|
73
|
+
args=toolset._mcp_config.args or [],
|
|
74
|
+
env=toolset._mcp_config.env,
|
|
75
|
+
)
|
|
76
|
+
async with stdio_client(server_params) as (
|
|
77
|
+
read_stream,
|
|
78
|
+
write_stream,
|
|
79
|
+
):
|
|
80
|
+
async with ClientSession(read_stream, write_stream) as session:
|
|
81
|
+
_ = await session.initialize()
|
|
82
|
+
yield session
|
|
83
|
+
elif toolset._mcp_config.mode == MCPMode.SSE:
|
|
84
|
+
url = str(toolset._mcp_config.url)
|
|
85
|
+
async with sse_client(
|
|
86
|
+
url, toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
|
|
87
|
+
) as (
|
|
88
|
+
read_stream,
|
|
89
|
+
write_stream,
|
|
90
|
+
):
|
|
91
|
+
async with ClientSession(read_stream, write_stream) as session:
|
|
92
|
+
_ = await session.initialize()
|
|
93
|
+
yield session
|
|
94
|
+
else:
|
|
95
|
+
url = str(toolset._mcp_config.url)
|
|
96
|
+
async with streamablehttp_client(
|
|
97
|
+
url, headers=toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
|
|
98
|
+
) as (
|
|
99
|
+
read_stream,
|
|
100
|
+
write_stream,
|
|
101
|
+
_,
|
|
102
|
+
):
|
|
103
|
+
async with ClientSession(read_stream, write_stream) as session:
|
|
104
|
+
_ = await session.initialize()
|
|
105
|
+
yield session
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class RemoteMCPTool(Tool):
|
|
109
|
+
toolset: "RemoteMCPToolset" = Field(exclude=True)
|
|
110
|
+
|
|
111
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
30
112
|
try:
|
|
31
|
-
|
|
113
|
+
# Serialize calls to the same MCP server to prevent SSE conflicts
|
|
114
|
+
# Different servers can still run in parallel
|
|
115
|
+
if not self.toolset._mcp_config:
|
|
116
|
+
raise ValueError("MCP config not initialized")
|
|
117
|
+
|
|
118
|
+
lock = get_server_lock(str(self.toolset._mcp_config.get_lock_string()))
|
|
119
|
+
with lock:
|
|
120
|
+
return asyncio.run(self._invoke_async(params))
|
|
32
121
|
except Exception as e:
|
|
33
122
|
return StructuredToolResult(
|
|
34
|
-
status=
|
|
123
|
+
status=StructuredToolResultStatus.ERROR,
|
|
35
124
|
error=str(e.args),
|
|
36
125
|
params=params,
|
|
37
126
|
invocation=f"MCPtool {self.name} with params {params}",
|
|
38
127
|
)
|
|
39
128
|
|
|
129
|
+
@staticmethod
|
|
130
|
+
def _is_content_error(content: str) -> bool:
|
|
131
|
+
try: # aws mcp sometimes returns an error in content - status code != 200
|
|
132
|
+
json_content: dict = json.loads(content)
|
|
133
|
+
status_code = json_content.get("response", {}).get("status_code", 200)
|
|
134
|
+
return status_code >= 300
|
|
135
|
+
except Exception:
|
|
136
|
+
return False
|
|
137
|
+
|
|
40
138
|
async def _invoke_async(self, params: Dict) -> StructuredToolResult:
|
|
41
|
-
async with
|
|
42
|
-
|
|
43
|
-
_ = await session.initialize()
|
|
44
|
-
tool_result: CallToolResult = await session.call_tool(self.name, params)
|
|
139
|
+
async with get_initialized_mcp_session(self.toolset) as session:
|
|
140
|
+
tool_result = await session.call_tool(self.name, params)
|
|
45
141
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
invocation=f"MCPtool {self.name} with params {params}",
|
|
58
|
-
)
|
|
142
|
+
merged_text = " ".join(c.text for c in tool_result.content if c.type == "text")
|
|
143
|
+
return StructuredToolResult(
|
|
144
|
+
status=(
|
|
145
|
+
StructuredToolResultStatus.ERROR
|
|
146
|
+
if (tool_result.isError or self._is_content_error(merged_text))
|
|
147
|
+
else StructuredToolResultStatus.SUCCESS
|
|
148
|
+
),
|
|
149
|
+
data=merged_text,
|
|
150
|
+
params=params,
|
|
151
|
+
invocation=f"MCPtool {self.name} with params {params}",
|
|
152
|
+
)
|
|
59
153
|
|
|
60
154
|
@classmethod
|
|
61
|
-
def create(
|
|
155
|
+
def create(
|
|
156
|
+
cls,
|
|
157
|
+
tool: MCP_Tool,
|
|
158
|
+
toolset: "RemoteMCPToolset",
|
|
159
|
+
):
|
|
62
160
|
parameters = cls.parse_input_schema(tool.inputSchema)
|
|
63
161
|
return cls(
|
|
64
|
-
url=url,
|
|
65
162
|
name=tool.name,
|
|
66
163
|
description=tool.description or "",
|
|
67
164
|
parameters=parameters,
|
|
68
|
-
|
|
165
|
+
toolset=toolset,
|
|
69
166
|
)
|
|
70
167
|
|
|
71
168
|
@classmethod
|
|
@@ -85,53 +182,110 @@ class RemoteMCPTool(Tool):
|
|
|
85
182
|
return parameters
|
|
86
183
|
|
|
87
184
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
88
|
-
|
|
185
|
+
# AWS MCP cli_command
|
|
186
|
+
if params and params.get("cli_command"):
|
|
187
|
+
return f"{params.get('cli_command')}"
|
|
188
|
+
|
|
189
|
+
# gcloud MCP run_gcloud_command
|
|
190
|
+
if self.name == "run_gcloud_command" and params and "args" in params:
|
|
191
|
+
args = params.get("args", [])
|
|
192
|
+
if isinstance(args, list):
|
|
193
|
+
return f"gcloud {' '.join(str(arg) for arg in args)}"
|
|
194
|
+
|
|
195
|
+
return f"{self.toolset.name}: {self.name} {params}"
|
|
89
196
|
|
|
90
197
|
|
|
91
198
|
class RemoteMCPToolset(Toolset):
|
|
92
|
-
url: AnyUrl
|
|
93
199
|
tools: List[RemoteMCPTool] = Field(default_factory=list) # type: ignore
|
|
94
200
|
icon_url: str = "https://registry.npmmirror.com/@lobehub/icons-static-png/1.46.0/files/light/mcp.png"
|
|
201
|
+
_mcp_config: Optional[Union[MCPConfig, StdioMCPConfig]] = None
|
|
95
202
|
|
|
96
203
|
def model_post_init(self, __context: Any) -> None:
|
|
97
|
-
self.prerequisites = [
|
|
204
|
+
self.prerequisites = [
|
|
205
|
+
CallablePrerequisite(callable=self.prerequisites_callable)
|
|
206
|
+
]
|
|
98
207
|
|
|
99
|
-
|
|
100
|
-
|
|
208
|
+
@model_validator(mode="before")
|
|
209
|
+
@classmethod
|
|
210
|
+
def migrate_url_to_config(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
211
|
+
"""
|
|
212
|
+
Migrates url from field parameter to config object.
|
|
213
|
+
If url is passed as a parameter, it's moved to config (or config is created if it doesn't exist).
|
|
214
|
+
"""
|
|
215
|
+
if not isinstance(values, dict) or "url" not in values:
|
|
216
|
+
return values
|
|
217
|
+
|
|
218
|
+
url_value = values.pop("url")
|
|
219
|
+
if url_value is None:
|
|
220
|
+
return values
|
|
221
|
+
|
|
222
|
+
config = values.get("config")
|
|
223
|
+
if config is None:
|
|
224
|
+
config = {}
|
|
225
|
+
values["config"] = config
|
|
226
|
+
|
|
227
|
+
toolset_name = values.get("name", "unknown")
|
|
228
|
+
if "url" in config:
|
|
229
|
+
logging.warning(
|
|
230
|
+
f"Toolset {toolset_name}: has two urls defined, remove the 'url' field from the toolset configuration and keep the 'url' in the config section."
|
|
231
|
+
)
|
|
232
|
+
return values
|
|
101
233
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
234
|
+
logging.warning(
|
|
235
|
+
f"Toolset {toolset_name}: 'url' field has been migrated to config. "
|
|
236
|
+
"Please move 'url' to the config section."
|
|
237
|
+
)
|
|
238
|
+
config["url"] = url_value
|
|
239
|
+
return values
|
|
107
240
|
|
|
108
|
-
|
|
109
|
-
def init_server_tools(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
241
|
+
def prerequisites_callable(self, config) -> Tuple[bool, str]:
|
|
110
242
|
try:
|
|
243
|
+
if not config:
|
|
244
|
+
return (False, f"Config is required for {self.name}")
|
|
245
|
+
|
|
246
|
+
mode_value = config.get("mode", MCPMode.SSE.value)
|
|
247
|
+
allowed_modes = [e.value for e in MCPMode]
|
|
248
|
+
if mode_value not in allowed_modes:
|
|
249
|
+
return (
|
|
250
|
+
False,
|
|
251
|
+
f'Invalid mode "{mode_value}", allowed modes are {", ".join(allowed_modes)}',
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if mode_value == MCPMode.STDIO.value:
|
|
255
|
+
self._mcp_config = StdioMCPConfig(**config)
|
|
256
|
+
else:
|
|
257
|
+
self._mcp_config = MCPConfig(**config)
|
|
258
|
+
clean_url_str = str(self._mcp_config.url).rstrip("/")
|
|
259
|
+
|
|
260
|
+
if self._mcp_config.mode == MCPMode.SSE and not clean_url_str.endswith(
|
|
261
|
+
"/sse"
|
|
262
|
+
):
|
|
263
|
+
self._mcp_config.url = AnyUrl(clean_url_str + "/sse")
|
|
264
|
+
|
|
111
265
|
tools_result = asyncio.run(self._get_server_tools())
|
|
266
|
+
|
|
112
267
|
self.tools = [
|
|
113
|
-
RemoteMCPTool.create(
|
|
114
|
-
for tool in tools_result.tools
|
|
268
|
+
RemoteMCPTool.create(tool, self) for tool in tools_result.tools
|
|
115
269
|
]
|
|
116
270
|
|
|
117
271
|
if not self.tools:
|
|
118
272
|
logging.warning(f"mcp server {self.name} loaded 0 tools.")
|
|
273
|
+
|
|
119
274
|
return (True, "")
|
|
120
275
|
except Exception as e:
|
|
121
|
-
# using e.args, the asyncio wrapper could stack another exception this helps printing them all.
|
|
122
276
|
return (
|
|
123
277
|
False,
|
|
124
|
-
f"Failed to load mcp server {self.name} {
|
|
278
|
+
f"Failed to load mcp server {self.name}: {str(e)}",
|
|
125
279
|
)
|
|
126
280
|
|
|
127
281
|
async def _get_server_tools(self):
|
|
128
|
-
async with
|
|
129
|
-
|
|
130
|
-
write_stream,
|
|
131
|
-
):
|
|
132
|
-
async with ClientSession(read_stream, write_stream) as session:
|
|
133
|
-
_ = await session.initialize()
|
|
134
|
-
return await session.list_tools()
|
|
282
|
+
async with get_initialized_mcp_session(self) as session:
|
|
283
|
+
return await session.list_tools()
|
|
135
284
|
|
|
136
285
|
def get_example_config(self) -> Dict[str, Any]:
|
|
137
|
-
|
|
286
|
+
example_config = MCPConfig(
|
|
287
|
+
url=AnyUrl("http://example.com:8000/mcp/messages"),
|
|
288
|
+
mode=MCPMode.STREAMABLE_HTTP,
|
|
289
|
+
headers={"Authorization": "Bearer YOUR_TOKEN"},
|
|
290
|
+
)
|
|
291
|
+
return example_config.model_dump()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""NewRelic API wrapper for executing NRQL queries via GraphQL."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
import requests # type: ignore
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NewRelicAPI:
|
|
12
|
+
"""Python wrapper for NewRelic GraphQL API.
|
|
13
|
+
|
|
14
|
+
This class provides a clean interface to execute NRQL queries via the NewRelic GraphQL API,
|
|
15
|
+
supporting both US and EU datacenters.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str, account_id: str, is_eu_datacenter: bool = False):
|
|
19
|
+
"""Initialize the NewRelic API wrapper.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
api_key: NewRelic API key
|
|
23
|
+
account_id: NewRelic account ID
|
|
24
|
+
is_eu_datacenter: If True, use EU datacenter URL. Defaults to False (US).
|
|
25
|
+
"""
|
|
26
|
+
self.api_key = api_key
|
|
27
|
+
# Validate account_id is numeric to prevent injection
|
|
28
|
+
try:
|
|
29
|
+
self.account_id = int(account_id)
|
|
30
|
+
except ValueError:
|
|
31
|
+
raise ValueError(f"Invalid account_id: must be numeric, got '{account_id}'")
|
|
32
|
+
self.is_eu_datacenter = is_eu_datacenter
|
|
33
|
+
|
|
34
|
+
def _get_api_url(self) -> str:
|
|
35
|
+
"""Get the appropriate API URL based on datacenter location.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str: The GraphQL API endpoint URL
|
|
39
|
+
"""
|
|
40
|
+
if self.is_eu_datacenter:
|
|
41
|
+
return "https://api.eu.newrelic.com/graphql"
|
|
42
|
+
return "https://api.newrelic.com/graphql"
|
|
43
|
+
|
|
44
|
+
def _make_request(
|
|
45
|
+
self, graphql_query: Dict[str, Any], timeout: int = 30
|
|
46
|
+
) -> Dict[str, Any]:
|
|
47
|
+
"""Make HTTP POST request to NewRelic GraphQL API.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
graphql_query: The GraphQL query as a dictionary
|
|
51
|
+
timeout: Request timeout in seconds
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
JSON response from the API
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
requests.exceptions.HTTPError: If the request fails
|
|
58
|
+
Exception: If GraphQL returns errors
|
|
59
|
+
"""
|
|
60
|
+
url = self._get_api_url()
|
|
61
|
+
headers = {
|
|
62
|
+
"Content-Type": "application/json",
|
|
63
|
+
"Api-Key": self.api_key,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
response = requests.post(
|
|
67
|
+
url,
|
|
68
|
+
headers=headers,
|
|
69
|
+
json=graphql_query,
|
|
70
|
+
timeout=timeout,
|
|
71
|
+
)
|
|
72
|
+
response.raise_for_status()
|
|
73
|
+
|
|
74
|
+
# Parse JSON response
|
|
75
|
+
data = response.json()
|
|
76
|
+
|
|
77
|
+
# Check for GraphQL errors even on 200 responses
|
|
78
|
+
if "errors" in data and data["errors"]:
|
|
79
|
+
error_msg = data["errors"][0].get("message", "Unknown GraphQL error")
|
|
80
|
+
raise Exception(f"NewRelic GraphQL error: {error_msg}")
|
|
81
|
+
|
|
82
|
+
return data
|
|
83
|
+
|
|
84
|
+
def execute_nrql_query(self, nrql_query: str) -> list:
|
|
85
|
+
"""Execute an NRQL query via the NewRelic GraphQL API.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
nrql_query: The NRQL query string to execute
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
list: The query results from NewRelic (extracted from the nested response)
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
requests.exceptions.HTTPError: If the API request fails
|
|
95
|
+
Exception: If GraphQL returns errors
|
|
96
|
+
"""
|
|
97
|
+
# Build the GraphQL query using variables to prevent injection
|
|
98
|
+
# Note: New Relic's GraphQL API requires the account ID to be inline, but we can use variables for the NRQL query
|
|
99
|
+
graphql_query = {
|
|
100
|
+
"query": f"""
|
|
101
|
+
query ExecuteNRQL($nrqlQuery: Nrql!) {{
|
|
102
|
+
actor {{
|
|
103
|
+
account(id: {self.account_id}) {{
|
|
104
|
+
nrql(query: $nrqlQuery) {{
|
|
105
|
+
results
|
|
106
|
+
}}
|
|
107
|
+
}}
|
|
108
|
+
}}
|
|
109
|
+
}}
|
|
110
|
+
""",
|
|
111
|
+
"variables": {"nrqlQuery": nrql_query},
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
logger.info(f"Executing NRQL query: {nrql_query}")
|
|
115
|
+
response = self._make_request(graphql_query)
|
|
116
|
+
|
|
117
|
+
# Extract just the results array from the nested response
|
|
118
|
+
try:
|
|
119
|
+
results = response["data"]["actor"]["account"]["nrql"]["results"]
|
|
120
|
+
return results
|
|
121
|
+
except (KeyError, TypeError) as e:
|
|
122
|
+
raise Exception(
|
|
123
|
+
f"Failed to extract results from NewRelic response: {e}"
|
|
124
|
+
) from e
|
|
125
|
+
|
|
126
|
+
def get_organization_accounts(self) -> list:
|
|
127
|
+
"""Get all accounts accessible in the organization.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
list: List of account dictionaries with id and name
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
requests.exceptions.HTTPError: If the API request fails
|
|
134
|
+
Exception: If GraphQL returns errors
|
|
135
|
+
"""
|
|
136
|
+
graphql_query = {
|
|
137
|
+
"query": """
|
|
138
|
+
query GetOrganizationAccounts {
|
|
139
|
+
actor {
|
|
140
|
+
organization {
|
|
141
|
+
accountManagement {
|
|
142
|
+
managedAccounts {
|
|
143
|
+
id
|
|
144
|
+
name
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
"""
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
logger.info("Querying organization accounts")
|
|
154
|
+
response = self._make_request(graphql_query)
|
|
155
|
+
|
|
156
|
+
# Extract accounts from the nested response
|
|
157
|
+
try:
|
|
158
|
+
accounts = response["data"]["actor"]["organization"]["accountManagement"][
|
|
159
|
+
"managedAccounts"
|
|
160
|
+
]
|
|
161
|
+
return accounts
|
|
162
|
+
except (KeyError, TypeError) as e:
|
|
163
|
+
raise Exception(
|
|
164
|
+
f"Failed to extract accounts from NewRelic response: {e}"
|
|
165
|
+
) from e
|