holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,27 +1,36 @@
1
- from abc import ABC, abstractmethod
2
- from datetime import datetime, timedelta
3
1
  import logging
4
- from typing import Optional, Set
2
+ from abc import ABC, abstractmethod
3
+ from datetime import datetime, timedelta, timezone
5
4
  from enum import Enum
5
+ from math import ceil
6
+ from typing import Optional, Set
6
7
 
7
8
  from pydantic import BaseModel, field_validator
8
- from datetime import timezone
9
+
10
+ from holmes.core.llm import LLM
9
11
  from holmes.core.tools import (
10
12
  StructuredToolResult,
11
13
  Tool,
14
+ ToolInvokeContext,
12
15
  ToolParameter,
13
16
  Toolset,
14
17
  )
18
+ from holmes.core.tools_utils.token_counting import count_tool_response_tokens
15
19
  from holmes.plugins.toolsets.utils import get_param_or_raise
16
20
 
17
21
  # Default values for log fetching
18
22
  DEFAULT_LOG_LIMIT = 100
19
23
  SECONDS_PER_DAY = 24 * 60 * 60
20
24
  DEFAULT_TIME_SPAN_SECONDS = 7 * SECONDS_PER_DAY # 1 week in seconds
21
- DEFAULT_GRAPH_TIME_SPAN_SECONDS = 1 * SECONDS_PER_DAY # 1 day in seconds
25
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS = 1 * 60 * 60 # 1 hour in seconds
22
26
 
23
27
  POD_LOGGING_TOOL_NAME = "fetch_pod_logs"
24
28
 
29
+ TRUNCATION_PROMPT_PREFIX = "[... PREVIOUS LOGS ABOVE THIS LINE HAVE BEEN TRUNCATED]"
30
+ MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE: int = (
31
+ 50 + len(TRUNCATION_PROMPT_PREFIX)
32
+ ) # prevents the truncation algorithm from going too slow once the actual token count gets close to the expected limit
33
+
25
34
 
26
35
  class LoggingCapability(str, Enum):
27
36
  """Optional advanced logging capabilities"""
@@ -74,6 +83,76 @@ class BasePodLoggingToolset(Toolset, ABC):
74
83
  return ""
75
84
 
76
85
 
86
+ def truncate_logs(
87
+ logging_structured_tool_result: StructuredToolResult,
88
+ llm: LLM,
89
+ token_limit: int,
90
+ structured_params: FetchPodLogsParams,
91
+ tool_call_id: str,
92
+ tool_name: str,
93
+ ):
94
+ original_token_count = count_tool_response_tokens(
95
+ llm=llm,
96
+ structured_tool_result=logging_structured_tool_result,
97
+ tool_call_id=tool_call_id,
98
+ tool_name=tool_name,
99
+ )
100
+ token_count = original_token_count
101
+ text = None
102
+ while token_count > token_limit:
103
+ # Loop because we are counting tokens but trimming characters. This means we try to trim a number of
104
+ # characters proportional to the number of tokens but we may still have too many tokens
105
+ if not text:
106
+ text = logging_structured_tool_result.get_stringified_data()
107
+ if not text:
108
+ # Weird scenario where the result exceeds the token allowance but there is not data.
109
+ # Exit and do nothing because I don't know how to handle such scenario.
110
+ logging.warning(
111
+ f"The calculated token count for logs is {token_count} but the limit is {token_limit}. However the data field is empty so there are no logs to truncate."
112
+ )
113
+ return
114
+ ratio = token_count / token_limit
115
+ character_count = len(text)
116
+ number_of_characters_to_truncate = character_count - ceil(
117
+ character_count / ratio
118
+ )
119
+ number_of_characters_to_truncate = max(
120
+ MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE, number_of_characters_to_truncate
121
+ )
122
+
123
+ if len(text) <= number_of_characters_to_truncate:
124
+ logging.warning(
125
+ f"The calculated token count for logs is {token_count} (max allowed tokens={token_limit}) but the logs are only {len(text)} characters which is below the intended truncation of {number_of_characters_to_truncate} characters. Logs will no longer be truncated"
126
+ )
127
+ return
128
+ else:
129
+ linefeed_truncation_offset = max(
130
+ text[number_of_characters_to_truncate:].find("\n"), 0
131
+ ) # keep log lines atomic
132
+
133
+ # Tentatively add the truncation prefix.
134
+ # When counting tokens, we want to include the TRUNCATION_PROMPT_PREFIX because it will be part of the tool response.
135
+ # Because we're truncating based on character counts but ultimately checking tokens count,
136
+ # it is possible that the character truncation is incorrect and more need to be truncated.
137
+ # This will be caught in the next iteration and the truncation prefix will be truncated
138
+ # because MIN_NUMBER_OF_CHARACTERS_TO_TRUNCATE cannot be smaller than TRUNCATION_PROMPT_PREFIX
139
+ text = (
140
+ TRUNCATION_PROMPT_PREFIX
141
+ + text[number_of_characters_to_truncate + linefeed_truncation_offset :]
142
+ )
143
+ logging_structured_tool_result.data = text
144
+ token_count = count_tool_response_tokens(
145
+ llm=llm,
146
+ structured_tool_result=logging_structured_tool_result,
147
+ tool_call_id=tool_call_id,
148
+ tool_name=tool_name,
149
+ )
150
+ if token_count < original_token_count:
151
+ logging.info(
152
+ f"Logs for pod {structured_params.pod_name}/{structured_params.namespace} have been truncated from {original_token_count} tokens down to {token_count} tokens."
153
+ )
154
+
155
+
77
156
  class PodLoggingTool(Tool):
78
157
  """Common tool for fetching pod logs across different logging backends"""
79
158
 
@@ -175,9 +254,7 @@ If you hit the log limit and see lots of repetitive INFO logs, use exclude_filte
175
254
 
176
255
  return params
177
256
 
178
- def _invoke(
179
- self, params: dict, user_approved: bool = False
180
- ) -> StructuredToolResult:
257
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
181
258
  structured_params = FetchPodLogsParams(
182
259
  namespace=get_param_or_raise(params, "namespace"),
183
260
  pod_name=get_param_or_raise(params, "pod_name"),
@@ -192,6 +269,15 @@ If you hit the log limit and see lots of repetitive INFO logs, use exclude_filte
192
269
  params=structured_params,
193
270
  )
194
271
 
272
+ truncate_logs(
273
+ logging_structured_tool_result=result,
274
+ llm=context.llm,
275
+ token_limit=context.max_token_count,
276
+ structured_params=structured_params,
277
+ tool_call_id=context.tool_call_id,
278
+ tool_name=context.tool_name,
279
+ )
280
+
195
281
  return result
196
282
 
197
283
  def get_parameterized_one_liner(self, params: dict) -> str:
@@ -1,71 +1,168 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import threading
5
+ from contextlib import asynccontextmanager
6
+ from enum import Enum
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
8
+
9
+ from mcp.client.session import ClientSession
10
+ from mcp.client.sse import sse_client
11
+ from mcp.client.stdio import StdioServerParameters, stdio_client
12
+ from mcp.client.streamable_http import streamablehttp_client
13
+ from mcp.types import Tool as MCP_Tool
14
+ from pydantic import AnyUrl, BaseModel, Field, model_validator
15
+
16
+ from holmes.common.env_vars import SSE_READ_TIMEOUT
1
17
  from holmes.core.tools import (
2
- Toolset,
18
+ CallablePrerequisite,
19
+ StructuredToolResult,
20
+ StructuredToolResultStatus,
3
21
  Tool,
22
+ ToolInvokeContext,
4
23
  ToolParameter,
5
- StructuredToolResult,
6
- ToolResultStatus,
7
- CallablePrerequisite,
24
+ Toolset,
8
25
  )
9
26
 
10
- from typing import Dict, Any, List, Optional
11
- from mcp.client.session import ClientSession
12
- from mcp.client.sse import sse_client
27
+ # Lock per MCP server URL to serialize calls to the same server
28
+ _server_locks: Dict[str, threading.Lock] = {}
29
+ _locks_lock = threading.Lock()
13
30
 
14
- from mcp.types import Tool as MCP_Tool
15
- from mcp.types import CallToolResult
16
31
 
17
- import asyncio
18
- from pydantic import Field, AnyUrl, field_validator
19
- from typing import Tuple
20
- import logging
32
+ def get_server_lock(url: str) -> threading.Lock:
33
+ """Get or create a lock for a specific MCP server URL."""
34
+ with _locks_lock:
35
+ if url not in _server_locks:
36
+ _server_locks[url] = threading.Lock()
37
+ return _server_locks[url]
21
38
 
22
39
 
23
- class RemoteMCPTool(Tool):
24
- url: str
40
+ class MCPMode(str, Enum):
41
+ SSE = "sse"
42
+ STREAMABLE_HTTP = "streamable-http"
43
+ STDIO = "stdio"
44
+
45
+
46
+ class MCPConfig(BaseModel):
47
+ url: AnyUrl
48
+ mode: MCPMode = MCPMode.SSE
25
49
  headers: Optional[Dict[str, str]] = None
26
50
 
27
- def _invoke(
28
- self, params: dict, user_approved: bool = False
29
- ) -> StructuredToolResult:
51
+ def get_lock_string(self) -> str:
52
+ return str(self.url)
53
+
54
+
55
+ class StdioMCPConfig(BaseModel):
56
+ mode: MCPMode = MCPMode.STDIO
57
+ command: str
58
+ args: Optional[List[str]] = None
59
+ env: Optional[Dict[str, str]] = None
60
+
61
+ def get_lock_string(self) -> str:
62
+ return str(self.command)
63
+
64
+
65
+ @asynccontextmanager
66
+ async def get_initialized_mcp_session(toolset: "RemoteMCPToolset"):
67
+ if toolset._mcp_config is None:
68
+ raise ValueError("MCP config is not initialized")
69
+
70
+ if isinstance(toolset._mcp_config, StdioMCPConfig):
71
+ server_params = StdioServerParameters(
72
+ command=toolset._mcp_config.command,
73
+ args=toolset._mcp_config.args or [],
74
+ env=toolset._mcp_config.env,
75
+ )
76
+ async with stdio_client(server_params) as (
77
+ read_stream,
78
+ write_stream,
79
+ ):
80
+ async with ClientSession(read_stream, write_stream) as session:
81
+ _ = await session.initialize()
82
+ yield session
83
+ elif toolset._mcp_config.mode == MCPMode.SSE:
84
+ url = str(toolset._mcp_config.url)
85
+ async with sse_client(
86
+ url, toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
87
+ ) as (
88
+ read_stream,
89
+ write_stream,
90
+ ):
91
+ async with ClientSession(read_stream, write_stream) as session:
92
+ _ = await session.initialize()
93
+ yield session
94
+ else:
95
+ url = str(toolset._mcp_config.url)
96
+ async with streamablehttp_client(
97
+ url, headers=toolset._mcp_config.headers, sse_read_timeout=SSE_READ_TIMEOUT
98
+ ) as (
99
+ read_stream,
100
+ write_stream,
101
+ _,
102
+ ):
103
+ async with ClientSession(read_stream, write_stream) as session:
104
+ _ = await session.initialize()
105
+ yield session
106
+
107
+
108
+ class RemoteMCPTool(Tool):
109
+ toolset: "RemoteMCPToolset" = Field(exclude=True)
110
+
111
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
30
112
  try:
31
- return asyncio.run(self._invoke_async(params))
113
+ # Serialize calls to the same MCP server to prevent SSE conflicts
114
+ # Different servers can still run in parallel
115
+ if not self.toolset._mcp_config:
116
+ raise ValueError("MCP config not initialized")
117
+
118
+ lock = get_server_lock(str(self.toolset._mcp_config.get_lock_string()))
119
+ with lock:
120
+ return asyncio.run(self._invoke_async(params))
32
121
  except Exception as e:
33
122
  return StructuredToolResult(
34
- status=ToolResultStatus.ERROR,
123
+ status=StructuredToolResultStatus.ERROR,
35
124
  error=str(e.args),
36
125
  params=params,
37
126
  invocation=f"MCPtool {self.name} with params {params}",
38
127
  )
39
128
 
129
+ @staticmethod
130
+ def _is_content_error(content: str) -> bool:
131
+ try: # aws mcp sometimes returns an error in content - status code != 200
132
+ json_content: dict = json.loads(content)
133
+ status_code = json_content.get("response", {}).get("status_code", 200)
134
+ return status_code >= 300
135
+ except Exception:
136
+ return False
137
+
40
138
  async def _invoke_async(self, params: Dict) -> StructuredToolResult:
41
- async with sse_client(self.url, self.headers) as (read_stream, write_stream):
42
- async with ClientSession(read_stream, write_stream) as session:
43
- _ = await session.initialize()
44
- tool_result: CallToolResult = await session.call_tool(self.name, params)
139
+ async with get_initialized_mcp_session(self.toolset) as session:
140
+ tool_result = await session.call_tool(self.name, params)
45
141
 
46
- merged_text = " ".join(
47
- c.text for c in tool_result.content if c.type == "text"
48
- )
49
- return StructuredToolResult(
50
- status=(
51
- ToolResultStatus.ERROR
52
- if tool_result.isError
53
- else ToolResultStatus.SUCCESS
54
- ),
55
- data=merged_text,
56
- params=params,
57
- invocation=f"MCPtool {self.name} with params {params}",
58
- )
142
+ merged_text = " ".join(c.text for c in tool_result.content if c.type == "text")
143
+ return StructuredToolResult(
144
+ status=(
145
+ StructuredToolResultStatus.ERROR
146
+ if (tool_result.isError or self._is_content_error(merged_text))
147
+ else StructuredToolResultStatus.SUCCESS
148
+ ),
149
+ data=merged_text,
150
+ params=params,
151
+ invocation=f"MCPtool {self.name} with params {params}",
152
+ )
59
153
 
60
154
  @classmethod
61
- def create(cls, url: str, tool: MCP_Tool, headers: Optional[Dict[str, str]] = None):
155
+ def create(
156
+ cls,
157
+ tool: MCP_Tool,
158
+ toolset: "RemoteMCPToolset",
159
+ ):
62
160
  parameters = cls.parse_input_schema(tool.inputSchema)
63
161
  return cls(
64
- url=url,
65
162
  name=tool.name,
66
163
  description=tool.description or "",
67
164
  parameters=parameters,
68
- headers=headers,
165
+ toolset=toolset,
69
166
  )
70
167
 
71
168
  @classmethod
@@ -85,53 +182,110 @@ class RemoteMCPTool(Tool):
85
182
  return parameters
86
183
 
87
184
  def get_parameterized_one_liner(self, params: Dict) -> str:
88
- return f"Call MCP Server ({self.url} - {self.name})"
185
+ # AWS MCP cli_command
186
+ if params and params.get("cli_command"):
187
+ return f"{params.get('cli_command')}"
188
+
189
+ # gcloud MCP run_gcloud_command
190
+ if self.name == "run_gcloud_command" and params and "args" in params:
191
+ args = params.get("args", [])
192
+ if isinstance(args, list):
193
+ return f"gcloud {' '.join(str(arg) for arg in args)}"
194
+
195
+ return f"{self.toolset.name}: {self.name} {params}"
89
196
 
90
197
 
91
198
  class RemoteMCPToolset(Toolset):
92
- url: AnyUrl
93
199
  tools: List[RemoteMCPTool] = Field(default_factory=list) # type: ignore
94
200
  icon_url: str = "https://registry.npmmirror.com/@lobehub/icons-static-png/1.46.0/files/light/mcp.png"
201
+ _mcp_config: Optional[Union[MCPConfig, StdioMCPConfig]] = None
95
202
 
96
203
  def model_post_init(self, __context: Any) -> None:
97
- self.prerequisites = [CallablePrerequisite(callable=self.init_server_tools)]
204
+ self.prerequisites = [
205
+ CallablePrerequisite(callable=self.prerequisites_callable)
206
+ ]
98
207
 
99
- def get_headers(self) -> Optional[Dict[str, str]]:
100
- return self.config and self.config.get("headers")
208
+ @model_validator(mode="before")
209
+ @classmethod
210
+ def migrate_url_to_config(cls, values: dict[str, Any]) -> dict[str, Any]:
211
+ """
212
+ Migrates url from field parameter to config object.
213
+ If url is passed as a parameter, it's moved to config (or config is created if it doesn't exist).
214
+ """
215
+ if not isinstance(values, dict) or "url" not in values:
216
+ return values
217
+
218
+ url_value = values.pop("url")
219
+ if url_value is None:
220
+ return values
221
+
222
+ config = values.get("config")
223
+ if config is None:
224
+ config = {}
225
+ values["config"] = config
226
+
227
+ toolset_name = values.get("name", "unknown")
228
+ if "url" in config:
229
+ logging.warning(
230
+ f"Toolset {toolset_name}: has two urls defined, remove the 'url' field from the toolset configuration and keep the 'url' in the config section."
231
+ )
232
+ return values
101
233
 
102
- @field_validator("url", mode="before")
103
- def append_sse_if_missing(cls, v):
104
- if isinstance(v, str) and not v.rstrip("/").endswith("/sse"):
105
- v = v.rstrip("/") + "/sse"
106
- return v
234
+ logging.warning(
235
+ f"Toolset {toolset_name}: 'url' field has been migrated to config. "
236
+ "Please move 'url' to the config section."
237
+ )
238
+ config["url"] = url_value
239
+ return values
107
240
 
108
- # used as a CallablePrerequisite, config added for that case.
109
- def init_server_tools(self, config: dict[str, Any]) -> Tuple[bool, str]:
241
+ def prerequisites_callable(self, config) -> Tuple[bool, str]:
110
242
  try:
243
+ if not config:
244
+ return (False, f"Config is required for {self.name}")
245
+
246
+ mode_value = config.get("mode", MCPMode.SSE.value)
247
+ allowed_modes = [e.value for e in MCPMode]
248
+ if mode_value not in allowed_modes:
249
+ return (
250
+ False,
251
+ f'Invalid mode "{mode_value}", allowed modes are {", ".join(allowed_modes)}',
252
+ )
253
+
254
+ if mode_value == MCPMode.STDIO.value:
255
+ self._mcp_config = StdioMCPConfig(**config)
256
+ else:
257
+ self._mcp_config = MCPConfig(**config)
258
+ clean_url_str = str(self._mcp_config.url).rstrip("/")
259
+
260
+ if self._mcp_config.mode == MCPMode.SSE and not clean_url_str.endswith(
261
+ "/sse"
262
+ ):
263
+ self._mcp_config.url = AnyUrl(clean_url_str + "/sse")
264
+
111
265
  tools_result = asyncio.run(self._get_server_tools())
266
+
112
267
  self.tools = [
113
- RemoteMCPTool.create(str(self.url), tool, self.get_headers())
114
- for tool in tools_result.tools
268
+ RemoteMCPTool.create(tool, self) for tool in tools_result.tools
115
269
  ]
116
270
 
117
271
  if not self.tools:
118
272
  logging.warning(f"mcp server {self.name} loaded 0 tools.")
273
+
119
274
  return (True, "")
120
275
  except Exception as e:
121
- # using e.args, the asyncio wrapper could stack another exception this helps printing them all.
122
276
  return (
123
277
  False,
124
- f"Failed to load mcp server {self.name} {self.url} {str(e.args)}",
278
+ f"Failed to load mcp server {self.name}: {str(e)}",
125
279
  )
126
280
 
127
281
  async def _get_server_tools(self):
128
- async with sse_client(str(self.url), headers=self.get_headers()) as (
129
- read_stream,
130
- write_stream,
131
- ):
132
- async with ClientSession(read_stream, write_stream) as session:
133
- _ = await session.initialize()
134
- return await session.list_tools()
282
+ async with get_initialized_mcp_session(self) as session:
283
+ return await session.list_tools()
135
284
 
136
285
  def get_example_config(self) -> Dict[str, Any]:
137
- return {}
286
+ example_config = MCPConfig(
287
+ url=AnyUrl("http://example.com:8000/mcp/messages"),
288
+ mode=MCPMode.STREAMABLE_HTTP,
289
+ headers={"Authorization": "Bearer YOUR_TOKEN"},
290
+ )
291
+ return example_config.model_dump()
@@ -0,0 +1,165 @@
1
+ """NewRelic API wrapper for executing NRQL queries via GraphQL."""
2
+
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ import requests # type: ignore
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class NewRelicAPI:
12
+ """Python wrapper for NewRelic GraphQL API.
13
+
14
+ This class provides a clean interface to execute NRQL queries via the NewRelic GraphQL API,
15
+ supporting both US and EU datacenters.
16
+ """
17
+
18
+ def __init__(self, api_key: str, account_id: str, is_eu_datacenter: bool = False):
19
+ """Initialize the NewRelic API wrapper.
20
+
21
+ Args:
22
+ api_key: NewRelic API key
23
+ account_id: NewRelic account ID
24
+ is_eu_datacenter: If True, use EU datacenter URL. Defaults to False (US).
25
+ """
26
+ self.api_key = api_key
27
+ # Validate account_id is numeric to prevent injection
28
+ try:
29
+ self.account_id = int(account_id)
30
+ except ValueError:
31
+ raise ValueError(f"Invalid account_id: must be numeric, got '{account_id}'")
32
+ self.is_eu_datacenter = is_eu_datacenter
33
+
34
+ def _get_api_url(self) -> str:
35
+ """Get the appropriate API URL based on datacenter location.
36
+
37
+ Returns:
38
+ str: The GraphQL API endpoint URL
39
+ """
40
+ if self.is_eu_datacenter:
41
+ return "https://api.eu.newrelic.com/graphql"
42
+ return "https://api.newrelic.com/graphql"
43
+
44
+ def _make_request(
45
+ self, graphql_query: Dict[str, Any], timeout: int = 30
46
+ ) -> Dict[str, Any]:
47
+ """Make HTTP POST request to NewRelic GraphQL API.
48
+
49
+ Args:
50
+ graphql_query: The GraphQL query as a dictionary
51
+ timeout: Request timeout in seconds
52
+
53
+ Returns:
54
+ JSON response from the API
55
+
56
+ Raises:
57
+ requests.exceptions.HTTPError: If the request fails
58
+ Exception: If GraphQL returns errors
59
+ """
60
+ url = self._get_api_url()
61
+ headers = {
62
+ "Content-Type": "application/json",
63
+ "Api-Key": self.api_key,
64
+ }
65
+
66
+ response = requests.post(
67
+ url,
68
+ headers=headers,
69
+ json=graphql_query,
70
+ timeout=timeout,
71
+ )
72
+ response.raise_for_status()
73
+
74
+ # Parse JSON response
75
+ data = response.json()
76
+
77
+ # Check for GraphQL errors even on 200 responses
78
+ if "errors" in data and data["errors"]:
79
+ error_msg = data["errors"][0].get("message", "Unknown GraphQL error")
80
+ raise Exception(f"NewRelic GraphQL error: {error_msg}")
81
+
82
+ return data
83
+
84
+ def execute_nrql_query(self, nrql_query: str) -> list:
85
+ """Execute an NRQL query via the NewRelic GraphQL API.
86
+
87
+ Args:
88
+ nrql_query: The NRQL query string to execute
89
+
90
+ Returns:
91
+ list: The query results from NewRelic (extracted from the nested response)
92
+
93
+ Raises:
94
+ requests.exceptions.HTTPError: If the API request fails
95
+ Exception: If GraphQL returns errors
96
+ """
97
+ # Build the GraphQL query using variables to prevent injection
98
+ # Note: New Relic's GraphQL API requires the account ID to be inline, but we can use variables for the NRQL query
99
+ graphql_query = {
100
+ "query": f"""
101
+ query ExecuteNRQL($nrqlQuery: Nrql!) {{
102
+ actor {{
103
+ account(id: {self.account_id}) {{
104
+ nrql(query: $nrqlQuery) {{
105
+ results
106
+ }}
107
+ }}
108
+ }}
109
+ }}
110
+ """,
111
+ "variables": {"nrqlQuery": nrql_query},
112
+ }
113
+
114
+ logger.info(f"Executing NRQL query: {nrql_query}")
115
+ response = self._make_request(graphql_query)
116
+
117
+ # Extract just the results array from the nested response
118
+ try:
119
+ results = response["data"]["actor"]["account"]["nrql"]["results"]
120
+ return results
121
+ except (KeyError, TypeError) as e:
122
+ raise Exception(
123
+ f"Failed to extract results from NewRelic response: {e}"
124
+ ) from e
125
+
126
+ def get_organization_accounts(self) -> list:
127
+ """Get all accounts accessible in the organization.
128
+
129
+ Returns:
130
+ list: List of account dictionaries with id and name
131
+
132
+ Raises:
133
+ requests.exceptions.HTTPError: If the API request fails
134
+ Exception: If GraphQL returns errors
135
+ """
136
+ graphql_query = {
137
+ "query": """
138
+ query GetOrganizationAccounts {
139
+ actor {
140
+ organization {
141
+ accountManagement {
142
+ managedAccounts {
143
+ id
144
+ name
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ """
151
+ }
152
+
153
+ logger.info("Querying organization accounts")
154
+ response = self._make_request(graphql_query)
155
+
156
+ # Extract accounts from the nested response
157
+ try:
158
+ accounts = response["data"]["actor"]["organization"]["accountManagement"][
159
+ "managedAccounts"
160
+ ]
161
+ return accounts
162
+ except (KeyError, TypeError) as e:
163
+ raise Exception(
164
+ f"Failed to extract accounts from NewRelic response: {e}"
165
+ ) from e