mcp-hydrolix 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_hydrolix/__init__.py +13 -0
- mcp_hydrolix/auth/__init__.py +29 -0
- mcp_hydrolix/auth/credentials.py +63 -0
- mcp_hydrolix/auth/mcp_providers.py +137 -0
- mcp_hydrolix/log/__init__.py +6 -0
- mcp_hydrolix/log/log.py +60 -0
- mcp_hydrolix/log/log.yaml +40 -0
- mcp_hydrolix/log/utils.py +56 -0
- mcp_hydrolix/main.py +77 -0
- mcp_hydrolix/mcp_env.py +324 -0
- mcp_hydrolix/mcp_server.py +321 -0
- mcp_hydrolix/utils.py +70 -0
- mcp_hydrolix-0.1.6.dist-info/METADATA +314 -0
- mcp_hydrolix-0.1.6.dist-info/RECORD +17 -0
- mcp_hydrolix-0.1.6.dist-info/WHEEL +4 -0
- mcp_hydrolix-0.1.6.dist-info/entry_points.txt +2 -0
- mcp_hydrolix-0.1.6.dist-info/licenses/LICENSE +201 -0
mcp_hydrolix/mcp_env.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Environment configuration for the MCP Hydrolix server.
|
|
2
|
+
|
|
3
|
+
This module handles all environment variable configuration with sensible defaults
|
|
4
|
+
and type conversion.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from mcp_hydrolix.auth.credentials import HydrolixCredential, ServiceAccountToken, UsernamePassword
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TransportType(str, Enum):
|
|
16
|
+
"""Supported MCP server transport types."""
|
|
17
|
+
|
|
18
|
+
STDIO = "stdio"
|
|
19
|
+
HTTP = "http"
|
|
20
|
+
SSE = "sse"
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def values(cls) -> list[str]:
|
|
24
|
+
"""Get all valid transport values."""
|
|
25
|
+
return [transport.value for transport in cls]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class HydrolixConfig:
|
|
30
|
+
"""Configuration for Hydrolix connection settings.
|
|
31
|
+
|
|
32
|
+
This class handles all environment variable configuration with sensible defaults
|
|
33
|
+
and type conversion. It provides typed methods for accessing each configuration value.
|
|
34
|
+
|
|
35
|
+
Required environment variables:
|
|
36
|
+
HYDROLIX_HOST: The hostname of the Hydrolix server
|
|
37
|
+
|
|
38
|
+
Optional environment variables (with defaults):
|
|
39
|
+
HYDROLIX_TOKEN: Service account token to the Hydrolix Server (this or user+password is required)
|
|
40
|
+
HYDROLIX_USER: The username for authentication (this or token is required)
|
|
41
|
+
HYDROLIX_PASSWORD: The password for authentication (this or token is required)
|
|
42
|
+
HYDROLIX_PORT: The port number (default: 8088)
|
|
43
|
+
HYDROLIX_VERIFY: Verify SSL certificates (default: true)
|
|
44
|
+
HYDROLIX_CONNECT_TIMEOUT: Connection timeout in seconds (default: 30)
|
|
45
|
+
HYDROLIX_SEND_RECEIVE_TIMEOUT: Send/receive timeout in seconds (default: 300)
|
|
46
|
+
HYDROLIX_DATABASE: Default database to use (default: None)
|
|
47
|
+
HYDROLIX_PROXY_PATH: Path to be added to the host URL. For instance, for servers behind an HTTP proxy (default: None)
|
|
48
|
+
HYDROLIX_MCP_SERVER_TRANSPORT: MCP server transport method - "stdio", "http", or "sse" (default: stdio)
|
|
49
|
+
HYDROLIX_MCP_BIND_HOST: Host to bind the MCP server to when using HTTP or SSE transport (default: 127.0.0.1)
|
|
50
|
+
HYDROLIX_MCP_BIND_PORT: Port to bind the MCP server to when using HTTP or SSE transport (default: 8000)
|
|
51
|
+
HYDROLIX_QUERIES_POOL_SIZE 100
|
|
52
|
+
HYDROLIX_MCP_REQUEST_TIMEOUT 120
|
|
53
|
+
HYDROLIX_MCP_WORKERS 3
|
|
54
|
+
HYDROLIX_MCP_WORKER_CONNECTIONS 200
|
|
55
|
+
HYDROLIX_MCP_MAX_REQUESTS 10000
|
|
56
|
+
HYDROLIX_MCP_MAX_REQUESTS_JITTER 1000
|
|
57
|
+
HYDROLIX_MCP_MAX_KEEPALIVE 10
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self) -> None:
|
|
61
|
+
"""Initialize the configuration from environment variables."""
|
|
62
|
+
self._validate_required_vars()
|
|
63
|
+
# Credential to use for clickhouse connections when no per-request credential is provided
|
|
64
|
+
self._default_credential: Optional[HydrolixCredential] = None
|
|
65
|
+
|
|
66
|
+
# Set the default credential to the service account from the environment, if available
|
|
67
|
+
if (global_service_account := os.environ.get("HYDROLIX_TOKEN")) is not None:
|
|
68
|
+
self._default_credential = ServiceAccountToken(
|
|
69
|
+
global_service_account, f"https://{self.host}/config"
|
|
70
|
+
)
|
|
71
|
+
elif (global_username := os.environ.get("HYDROLIX_USER")) is not None and (
|
|
72
|
+
global_password := os.environ.get("HYDROLIX_PASSWORD")
|
|
73
|
+
) is not None:
|
|
74
|
+
# No global service account available. Set the default credential to the username/password
|
|
75
|
+
# from the environment, if available
|
|
76
|
+
self._default_credential = UsernamePassword(global_username, global_password)
|
|
77
|
+
|
|
78
|
+
def creds_with(self, request_credential: Optional[HydrolixCredential]) -> HydrolixCredential:
|
|
79
|
+
if request_credential is not None:
|
|
80
|
+
return request_credential
|
|
81
|
+
elif self._default_credential is not None:
|
|
82
|
+
return self._default_credential
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
"No credentials available for Hydrolix connection. "
|
|
86
|
+
"Please provide credentials either through HYDROLIX_TOKEN or "
|
|
87
|
+
"HYDROLIX_USER/HYDROLIX_PASSWORD environment variables, "
|
|
88
|
+
"or pass credentials explicitly via the creds parameter."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def host(self) -> str:
|
|
93
|
+
"""Get the Hydrolix host. Called during __init__"""
|
|
94
|
+
return os.environ["HYDROLIX_HOST"]
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def port(self) -> int:
|
|
98
|
+
"""Get the Hydrolix port.
|
|
99
|
+
|
|
100
|
+
Defaults to 8088.
|
|
101
|
+
Can be overridden by HYDROLIX_PORT environment variable.
|
|
102
|
+
"""
|
|
103
|
+
if "HYDROLIX_PORT" in os.environ:
|
|
104
|
+
return int(os.environ["HYDROLIX_PORT"])
|
|
105
|
+
return 8088
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def database(self) -> Optional[str]:
|
|
109
|
+
"""Get the default database name if set."""
|
|
110
|
+
return os.getenv("HYDROLIX_DATABASE")
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def verify(self) -> bool:
|
|
114
|
+
"""Get whether SSL certificate verification is enabled.
|
|
115
|
+
|
|
116
|
+
Default: True
|
|
117
|
+
"""
|
|
118
|
+
return os.getenv("HYDROLIX_VERIFY", "true").lower() == "true"
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def secure(self) -> bool:
|
|
122
|
+
"""Get whether use secured connection.
|
|
123
|
+
|
|
124
|
+
Default: True
|
|
125
|
+
"""
|
|
126
|
+
return os.getenv("HYDROLIX_SECURE", "true").lower() == "true"
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def connect_timeout(self) -> int:
|
|
130
|
+
"""Get the connection timeout in seconds.
|
|
131
|
+
|
|
132
|
+
Default: 30
|
|
133
|
+
"""
|
|
134
|
+
return int(os.getenv("HYDROLIX_CONNECT_TIMEOUT", "30"))
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def send_receive_timeout(self) -> int:
|
|
138
|
+
"""Get the send/receive timeout in seconds.
|
|
139
|
+
|
|
140
|
+
Default: 300 (Hydrolix default)
|
|
141
|
+
"""
|
|
142
|
+
return int(os.getenv("HYDROLIX_SEND_RECEIVE_TIMEOUT", "300"))
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def query_pool_size(self) -> int:
|
|
146
|
+
"""Get the send/receive timeout in seconds.
|
|
147
|
+
|
|
148
|
+
Default: 300 (Hydrolix default)
|
|
149
|
+
"""
|
|
150
|
+
return int(os.getenv("HYDROLIX_QUERIES_POOL_SIZE", 100))
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def query_timeout_sec(self) -> int:
|
|
154
|
+
"""Get the send/receive timeout in seconds.
|
|
155
|
+
|
|
156
|
+
Default: 300 (Hydrolix default)
|
|
157
|
+
"""
|
|
158
|
+
return int(os.getenv("HYDROLIX_QUERY_TIMEOUT_SECS", 30))
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def proxy_path(self) -> Optional[str]:
|
|
162
|
+
return os.getenv("HYDROLIX_PROXY_PATH")
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def mcp_server_transport(self) -> str:
|
|
166
|
+
"""Get the MCP server transport method.
|
|
167
|
+
|
|
168
|
+
Valid options: "stdio", "http", "sse"
|
|
169
|
+
Default: "stdio"
|
|
170
|
+
"""
|
|
171
|
+
transport = os.getenv("HYDROLIX_MCP_SERVER_TRANSPORT", TransportType.STDIO.value).lower()
|
|
172
|
+
|
|
173
|
+
# Validate transport type
|
|
174
|
+
if transport not in TransportType.values():
|
|
175
|
+
valid_options = ", ".join(f'"{t}"' for t in TransportType.values())
|
|
176
|
+
raise ValueError(f"Invalid transport '{transport}'. Valid options: {valid_options}")
|
|
177
|
+
return transport
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def mcp_bind_host(self) -> str:
|
|
181
|
+
"""Get the host to bind the MCP server to.
|
|
182
|
+
|
|
183
|
+
Only used when transport is "http" or "sse".
|
|
184
|
+
Default: "127.0.0.1"
|
|
185
|
+
"""
|
|
186
|
+
return os.getenv("HYDROLIX_MCP_BIND_HOST", "127.0.0.1")
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def mcp_bind_port(self) -> int:
|
|
190
|
+
"""Get the port to bind the MCP server to.
|
|
191
|
+
|
|
192
|
+
Only used when transport is "http" or "sse".
|
|
193
|
+
Default: 8000
|
|
194
|
+
"""
|
|
195
|
+
return int(os.getenv("HYDROLIX_MCP_BIND_PORT", "8000"))
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def mcp_timeout(self) -> int:
|
|
199
|
+
"""Get the request timeout secunds.
|
|
200
|
+
|
|
201
|
+
Only used when transport is "http" or "sse".
|
|
202
|
+
Default: 120
|
|
203
|
+
"""
|
|
204
|
+
return int(os.getenv("HYDROLIX_MCP_REQUEST_TIMEOUT", 120))
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def mcp_workers(self) -> int:
|
|
208
|
+
"""Get the number of worker processes.
|
|
209
|
+
|
|
210
|
+
Only used when transport is "http" or "sse".
|
|
211
|
+
Default: 1
|
|
212
|
+
"""
|
|
213
|
+
return int(os.getenv("HYDROLIX_MCP_WORKERS", 1))
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def mcp_worker_connections(self) -> int:
|
|
217
|
+
"""Get the max number of concurrent requests per worker.
|
|
218
|
+
|
|
219
|
+
Only used when transport is "http" or "sse".
|
|
220
|
+
Default: 200
|
|
221
|
+
"""
|
|
222
|
+
return int(os.getenv("HYDROLIX_MCP_WORKER_CONNECTIONS", 100))
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def mcp_max_requests_jitter(self) -> int:
|
|
226
|
+
"""Get the random parameter to randomize time process is reloaded after max_requests.
|
|
227
|
+
|
|
228
|
+
Only used when transport is "http" or "sse".
|
|
229
|
+
Default: 10000
|
|
230
|
+
"""
|
|
231
|
+
return int(os.getenv("HYDROLIX_MCP_MAX_REQUESTS_JITTER", 1000))
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def mcp_max_requests(self) -> int:
|
|
235
|
+
"""Get the max number of requests handled by worker before it is restarted.
|
|
236
|
+
|
|
237
|
+
Only used when transport is "http" or "sse".
|
|
238
|
+
Default: 1000
|
|
239
|
+
"""
|
|
240
|
+
return int(os.getenv("HYDROLIX_MCP_MAX_REQUESTS", 10000))
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def mcp_keepalive(self) -> int:
|
|
244
|
+
"""Get a seconds of idle keepalive connections are kept alive.
|
|
245
|
+
|
|
246
|
+
Only used when transport is "http" or "sse".
|
|
247
|
+
Default: 10
|
|
248
|
+
"""
|
|
249
|
+
return int(os.getenv("HYDROLIX_MCP_MAX_KEEPALIVE", 10))
|
|
250
|
+
|
|
251
|
+
def get_client_config(self, request_credential: Optional[HydrolixCredential]) -> dict:
|
|
252
|
+
"""
|
|
253
|
+
Get the configuration dictionary for clickhouse_connect client.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
request_credential: Optional credentials to use for this request. If not provided,
|
|
257
|
+
falls back to the default credential for this HydrolixConfig
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
dict: Configuration ready to be passed to clickhouse_connect.get_client()
|
|
261
|
+
|
|
262
|
+
Raises:
|
|
263
|
+
ValueError: If no credentials could be inferred for the request (either from
|
|
264
|
+
the startup environment or provided in the request)
|
|
265
|
+
"""
|
|
266
|
+
config = {
|
|
267
|
+
"host": self.host,
|
|
268
|
+
"port": self.port,
|
|
269
|
+
"secure": self.secure,
|
|
270
|
+
"verify": self.verify,
|
|
271
|
+
"connect_timeout": self.connect_timeout,
|
|
272
|
+
"send_receive_timeout": self.send_receive_timeout,
|
|
273
|
+
"executor_threads": self.query_pool_size,
|
|
274
|
+
"client_name": "mcp_hydrolix",
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# Add optional database if set
|
|
278
|
+
if self.database:
|
|
279
|
+
config["database"] = self.database
|
|
280
|
+
|
|
281
|
+
if self.proxy_path:
|
|
282
|
+
config["proxy_path"] = self.proxy_path
|
|
283
|
+
|
|
284
|
+
# Add credentials
|
|
285
|
+
config |= self.creds_with(request_credential).clickhouse_config_entries()
|
|
286
|
+
|
|
287
|
+
return config
|
|
288
|
+
|
|
289
|
+
def _validate_required_vars(self) -> None:
|
|
290
|
+
"""Validate that all required environment variables are set. Called during __init__.
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ValueError: If any required environment variable is missing.
|
|
294
|
+
"""
|
|
295
|
+
missing_vars = []
|
|
296
|
+
required_vars = ["HYDROLIX_HOST"]
|
|
297
|
+
for var in required_vars:
|
|
298
|
+
if var not in os.environ:
|
|
299
|
+
missing_vars.append(var)
|
|
300
|
+
|
|
301
|
+
# HYDROLIX_USER and HYDROLIX_PASSWORD must either be both present or both absent
|
|
302
|
+
if ("HYDROLIX_USER" in os.environ) != ("HYDROLIX_PASSWORD" in os.environ):
|
|
303
|
+
raise ValueError(
|
|
304
|
+
"User/password authentication is only partially configured: set both HYDROLIX_USER and HYDROLIX_PASSWORD"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
if missing_vars:
|
|
308
|
+
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# Global instance placeholder for the singleton pattern
|
|
312
|
+
_CONFIG_INSTANCE = None
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def get_config():
|
|
316
|
+
"""
|
|
317
|
+
Gets the singleton instance of HydrolixConfig.
|
|
318
|
+
Instantiates it on the first call.
|
|
319
|
+
"""
|
|
320
|
+
global _CONFIG_INSTANCE
|
|
321
|
+
if _CONFIG_INSTANCE is None:
|
|
322
|
+
# Instantiate the config object here, ensuring load_dotenv() has likely run
|
|
323
|
+
_CONFIG_INSTANCE = HydrolixConfig()
|
|
324
|
+
return _CONFIG_INSTANCE
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import signal
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from dataclasses import asdict, is_dataclass
|
|
6
|
+
from typing import Any, Final, Optional, List, cast, TypedDict
|
|
7
|
+
|
|
8
|
+
import clickhouse_connect
|
|
9
|
+
from clickhouse_connect import common
|
|
10
|
+
from clickhouse_connect.driver import httputil
|
|
11
|
+
from clickhouse_connect.driver.binding import format_query_value
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from fastmcp import FastMCP
|
|
14
|
+
from fastmcp.exceptions import ToolError
|
|
15
|
+
from fastmcp.server.dependencies import get_access_token
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
from pydantic.dataclasses import dataclass
|
|
18
|
+
from starlette.requests import Request
|
|
19
|
+
from starlette.responses import PlainTextResponse
|
|
20
|
+
|
|
21
|
+
from mcp_hydrolix.auth import (
|
|
22
|
+
AccessToken,
|
|
23
|
+
HydrolixCredential,
|
|
24
|
+
HydrolixCredentialChain,
|
|
25
|
+
ServiceAccountToken,
|
|
26
|
+
UsernamePassword,
|
|
27
|
+
)
|
|
28
|
+
from mcp_hydrolix.mcp_env import HydrolixConfig, get_config
|
|
29
|
+
from mcp_hydrolix.utils import with_serializer
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Column:
|
|
34
|
+
database: str
|
|
35
|
+
table: str
|
|
36
|
+
name: str
|
|
37
|
+
column_type: str
|
|
38
|
+
default_kind: Optional[str]
|
|
39
|
+
default_expression: Optional[str]
|
|
40
|
+
comment: Optional[str]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Table:
|
|
45
|
+
database: str
|
|
46
|
+
name: str
|
|
47
|
+
engine: str
|
|
48
|
+
create_table_query: str
|
|
49
|
+
dependencies_database: List[str]
|
|
50
|
+
dependencies_table: List[str]
|
|
51
|
+
engine_full: str
|
|
52
|
+
sorting_key: str
|
|
53
|
+
primary_key: str
|
|
54
|
+
total_rows: Optional[int]
|
|
55
|
+
total_bytes: Optional[int]
|
|
56
|
+
total_bytes_uncompressed: Optional[int]
|
|
57
|
+
parts: Optional[int]
|
|
58
|
+
active_parts: Optional[int]
|
|
59
|
+
total_marks: Optional[int]
|
|
60
|
+
columns: Optional[List[Column]] = Field([])
|
|
61
|
+
comment: Optional[str] = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class HdxQueryResult(TypedDict):
|
|
66
|
+
columns: List[str]
|
|
67
|
+
rows: List[List[Any]]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
MCP_SERVER_NAME = "mcp-hydrolix"
|
|
71
|
+
logger = logging.getLogger(MCP_SERVER_NAME)
|
|
72
|
+
|
|
73
|
+
load_dotenv()
|
|
74
|
+
|
|
75
|
+
HYDROLIX_CONFIG: Final[HydrolixConfig] = get_config()
|
|
76
|
+
|
|
77
|
+
mcp = FastMCP(
|
|
78
|
+
name=MCP_SERVER_NAME,
|
|
79
|
+
auth=HydrolixCredentialChain(f"https://{HYDROLIX_CONFIG.host}/config"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_request_credential() -> Optional[HydrolixCredential]:
|
|
84
|
+
if (token := get_access_token()) is not None:
|
|
85
|
+
if isinstance(token, AccessToken):
|
|
86
|
+
return token.as_credential()
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError(
|
|
89
|
+
"Found non-hydrolix access token on request -- this should be impossible!"
|
|
90
|
+
)
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def create_hydrolix_client(pool_mgr, request_credential: Optional[HydrolixCredential]):
|
|
95
|
+
"""
|
|
96
|
+
Create a client for operations against query-head. Note that this eagerly issues requests for initialization
|
|
97
|
+
of properties like `server_version`, and so may throw exceptions.
|
|
98
|
+
INV: clients returned by this method MUST NOT be reused across sessions, because they can close over per-session
|
|
99
|
+
credentials.
|
|
100
|
+
"""
|
|
101
|
+
creds = HYDROLIX_CONFIG.creds_with(request_credential)
|
|
102
|
+
auth_info = (
|
|
103
|
+
f"as {creds.username}"
|
|
104
|
+
if isinstance(creds, UsernamePassword)
|
|
105
|
+
else f"using service account {cast(ServiceAccountToken, creds).service_account_id}"
|
|
106
|
+
)
|
|
107
|
+
logger.info(
|
|
108
|
+
f"Creating Hydrolix client connection to {HYDROLIX_CONFIG.host}:{HYDROLIX_CONFIG.port} "
|
|
109
|
+
f"{auth_info} "
|
|
110
|
+
f"(connect_timeout={HYDROLIX_CONFIG.connect_timeout}s, "
|
|
111
|
+
f"send_receive_timeout={HYDROLIX_CONFIG.send_receive_timeout}s)"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
client = await clickhouse_connect.get_async_client(
|
|
116
|
+
pool_mgr=pool_mgr, **HYDROLIX_CONFIG.get_client_config(request_credential)
|
|
117
|
+
)
|
|
118
|
+
# Test the connection
|
|
119
|
+
version = client.client.server_version
|
|
120
|
+
logger.info(f"Successfully connected to Hydrolix compatible with ClickHouse {version}")
|
|
121
|
+
return client
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"Failed to connect to Hydrolix: {str(e)}")
|
|
124
|
+
raise
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# allow custom hydrolix settings in CH client
|
|
128
|
+
common.set_setting("invalid_setting_action", "send")
|
|
129
|
+
common.set_setting("autogenerate_session_id", False)
|
|
130
|
+
client_shared_pool = httputil.get_pool_manager(maxsize=HYDROLIX_CONFIG.query_pool_size, num_pools=1)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def term(*args, **kwargs):
|
|
134
|
+
client_shared_pool.clear()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
signal.signal(signal.SIGTERM, term)
|
|
138
|
+
signal.signal(signal.SIGINT, term)
|
|
139
|
+
signal.signal(signal.SIGQUIT, term)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def execute_query(query: str) -> HdxQueryResult:
|
|
143
|
+
try:
|
|
144
|
+
async with await create_hydrolix_client(
|
|
145
|
+
client_shared_pool, get_request_credential()
|
|
146
|
+
) as client:
|
|
147
|
+
res = await client.query(
|
|
148
|
+
query,
|
|
149
|
+
settings={
|
|
150
|
+
"readonly": 1,
|
|
151
|
+
"hdx_query_max_execution_time": HYDROLIX_CONFIG.query_timeout_sec,
|
|
152
|
+
"hdx_query_max_attempts": 1,
|
|
153
|
+
"hdx_query_max_result_rows": 100_000,
|
|
154
|
+
"hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024, # 2GiB
|
|
155
|
+
"hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
logger.info(f"Query returned {len(res.result_rows)} rows")
|
|
159
|
+
return HdxQueryResult(columns=res.column_names, rows=res.result_rows)
|
|
160
|
+
except Exception as err:
|
|
161
|
+
logger.error(f"Error executing query: {err}")
|
|
162
|
+
raise ToolError(f"Query execution failed: {str(err)}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
async def execute_cmd(query: str):
|
|
166
|
+
try:
|
|
167
|
+
async with await create_hydrolix_client(
|
|
168
|
+
client_shared_pool, get_request_credential()
|
|
169
|
+
) as client:
|
|
170
|
+
res = await client.command(query)
|
|
171
|
+
logger.info("Command returned executed.")
|
|
172
|
+
return res
|
|
173
|
+
except Exception as err:
|
|
174
|
+
logger.error(f"Error executing command: {err}")
|
|
175
|
+
raise ToolError(f"Command execution failed: {str(err)}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@mcp.custom_route("/health", methods=["GET"])
|
|
179
|
+
async def health_check(request: Request) -> PlainTextResponse:
|
|
180
|
+
"""Health check endpoint for monitoring server status.
|
|
181
|
+
|
|
182
|
+
Returns OK if the server is running and can connect to Hydrolix.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
# Try to create a client connection to verify query-head connectivity
|
|
186
|
+
async with await create_hydrolix_client(
|
|
187
|
+
client_shared_pool, get_request_credential()
|
|
188
|
+
) as client:
|
|
189
|
+
version = client.client.server_version
|
|
190
|
+
return PlainTextResponse(f"OK - Connected to Hydrolix compatible with ClickHouse {version}")
|
|
191
|
+
except Exception as e:
|
|
192
|
+
# Return 503 Service Unavailable if we can't connect to Hydrolix
|
|
193
|
+
return PlainTextResponse(f"ERROR - Cannot connect to Hydrolix: {str(e)}", status_code=503)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def result_to_table(query_columns, result) -> List[Table]:
|
|
197
|
+
return [Table(**dict(zip(query_columns, row))) for row in result]
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def result_to_column(query_columns, result) -> List[Column]:
|
|
201
|
+
return [Column(**dict(zip(query_columns, row))) for row in result]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def to_json(obj: Any) -> str:
|
|
205
|
+
# This function technically returns different types:
|
|
206
|
+
# - str for dataclasses (the primary use case)
|
|
207
|
+
# - list/dict/Any for recursive processing during serialization
|
|
208
|
+
# Type checking is suppressed for non-str returns as they're only used internally by json.dumps
|
|
209
|
+
if is_dataclass(obj):
|
|
210
|
+
return json.dumps(asdict(obj), default=to_json)
|
|
211
|
+
elif isinstance(obj, list):
|
|
212
|
+
return [to_json(item) for item in obj] # type: ignore[return-value]
|
|
213
|
+
elif isinstance(obj, dict):
|
|
214
|
+
return {key: to_json(value) for key, value in obj.items()} # type: ignore[return-value]
|
|
215
|
+
return obj # type: ignore[return-value]
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@mcp.tool()
|
|
219
|
+
async def list_databases() -> List[str]:
|
|
220
|
+
"""List available Hydrolix databases"""
|
|
221
|
+
logger.info("Listing all databases")
|
|
222
|
+
result = await execute_cmd("SHOW DATABASES")
|
|
223
|
+
|
|
224
|
+
# Convert newline-separated string to list and trim whitespace
|
|
225
|
+
if isinstance(result, str):
|
|
226
|
+
databases = [db.strip() for db in result.strip().split("\n")]
|
|
227
|
+
else:
|
|
228
|
+
databases = [result]
|
|
229
|
+
|
|
230
|
+
logger.info(f"Found {len(databases)} databases")
|
|
231
|
+
return databases
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@mcp.tool()
|
|
235
|
+
async def list_tables(
|
|
236
|
+
database: str, like: Optional[str] = None, not_like: Optional[str] = None
|
|
237
|
+
) -> List[Table]:
|
|
238
|
+
"""List available Hydrolix tables in a database, including schema, comment,
|
|
239
|
+
row count, and column count."""
|
|
240
|
+
logger.info(f"Listing tables in database '{database}'")
|
|
241
|
+
query = f"""
|
|
242
|
+
SELECT database, name, engine, create_table_query, dependencies_database,
|
|
243
|
+
dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes,
|
|
244
|
+
total_bytes_uncompressed, parts, active_parts, total_marks, comment
|
|
245
|
+
FROM system.tables WHERE database = {format_query_value(database)}"""
|
|
246
|
+
if like:
|
|
247
|
+
query += f" AND name LIKE {format_query_value(like)}"
|
|
248
|
+
|
|
249
|
+
if not_like:
|
|
250
|
+
query += f" AND name NOT LIKE {format_query_value(not_like)}"
|
|
251
|
+
|
|
252
|
+
result = await execute_query(query)
|
|
253
|
+
|
|
254
|
+
# Deserialize result as Table dataclass instances
|
|
255
|
+
tables = result_to_table(result["columns"], result["rows"])
|
|
256
|
+
|
|
257
|
+
for table in tables:
|
|
258
|
+
column_data_query = f"""
|
|
259
|
+
SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
|
|
260
|
+
FROM system.columns
|
|
261
|
+
WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"""
|
|
262
|
+
column_data_query_result = await execute_query(column_data_query)
|
|
263
|
+
table.columns = [
|
|
264
|
+
c
|
|
265
|
+
for c in result_to_column(
|
|
266
|
+
column_data_query_result["columns"],
|
|
267
|
+
column_data_query_result["rows"],
|
|
268
|
+
)
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
logger.info(f"Found {len(tables)} tables")
|
|
272
|
+
return tables
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@mcp.tool()
|
|
276
|
+
@with_serializer
|
|
277
|
+
async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Sequence[Any]]]:
|
|
278
|
+
"""Run a SELECT query in a Hydrolix time-series database using the Clickhouse SQL dialect.
|
|
279
|
+
Queries run using this tool will timeout after 30 seconds.
|
|
280
|
+
|
|
281
|
+
The primary key on tables queried this way is always a timestamp. Queries should include either
|
|
282
|
+
a LIMIT clause or a filter based on the primary key as a performance guard to ensure they return
|
|
283
|
+
in a reasonable amount of time. Queries should select specific fields and avoid the use of
|
|
284
|
+
SELECT * to avoid performance issues. The performance guard used for the query should be clearly
|
|
285
|
+
communicated with the user, and the user should be informed that the query may take a long time
|
|
286
|
+
to run if the performance guard is not used. When choosing a performance guard, the user's
|
|
287
|
+
preference should be requested and used if available. When using aggregations, the performance
|
|
288
|
+
guard should take form of a primary key filter, or else the LIMIT should be applied in a
|
|
289
|
+
subquery before applying the aggregations.
|
|
290
|
+
|
|
291
|
+
When matching columns based on substrings, prefix or suffix matches should be used instead of
|
|
292
|
+
full-text search whenever possible. When searching for substrings, the syntax `column LIKE
|
|
293
|
+
'%suffix'` or `column LIKE 'prefix%'` should be used.
|
|
294
|
+
|
|
295
|
+
Example query. Purpose: get logs from the `application.logs` table. Primary key: `timestamp`.
|
|
296
|
+
Performance guard: 10 minute recency filter.
|
|
297
|
+
|
|
298
|
+
`SELECT message, timestamp FROM application.logs WHERE timestamp > now() - INTERVAL 10 MINUTES`
|
|
299
|
+
|
|
300
|
+
Example query. Purpose: get the median humidity from the `weather.measurements` table. Primary
|
|
301
|
+
key: `date`. Performance guard: 1000 row limit, applied before aggregation.
|
|
302
|
+
|
|
303
|
+
`SELECT median(humidity) FROM (SELECT humidity FROM weather.measurements LIMIT 1000)`
|
|
304
|
+
|
|
305
|
+
Example query. Purpose: get the lowest temperature from the `weather.measurements` table over
|
|
306
|
+
the last 10 years. Primary key: `date`. Performance guard: date range filter.
|
|
307
|
+
|
|
308
|
+
`SELECT min(temperature) FROM weather.measurements WHERE date > now() - INTERVAL 10 YEARS`
|
|
309
|
+
|
|
310
|
+
Example query. Purpose: get the app name with the most log messages from the `application.logs`
|
|
311
|
+
table in the window between new year and valentine's day of 2024. Primary key: `timestamp`.
|
|
312
|
+
Performance guard: date range filter.
|
|
313
|
+
`SELECT app, count(*) FROM application.logs WHERE timestamp > '2024-01-01' AND timestamp < '2024-02-14' GROUP BY app ORDER BY count(*) DESC LIMIT 1`
|
|
314
|
+
"""
|
|
315
|
+
logger.info(f"Executing SELECT query: {query}")
|
|
316
|
+
try:
|
|
317
|
+
result = await execute_query(query=query)
|
|
318
|
+
return result
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.error(f"Unexpected error in run_select_query: {str(e)}")
|
|
321
|
+
raise ToolError(f"Unexpected error during query execution: {str(e)}")
|