mcp-hydrolix 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,154 +1,285 @@
1
+ import json
1
2
  import logging
2
- from typing import Sequence
3
- import concurrent.futures
4
- import atexit
3
+ import signal
4
+ from collections.abc import Sequence
5
+ from dataclasses import asdict, is_dataclass
6
+ from typing import Any, Final, Optional, List, cast, TypedDict
5
7
 
6
8
  import clickhouse_connect
7
- from clickhouse_connect.driver.binding import quote_identifier, format_query_value
9
+ from clickhouse_connect import common
10
+ from clickhouse_connect.driver import httputil
11
+ from clickhouse_connect.driver.binding import format_query_value
8
12
  from dotenv import load_dotenv
9
- from mcp.server.fastmcp import FastMCP
13
+ from fastmcp import FastMCP
14
+ from fastmcp.exceptions import ToolError
15
+ from fastmcp.server.dependencies import get_access_token
16
+ from pydantic import Field
17
+ from pydantic.dataclasses import dataclass
18
+ from starlette.requests import Request
19
+ from starlette.responses import PlainTextResponse
20
+
21
+ from .auth import (
22
+ AccessToken,
23
+ HydrolixCredential,
24
+ HydrolixCredentialChain,
25
+ ServiceAccountToken,
26
+ UsernamePassword,
27
+ )
28
+ from .mcp_env import HydrolixConfig, get_config
29
+ from .utils import with_serializer
30
+
31
+
32
+ @dataclass
33
+ class Column:
34
+ database: str
35
+ table: str
36
+ name: str
37
+ column_type: str
38
+ default_kind: Optional[str]
39
+ default_expression: Optional[str]
40
+ comment: Optional[str]
41
+
42
+
43
+ @dataclass
44
+ class Table:
45
+ database: str
46
+ name: str
47
+ engine: str
48
+ create_table_query: str
49
+ dependencies_database: List[str]
50
+ dependencies_table: List[str]
51
+ engine_full: str
52
+ sorting_key: str
53
+ primary_key: str
54
+ total_rows: Optional[int]
55
+ total_bytes: Optional[int]
56
+ total_bytes_uncompressed: Optional[int]
57
+ parts: Optional[int]
58
+ active_parts: Optional[int]
59
+ total_marks: Optional[int]
60
+ columns: Optional[List[Column]] = Field([])
61
+ comment: Optional[str] = None
62
+
63
+
64
+ @dataclass
65
+ class HdxQueryResult(TypedDict):
66
+ columns: List[str]
67
+ rows: List[List[Any]]
10
68
 
11
- from mcp_hydrolix.mcp_env import get_config
12
69
 
13
70
  MCP_SERVER_NAME = "mcp-hydrolix"
71
+ logger = logging.getLogger(MCP_SERVER_NAME)
72
+
73
+ load_dotenv()
74
+
75
+ HYDROLIX_CONFIG: Final[HydrolixConfig] = get_config()
14
76
 
15
- # Configure logging
16
- logging.basicConfig(
17
- level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
77
+ mcp = FastMCP(
78
+ name=MCP_SERVER_NAME,
79
+ dependencies=[
80
+ "clickhouse-connect",
81
+ "python-dotenv",
82
+ "pip-system-certs",
83
+ ],
84
+ auth=HydrolixCredentialChain(f"https://{HYDROLIX_CONFIG.host}/config"),
18
85
  )
19
- logger = logging.getLogger(MCP_SERVER_NAME)
20
86
 
21
- QUERY_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10)
22
- atexit.register(lambda: QUERY_EXECUTOR.shutdown(wait=True))
23
- SELECT_QUERY_TIMEOUT_SECS = 30
24
87
 
25
- load_dotenv()
88
+ def get_request_credential() -> Optional[HydrolixCredential]:
89
+ if (token := get_access_token()) is not None:
90
+ if isinstance(token, AccessToken):
91
+ return token.as_credential()
92
+ else:
93
+ raise ValueError(
94
+ "Found non-hydrolix access token on request -- this should be impossible!"
95
+ )
96
+ return None
97
+
98
+
99
+ async def create_hydrolix_client(pool_mgr, request_credential: Optional[HydrolixCredential]):
100
+ """
101
+ Create a client for operations against query-head. Note that this eagerly issues requests for initialization
102
+ of properties like `server_version`, and so may throw exceptions.
103
+ INV: clients returned by this method MUST NOT be reused across sessions, because they can close over per-session
104
+ credentials.
105
+ """
106
+ creds = HYDROLIX_CONFIG.creds_with(request_credential)
107
+ auth_info = (
108
+ f"as {creds.username}"
109
+ if isinstance(creds, UsernamePassword)
110
+ else f"using service account {cast(ServiceAccountToken, creds).service_account_id}"
111
+ )
112
+ logger.info(
113
+ f"Creating Hydrolix client connection to {HYDROLIX_CONFIG.host}:{HYDROLIX_CONFIG.port} "
114
+ f"{auth_info} "
115
+ f"(connect_timeout={HYDROLIX_CONFIG.connect_timeout}s, "
116
+ f"send_receive_timeout={HYDROLIX_CONFIG.send_receive_timeout}s)"
117
+ )
118
+
119
+ try:
120
+ client = await clickhouse_connect.get_async_client(
121
+ pool_mgr=pool_mgr, **HYDROLIX_CONFIG.get_client_config(request_credential)
122
+ )
123
+ # Test the connection
124
+ version = client.client.server_version
125
+ logger.info(f"Successfully connected to Hydrolix compatible with ClickHouse {version}")
126
+ return client
127
+ except Exception as e:
128
+ logger.error(f"Failed to connect to Hydrolix: {str(e)}")
129
+ raise
130
+
131
+
132
+ # allow custom hydrolix settings in CH client
133
+ common.set_setting("invalid_setting_action", "send")
134
+ common.set_setting("autogenerate_session_id", False)
135
+ client_shared_pool = httputil.get_pool_manager(maxsize=HYDROLIX_CONFIG.query_pool_size, num_pools=1)
136
+
137
+
138
+ def term(*args, **kwargs):
139
+ client_shared_pool.clear()
140
+
141
+
142
+ signal.signal(signal.SIGTERM, term)
143
+ signal.signal(signal.SIGINT, term)
144
+ signal.signal(signal.SIGQUIT, term)
145
+
146
+
147
+ async def execute_query(query: str) -> HdxQueryResult:
148
+ try:
149
+ async with await create_hydrolix_client(
150
+ client_shared_pool, get_request_credential()
151
+ ) as client:
152
+ res = await client.query(
153
+ query,
154
+ settings={
155
+ "readonly": 1,
156
+ "hdx_query_max_execution_time": HYDROLIX_CONFIG.query_timeout_sec,
157
+ "hdx_query_max_attempts": 1,
158
+ "hdx_query_max_result_rows": 100_000,
159
+ "hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024, # 2GiB
160
+ "hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
161
+ },
162
+ )
163
+ logger.info(f"Query returned {len(res.result_rows)} rows")
164
+ return HdxQueryResult(columns=res.column_names, rows=res.result_rows)
165
+ except Exception as err:
166
+ logger.error(f"Error executing query: {err}")
167
+ raise ToolError(f"Query execution failed: {str(err)}")
168
+
169
+
170
+ async def execute_cmd(query: str):
171
+ try:
172
+ async with await create_hydrolix_client(
173
+ client_shared_pool, get_request_credential()
174
+ ) as client:
175
+ res = await client.command(query)
176
+ logger.info("Command returned executed.")
177
+ return res
178
+ except Exception as err:
179
+ logger.error(f"Error executing command: {err}")
180
+ raise ToolError(f"Command execution failed: {str(err)}")
181
+
182
+
183
+ @mcp.custom_route("/health", methods=["GET"])
184
+ async def health_check(request: Request) -> PlainTextResponse:
185
+ """Health check endpoint for monitoring server status.
186
+
187
+ Returns OK if the server is running and can connect to Hydrolix.
188
+ """
189
+ try:
190
+ # Try to create a client connection to verify query-head connectivity
191
+ async with await create_hydrolix_client(
192
+ client_shared_pool, get_request_credential()
193
+ ) as client:
194
+ version = client.client.server_version
195
+ return PlainTextResponse(f"OK - Connected to Hydrolix compatible with ClickHouse {version}")
196
+ except Exception as e:
197
+ # Return 503 Service Unavailable if we can't connect to Hydrolix
198
+ return PlainTextResponse(f"ERROR - Cannot connect to Hydrolix: {str(e)}", status_code=503)
199
+
26
200
 
27
- deps = [
28
- "clickhouse-connect",
29
- "python-dotenv",
30
- "uvicorn",
31
- "pip-system-certs",
32
- ]
201
+ def result_to_table(query_columns, result) -> List[Table]:
202
+ return [Table(**dict(zip(query_columns, row))) for row in result]
33
203
 
34
- mcp = FastMCP(MCP_SERVER_NAME, dependencies=deps)
204
+
205
+ def result_to_column(query_columns, result) -> List[Column]:
206
+ return [Column(**dict(zip(query_columns, row))) for row in result]
207
+
208
+
209
+ def to_json(obj: Any) -> str:
210
+ # This function technically returns different types:
211
+ # - str for dataclasses (the primary use case)
212
+ # - list/dict/Any for recursive processing during serialization
213
+ # Type checking is suppressed for non-str returns as they're only used internally by json.dumps
214
+ if is_dataclass(obj):
215
+ return json.dumps(asdict(obj), default=to_json)
216
+ elif isinstance(obj, list):
217
+ return [to_json(item) for item in obj] # type: ignore[return-value]
218
+ elif isinstance(obj, dict):
219
+ return {key: to_json(value) for key, value in obj.items()} # type: ignore[return-value]
220
+ return obj # type: ignore[return-value]
35
221
 
36
222
 
37
223
  @mcp.tool()
38
- def list_databases():
224
+ async def list_databases() -> List[str]:
39
225
  """List available Hydrolix databases"""
40
226
  logger.info("Listing all databases")
41
- client = create_hydrolix_client()
42
- result = client.command("SHOW DATABASES")
43
- logger.info(f"Found {len(result) if isinstance(result, list) else 1} databases")
44
- return result
227
+ result = await execute_cmd("SHOW DATABASES")
228
+
229
+ # Convert newline-separated string to list and trim whitespace
230
+ if isinstance(result, str):
231
+ databases = [db.strip() for db in result.strip().split("\n")]
232
+ else:
233
+ databases = [result]
234
+
235
+ logger.info(f"Found {len(databases)} databases")
236
+ return databases
45
237
 
46
238
 
47
239
  @mcp.tool()
48
- def list_tables(database: str, like: str = None):
49
- """List available Hydrolix tables in a database"""
240
+ async def list_tables(
241
+ database: str, like: Optional[str] = None, not_like: Optional[str] = None
242
+ ) -> List[Table]:
243
+ """List available Hydrolix tables in a database, including schema, comment,
244
+ row count, and column count."""
50
245
  logger.info(f"Listing tables in database '{database}'")
51
- client = create_hydrolix_client()
52
- query = f"SHOW TABLES FROM {quote_identifier(database)}"
246
+ query = f"""
247
+ SELECT database, name, engine, create_table_query, dependencies_database,
248
+ dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes,
249
+ total_bytes_uncompressed, parts, active_parts, total_marks, comment
250
+ FROM system.tables WHERE database = {format_query_value(database)}"""
53
251
  if like:
54
- query += f" LIKE {format_query_value(like)}"
55
- result = client.command(query)
56
-
57
- # Get all table comments in one query
58
- table_comments_query = (
59
- f"SELECT name, comment, primary_key FROM system.tables WHERE database = {format_query_value(database)} and engine = 'TurbineStorage' and total_rows > 0"
60
- )
61
- table_comments_result = client.query(table_comments_query)
62
- table_comments = {row[0]: row[1] for row in table_comments_result.result_rows}
63
- primary_keys = {row[0]: row[2] for row in table_comments_result.result_rows}
64
-
65
- # Get all column comments in one query
66
- column_comments_query = f"SELECT table, name, comment FROM system.columns WHERE database = {format_query_value(database)}"
67
- column_comments_result = client.query(column_comments_query)
68
- column_comments = {}
69
- for row in column_comments_result.result_rows:
70
- table, col_name, comment = row
71
- if table not in column_comments:
72
- column_comments[table] = {}
73
- column_comments[table][col_name] = comment
74
-
75
- def get_table_info(table):
76
- logger.info(f"Getting schema info for table {database}.{table}")
77
- schema_query = f"DESCRIBE TABLE {quote_identifier(database)}.{quote_identifier(table)}"
78
- schema_result = client.query(schema_query)
79
-
80
- columns = []
81
- column_names = schema_result.column_names
82
- for row in schema_result.result_rows:
83
- column_dict = {}
84
- for i, col_name in enumerate(column_names):
85
- column_dict[col_name] = row[i]
86
- # Add comment from our pre-fetched comments
87
- if table in column_comments and column_dict["name"] in column_comments[table]:
88
- column_dict["comment"] = column_comments[table][column_dict["name"]]
89
- else:
90
- column_dict["comment"] = None
91
- columns.append(column_dict)
92
-
93
- create_table_query = f"SHOW CREATE TABLE {database}.`{table}`"
94
- create_table_result = client.command(create_table_query)
95
-
96
- return {
97
- "database": database,
98
- "name": table,
99
- "comment": table_comments.get(table),
100
- "columns": columns,
101
- "create_table_query": create_table_result,
102
- "primary_key": primary_keys.get(table)
103
- }
104
-
105
- tables = []
106
- if isinstance(result, str):
107
- # Single table result
108
- for table in (t.strip() for t in result.split()):
109
- if table:
110
- tables.append(get_table_info(table))
111
- elif isinstance(result, Sequence):
112
- # Multiple table results
113
- for table in result:
114
- tables.append(get_table_info(table))
252
+ query += f" AND name LIKE {format_query_value(like)}"
253
+
254
+ if not_like:
255
+ query += f" AND name NOT LIKE {format_query_value(not_like)}"
256
+
257
+ result = await execute_query(query)
258
+
259
+ # Deserialize result as Table dataclass instances
260
+ tables = result_to_table(result["columns"], result["rows"])
261
+
262
+ for table in tables:
263
+ column_data_query = f"""
264
+ SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
265
+ FROM system.columns
266
+ WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"""
267
+ column_data_query_result = await execute_query(column_data_query)
268
+ table.columns = [
269
+ c
270
+ for c in result_to_column(
271
+ column_data_query_result["columns"],
272
+ column_data_query_result["rows"],
273
+ )
274
+ ]
115
275
 
116
276
  logger.info(f"Found {len(tables)} tables")
117
277
  return tables
118
278
 
119
279
 
120
- def execute_query(query: str):
121
- client = create_hydrolix_client()
122
- try:
123
- res = client.query(
124
- query,
125
- settings={
126
- "readonly": 1,
127
- "hdx_query_max_execution_time": SELECT_QUERY_TIMEOUT_SECS,
128
- "hdx_query_max_attempts": 1,
129
- "hdx_query_max_result_rows": 100_000,
130
- "hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024, # 2GiB
131
- "hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
132
- },
133
- )
134
- column_names = res.column_names
135
- rows = []
136
- for row in res.result_rows:
137
- row_dict = {}
138
- for i, col_name in enumerate(column_names):
139
- row_dict[col_name] = row[i]
140
- rows.append(row_dict)
141
- logger.info(f"Query returned {len(rows)} rows")
142
- return rows
143
- except Exception as err:
144
- logger.error(f"Error executing query: {err}")
145
- # Return a structured dictionary rather than a string to ensure proper serialization
146
- # by the MCP protocol. String responses for errors can cause BrokenResourceError.
147
- return {"error": str(err)}
148
-
149
-
150
280
  @mcp.tool()
151
- def run_select_query(query: str):
281
+ @with_serializer
282
+ async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Sequence[Any]]]:
152
283
  """Run a SELECT query in a Hydrolix time-series database using the Clickhouse SQL dialect.
153
284
  Queries run using this tool will timeout after 30 seconds.
154
285
 
@@ -188,47 +319,8 @@ def run_select_query(query: str):
188
319
  """
189
320
  logger.info(f"Executing SELECT query: {query}")
190
321
  try:
191
- future = QUERY_EXECUTOR.submit(execute_query, query)
192
- try:
193
- result = future.result(timeout=SELECT_QUERY_TIMEOUT_SECS)
194
- # Check if we received an error structure from execute_query
195
- if isinstance(result, dict) and "error" in result:
196
- logger.warning(f"Query failed: {result['error']}")
197
- # MCP requires structured responses; string error messages can cause
198
- # serialization issues leading to BrokenResourceError
199
- return {"status": "error", "message": f"Query failed: {result['error']}"}
200
- return result
201
- except concurrent.futures.TimeoutError:
202
- logger.warning(f"Query timed out after {SELECT_QUERY_TIMEOUT_SECS} seconds: {query}")
203
- future.cancel()
204
- # Return a properly structured response for timeout errors
205
- return {
206
- "status": "error",
207
- "message": f"Query timed out after {SELECT_QUERY_TIMEOUT_SECS} seconds",
208
- }
322
+ result = await execute_query(query=query)
323
+ return result
209
324
  except Exception as e:
210
325
  logger.error(f"Unexpected error in run_select_query: {str(e)}")
211
- # Catch all other exceptions and return them in a structured format
212
- # to prevent MCP serialization failures
213
- return {"status": "error", "message": f"Unexpected error: {str(e)}"}
214
-
215
-
216
- def create_hydrolix_client():
217
- client_config = get_config().get_client_config()
218
- logger.info(
219
- f"Creating Hydrolix client connection to {client_config['host']}:{client_config['port']} "
220
- f"as {client_config['username']} "
221
- f"(secure={client_config['secure']}, verify={client_config['verify']}, "
222
- f"connect_timeout={client_config['connect_timeout']}s, "
223
- f"send_receive_timeout={client_config['send_receive_timeout']}s)"
224
- )
225
-
226
- try:
227
- client = clickhouse_connect.get_client(**client_config)
228
- # Test the connection
229
- version = client.server_version
230
- logger.info(f"Successfully connected to Hydrolix server version {version}")
231
- return client
232
- except Exception as e:
233
- logger.error(f"Failed to connect to Hydrolix: {str(e)}")
234
- raise
326
+ raise ToolError(f"Unexpected error during query execution: {str(e)}")
mcp_hydrolix/utils.py ADDED
@@ -0,0 +1,70 @@
1
+ import inspect
2
+ import ipaddress
3
+ import json
4
+ from datetime import datetime, time
5
+ from decimal import Decimal
6
+ from functools import wraps
7
+
8
+ import fastmcp.utilities.types
9
+ from fastmcp.tools.tool import ToolResult
10
+
11
+
12
+ class ExtendedEncoder(json.JSONEncoder):
13
+ """Extends JSONEncoder to apply custom serialization of CH data types."""
14
+
15
+ def default(self, obj):
16
+ if isinstance(obj, ipaddress.IPv4Address):
17
+ return str(obj)
18
+ if isinstance(obj, datetime):
19
+ return obj.time()
20
+ if isinstance(obj, time):
21
+ return obj.hour * 3600 + obj.minute * 60 + obj.second + obj.microsecond / 1_000_000
22
+ if isinstance(obj, bytes):
23
+ return obj.decode()
24
+ if isinstance(obj, Decimal):
25
+ return str(obj)
26
+ return super().default(obj)
27
+
28
+
29
+ def with_serializer(fn):
30
+ """
31
+ Decorator to apply custom serialization to CH query tool result.
32
+ Should be applied as a first decorator of the tool function.
33
+
34
+ :returns: sync/async wrapper of mcp tool function
35
+ """
36
+
37
+ @wraps(fn)
38
+ def wrapper(*args, **kwargs):
39
+ """
40
+ Sync wrapper of mcpt tool `fn` function.
41
+ Function should return a dict or None.
42
+
43
+ :returns: ToolResult object with text-serialized and structured content.
44
+ """
45
+ result = fn(*args, **kwargs)
46
+ if not isinstance(result, dict):
47
+ result = {"result": result}
48
+ enc = json.dumps(result, cls=ExtendedEncoder)
49
+ return ToolResult(content=enc, structured_content=json.loads(enc))
50
+
51
+ @wraps(fn)
52
+ async def async_wrapper(*args, **kwargs):
53
+ """
54
+ Async wrapper of mcp tool `fn` function.
55
+ Function should return a dict or None.
56
+
57
+ :returns: ToolResult object with text-serialized and structured content.
58
+ """
59
+ result = await fn(*args, **kwargs)
60
+ if not isinstance(result, dict):
61
+ result = {"result": result}
62
+ enc = json.dumps(result, cls=ExtendedEncoder)
63
+ return ToolResult(content=enc, structured_content=json.loads(enc))
64
+
65
+ # TODO: remove next signature fix code when a new fastmcp released (https://github.com/jlowin/fastmcp/issues/2524)
66
+ new_fn = fastmcp.utilities.types.create_function_without_params(fn, ["ctx"])
67
+ sig = inspect.signature(new_fn)
68
+ async_wrapper.__signature__ = sig
69
+ wrapper.__signature__ = sig
70
+ return async_wrapper if inspect.iscoroutinefunction(fn) else wrapper