mcp-hydrolix 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,324 @@
1
+ """Environment configuration for the MCP Hydrolix server.
2
+
3
+ This module handles all environment variable configuration with sensible defaults
4
+ and type conversion.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Optional
11
+
12
+ from mcp_hydrolix.auth.credentials import HydrolixCredential, ServiceAccountToken, UsernamePassword
13
+
14
+
15
+ class TransportType(str, Enum):
16
+ """Supported MCP server transport types."""
17
+
18
+ STDIO = "stdio"
19
+ HTTP = "http"
20
+ SSE = "sse"
21
+
22
+ @classmethod
23
+ def values(cls) -> list[str]:
24
+ """Get all valid transport values."""
25
+ return [transport.value for transport in cls]
26
+
27
+
28
+ @dataclass
29
+ class HydrolixConfig:
30
+ """Configuration for Hydrolix connection settings.
31
+
32
+ This class handles all environment variable configuration with sensible defaults
33
+ and type conversion. It provides typed methods for accessing each configuration value.
34
+
35
+ Required environment variables:
36
+ HYDROLIX_HOST: The hostname of the Hydrolix server
37
+
38
+ Optional environment variables (with defaults):
39
+ HYDROLIX_TOKEN: Service account token to the Hydrolix Server (this or user+password is required)
40
+ HYDROLIX_USER: The username for authentication (this or token is required)
41
+ HYDROLIX_PASSWORD: The password for authentication (this or token is required)
42
+ HYDROLIX_PORT: The port number (default: 8088)
43
+ HYDROLIX_VERIFY: Verify SSL certificates (default: true)
44
+ HYDROLIX_CONNECT_TIMEOUT: Connection timeout in seconds (default: 30)
45
+ HYDROLIX_SEND_RECEIVE_TIMEOUT: Send/receive timeout in seconds (default: 300)
46
+ HYDROLIX_DATABASE: Default database to use (default: None)
47
+ HYDROLIX_PROXY_PATH: Path to be added to the host URL. For instance, for servers behind an HTTP proxy (default: None)
48
+ HYDROLIX_MCP_SERVER_TRANSPORT: MCP server transport method - "stdio", "http", or "sse" (default: stdio)
49
+ HYDROLIX_MCP_BIND_HOST: Host to bind the MCP server to when using HTTP or SSE transport (default: 127.0.0.1)
50
+ HYDROLIX_MCP_BIND_PORT: Port to bind the MCP server to when using HTTP or SSE transport (default: 8000)
51
+ HYDROLIX_QUERIES_POOL_SIZE 100
52
+ HYDROLIX_MCP_REQUEST_TIMEOUT 120
53
+ HYDROLIX_MCP_WORKERS 3
54
+ HYDROLIX_MCP_WORKER_CONNECTIONS 200
55
+ HYDROLIX_MCP_MAX_REQUESTS 10000
56
+ HYDROLIX_MCP_MAX_REQUESTS_JITTER 1000
57
+ HYDROLIX_MCP_MAX_KEEPALIVE 10
58
+ """
59
+
60
+ def __init__(self) -> None:
61
+ """Initialize the configuration from environment variables."""
62
+ self._validate_required_vars()
63
+ # Credential to use for clickhouse connections when no per-request credential is provided
64
+ self._default_credential: Optional[HydrolixCredential] = None
65
+
66
+ # Set the default credential to the service account from the environment, if available
67
+ if (global_service_account := os.environ.get("HYDROLIX_TOKEN")) is not None:
68
+ self._default_credential = ServiceAccountToken(
69
+ global_service_account, f"https://{self.host}/config"
70
+ )
71
+ elif (global_username := os.environ.get("HYDROLIX_USER")) is not None and (
72
+ global_password := os.environ.get("HYDROLIX_PASSWORD")
73
+ ) is not None:
74
+ # No global service account available. Set the default credential to the username/password
75
+ # from the environment, if available
76
+ self._default_credential = UsernamePassword(global_username, global_password)
77
+
78
+ def creds_with(self, request_credential: Optional[HydrolixCredential]) -> HydrolixCredential:
79
+ if request_credential is not None:
80
+ return request_credential
81
+ elif self._default_credential is not None:
82
+ return self._default_credential
83
+ else:
84
+ raise ValueError(
85
+ "No credentials available for Hydrolix connection. "
86
+ "Please provide credentials either through HYDROLIX_TOKEN or "
87
+ "HYDROLIX_USER/HYDROLIX_PASSWORD environment variables, "
88
+ "or pass credentials explicitly via the creds parameter."
89
+ )
90
+
91
+ @property
92
+ def host(self) -> str:
93
+ """Get the Hydrolix host. Called during __init__"""
94
+ return os.environ["HYDROLIX_HOST"]
95
+
96
+ @property
97
+ def port(self) -> int:
98
+ """Get the Hydrolix port.
99
+
100
+ Defaults to 8088.
101
+ Can be overridden by HYDROLIX_PORT environment variable.
102
+ """
103
+ if "HYDROLIX_PORT" in os.environ:
104
+ return int(os.environ["HYDROLIX_PORT"])
105
+ return 8088
106
+
107
+ @property
108
+ def database(self) -> Optional[str]:
109
+ """Get the default database name if set."""
110
+ return os.getenv("HYDROLIX_DATABASE")
111
+
112
+ @property
113
+ def verify(self) -> bool:
114
+ """Get whether SSL certificate verification is enabled.
115
+
116
+ Default: True
117
+ """
118
+ return os.getenv("HYDROLIX_VERIFY", "true").lower() == "true"
119
+
120
+ @property
121
+ def secure(self) -> bool:
122
+ """Get whether use secured connection.
123
+
124
+ Default: True
125
+ """
126
+ return os.getenv("HYDROLIX_SECURE", "true").lower() == "true"
127
+
128
+ @property
129
+ def connect_timeout(self) -> int:
130
+ """Get the connection timeout in seconds.
131
+
132
+ Default: 30
133
+ """
134
+ return int(os.getenv("HYDROLIX_CONNECT_TIMEOUT", "30"))
135
+
136
+ @property
137
+ def send_receive_timeout(self) -> int:
138
+ """Get the send/receive timeout in seconds.
139
+
140
+ Default: 300 (Hydrolix default)
141
+ """
142
+ return int(os.getenv("HYDROLIX_SEND_RECEIVE_TIMEOUT", "300"))
143
+
144
+ @property
145
+ def query_pool_size(self) -> int:
146
+ """Get the send/receive timeout in seconds.
147
+
148
+ Default: 300 (Hydrolix default)
149
+ """
150
+ return int(os.getenv("HYDROLIX_QUERIES_POOL_SIZE", 100))
151
+
152
+ @property
153
+ def query_timeout_sec(self) -> int:
154
+ """Get the send/receive timeout in seconds.
155
+
156
+ Default: 300 (Hydrolix default)
157
+ """
158
+ return int(os.getenv("HYDROLIX_QUERY_TIMEOUT_SECS", 30))
159
+
160
+ @property
161
+ def proxy_path(self) -> Optional[str]:
162
+ return os.getenv("HYDROLIX_PROXY_PATH")
163
+
164
+ @property
165
+ def mcp_server_transport(self) -> str:
166
+ """Get the MCP server transport method.
167
+
168
+ Valid options: "stdio", "http", "sse"
169
+ Default: "stdio"
170
+ """
171
+ transport = os.getenv("HYDROLIX_MCP_SERVER_TRANSPORT", TransportType.STDIO.value).lower()
172
+
173
+ # Validate transport type
174
+ if transport not in TransportType.values():
175
+ valid_options = ", ".join(f'"{t}"' for t in TransportType.values())
176
+ raise ValueError(f"Invalid transport '{transport}'. Valid options: {valid_options}")
177
+ return transport
178
+
179
+ @property
180
+ def mcp_bind_host(self) -> str:
181
+ """Get the host to bind the MCP server to.
182
+
183
+ Only used when transport is "http" or "sse".
184
+ Default: "127.0.0.1"
185
+ """
186
+ return os.getenv("HYDROLIX_MCP_BIND_HOST", "127.0.0.1")
187
+
188
+ @property
189
+ def mcp_bind_port(self) -> int:
190
+ """Get the port to bind the MCP server to.
191
+
192
+ Only used when transport is "http" or "sse".
193
+ Default: 8000
194
+ """
195
+ return int(os.getenv("HYDROLIX_MCP_BIND_PORT", "8000"))
196
+
197
+ @property
198
+ def mcp_timeout(self) -> int:
199
+ """Get the request timeout secunds.
200
+
201
+ Only used when transport is "http" or "sse".
202
+ Default: 120
203
+ """
204
+ return int(os.getenv("HYDROLIX_MCP_REQUEST_TIMEOUT", 120))
205
+
206
+ @property
207
+ def mcp_workers(self) -> int:
208
+ """Get the number of worker processes.
209
+
210
+ Only used when transport is "http" or "sse".
211
+ Default: 1
212
+ """
213
+ return int(os.getenv("HYDROLIX_MCP_WORKERS", 1))
214
+
215
+ @property
216
+ def mcp_worker_connections(self) -> int:
217
+ """Get the max number of concurrent requests per worker.
218
+
219
+ Only used when transport is "http" or "sse".
220
+ Default: 200
221
+ """
222
+ return int(os.getenv("HYDROLIX_MCP_WORKER_CONNECTIONS", 100))
223
+
224
+ @property
225
+ def mcp_max_requests_jitter(self) -> int:
226
+ """Get the random parameter to randomize time process is reloaded after max_requests.
227
+
228
+ Only used when transport is "http" or "sse".
229
+ Default: 10000
230
+ """
231
+ return int(os.getenv("HYDROLIX_MCP_MAX_REQUESTS_JITTER", 1000))
232
+
233
+ @property
234
+ def mcp_max_requests(self) -> int:
235
+ """Get the max number of requests handled by worker before it is restarted.
236
+
237
+ Only used when transport is "http" or "sse".
238
+ Default: 1000
239
+ """
240
+ return int(os.getenv("HYDROLIX_MCP_MAX_REQUESTS", 10000))
241
+
242
+ @property
243
+ def mcp_keepalive(self) -> int:
244
+ """Get a seconds of idle keepalive connections are kept alive.
245
+
246
+ Only used when transport is "http" or "sse".
247
+ Default: 10
248
+ """
249
+ return int(os.getenv("HYDROLIX_MCP_MAX_KEEPALIVE", 10))
250
+
251
+ def get_client_config(self, request_credential: Optional[HydrolixCredential]) -> dict:
252
+ """
253
+ Get the configuration dictionary for clickhouse_connect client.
254
+
255
+ Args:
256
+ request_credential: Optional credentials to use for this request. If not provided,
257
+ falls back to the default credential for this HydrolixConfig
258
+
259
+ Returns:
260
+ dict: Configuration ready to be passed to clickhouse_connect.get_client()
261
+
262
+ Raises:
263
+ ValueError: If no credentials could be inferred for the request (either from
264
+ the startup environment or provided in the request)
265
+ """
266
+ config = {
267
+ "host": self.host,
268
+ "port": self.port,
269
+ "secure": self.secure,
270
+ "verify": self.verify,
271
+ "connect_timeout": self.connect_timeout,
272
+ "send_receive_timeout": self.send_receive_timeout,
273
+ "executor_threads": self.query_pool_size,
274
+ "client_name": "mcp_hydrolix",
275
+ }
276
+
277
+ # Add optional database if set
278
+ if self.database:
279
+ config["database"] = self.database
280
+
281
+ if self.proxy_path:
282
+ config["proxy_path"] = self.proxy_path
283
+
284
+ # Add credentials
285
+ config |= self.creds_with(request_credential).clickhouse_config_entries()
286
+
287
+ return config
288
+
289
+ def _validate_required_vars(self) -> None:
290
+ """Validate that all required environment variables are set. Called during __init__.
291
+
292
+ Raises:
293
+ ValueError: If any required environment variable is missing.
294
+ """
295
+ missing_vars = []
296
+ required_vars = ["HYDROLIX_HOST"]
297
+ for var in required_vars:
298
+ if var not in os.environ:
299
+ missing_vars.append(var)
300
+
301
+ # HYDROLIX_USER and HYDROLIX_PASSWORD must either be both present or both absent
302
+ if ("HYDROLIX_USER" in os.environ) != ("HYDROLIX_PASSWORD" in os.environ):
303
+ raise ValueError(
304
+ "User/password authentication is only partially configured: set both HYDROLIX_USER and HYDROLIX_PASSWORD"
305
+ )
306
+
307
+ if missing_vars:
308
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
309
+
310
+
311
+ # Global instance placeholder for the singleton pattern
312
+ _CONFIG_INSTANCE = None
313
+
314
+
315
+ def get_config():
316
+ """
317
+ Gets the singleton instance of HydrolixConfig.
318
+ Instantiates it on the first call.
319
+ """
320
+ global _CONFIG_INSTANCE
321
+ if _CONFIG_INSTANCE is None:
322
+ # Instantiate the config object here, ensuring load_dotenv() has likely run
323
+ _CONFIG_INSTANCE = HydrolixConfig()
324
+ return _CONFIG_INSTANCE
@@ -0,0 +1,321 @@
1
+ import json
2
+ import logging
3
+ import signal
4
+ from collections.abc import Sequence
5
+ from dataclasses import asdict, is_dataclass
6
+ from typing import Any, Final, Optional, List, cast, TypedDict
7
+
8
+ import clickhouse_connect
9
+ from clickhouse_connect import common
10
+ from clickhouse_connect.driver import httputil
11
+ from clickhouse_connect.driver.binding import format_query_value
12
+ from dotenv import load_dotenv
13
+ from fastmcp import FastMCP
14
+ from fastmcp.exceptions import ToolError
15
+ from fastmcp.server.dependencies import get_access_token
16
+ from pydantic import Field
17
+ from pydantic.dataclasses import dataclass
18
+ from starlette.requests import Request
19
+ from starlette.responses import PlainTextResponse
20
+
21
+ from mcp_hydrolix.auth import (
22
+ AccessToken,
23
+ HydrolixCredential,
24
+ HydrolixCredentialChain,
25
+ ServiceAccountToken,
26
+ UsernamePassword,
27
+ )
28
+ from mcp_hydrolix.mcp_env import HydrolixConfig, get_config
29
+ from mcp_hydrolix.utils import with_serializer
30
+
31
+
32
+ @dataclass
33
+ class Column:
34
+ database: str
35
+ table: str
36
+ name: str
37
+ column_type: str
38
+ default_kind: Optional[str]
39
+ default_expression: Optional[str]
40
+ comment: Optional[str]
41
+
42
+
43
+ @dataclass
44
+ class Table:
45
+ database: str
46
+ name: str
47
+ engine: str
48
+ create_table_query: str
49
+ dependencies_database: List[str]
50
+ dependencies_table: List[str]
51
+ engine_full: str
52
+ sorting_key: str
53
+ primary_key: str
54
+ total_rows: Optional[int]
55
+ total_bytes: Optional[int]
56
+ total_bytes_uncompressed: Optional[int]
57
+ parts: Optional[int]
58
+ active_parts: Optional[int]
59
+ total_marks: Optional[int]
60
+ columns: Optional[List[Column]] = Field([])
61
+ comment: Optional[str] = None
62
+
63
+
64
+ @dataclass
65
+ class HdxQueryResult(TypedDict):
66
+ columns: List[str]
67
+ rows: List[List[Any]]
68
+
69
+
70
+ MCP_SERVER_NAME = "mcp-hydrolix"
71
+ logger = logging.getLogger(MCP_SERVER_NAME)
72
+
73
+ load_dotenv()
74
+
75
+ HYDROLIX_CONFIG: Final[HydrolixConfig] = get_config()
76
+
77
+ mcp = FastMCP(
78
+ name=MCP_SERVER_NAME,
79
+ auth=HydrolixCredentialChain(f"https://{HYDROLIX_CONFIG.host}/config"),
80
+ )
81
+
82
+
83
+ def get_request_credential() -> Optional[HydrolixCredential]:
84
+ if (token := get_access_token()) is not None:
85
+ if isinstance(token, AccessToken):
86
+ return token.as_credential()
87
+ else:
88
+ raise ValueError(
89
+ "Found non-hydrolix access token on request -- this should be impossible!"
90
+ )
91
+ return None
92
+
93
+
94
+ async def create_hydrolix_client(pool_mgr, request_credential: Optional[HydrolixCredential]):
95
+ """
96
+ Create a client for operations against query-head. Note that this eagerly issues requests for initialization
97
+ of properties like `server_version`, and so may throw exceptions.
98
+ INV: clients returned by this method MUST NOT be reused across sessions, because they can close over per-session
99
+ credentials.
100
+ """
101
+ creds = HYDROLIX_CONFIG.creds_with(request_credential)
102
+ auth_info = (
103
+ f"as {creds.username}"
104
+ if isinstance(creds, UsernamePassword)
105
+ else f"using service account {cast(ServiceAccountToken, creds).service_account_id}"
106
+ )
107
+ logger.info(
108
+ f"Creating Hydrolix client connection to {HYDROLIX_CONFIG.host}:{HYDROLIX_CONFIG.port} "
109
+ f"{auth_info} "
110
+ f"(connect_timeout={HYDROLIX_CONFIG.connect_timeout}s, "
111
+ f"send_receive_timeout={HYDROLIX_CONFIG.send_receive_timeout}s)"
112
+ )
113
+
114
+ try:
115
+ client = await clickhouse_connect.get_async_client(
116
+ pool_mgr=pool_mgr, **HYDROLIX_CONFIG.get_client_config(request_credential)
117
+ )
118
+ # Test the connection
119
+ version = client.client.server_version
120
+ logger.info(f"Successfully connected to Hydrolix compatible with ClickHouse {version}")
121
+ return client
122
+ except Exception as e:
123
+ logger.error(f"Failed to connect to Hydrolix: {str(e)}")
124
+ raise
125
+
126
+
127
+ # allow custom hydrolix settings in CH client
128
+ common.set_setting("invalid_setting_action", "send")
129
+ common.set_setting("autogenerate_session_id", False)
130
+ client_shared_pool = httputil.get_pool_manager(maxsize=HYDROLIX_CONFIG.query_pool_size, num_pools=1)
131
+
132
+
133
+ def term(*args, **kwargs):
134
+ client_shared_pool.clear()
135
+
136
+
137
+ signal.signal(signal.SIGTERM, term)
138
+ signal.signal(signal.SIGINT, term)
139
+ signal.signal(signal.SIGQUIT, term)
140
+
141
+
142
+ async def execute_query(query: str) -> HdxQueryResult:
143
+ try:
144
+ async with await create_hydrolix_client(
145
+ client_shared_pool, get_request_credential()
146
+ ) as client:
147
+ res = await client.query(
148
+ query,
149
+ settings={
150
+ "readonly": 1,
151
+ "hdx_query_max_execution_time": HYDROLIX_CONFIG.query_timeout_sec,
152
+ "hdx_query_max_attempts": 1,
153
+ "hdx_query_max_result_rows": 100_000,
154
+ "hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024, # 2GiB
155
+ "hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
156
+ },
157
+ )
158
+ logger.info(f"Query returned {len(res.result_rows)} rows")
159
+ return HdxQueryResult(columns=res.column_names, rows=res.result_rows)
160
+ except Exception as err:
161
+ logger.error(f"Error executing query: {err}")
162
+ raise ToolError(f"Query execution failed: {str(err)}")
163
+
164
+
165
+ async def execute_cmd(query: str):
166
+ try:
167
+ async with await create_hydrolix_client(
168
+ client_shared_pool, get_request_credential()
169
+ ) as client:
170
+ res = await client.command(query)
171
+ logger.info("Command returned executed.")
172
+ return res
173
+ except Exception as err:
174
+ logger.error(f"Error executing command: {err}")
175
+ raise ToolError(f"Command execution failed: {str(err)}")
176
+
177
+
178
+ @mcp.custom_route("/health", methods=["GET"])
179
+ async def health_check(request: Request) -> PlainTextResponse:
180
+ """Health check endpoint for monitoring server status.
181
+
182
+ Returns OK if the server is running and can connect to Hydrolix.
183
+ """
184
+ try:
185
+ # Try to create a client connection to verify query-head connectivity
186
+ async with await create_hydrolix_client(
187
+ client_shared_pool, get_request_credential()
188
+ ) as client:
189
+ version = client.client.server_version
190
+ return PlainTextResponse(f"OK - Connected to Hydrolix compatible with ClickHouse {version}")
191
+ except Exception as e:
192
+ # Return 503 Service Unavailable if we can't connect to Hydrolix
193
+ return PlainTextResponse(f"ERROR - Cannot connect to Hydrolix: {str(e)}", status_code=503)
194
+
195
+
196
+ def result_to_table(query_columns, result) -> List[Table]:
197
+ return [Table(**dict(zip(query_columns, row))) for row in result]
198
+
199
+
200
+ def result_to_column(query_columns, result) -> List[Column]:
201
+ return [Column(**dict(zip(query_columns, row))) for row in result]
202
+
203
+
204
+ def to_json(obj: Any) -> str:
205
+ # This function technically returns different types:
206
+ # - str for dataclasses (the primary use case)
207
+ # - list/dict/Any for recursive processing during serialization
208
+ # Type checking is suppressed for non-str returns as they're only used internally by json.dumps
209
+ if is_dataclass(obj):
210
+ return json.dumps(asdict(obj), default=to_json)
211
+ elif isinstance(obj, list):
212
+ return [to_json(item) for item in obj] # type: ignore[return-value]
213
+ elif isinstance(obj, dict):
214
+ return {key: to_json(value) for key, value in obj.items()} # type: ignore[return-value]
215
+ return obj # type: ignore[return-value]
216
+
217
+
218
+ @mcp.tool()
219
+ async def list_databases() -> List[str]:
220
+ """List available Hydrolix databases"""
221
+ logger.info("Listing all databases")
222
+ result = await execute_cmd("SHOW DATABASES")
223
+
224
+ # Convert newline-separated string to list and trim whitespace
225
+ if isinstance(result, str):
226
+ databases = [db.strip() for db in result.strip().split("\n")]
227
+ else:
228
+ databases = [result]
229
+
230
+ logger.info(f"Found {len(databases)} databases")
231
+ return databases
232
+
233
+
234
+ @mcp.tool()
235
+ async def list_tables(
236
+ database: str, like: Optional[str] = None, not_like: Optional[str] = None
237
+ ) -> List[Table]:
238
+ """List available Hydrolix tables in a database, including schema, comment,
239
+ row count, and column count."""
240
+ logger.info(f"Listing tables in database '{database}'")
241
+ query = f"""
242
+ SELECT database, name, engine, create_table_query, dependencies_database,
243
+ dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes,
244
+ total_bytes_uncompressed, parts, active_parts, total_marks, comment
245
+ FROM system.tables WHERE database = {format_query_value(database)}"""
246
+ if like:
247
+ query += f" AND name LIKE {format_query_value(like)}"
248
+
249
+ if not_like:
250
+ query += f" AND name NOT LIKE {format_query_value(not_like)}"
251
+
252
+ result = await execute_query(query)
253
+
254
+ # Deserialize result as Table dataclass instances
255
+ tables = result_to_table(result["columns"], result["rows"])
256
+
257
+ for table in tables:
258
+ column_data_query = f"""
259
+ SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
260
+ FROM system.columns
261
+ WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"""
262
+ column_data_query_result = await execute_query(column_data_query)
263
+ table.columns = [
264
+ c
265
+ for c in result_to_column(
266
+ column_data_query_result["columns"],
267
+ column_data_query_result["rows"],
268
+ )
269
+ ]
270
+
271
+ logger.info(f"Found {len(tables)} tables")
272
+ return tables
273
+
274
+
275
+ @mcp.tool()
276
+ @with_serializer
277
+ async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Sequence[Any]]]:
278
+ """Run a SELECT query in a Hydrolix time-series database using the Clickhouse SQL dialect.
279
+ Queries run using this tool will timeout after 30 seconds.
280
+
281
+ The primary key on tables queried this way is always a timestamp. Queries should include either
282
+ a LIMIT clause or a filter based on the primary key as a performance guard to ensure they return
283
+ in a reasonable amount of time. Queries should select specific fields and avoid the use of
284
+ SELECT * to avoid performance issues. The performance guard used for the query should be clearly
285
+ communicated with the user, and the user should be informed that the query may take a long time
286
+ to run if the performance guard is not used. When choosing a performance guard, the user's
287
+ preference should be requested and used if available. When using aggregations, the performance
288
+ guard should take form of a primary key filter, or else the LIMIT should be applied in a
289
+ subquery before applying the aggregations.
290
+
291
+ When matching columns based on substrings, prefix or suffix matches should be used instead of
292
+ full-text search whenever possible. When searching for substrings, the syntax `column LIKE
293
+ '%suffix'` or `column LIKE 'prefix%'` should be used.
294
+
295
+ Example query. Purpose: get logs from the `application.logs` table. Primary key: `timestamp`.
296
+ Performance guard: 10 minute recency filter.
297
+
298
+ `SELECT message, timestamp FROM application.logs WHERE timestamp > now() - INTERVAL 10 MINUTES`
299
+
300
+ Example query. Purpose: get the median humidity from the `weather.measurements` table. Primary
301
+ key: `date`. Performance guard: 1000 row limit, applied before aggregation.
302
+
303
+ `SELECT median(humidity) FROM (SELECT humidity FROM weather.measurements LIMIT 1000)`
304
+
305
+ Example query. Purpose: get the lowest temperature from the `weather.measurements` table over
306
+ the last 10 years. Primary key: `date`. Performance guard: date range filter.
307
+
308
+ `SELECT min(temperature) FROM weather.measurements WHERE date > now() - INTERVAL 10 YEARS`
309
+
310
+ Example query. Purpose: get the app name with the most log messages from the `application.logs`
311
+ table in the window between new year and valentine's day of 2024. Primary key: `timestamp`.
312
+ Performance guard: date range filter.
313
+ `SELECT app, count(*) FROM application.logs WHERE timestamp > '2024-01-01' AND timestamp < '2024-02-14' GROUP BY app ORDER BY count(*) DESC LIMIT 1`
314
+ """
315
+ logger.info(f"Executing SELECT query: {query}")
316
+ try:
317
+ result = await execute_query(query=query)
318
+ return result
319
+ except Exception as e:
320
+ logger.error(f"Unexpected error in run_select_query: {str(e)}")
321
+ raise ToolError(f"Unexpected error during query execution: {str(e)}")