PyPI - mcp-hydrolix - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

mcp-hydrolix 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

mcp_hydrolix/auth/__init__.py +29 -0
mcp_hydrolix/auth/credentials.py +63 -0
mcp_hydrolix/auth/mcp_providers.py +137 -0
mcp_hydrolix/log/__init__.py +6 -0
mcp_hydrolix/log/log.py +60 -0
mcp_hydrolix/log/log.yaml +40 -0
mcp_hydrolix/log/utils.py +56 -0
mcp_hydrolix/main.py +69 -1
mcp_hydrolix/mcp_env.py +203 -22
mcp_hydrolix/mcp_server.py +256 -164
mcp_hydrolix/utils.py +70 -0
mcp_hydrolix-0.1.5.dist-info/METADATA +314 -0
mcp_hydrolix-0.1.5.dist-info/RECORD +17 -0
{mcp_hydrolix-0.1.3.dist-info → mcp_hydrolix-0.1.5.dist-info}/WHEEL +1 -1
{mcp_hydrolix-0.1.3.dist-info → mcp_hydrolix-0.1.5.dist-info}/licenses/LICENSE +1 -1
mcp_hydrolix-0.1.3.dist-info/METADATA +0 -146
mcp_hydrolix-0.1.3.dist-info/RECORD +0 -9
{mcp_hydrolix-0.1.3.dist-info → mcp_hydrolix-0.1.5.dist-info}/entry_points.txt +0 -0

mcp_hydrolix/mcp_server.py CHANGED Viewed

@@ -1,154 +1,285 @@
+import json
 import logging
-from typing import Sequence
-import concurrent.futures
-import atexit
+import signal
+from collections.abc import Sequence
+from dataclasses import asdict, is_dataclass
+from typing import Any, Final, Optional, List, cast, TypedDict
 import clickhouse_connect
-from clickhouse_connect.driver.binding import quote_identifier, format_query_value
+from clickhouse_connect import common
+from clickhouse_connect.driver import httputil
+from clickhouse_connect.driver.binding import format_query_value
 from dotenv import load_dotenv
-from mcp.server.fastmcp import FastMCP
+from fastmcp import FastMCP
+from fastmcp.exceptions import ToolError
+from fastmcp.server.dependencies import get_access_token
+from pydantic import Field
+from pydantic.dataclasses import dataclass
+from starlette.requests import Request
+from starlette.responses import PlainTextResponse
+from .auth import (
+    AccessToken,
+    HydrolixCredential,
+    HydrolixCredentialChain,
+    ServiceAccountToken,
+    UsernamePassword,
+)
+from .mcp_env import HydrolixConfig, get_config
+from .utils import with_serializer
+@dataclass
+class Column:
+    database: str
+    table: str
+    name: str
+    column_type: str
+    default_kind: Optional[str]
+    default_expression: Optional[str]
+    comment: Optional[str]
+@dataclass
+class Table:
+    database: str
+    name: str
+    engine: str
+    create_table_query: str
+    dependencies_database: List[str]
+    dependencies_table: List[str]
+    engine_full: str
+    sorting_key: str
+    primary_key: str
+    total_rows: Optional[int]
+    total_bytes: Optional[int]
+    total_bytes_uncompressed: Optional[int]
+    parts: Optional[int]
+    active_parts: Optional[int]
+    total_marks: Optional[int]
+    columns: Optional[List[Column]] = Field([])
+    comment: Optional[str] = None
+@dataclass
+class HdxQueryResult(TypedDict):
+    columns: List[str]
+    rows: List[List[Any]]
-from mcp_hydrolix.mcp_env import get_config
 MCP_SERVER_NAME = "mcp-hydrolix"
+logger = logging.getLogger(MCP_SERVER_NAME)
+load_dotenv()
+HYDROLIX_CONFIG: Final[HydrolixConfig] = get_config()
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+mcp = FastMCP(
+    name=MCP_SERVER_NAME,
+    dependencies=[
+        "clickhouse-connect",
+        "python-dotenv",
+        "pip-system-certs",
+    ],
+    auth=HydrolixCredentialChain(f"https://{HYDROLIX_CONFIG.host}/config"),
 )
-logger = logging.getLogger(MCP_SERVER_NAME)
-QUERY_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10)
-atexit.register(lambda: QUERY_EXECUTOR.shutdown(wait=True))
-SELECT_QUERY_TIMEOUT_SECS = 30
-load_dotenv()
+def get_request_credential() -> Optional[HydrolixCredential]:
+    if (token := get_access_token()) is not None:
+        if isinstance(token, AccessToken):
+            return token.as_credential()
+        else:
+            raise ValueError(
+                "Found non-hydrolix access token on request -- this should be impossible!"
+            )
+    return None
+async def create_hydrolix_client(pool_mgr, request_credential: Optional[HydrolixCredential]):
+    """
+    Create a client for operations against query-head. Note that this eagerly issues requests for initialization
+    of properties like `server_version`, and so may throw exceptions.
+    INV: clients returned by this method MUST NOT be reused across sessions, because they can close over per-session
+    credentials.
+    """
+    creds = HYDROLIX_CONFIG.creds_with(request_credential)
+    auth_info = (
+        f"as {creds.username}"
+        if isinstance(creds, UsernamePassword)
+        else f"using service account {cast(ServiceAccountToken, creds).service_account_id}"
+    )
+    logger.info(
+        f"Creating Hydrolix client connection to {HYDROLIX_CONFIG.host}:{HYDROLIX_CONFIG.port} "
+        f"{auth_info} "
+        f"(connect_timeout={HYDROLIX_CONFIG.connect_timeout}s, "
+        f"send_receive_timeout={HYDROLIX_CONFIG.send_receive_timeout}s)"
+    )
+    try:
+        client = await clickhouse_connect.get_async_client(
+            pool_mgr=pool_mgr, **HYDROLIX_CONFIG.get_client_config(request_credential)
+        )
+        # Test the connection
+        version = client.client.server_version
+        logger.info(f"Successfully connected to Hydrolix compatible with ClickHouse {version}")
+        return client
+    except Exception as e:
+        logger.error(f"Failed to connect to Hydrolix: {str(e)}")
+        raise
+# allow custom hydrolix settings in CH client
+common.set_setting("invalid_setting_action", "send")
+common.set_setting("autogenerate_session_id", False)
+client_shared_pool = httputil.get_pool_manager(maxsize=HYDROLIX_CONFIG.query_pool_size, num_pools=1)
+def term(*args, **kwargs):
+    client_shared_pool.clear()
+signal.signal(signal.SIGTERM, term)
+signal.signal(signal.SIGINT, term)
+signal.signal(signal.SIGQUIT, term)
+async def execute_query(query: str) -> HdxQueryResult:
+    try:
+        async with await create_hydrolix_client(
+            client_shared_pool, get_request_credential()
+        ) as client:
+            res = await client.query(
+                query,
+                settings={
+                    "readonly": 1,
+                    "hdx_query_max_execution_time": HYDROLIX_CONFIG.query_timeout_sec,
+                    "hdx_query_max_attempts": 1,
+                    "hdx_query_max_result_rows": 100_000,
+                    "hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024,  # 2GiB
+                    "hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
+                },
+            )
+            logger.info(f"Query returned {len(res.result_rows)} rows")
+            return HdxQueryResult(columns=res.column_names, rows=res.result_rows)
+    except Exception as err:
+        logger.error(f"Error executing query: {err}")
+        raise ToolError(f"Query execution failed: {str(err)}")
+async def execute_cmd(query: str):
+    try:
+        async with await create_hydrolix_client(
+            client_shared_pool, get_request_credential()
+        ) as client:
+            res = await client.command(query)
+            logger.info("Command returned executed.")
+            return res
+    except Exception as err:
+        logger.error(f"Error executing command: {err}")
+        raise ToolError(f"Command execution failed: {str(err)}")
+@mcp.custom_route("/health", methods=["GET"])
+async def health_check(request: Request) -> PlainTextResponse:
+    """Health check endpoint for monitoring server status.
+    Returns OK if the server is running and can connect to Hydrolix.
+    """
+    try:
+        # Try to create a client connection to verify query-head connectivity
+        async with await create_hydrolix_client(
+            client_shared_pool, get_request_credential()
+        ) as client:
+            version = client.client.server_version
+        return PlainTextResponse(f"OK - Connected to Hydrolix compatible with ClickHouse {version}")
+    except Exception as e:
+        # Return 503 Service Unavailable if we can't connect to Hydrolix
+        return PlainTextResponse(f"ERROR - Cannot connect to Hydrolix: {str(e)}", status_code=503)
-deps = [
-    "clickhouse-connect",
-    "python-dotenv",
-    "uvicorn",
-    "pip-system-certs",
-]
+def result_to_table(query_columns, result) -> List[Table]:
+    return [Table(**dict(zip(query_columns, row))) for row in result]
-mcp = FastMCP(MCP_SERVER_NAME, dependencies=deps)
+def result_to_column(query_columns, result) -> List[Column]:
+    return [Column(**dict(zip(query_columns, row))) for row in result]
+def to_json(obj: Any) -> str:
+    # This function technically returns different types:
+    # - str for dataclasses (the primary use case)
+    # - list/dict/Any for recursive processing during serialization
+    # Type checking is suppressed for non-str returns as they're only used internally by json.dumps
+    if is_dataclass(obj):
+        return json.dumps(asdict(obj), default=to_json)
+    elif isinstance(obj, list):
+        return [to_json(item) for item in obj]  # type: ignore[return-value]
+    elif isinstance(obj, dict):
+        return {key: to_json(value) for key, value in obj.items()}  # type: ignore[return-value]
+    return obj  # type: ignore[return-value]
 @mcp.tool()
-def list_databases():
+async def list_databases() -> List[str]:
     """List available Hydrolix databases"""
     logger.info("Listing all databases")
-    client = create_hydrolix_client()
-    result = client.command("SHOW DATABASES")
-    logger.info(f"Found {len(result) if isinstance(result, list) else 1} databases")
-    return result
+    result = await execute_cmd("SHOW DATABASES")
+    # Convert newline-separated string to list and trim whitespace
+    if isinstance(result, str):
+        databases = [db.strip() for db in result.strip().split("\n")]
+    else:
+        databases = [result]
+    logger.info(f"Found {len(databases)} databases")
+    return databases
 @mcp.tool()
-def list_tables(database: str, like: str = None):
-    """List available Hydrolix tables in a database"""
+async def list_tables(
+    database: str, like: Optional[str] = None, not_like: Optional[str] = None
+) -> List[Table]:
+    """List available Hydrolix tables in a database, including schema, comment,
+    row count, and column count."""
     logger.info(f"Listing tables in database '{database}'")
-    client = create_hydrolix_client()
-    query = f"SHOW TABLES FROM {quote_identifier(database)}"
+    query = f"""
+        SELECT database, name, engine, create_table_query, dependencies_database,
+            dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes,
+            total_bytes_uncompressed, parts, active_parts, total_marks, comment
+        FROM system.tables WHERE database = {format_query_value(database)}"""
     if like:
-        query += f" LIKE {format_query_value(like)}"
-    result = client.command(query)
-    # Get all table comments in one query
-    table_comments_query = (
-        f"SELECT name, comment, primary_key FROM system.tables WHERE database = {format_query_value(database)} and engine = 'TurbineStorage' and total_rows > 0"
-    )
-    table_comments_result = client.query(table_comments_query)
-    table_comments = {row[0]: row[1] for row in table_comments_result.result_rows}
-    primary_keys = {row[0]: row[2] for row in table_comments_result.result_rows}
-    # Get all column comments in one query
-    column_comments_query = f"SELECT table, name, comment FROM system.columns WHERE database = {format_query_value(database)}"
-    column_comments_result = client.query(column_comments_query)
-    column_comments = {}
-    for row in column_comments_result.result_rows:
-        table, col_name, comment = row
-        if table not in column_comments:
-            column_comments[table] = {}
-        column_comments[table][col_name] = comment
-    def get_table_info(table):
-        logger.info(f"Getting schema info for table {database}.{table}")
-        schema_query = f"DESCRIBE TABLE {quote_identifier(database)}.{quote_identifier(table)}"
-        schema_result = client.query(schema_query)
-        columns = []
-        column_names = schema_result.column_names
-        for row in schema_result.result_rows:
-            column_dict = {}
-            for i, col_name in enumerate(column_names):
-                column_dict[col_name] = row[i]
-            # Add comment from our pre-fetched comments
-            if table in column_comments and column_dict["name"] in column_comments[table]:
-                column_dict["comment"] = column_comments[table][column_dict["name"]]
-            else:
-                column_dict["comment"] = None
-            columns.append(column_dict)
-        create_table_query = f"SHOW CREATE TABLE {database}.`{table}`"
-        create_table_result = client.command(create_table_query)
-        return {
-            "database": database,
-            "name": table,
-            "comment": table_comments.get(table),
-            "columns": columns,
-            "create_table_query": create_table_result,
-            "primary_key": primary_keys.get(table)
-        }
-    tables = []
-    if isinstance(result, str):
-        # Single table result
-        for table in (t.strip() for t in result.split()):
-            if table:
-                tables.append(get_table_info(table))
-    elif isinstance(result, Sequence):
-        # Multiple table results
-        for table in result:
-            tables.append(get_table_info(table))
+        query += f" AND name LIKE {format_query_value(like)}"
+    if not_like:
+        query += f" AND name NOT LIKE {format_query_value(not_like)}"
+    result = await execute_query(query)
+    # Deserialize result as Table dataclass instances
+    tables = result_to_table(result["columns"], result["rows"])
+    for table in tables:
+        column_data_query = f"""
+            SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
+            FROM system.columns
+            WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"""
+        column_data_query_result = await execute_query(column_data_query)
+        table.columns = [
+            c
+            for c in result_to_column(
+                column_data_query_result["columns"],
+                column_data_query_result["rows"],
+            )
+        ]
     logger.info(f"Found {len(tables)} tables")
     return tables
-def execute_query(query: str):
-    client = create_hydrolix_client()
-    try:
-        res = client.query(
-            query,
-            settings={
-                "readonly": 1,
-                "hdx_query_max_execution_time": SELECT_QUERY_TIMEOUT_SECS,
-                "hdx_query_max_attempts": 1,
-                "hdx_query_max_result_rows": 100_000,
-                "hdx_query_max_memory_usage": 2 * 1024 * 1024 * 1024,  # 2GiB
-                "hdx_query_admin_comment": f"User: {MCP_SERVER_NAME}",
-            },
-        )
-        column_names = res.column_names
-        rows = []
-        for row in res.result_rows:
-            row_dict = {}
-            for i, col_name in enumerate(column_names):
-                row_dict[col_name] = row[i]
-            rows.append(row_dict)
-        logger.info(f"Query returned {len(rows)} rows")
-        return rows
-    except Exception as err:
-        logger.error(f"Error executing query: {err}")
-        # Return a structured dictionary rather than a string to ensure proper serialization
-        # by the MCP protocol. String responses for errors can cause BrokenResourceError.
-        return {"error": str(err)}
 @mcp.tool()
-def run_select_query(query: str):
+@with_serializer
+async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Sequence[Any]]]:
     """Run a SELECT query in a Hydrolix time-series database using the Clickhouse SQL dialect.
     Queries run using this tool will timeout after 30 seconds.
@@ -188,47 +319,8 @@ def run_select_query(query: str):
     """
     logger.info(f"Executing SELECT query: {query}")
     try:
-        future = QUERY_EXECUTOR.submit(execute_query, query)
-        try:
-            result = future.result(timeout=SELECT_QUERY_TIMEOUT_SECS)
-            # Check if we received an error structure from execute_query
-            if isinstance(result, dict) and "error" in result:
-                logger.warning(f"Query failed: {result['error']}")
-                # MCP requires structured responses; string error messages can cause
-                # serialization issues leading to BrokenResourceError
-                return {"status": "error", "message": f"Query failed: {result['error']}"}
-            return result
-        except concurrent.futures.TimeoutError:
-            logger.warning(f"Query timed out after {SELECT_QUERY_TIMEOUT_SECS} seconds: {query}")
-            future.cancel()
-            # Return a properly structured response for timeout errors
-            return {
-                "status": "error",
-                "message": f"Query timed out after {SELECT_QUERY_TIMEOUT_SECS} seconds",
-            }
+        result = await execute_query(query=query)
+        return result
     except Exception as e:
         logger.error(f"Unexpected error in run_select_query: {str(e)}")
-        # Catch all other exceptions and return them in a structured format
-        # to prevent MCP serialization failures
-        return {"status": "error", "message": f"Unexpected error: {str(e)}"}
-def create_hydrolix_client():
-    client_config = get_config().get_client_config()
-    logger.info(
-        f"Creating Hydrolix client connection to {client_config['host']}:{client_config['port']} "
-        f"as {client_config['username']} "
-        f"(secure={client_config['secure']}, verify={client_config['verify']}, "
-        f"connect_timeout={client_config['connect_timeout']}s, "
-        f"send_receive_timeout={client_config['send_receive_timeout']}s)"
-    )
-    try:
-        client = clickhouse_connect.get_client(**client_config)
-        # Test the connection
-        version = client.server_version
-        logger.info(f"Successfully connected to Hydrolix server version {version}")
-        return client
-    except Exception as e:
-        logger.error(f"Failed to connect to Hydrolix: {str(e)}")
-        raise
+        raise ToolError(f"Unexpected error during query execution: {str(e)}")

mcp_hydrolix/utils.py ADDED Viewed

@@ -0,0 +1,70 @@
+import inspect
+import ipaddress
+import json
+from datetime import datetime, time
+from decimal import Decimal
+from functools import wraps
+import fastmcp.utilities.types
+from fastmcp.tools.tool import ToolResult
+class ExtendedEncoder(json.JSONEncoder):
+    """Extends JSONEncoder to apply custom serialization of CH data types."""
+    def default(self, obj):
+        if isinstance(obj, ipaddress.IPv4Address):
+            return str(obj)
+        if isinstance(obj, datetime):
+            return obj.time()
+        if isinstance(obj, time):
+            return obj.hour * 3600 + obj.minute * 60 + obj.second + obj.microsecond / 1_000_000
+        if isinstance(obj, bytes):
+            return obj.decode()
+        if isinstance(obj, Decimal):
+            return str(obj)
+        return super().default(obj)
+def with_serializer(fn):
+    """
+    Decorator to apply custom serialization to CH query tool result.
+    Should be applied as a first decorator of the tool function.
+    :returns: sync/async wrapper of mcp tool function
+    """
+    @wraps(fn)
+    def wrapper(*args, **kwargs):
+        """
+        Sync wrapper of mcpt tool `fn` function.
+        Function should return a dict or None.
+        :returns: ToolResult object with text-serialized and structured content.
+        """
+        result = fn(*args, **kwargs)
+        if not isinstance(result, dict):
+            result = {"result": result}
+        enc = json.dumps(result, cls=ExtendedEncoder)
+        return ToolResult(content=enc, structured_content=json.loads(enc))
+    @wraps(fn)
+    async def async_wrapper(*args, **kwargs):
+        """
+        Async wrapper of mcp tool `fn` function.
+        Function should return a dict or None.
+        :returns: ToolResult object with text-serialized and structured content.
+        """
+        result = await fn(*args, **kwargs)
+        if not isinstance(result, dict):
+            result = {"result": result}
+        enc = json.dumps(result, cls=ExtendedEncoder)
+        return ToolResult(content=enc, structured_content=json.loads(enc))
+    # TODO: remove next signature fix code when a new fastmcp released (https://github.com/jlowin/fastmcp/issues/2524)
+    new_fn = fastmcp.utilities.types.create_function_without_params(fn, ["ctx"])
+    sig = inspect.signature(new_fn)
+    async_wrapper.__signature__ = sig
+    wrapper.__signature__ = sig
+    return async_wrapper if inspect.iscoroutinefunction(fn) else wrapper

mcp-hydrolix 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

mcp-hydrolix 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl