mcp-hydrolix 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ """Authentication package for MCP Hydrolix.
2
+
3
+ This package contains authentication-related types used to define hydrolix auth
4
+ in terms of FastMCP infrastructure
5
+ """
6
+
7
+ from .credentials import (
8
+ HydrolixCredential,
9
+ ServiceAccountToken,
10
+ UsernamePassword,
11
+ )
12
+ from .mcp_providers import (
13
+ TOKEN_PARAM,
14
+ AccessToken,
15
+ ChainedAuthBackend,
16
+ GetParamAuthBackend,
17
+ HydrolixCredentialChain,
18
+ )
19
+
20
+ __all__ = [
21
+ "HydrolixCredential",
22
+ "ServiceAccountToken",
23
+ "UsernamePassword",
24
+ "AccessToken",
25
+ "ChainedAuthBackend",
26
+ "GetParamAuthBackend",
27
+ "HydrolixCredentialChain",
28
+ "TOKEN_PARAM",
29
+ ]
@@ -0,0 +1,63 @@
1
+ """Hydrolix credential types for authentication."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+ import jwt
7
+
8
+
9
+ class HydrolixCredential(ABC):
10
+ @abstractmethod
11
+ def clickhouse_config_entries(self) -> dict:
12
+ """
13
+ Returns the entries needed for a ClickHouse client config to use this credential.
14
+ This will typically add `access_token` or (`username` and `password`)
15
+ """
16
+ ...
17
+
18
+
19
+ @dataclass
20
+ class ServiceAccountToken(HydrolixCredential):
21
+ """Hydrolix credentials using a service account token."""
22
+
23
+ def __init__(self, token: str, expected_iss: Optional[str]):
24
+ """
25
+ Initialize a ServiceAccountToken from a token JWT (or raise an error if the claims are invalid).
26
+ NB the claims' signatures are NOT checked by this function -- these validations MUST NOT be considered
27
+ authoritative.
28
+ """
29
+
30
+ claims = jwt.decode(
31
+ token,
32
+ key="", # NB service account signing key is not publicly-hosted, so we can't verify the signature
33
+ options={
34
+ "verify_signature": False,
35
+ "verify_iss": True,
36
+ "verify_iat": True,
37
+ "verify_exp": True,
38
+ },
39
+ issuer=expected_iss,
40
+ )
41
+ self.token = token
42
+ self.service_account_id = claims["sub"]
43
+ self.issued_at = claims["iss"]
44
+ self.expires_at = claims["exp"]
45
+
46
+ def clickhouse_config_entries(self) -> dict:
47
+ return {"access_token": self.token}
48
+
49
+ token: str
50
+ service_account_id: str
51
+ issued_at: int
52
+ expires_at: int
53
+
54
+
55
+ @dataclass
56
+ class UsernamePassword(HydrolixCredential):
57
+ """Hydrolix credentials using username and password."""
58
+
59
+ def clickhouse_config_entries(self) -> dict:
60
+ return {"username": self.username, "password": self.password}
61
+
62
+ username: str
63
+ password: str
@@ -0,0 +1,137 @@
1
+ """Authentication backends and providers for MCP Hydrolix server."""
2
+
3
+ import time
4
+ from abc import abstractmethod, ABC
5
+ from typing import List, ClassVar, Final, Optional
6
+
7
+ from fastmcp.server.auth import AccessToken as FastMCPAccessToken, AuthProvider
8
+ from mcp.server.auth.middleware.auth_context import (
9
+ AuthContextMiddleware as McpAuthContextMiddleware,
10
+ )
11
+ from mcp.server.auth.middleware.bearer_auth import (
12
+ AuthenticatedUser as McpAuthenticatedUser,
13
+ BearerAuthBackend,
14
+ )
15
+ from mcp.server.auth.provider import TokenVerifier as McpTokenVerifier
16
+ from starlette.authentication import AuthCredentials, AuthenticationBackend
17
+ from starlette.middleware import Middleware
18
+ from starlette.middleware.authentication import AuthenticationMiddleware
19
+ from starlette.requests import HTTPConnection, Request
20
+
21
+ from .credentials import HydrolixCredential, ServiceAccountToken
22
+
23
+ TOKEN_PARAM: Final[str] = "token"
24
+
25
+
26
+ class ChainedAuthBackend(AuthenticationBackend):
27
+ """
28
+ Generic authentication backend that tries multiple backends in order. Returns the first successful
29
+ authentication result. Only tries an auth method once all previous auth methods have failed.
30
+ """
31
+
32
+ def __init__(self, backends: List[AuthenticationBackend]):
33
+ self.backends = backends
34
+
35
+ async def authenticate(self, conn: HTTPConnection):
36
+ # due to a very strange quirk of python syntax, this CANNOT be an anonymous async generator. The quirk is
37
+ # that async generator expressions aren't allowed to have `await` in their if conditions (though async
38
+ # generators have no such restriction on their if statements)
39
+ async def successful_results():
40
+ for backend in self.backends:
41
+ if (result := await backend.authenticate(conn)) is not None:
42
+ yield result
43
+
44
+ return await anext(successful_results(), None)
45
+
46
+
47
+ class GetParamAuthBackend(AuthenticationBackend):
48
+ """
49
+ Authentication backend that validates tokens from a query parameter
50
+ """
51
+
52
+ def __init__(self, token_verifier: McpTokenVerifier, token_get_param: str):
53
+ self.token_verifier = token_verifier
54
+ self.token_get_param = token_get_param
55
+
56
+ async def authenticate(self, conn: HTTPConnection):
57
+ token = Request(conn.scope).query_params.get(self.token_get_param)
58
+
59
+ if token is None:
60
+ return None
61
+
62
+ # Validate the token with the verifier
63
+ auth_info = await self.token_verifier.verify_token(token)
64
+
65
+ if not auth_info:
66
+ return None
67
+
68
+ if auth_info.expires_at and auth_info.expires_at < int(time.time()):
69
+ return None
70
+
71
+ return AuthCredentials(auth_info.scopes), McpAuthenticatedUser(auth_info)
72
+
73
+
74
+ class AccessToken(FastMCPAccessToken, ABC):
75
+ @abstractmethod
76
+ def as_credential(self) -> HydrolixCredential: ...
77
+
78
+
79
+ class HydrolixCredentialChain(AuthProvider):
80
+ """
81
+ AuthProvider that authenticates with the following precedence (highest to lowest):
82
+
83
+ 1. Per-request Bearer token: Service account token via Authorization: Bearer <token> header
84
+ 2. Per-request GET parameter: Service account token via ?token=<token> query parameter
85
+
86
+ NB MCP-standard oAuth is not currently implemented
87
+ NB all per-request credentials take precedence over all environment-variable-supplied credentials
88
+ """
89
+
90
+ class ServiceAccountAccess(AccessToken):
91
+ FAKE_CLIENT_ID: ClassVar[Final[str]] = "MCP_CLIENT_VIA_SERVICE_ACCOUNT"
92
+ FAKE_SCOPE: ClassVar[Final[str]] = "MCP_SERVICE_ACCOUNT_SCOPE"
93
+
94
+ expected_issuer: Optional[str] = None
95
+
96
+ def as_credential(self) -> ServiceAccountToken:
97
+ return ServiceAccountToken(self.token, self.expected_issuer)
98
+
99
+ def __init__(self, expected_issuer: Optional[str]):
100
+ """
101
+ Initialize HydrolixCredentialChain.
102
+
103
+ Args:
104
+ expected_issuer: The issuer URL that must be used (mitigates credential-stuffing)
105
+ """
106
+ super().__init__()
107
+ self.expected_issuer = expected_issuer
108
+
109
+ async def verify_token(self, token: str) -> AccessToken | None:
110
+ """
111
+ This is responsible for validating and authenticating the `token`.
112
+ See ChainedAuthBackend for how the token is obtained in the first place.
113
+ Authorization is performed by individual endpoints via `fastmcp.server.dependencies.get_access_token`
114
+ """
115
+ return HydrolixCredentialChain.ServiceAccountAccess(
116
+ token=token,
117
+ client_id=HydrolixCredentialChain.ServiceAccountAccess.FAKE_CLIENT_ID,
118
+ scopes=[HydrolixCredentialChain.ServiceAccountAccess.FAKE_SCOPE],
119
+ expires_at=None,
120
+ resource=None,
121
+ claims={},
122
+ expected_issuer=self.expected_issuer,
123
+ )
124
+
125
+ def get_middleware(self) -> list:
126
+ return [
127
+ Middleware(
128
+ AuthenticationMiddleware,
129
+ backend=ChainedAuthBackend(
130
+ [
131
+ BearerAuthBackend(self),
132
+ GetParamAuthBackend(self, TOKEN_PARAM),
133
+ ]
134
+ ),
135
+ ),
136
+ Middleware(McpAuthContextMiddleware),
137
+ ]
@@ -0,0 +1,6 @@
1
+ from .log import JsonFormatter, setup_logging
2
+
3
+ __all__ = [
4
+ "setup_logging",
5
+ "JsonFormatter",
6
+ ]
@@ -0,0 +1,60 @@
1
+ import json
2
+ import logging
3
+ import logging.config
4
+ import os
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+
9
+
10
+ class JsonFormatter(logging.Formatter):
11
+ """
12
+ Custom formatter to output logs in JSON format.
13
+ """
14
+
15
+ def format(self, record: logging.LogRecord) -> str:
16
+ """Convert log record to JSON format."""
17
+ log_record = {
18
+ "timestamp": self.formatTime(record, self.datefmt),
19
+ "level": record.levelname,
20
+ "component": "mcp-hydrolix",
21
+ "logger": record.name,
22
+ }
23
+
24
+ if isinstance(record.msg, dict):
25
+ log_record["message"] = json.dumps(record.msg)
26
+ else:
27
+ log_record["message"] = record.getMessage()
28
+
29
+ if record.exc_info:
30
+ log_record["exception"] = self.formatException(record.exc_info)
31
+
32
+ return json.dumps(log_record)
33
+
34
+
35
+ def setup_logging(config_path: str | None, log_level: str, log_format: str) -> dict | None:
36
+ """
37
+ Configures logging from a YAML file and overrides level/format.
38
+ """
39
+ if config_path is None:
40
+ # print(f"Warning: Logging config file not provided at '{config_path}'. Using basic config.")
41
+ config_path = f"{os.path.dirname(__file__)}/log.yaml"
42
+
43
+ config_file = Path(config_path)
44
+ if not config_file.is_file():
45
+ logging.basicConfig(level=log_level.upper())
46
+ return None
47
+
48
+ with open(config_file) as f:
49
+ config = yaml.safe_load(f)
50
+
51
+ # Override level and formatter based on function arguments
52
+ config["root"]["level"] = log_level.upper()
53
+ if "loggers" in config and isinstance(config["loggers"], dict):
54
+ for logger in config["loggers"].keys():
55
+ config["loggers"][logger]["level"] = log_level.upper()
56
+
57
+ config["handlers"]["default"]["formatter"] = log_format
58
+
59
+ # logging.config.dictConfig(config)
60
+ return config
@@ -0,0 +1,40 @@
1
+ version: 1
2
+ disable_existing_loggers: false
3
+
4
+ filters:
5
+ token_filter:
6
+ '()': mcp_hydrolix.log.utils.AccessLogTokenRedactingFilter
7
+
8
+ # This section defines the format of your log messages
9
+ formatters:
10
+ default:
11
+ format: "%(levelname)s: %(name)s - %(message)s"
12
+ json:
13
+ # This special '()' key points to the custom JsonFormatter class in main.py
14
+ '()': mcp_hydrolix.log.JsonFormatter
15
+
16
+ # This section defines where the logs are sent (e.g., console)
17
+ handlers:
18
+ default:
19
+ # The formatter used by this handler will be set dynamically in the code
20
+ formatter: default
21
+ class: logging.StreamHandler
22
+ stream: ext://sys.stderr
23
+ filters:
24
+ - token_filter
25
+
26
+ root:
27
+ level: INFO
28
+ handlers: [ default ]
29
+ propagate: false
30
+
31
+ # This section defines the loggers for different parts of the application
32
+ loggers:
33
+ uvicorn:
34
+ handlers: [ default ]
35
+ level: INFO
36
+ propagate: false
37
+ gunicorn:
38
+ handlers: [ default ]
39
+ level: INFO
40
+ propagate: false
@@ -0,0 +1,56 @@
1
+ """Logging utilities for redacting sensitive information from logs."""
2
+
3
+ import logging
4
+ import re
5
+
6
+ from ..auth import TOKEN_PARAM
7
+
8
+
9
+ class AccessLogTokenRedactingFilter(logging.Filter):
10
+ """
11
+ Filter that redacts token query parameters from uvicorn access logs.
12
+
13
+ This filter is specifically designed to intercept log messages that contain
14
+ request URLs with query parameters and replace token values with [REDACTED].
15
+ """
16
+
17
+ # Regex pattern to match token=<value> in query strings
18
+ # Matches: token=<anything except & or whitespace>
19
+ TOKEN_PATTERN = re.compile(rf"{TOKEN_PARAM}=[^&\s]+")
20
+
21
+ def filter(self, record: logging.LogRecord) -> bool:
22
+ """
23
+ Filter method that redacts tokens from the log message.
24
+
25
+ Args:
26
+ record: The log record to filter
27
+
28
+ Returns:
29
+ True (side-effect only)
30
+ """
31
+ if hasattr(record, "msg") and isinstance(record.msg, str):
32
+ record.msg = self.TOKEN_PATTERN.sub(rf"{TOKEN_PARAM}=[REDACTED]", record.msg)
33
+
34
+ # Also check args if they exist (for formatted log messages)
35
+ if hasattr(record, "args") and record.args:
36
+ # Convert args to list for modification
37
+ if isinstance(record.args, tuple):
38
+ modified_args: list = []
39
+ for arg in record.args:
40
+ if isinstance(arg, str):
41
+ # Redact tokens from string arguments
42
+ modified_args.append(
43
+ self.TOKEN_PATTERN.sub(rf"{TOKEN_PARAM}=[REDACTED]", arg)
44
+ )
45
+ elif isinstance(arg, bytes):
46
+ # Redact tokens from string arguments
47
+ modified_args.append(
48
+ self.TOKEN_PATTERN.sub(
49
+ rf"{TOKEN_PARAM}=[REDACTED]", arg.decode("utf-8")
50
+ )
51
+ )
52
+ else:
53
+ modified_args.append(arg)
54
+ record.args = tuple(modified_args)
55
+
56
+ return True
mcp_hydrolix/main.py CHANGED
@@ -1,8 +1,76 @@
1
+ import logging.config as lconfig
2
+
3
+ from fastmcp.server.http import StarletteWithLifespan
4
+ from gunicorn.app.base import BaseApplication
5
+
6
+ from .log import setup_logging
7
+ from .mcp_env import TransportType, get_config
1
8
  from .mcp_server import mcp
2
9
 
3
10
 
11
+ class CoreApplication(BaseApplication):
12
+ """Gunicorn Core Application"""
13
+
14
+ def __init__(self, app: StarletteWithLifespan, options: dict = None) -> None:
15
+ """Initialize the core application."""
16
+ self.options = options or {}
17
+ self.app = app
18
+ super().__init__()
19
+
20
+ def load_config(self) -> None:
21
+ """Load the options specific to this application."""
22
+ config = {
23
+ key: value
24
+ for key, value in self.options.items()
25
+ if key in self.cfg.settings and value is not None
26
+ }
27
+ for key, value in config.items():
28
+ self.cfg.set(key.lower(), value)
29
+
30
+ def load(self) -> BaseApplication:
31
+ """Load the application."""
32
+ return self.app
33
+
34
+
4
35
  def main():
5
- mcp.run()
36
+ config = get_config()
37
+ transport = config.mcp_server_transport
38
+
39
+ # For HTTP and SSE transports, we need to specify host and port
40
+ http_transports = [TransportType.HTTP.value, TransportType.SSE.value]
41
+ if transport in http_transports:
42
+ # Use the configured bind host (defaults to 127.0.0.1, can be set to 0.0.0.0)
43
+ # and bind port (defaults to 8000)
44
+ workers = config.mcp_workers
45
+ if workers == 1:
46
+ log_dict_config = setup_logging(None, "INFO", "json")
47
+ lconfig.dictConfig(log_dict_config)
48
+ mcp.run(
49
+ transport=transport,
50
+ host=config.mcp_bind_host,
51
+ port=config.mcp_bind_port,
52
+ uvicorn_config={"log_config": log_dict_config},
53
+ )
54
+ else:
55
+ log_dict_config = setup_logging(None, "INFO", "json")
56
+ lconfig.dictConfig(log_dict_config)
57
+ options = {
58
+ "bind": f"{config.mcp_bind_host}:{config.mcp_bind_port}",
59
+ "timeout": config.mcp_timeout,
60
+ "workers": config.mcp_workers,
61
+ "worker_class": "uvicorn.workers.UvicornWorker",
62
+ "worker_connections": config.mcp_worker_connections,
63
+ "max_requests": config.mcp_max_requests,
64
+ "max_requests_jitter": config.mcp_max_requests_jitter,
65
+ "keepalive": config.mcp_keepalive,
66
+ "logconfig_dict": log_dict_config,
67
+ }
68
+ CoreApplication(
69
+ mcp.http_app(path="/mcp", stateless_http=True, transport=transport), options
70
+ ).run()
71
+ else:
72
+ # For stdio transport, no host or port is needed
73
+ mcp.run(transport=transport)
6
74
 
7
75
 
8
76
  if __name__ == "__main__":