PyPI - pggm-mcp-snowflake-server - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pggm-mcp-snowflake-server 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pggm_mcp_snowflake_server/__init__.py +3 -0
pggm_mcp_snowflake_server/server.py +623 -0
pggm_mcp_snowflake_server/write_detector.py +36 -0
pggm_mcp_snowflake_server-0.1.0.dist-info/METADATA +34 -0
pggm_mcp_snowflake_server-0.1.0.dist-info/RECORD +8 -0
pggm_mcp_snowflake_server-0.1.0.dist-info/WHEEL +5 -0
pggm_mcp_snowflake_server-0.1.0.dist-info/entry_points.txt +2 -0
pggm_mcp_snowflake_server-0.1.0.dist-info/top_level.txt +1 -0

pggm_mcp_snowflake_server/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .server import main
+__all__ = ["main"]

pggm_mcp_snowflake_server/server.py ADDED Viewed

@@ -0,0 +1,623 @@
+import importlib.metadata
+import json
+import logging
+import os
+import time
+import uuid
+from functools import wraps
+from typing import Any, Callable
+import mcp.server.stdio
+import mcp.types as types
+import yaml
+from mcp.server import NotificationOptions, Server
+from mcp.server.models import InitializationOptions
+from pydantic import AnyUrl, BaseModel
+from snowflake.snowpark import Session
+from .write_detector import SQLWriteDetector
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler()],
+)
+logger = logging.getLogger("pggm_mcp_snowflake_server")
+def data_to_yaml(data: Any) -> str:
+    return yaml.dump(data, indent=2, sort_keys=False)
+class SnowflakeDB:
+    AUTH_EXPIRATION_TIME = 1800
+    def __init__(self, connection_config: dict):
+        self.connection_config = connection_config
+        self.session = None
+        self.insights: list[str] = []
+        self.auth_time = 0
+    def _init_database(self):
+        """Initialize connection to the Snowflake database"""
+        try:
+            # Create session without setting specific database and schema
+            self.session = Session.builder.configs(self.connection_config).create()
+            # Set initial warehouse if provided, but don't set database or schema
+            if "warehouse" in self.connection_config:
+                self.session.sql(f"USE WAREHOUSE {self.connection_config['warehouse'].upper()}")
+            self.auth_time = time.time()
+        except Exception as e:
+            raise ValueError(f"Failed to connect to Snowflake database: {e}")
+    def execute_query(self, query: str) -> tuple[list[dict[str, Any]], str]:
+        """Execute a SQL query and return results as a list of dictionaries"""
+        if not self.session or time.time() - self.auth_time > self.AUTH_EXPIRATION_TIME:
+            self._init_database()
+        logger.debug(f"Executing query: {query}")
+        try:
+            result = self.session.sql(query).to_pandas()
+            result_rows = result.to_dict(orient="records")
+            data_id = str(uuid.uuid4())
+            return result_rows, data_id
+        except Exception as e:
+            logger.error(f'Database error executing "{query}": {e}')
+            raise
+    def add_insight(self, insight: str) -> None:
+        """Add a new insight to the collection"""
+        self.insights.append(insight)
+    def get_memo(self) -> str:
+        """Generate a formatted memo from collected insights"""
+        if not self.insights:
+            return "No data insights have been discovered yet."
+        memo = "📊 Data Intelligence Memo 📊\n\n"
+        memo += "Key Insights Discovered:\n\n"
+        memo += "\n".join(f"- {insight}" for insight in self.insights)
+        if len(self.insights) > 1:
+            memo += f"\n\nSummary:\nAnalysis has revealed {len(self.insights)} key data insights that suggest opportunities for strategic optimization and growth."
+        return memo
+def handle_tool_errors(func: Callable) -> Callable:
+    """Decorator to standardize tool error handling"""
+    @wraps(func)
+    async def wrapper(*args, **kwargs) -> list[types.TextContent]:
+        try:
+            return await func(*args, **kwargs)
+        except Exception as e:
+            logger.error(f"Error in {func.__name__}: {str(e)}")
+            return [types.TextContent(type="text", text=f"Error: {str(e)}")]
+    return wrapper
+class Tool(BaseModel):
+    name: str
+    description: str
+    input_schema: dict[str, Any]
+    handler: Callable[
+        [str, dict[str, Any] | None],
+        list[types.TextContent | types.ImageContent | types.EmbeddedResource],
+    ]
+    tags: list[str] = []
+# Tool handlers
+async def handle_list_databases(arguments, db, *_, exclusion_config=None):
+    query = "SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES"
+    data, data_id = db.execute_query(query)
+    # Filter out excluded databases
+    if exclusion_config and "databases" in exclusion_config and exclusion_config["databases"]:
+        filtered_data = []
+        for item in data:
+            db_name = item.get("DATABASE_NAME", "")
+            exclude = False
+            for pattern in exclusion_config["databases"]:
+                if pattern.lower() in db_name.lower():
+                    exclude = True
+                    break
+            if not exclude:
+                filtered_data.append(item)
+        data = filtered_data
+    output = {
+        "type": "data",
+        "data_id": data_id,
+        "data": data,
+    }
+    yaml_output = data_to_yaml(output)
+    json_output = json.dumps(output)
+    return [
+        types.TextContent(type="text", text=yaml_output),
+        types.EmbeddedResource(
+            type="resource",
+            resource=types.TextResourceContents(
+                uri=f"data://{data_id}", text=json_output, mimeType="application/json"
+            ),
+        ),
+    ]
+async def handle_list_schemas(arguments, db, *_, exclusion_config=None):
+    if not arguments or "database" not in arguments:
+        raise ValueError("Missing required 'database' parameter")
+    database = arguments["database"]
+    query = f"SELECT SCHEMA_NAME FROM {database.upper()}.INFORMATION_SCHEMA.SCHEMATA"
+    data, data_id = db.execute_query(query)
+    # Filter out excluded schemas
+    if exclusion_config and "schemas" in exclusion_config and exclusion_config["schemas"]:
+        filtered_data = []
+        for item in data:
+            schema_name = item.get("SCHEMA_NAME", "")
+            exclude = False
+            for pattern in exclusion_config["schemas"]:
+                if pattern.lower() in schema_name.lower():
+                    exclude = True
+                    break
+            if not exclude:
+                filtered_data.append(item)
+        data = filtered_data
+    output = {
+        "type": "data",
+        "data_id": data_id,
+        "database": database,
+        "data": data,
+    }
+    yaml_output = data_to_yaml(output)
+    json_output = json.dumps(output)
+    return [
+        types.TextContent(type="text", text=yaml_output),
+        types.EmbeddedResource(
+            type="resource",
+            resource=types.TextResourceContents(
+                uri=f"data://{data_id}", text=json_output, mimeType="application/json"
+            ),
+        ),
+    ]
+async def handle_list_tables(arguments, db, *_, exclusion_config=None):
+    if not arguments or "database" not in arguments or "schema" not in arguments:
+        raise ValueError("Missing required 'database' and 'schema' parameters")
+    database = arguments["database"]
+    schema = arguments["schema"]
+    query = f"""
+        SELECT table_catalog, table_schema, table_name, comment
+        FROM {database}.information_schema.tables
+        WHERE table_schema = '{schema.upper()}'
+    """
+    data, data_id = db.execute_query(query)
+    # Filter out excluded tables
+    if exclusion_config and "tables" in exclusion_config and exclusion_config["tables"]:
+        filtered_data = []
+        for item in data:
+            table_name = item.get("TABLE_NAME", "")
+            exclude = False
+            for pattern in exclusion_config["tables"]:
+                if pattern.lower() in table_name.lower():
+                    exclude = True
+                    break
+            if not exclude:
+                filtered_data.append(item)
+        data = filtered_data
+    output = {
+        "type": "data",
+        "data_id": data_id,
+        "database": database,
+        "schema": schema,
+        "data": data,
+    }
+    yaml_output = data_to_yaml(output)
+    json_output = json.dumps(output)
+    return [
+        types.TextContent(type="text", text=yaml_output),
+        types.EmbeddedResource(
+            type="resource",
+            resource=types.TextResourceContents(
+                uri=f"data://{data_id}", text=json_output, mimeType="application/json"
+            ),
+        ),
+    ]
+async def handle_describe_table(arguments, db, *_):
+    if not arguments or "table_name" not in arguments:
+        raise ValueError("Missing table_name argument")
+    table_spec = arguments["table_name"]
+    split_identifier = table_spec.split(".")
+    # Parse the fully qualified table name
+    if len(split_identifier) < 3:
+        raise ValueError("Table name must be fully qualified as 'database.schema.table'")
+    database_name = split_identifier[0].upper()
+    schema_name = split_identifier[1].upper()
+    table_name = split_identifier[2].upper()
+    query = f"""
+        SELECT column_name, column_default, is_nullable, data_type, comment
+        FROM {database_name}.information_schema.columns
+        WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'
+    """
+    data, data_id = db.execute_query(query)
+    output = {
+        "type": "data",
+        "data_id": data_id,
+        "database": database_name,
+        "schema": schema_name,
+        "table": table_name,
+        "data": data,
+    }
+    yaml_output = data_to_yaml(output)
+    json_output = json.dumps(output)
+    return [
+        types.TextContent(type="text", text=yaml_output),
+        types.EmbeddedResource(
+            type="resource",
+            resource=types.TextResourceContents(
+                uri=f"data://{data_id}", text=json_output, mimeType="application/json"
+            ),
+        ),
+    ]
+async def handle_read_query(arguments, db, write_detector, *_):
+    if not arguments or "query" not in arguments:
+        raise ValueError("Missing query argument")
+    if write_detector.analyze_query(arguments["query"])["contains_write"]:
+        raise ValueError("Calls to read_query should not contain write operations")
+    data, data_id = db.execute_query(arguments["query"])
+    output = {
+        "type": "data",
+        "data_id": data_id,
+        "data": data,
+    }
+    yaml_output = data_to_yaml(output)
+    json_output = json.dumps(output)
+    return [
+        types.TextContent(type="text", text=yaml_output),
+        types.EmbeddedResource(
+            type="resource",
+            resource=types.TextResourceContents(
+                uri=f"data://{data_id}", text=json_output, mimeType="application/json"
+            ),
+        ),
+    ]
+async def handle_append_insight(arguments, db, _, __, server):
+    if not arguments or "insight" not in arguments:
+        raise ValueError("Missing insight argument")
+    db.add_insight(arguments["insight"])
+    await server.request_context.session.send_resource_updated(AnyUrl("memo://insights"))
+    return [types.TextContent(type="text", text="Insight added to memo")]
+async def handle_write_query(arguments, db, _, allow_write, __):
+    if not allow_write:
+        raise ValueError("Write operations are not allowed for this data connection")
+    if arguments["query"].strip().upper().startswith("SELECT"):
+        raise ValueError("SELECT queries are not allowed for write_query")
+    results, data_id = db.execute_query(arguments["query"])
+    return [types.TextContent(type="text", text=str(results))]
+async def handle_create_table(arguments, db, _, allow_write, __):
+    if not allow_write:
+        raise ValueError("Write operations are not allowed for this data connection")
+    if not arguments["query"].strip().upper().startswith("CREATE TABLE"):
+        raise ValueError("Only CREATE TABLE statements are allowed")
+    results, data_id = db.execute_query(arguments["query"])
+    return [types.TextContent(type="text", text=f"Table created successfully. data_id = {data_id}")]
+async def main(
+    allow_write: bool = False,
+    connection_args: dict = None,
+    log_dir: str = None,
+    log_level: str = "INFO",
+    exclude_tools: list[str] = [],
+    config_file: str = "runtime_config.json",
+    exclude_patterns: dict = None,
+):
+    # Setup logging
+    if log_dir:
+        os.makedirs(log_dir, exist_ok=True)
+        logger.handlers.append(
+            logging.FileHandler(os.path.join(log_dir, "pggm_mcp_snowflake_server.log"))
+        )
+    if log_level:
+        logger.setLevel(log_level)
+    logger.info("Starting Snowflake MCP Server")
+    logger.info("Allow write operations: %s", allow_write)
+    logger.info("Excluded tools: %s", exclude_tools)
+    # Load configuration from file if provided
+    config = {}
+    #
+    if config_file:
+        try:
+            with open(config_file, "r") as f:
+                config = json.load(f)
+                logger.info(f"Loaded configuration from {config_file}")
+        except Exception as e:
+            logger.error(f"Error loading configuration file: {e}")
+    # Merge exclude_patterns from parameters with config file
+    exclusion_config = config.get("exclude_patterns", {})
+    if exclude_patterns:
+        # Merge patterns from parameters with those from config file
+        for key, patterns in exclude_patterns.items():
+            if key in exclusion_config:
+                exclusion_config[key].extend(patterns)
+            else:
+                exclusion_config[key] = patterns
+    # Set default patterns if none are specified
+    if not exclusion_config:
+        exclusion_config = {"databases": [], "schemas": [], "tables": []}
+    # Ensure all keys exist in the exclusion config
+    for key in ["databases", "schemas", "tables"]:
+        if key not in exclusion_config:
+            exclusion_config[key] = []
+    logger.info(f"Exclusion patterns: {exclusion_config}")
+    db = SnowflakeDB(connection_args)
+    server = Server("snowflake-manager")
+    write_detector = SQLWriteDetector()
+    tables_info = {}
+    tables_brief = ""
+    all_tools = [
+        Tool(
+            name="list_databases",
+            description="List all available databases in Snowflake",
+            input_schema={
+                "type": "object",
+                "properties": {},
+            },
+            handler=handle_list_databases,
+        ),
+        Tool(
+            name="list_schemas",
+            description="List all schemas in a database",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "database": {
+                        "type": "string",
+                        "description": "Database name to list schemas from",
+                    },
+                },
+                "required": ["database"],
+            },
+            handler=handle_list_schemas,
+        ),
+        Tool(
+            name="list_tables",
+            description="List all tables in a specific database and schema",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "database": {"type": "string", "description": "Database name"},
+                    "schema": {"type": "string", "description": "Schema name"},
+                },
+                "required": ["database", "schema"],
+            },
+            handler=handle_list_tables,
+        ),
+        Tool(
+            name="describe_table",
+            description="Get the schema information for a specific table",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "table_name": {
+                        "type": "string",
+                        "description": "Fully qualified table name in the format 'database.schema.table'",
+                    },
+                },
+                "required": ["table_name"],
+            },
+            handler=handle_describe_table,
+        ),
+        Tool(
+            name="read_query",
+            description="Execute a SELECT query.",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "SELECT SQL query to execute"}
+                },
+                "required": ["query"],
+            },
+            handler=handle_read_query,
+        ),
+        Tool(
+            name="append_insight",
+            description="Add a data insight to the memo",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "insight": {
+                        "type": "string",
+                        "description": "Data insight discovered from analysis",
+                    }
+                },
+                "required": ["insight"],
+            },
+            handler=handle_append_insight,
+            tags=["resource_based"],
+        ),
+        Tool(
+            name="write_query",
+            description="Execute an INSERT, UPDATE, or DELETE query on the Snowflake database",
+            input_schema={
+                "type": "object",
+                "properties": {"query": {"type": "string", "description": "SQL query to execute"}},
+                "required": ["query"],
+            },
+            handler=handle_write_query,
+            tags=["write"],
+        ),
+        Tool(
+            name="create_table",
+            description="Create a new table in the Snowflake database",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "CREATE TABLE SQL statement"}
+                },
+                "required": ["query"],
+            },
+            handler=handle_create_table,
+            tags=["write"],
+        ),
+    ]
+    exclude_tags = []
+    if not allow_write:
+        exclude_tags.append("write")
+    allowed_tools = [
+        tool
+        for tool in all_tools
+        if tool.name not in exclude_tools and not any(tag in exclude_tags for tag in tool.tags)
+    ]
+    logger.info("Allowed tools: %s", [tool.name for tool in allowed_tools])
+    # Register handlers
+    @server.list_resources()
+    async def handle_list_resources() -> list[types.Resource]:
+        resources = [
+            types.Resource(
+                uri=AnyUrl("memo://insights"),
+                name="Data Insights Memo",
+                description="A living document of discovered data insights",
+                mimeType="text/plain",
+            )
+        ]
+        table_brief_resources = [
+            types.Resource(
+                uri=AnyUrl(f"context://table/{table_name}"),
+                name=f"{table_name} table",
+                description=f"Description of the {table_name} table",
+                mimeType="text/plain",
+            )
+            for table_name in tables_info.keys()
+        ]
+        resources += table_brief_resources
+        return resources
+    @server.read_resource()
+    async def handle_read_resource(uri: AnyUrl) -> str:
+        if str(uri) == "memo://insights":
+            return db.get_memo()
+        elif str(uri).startswith("context://table"):
+            table_name = str(uri).split("/")[-1]
+            if table_name in tables_info:
+                return data_to_yaml(tables_info[table_name])
+            else:
+                raise ValueError(f"Unknown table: {table_name}")
+        else:
+            raise ValueError(f"Unknown resource: {uri}")
+    @server.list_prompts()
+    async def handle_list_prompts() -> list[types.Prompt]:
+        return []
+    @server.get_prompt()
+    async def handle_get_prompt(
+        name: str, arguments: dict[str, str] | None
+    ) -> types.GetPromptResult:
+        raise ValueError(f"Unknown prompt: {name}")
+    @server.call_tool()
+    @handle_tool_errors
+    async def handle_call_tool(
+        name: str, arguments: dict[str, Any] | None
+    ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+        if name in exclude_tools:
+            return [
+                types.TextContent(
+                    type="text", text=f"Tool {name} is excluded from this data connection"
+                )
+            ]
+        handler = next((tool.handler for tool in allowed_tools if tool.name == name), None)
+        if not handler:
+            raise ValueError(f"Unknown tool: {name}")
+        # Pass exclusion_config to the handler if it's a listing function
+        if name in ["list_databases", "list_schemas", "list_tables"]:
+            return await handler(
+                arguments,
+                db,
+                write_detector,
+                allow_write,
+                server,
+                exclusion_config=exclusion_config,
+            )
+        else:
+            return await handler(arguments, db, write_detector, allow_write, server)
+    @server.list_tools()
+    async def handle_list_tools() -> list[types.Tool]:
+        logger.info("Listing tools")
+        logger.error(f"Allowed tools: {allowed_tools}")
+        tools = [
+            types.Tool(
+                name=tool.name,
+                description=tool.description,
+                inputSchema=tool.input_schema,
+            )
+            for tool in allowed_tools
+        ]
+        return tools
+    # Start server
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        logger.info("Server running with stdio transport")
+        await server.run(
+            read_stream,
+            write_stream,
+            InitializationOptions(
+                server_name="snowflake",
+                server_version=importlib.metadata.version("pggm_mcp_snowflake_server"),
+                capabilities=server.get_capabilities(
+                    notification_options=NotificationOptions(),
+                    experimental_capabilities={},
+                ),
+            ),
+        )

pggm_mcp_snowflake_server/write_detector.py ADDED Viewed

@@ -0,0 +1,36 @@
+class SQLWriteDetector:
+    """Utility class to detect write operations in SQL queries."""
+    def __init__(self):
+        self.write_keywords = [
+            "INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE",
+            "GRANT", "REVOKE", "MERGE", "UPSERT", "REPLACE"
+        ]
+    def analyze_query(self, query: str) -> dict:
+        """
+        Analyze an SQL query to determine if it contains write operations.
+        Args:
+            query: SQL query string to analyze
+        Returns:
+            Dictionary with analysis results
+        """
+        result = {
+            "contains_write": False,
+            "write_operations": [],
+            "query_type": "READ"
+        }
+        # Convert to uppercase for case-insensitive comparison
+        upper_query = query.upper()
+        # Check for write keywords
+        for keyword in self.write_keywords:
+            if keyword in upper_query.split():
+                result["contains_write"] = True
+                result["write_operations"].append(keyword)
+                result["query_type"] = "WRITE"
+        return result

pggm_mcp_snowflake_server-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: pggm-mcp-snowflake-server
+Version: 0.1.0
+Summary: Custom Model Context Protocol server for Snowflake
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: mcp
+Requires-Dist: pydantic
+Requires-Dist: snowflake-snowpark-python
+Requires-Dist: pyyaml
+# PGGM MCP Snowflake Server
+A customized Model Context Protocol (MCP) server for Snowflake integration, allowing AI assistants to interact with Snowflake databases.
+## Features
+- Connect to Snowflake databases and execute queries
+- Support for various SQL operations and schema exploration
+- Data insights collection
+- Customized filters and configurations
+## Installation
+```bash
+pip install pggm-mcp-snowflake-server
+```
+## Usage
+This package is designed to be used with MCP-compatible AI assistants for database interactions.

pggm_mcp_snowflake_server-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+pggm_mcp_snowflake_server/__init__.py,sha256=bq16COjhclXSrjQP18TV21vWwJV24hIBrf4I7OfwZkE,48
+pggm_mcp_snowflake_server/server.py,sha256=4_Jlr_o5i4v_39-LuT9CFcqNwGMlNCXSZNgiiSp2vwg,21976
+pggm_mcp_snowflake_server/write_detector.py,sha256=Zli_U5tnIlCzpVoSYT7jPDBlDUSv4nyS1PnIrXpwZYc,1204
+pggm_mcp_snowflake_server-0.1.0.dist-info/METADATA,sha256=B6Pgk0AKQdZgmgWK19TZ-Z6jAH1g4YiKkGskqp-skXg,1015
+pggm_mcp_snowflake_server-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+pggm_mcp_snowflake_server-0.1.0.dist-info/entry_points.txt,sha256=kbUmsaZT0hYi6naFrGUO-mIKlwqHi_HKO6HNQKdTJE4,84
+pggm_mcp_snowflake_server-0.1.0.dist-info/top_level.txt,sha256=ouamdLwMWx5aSlAHI_mhoPvm9PEBtovD3qbDvR7x284,26
+pggm_mcp_snowflake_server-0.1.0.dist-info/RECORD,,

pggm_mcp_snowflake_server-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (78.1.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

pggm_mcp_snowflake_server-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ pggm_mcp_snowflake_server = pggm_mcp_snowflake_server.server:main

pggm_mcp_snowflake_server-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pggm_mcp_snowflake_server