recce-nightly 1.9.0.20250623__py3-none-any.whl → 1.25.0.20251112a2066__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recce/VERSION +1 -1
- recce/__init__.py +5 -0
- recce/adapter/dbt_adapter/__init__.py +318 -240
- recce/artifact.py +76 -3
- recce/cli.py +703 -71
- recce/config.py +3 -3
- recce/connect_to_cloud.py +138 -0
- recce/core.py +3 -3
- recce/data/404.html +1 -22
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +12 -0
- recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_buildManifest.js +11 -0
- recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/0a2b2dd4b57049c2.js +1 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/24fd885c7180a612.js +1 -0
- recce/data/_next/static/chunks/27e66b2eab4adc32.js +19 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/917619ab62a32388.js +1 -0
- recce/data/_next/static/chunks/93ba5a62932b704f.js +4 -0
- recce/data/_next/static/chunks/a43a2a5e06d5a92b.js +1 -0
- recce/data/_next/static/chunks/a6c78b24bd8b84fc.js +1 -0
- recce/data/_next/static/chunks/b2610ba997ff8c4f.js +110 -0
- recce/data/_next/static/chunks/ba2d87265a68599d.css +2 -0
- recce/data/_next/static/chunks/c117fd1c1382dd83.js +11 -0
- recce/data/_next/static/chunks/c9425ca46eebdde9.js +1 -0
- recce/data/_next/static/chunks/cc8a9eadba012be0.css +6 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/e392ad92847c3e17.js +1 -0
- recce/data/_next/static/chunks/e4ce95efe88dae79.js +11 -0
- recce/data/_next/static/chunks/e69c777814fea6ed.js +2 -0
- recce/data/_next/static/chunks/turbopack-21cfd73037ff57ab.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
- recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +10 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -8
- recce/event/__init__.py +9 -8
- recce/event/collector.py +6 -2
- recce/event/track.py +10 -0
- recce/github.py +1 -1
- recce/mcp_server.py +632 -0
- recce/models/types.py +23 -2
- recce/pull_request.py +1 -1
- recce/run.py +23 -16
- recce/server.py +194 -19
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +2 -1
- recce/tasks/dataframe.py +59 -2
- recce/tasks/rowcount.py +4 -1
- recce/tasks/schema.py +4 -1
- recce/tasks/valuediff.py +1 -1
- recce/util/api_token.py +11 -2
- recce/util/breaking.py +9 -0
- recce/util/cll.py +1 -2
- recce/util/io.py +2 -2
- recce/util/lineage.py +19 -18
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +229 -5
- recce/yaml/__init__.py +2 -2
- recce_cloud/__init__.py +15 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +104 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +72 -0
- recce_cloud/api/gitlab.py +78 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +303 -0
- recce_cloud/upload.py +213 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/METADATA +31 -27
- recce_nightly-1.25.0.20251112a2066.dist-info/RECORD +178 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/test_dbt_cll.py +412 -79
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +279 -0
- tests/test_cli.py +106 -3
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +148 -3
- tests/test_mcp_server.py +332 -0
- tests/test_server.py +6 -6
- tests/test_summary.py +14 -6
- recce/data/_next/static/WrRUb3nV8BhAZG_R8kVma/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
- recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
- recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
- recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
- recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
- recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
- recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
- recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
- recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
- recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
- recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
- recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
- recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
- recce/data/_next/static/chunks/92-7ab55ae02606193c.js +0 -1
- recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
- recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
- recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
- recce/data/_next/static/chunks/app/page-59241c42b7dd4fcf.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
- recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
- recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
- recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
- recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
- recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
- recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
- recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
- recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
- recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
- recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
- recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
- recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -785
- recce_nightly-1.9.0.20250623.dist-info/RECORD +0 -151
- tests/test_state.py +0 -134
- /recce/data/_next/static/{WrRUb3nV8BhAZG_R8kVma → 6LypcDXgyuSaiSCrsmUub}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
- /recce/data/_next/static/media/{reload-image.79aabb7d.svg → reload-image.7aa931c7.svg} +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/WHEEL +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/licenses/LICENSE +0 -0
recce/mcp_server.py
ADDED
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Recce MCP (Model Context Protocol) Server
|
|
3
|
+
|
|
4
|
+
This module implements a stdio-based MCP server that provides tools for
|
|
5
|
+
interacting with Recce's data validation capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import textwrap
|
|
13
|
+
import time
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from mcp.server import Server
|
|
18
|
+
from mcp.server.stdio import stdio_server
|
|
19
|
+
from mcp.types import TextContent, Tool
|
|
20
|
+
|
|
21
|
+
from recce.core import RecceContext, load_context
|
|
22
|
+
from recce.server import RecceServerMode
|
|
23
|
+
from recce.tasks.dataframe import DataFrame
|
|
24
|
+
from recce.tasks.profile import ProfileDiffTask
|
|
25
|
+
from recce.tasks.query import QueryDiffTask, QueryTask
|
|
26
|
+
from recce.tasks.rowcount import RowCountDiffTask
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _truncate_strings(obj: Any, max_length: int = 200) -> Any:
|
|
32
|
+
"""Recursively truncate strings longer than max_length in nested dicts and lists"""
|
|
33
|
+
if isinstance(obj, dict):
|
|
34
|
+
return {k: _truncate_strings(v, max_length) for k, v in obj.items()}
|
|
35
|
+
elif isinstance(obj, list):
|
|
36
|
+
return [_truncate_strings(item, max_length) for item in obj]
|
|
37
|
+
elif isinstance(obj, str) and len(obj) > max_length:
|
|
38
|
+
return obj[:max_length] + "..."
|
|
39
|
+
return obj
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MCPLogger:
|
|
43
|
+
"""JSON logger for MCP server request/response logging"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, debug: bool = False, log_file: str = "logs/recce-mcp.json"):
|
|
46
|
+
self.debug = debug
|
|
47
|
+
self.log_file = log_file
|
|
48
|
+
|
|
49
|
+
if self.debug:
|
|
50
|
+
# Create logs directory if it doesn't exist
|
|
51
|
+
log_dir = os.path.dirname(log_file)
|
|
52
|
+
if log_dir:
|
|
53
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
# Overwrite log file on initialization
|
|
56
|
+
try:
|
|
57
|
+
with open(log_file, "w") as f:
|
|
58
|
+
f.write("") # Clear existing content
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.warning(f"Failed to initialize log file {log_file}: {e}")
|
|
61
|
+
|
|
62
|
+
def _write_log(self, log_entry: Dict[str, Any]) -> None:
|
|
63
|
+
"""Write a log entry to the JSON file"""
|
|
64
|
+
if not self.debug:
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
with open(self.log_file, "a") as f:
|
|
69
|
+
f.write(json.dumps(log_entry) + "\n")
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.warning(f"Failed to write to log file {self.log_file}: {e}")
|
|
72
|
+
|
|
73
|
+
def log_list_tools(self, tools: List[Tool]) -> None:
|
|
74
|
+
"""Log a list_tools call"""
|
|
75
|
+
tool_names = [tool.name for tool in tools]
|
|
76
|
+
log_entry = {
|
|
77
|
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
78
|
+
"type": "list_tools",
|
|
79
|
+
"tools": tool_names,
|
|
80
|
+
}
|
|
81
|
+
self._write_log(log_entry)
|
|
82
|
+
|
|
83
|
+
def log_tool_call(
|
|
84
|
+
self,
|
|
85
|
+
tool_name: str,
|
|
86
|
+
arguments: Dict[str, Any],
|
|
87
|
+
response: Dict[str, Any],
|
|
88
|
+
duration_ms: float,
|
|
89
|
+
error: Optional[str] = None,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Log a tool call with request and response"""
|
|
92
|
+
log_entry = {
|
|
93
|
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
94
|
+
"type": "call_tool",
|
|
95
|
+
"tool": tool_name,
|
|
96
|
+
"request": arguments,
|
|
97
|
+
"duration_ms": round(duration_ms, 2),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if error:
|
|
101
|
+
log_entry["error"] = error
|
|
102
|
+
else:
|
|
103
|
+
log_entry["response"] = _truncate_strings(response)
|
|
104
|
+
|
|
105
|
+
self._write_log(log_entry)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class RecceMCPServer:
|
|
109
|
+
"""MCP Server for Recce data validation tools"""
|
|
110
|
+
|
|
111
|
+
def __init__(
|
|
112
|
+
self,
|
|
113
|
+
context: RecceContext,
|
|
114
|
+
mode: Optional[RecceServerMode] = None,
|
|
115
|
+
debug: bool = False,
|
|
116
|
+
log_file: str = "logs/recce-mcp.json",
|
|
117
|
+
):
|
|
118
|
+
self.context = context
|
|
119
|
+
self.mode = mode or RecceServerMode.server
|
|
120
|
+
self.server = Server("recce")
|
|
121
|
+
self.mcp_logger = MCPLogger(debug=debug, log_file=log_file)
|
|
122
|
+
self._setup_handlers()
|
|
123
|
+
|
|
124
|
+
def _setup_handlers(self):
|
|
125
|
+
"""Register all tool handlers"""
|
|
126
|
+
|
|
127
|
+
@self.server.list_tools()
|
|
128
|
+
async def list_tools() -> List[Tool]:
|
|
129
|
+
"""List all available tools based on server mode"""
|
|
130
|
+
tools = []
|
|
131
|
+
|
|
132
|
+
# Always available in all modes
|
|
133
|
+
tools.append(
|
|
134
|
+
Tool(
|
|
135
|
+
name="lineage_diff",
|
|
136
|
+
description=textwrap.dedent(
|
|
137
|
+
"""
|
|
138
|
+
Get the lineage diff between production(base) and session(current) for changed models.
|
|
139
|
+
Returns nodes, parent_map (node dependencies), and change_status/impacted information in compact dataframe format.
|
|
140
|
+
|
|
141
|
+
In parent_map: key is a node index, value is list of parent node indices
|
|
142
|
+
Nodes dataframe includes: idx, id, name, resource_type, materialized, change_status, impacted.
|
|
143
|
+
|
|
144
|
+
Rendering guidance for Mermaid diagram:
|
|
145
|
+
Use graph LR and apply these styles based on change_status and impacted:
|
|
146
|
+
- change_status="added": fill:#d4edda, stroke:#28a745, color:#000000
|
|
147
|
+
- change_status="removed": fill:#f8d7da, stroke:#dc3545, color:#000000
|
|
148
|
+
- change_status="modified" AND impacted=true: fill:#fff3cd, stroke:#ffc107, color:#000000
|
|
149
|
+
- change_status=null AND impacted=true: fill:#ffffff, stroke:#ffc107, color:#000000
|
|
150
|
+
- change_status=null AND impacted=false: fill:#ffffff, stroke:#d3d3d3, color:#999999
|
|
151
|
+
"""
|
|
152
|
+
).strip(),
|
|
153
|
+
inputSchema={
|
|
154
|
+
"type": "object",
|
|
155
|
+
"properties": {
|
|
156
|
+
"select": {
|
|
157
|
+
"type": "string",
|
|
158
|
+
"description": "dbt selector syntax to filter models (optional)",
|
|
159
|
+
},
|
|
160
|
+
"exclude": {
|
|
161
|
+
"type": "string",
|
|
162
|
+
"description": "dbt selector syntax to exclude models (optional)",
|
|
163
|
+
},
|
|
164
|
+
"packages": {
|
|
165
|
+
"type": "array",
|
|
166
|
+
"items": {"type": "string"},
|
|
167
|
+
"description": "List of packages to filter (optional)",
|
|
168
|
+
},
|
|
169
|
+
"view_mode": {
|
|
170
|
+
"type": "string",
|
|
171
|
+
"enum": ["changed_models", "all"],
|
|
172
|
+
"default": "changed_models",
|
|
173
|
+
"description": "View mode: 'changed_models' for only changed models (default), 'all' for all models",
|
|
174
|
+
},
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
tools.append(
|
|
180
|
+
Tool(
|
|
181
|
+
name="schema_diff",
|
|
182
|
+
description="Get the schema diff (column changes) between base and current environments. "
|
|
183
|
+
"Shows added, removed, and type-changed columns in compact dataframe format.",
|
|
184
|
+
inputSchema={
|
|
185
|
+
"type": "object",
|
|
186
|
+
"properties": {
|
|
187
|
+
"select": {
|
|
188
|
+
"type": "string",
|
|
189
|
+
"description": "dbt selector syntax to filter models (optional)",
|
|
190
|
+
},
|
|
191
|
+
"exclude": {
|
|
192
|
+
"type": "string",
|
|
193
|
+
"description": "dbt selector syntax to exclude models (optional)",
|
|
194
|
+
},
|
|
195
|
+
"packages": {
|
|
196
|
+
"type": "array",
|
|
197
|
+
"items": {"type": "string"},
|
|
198
|
+
"description": "List of packages to filter (optional)",
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Diff tools only available in server mode, not in preview or read-only mode
|
|
206
|
+
if self.mode == RecceServerMode.server:
|
|
207
|
+
tools.extend(
|
|
208
|
+
[
|
|
209
|
+
Tool(
|
|
210
|
+
name="row_count_diff",
|
|
211
|
+
description="Compare row counts between base and current environments for specified models.",
|
|
212
|
+
inputSchema={
|
|
213
|
+
"type": "object",
|
|
214
|
+
"properties": {
|
|
215
|
+
"node_names": {
|
|
216
|
+
"type": "array",
|
|
217
|
+
"items": {"type": "string"},
|
|
218
|
+
"description": "List of model names to check row counts (optional)",
|
|
219
|
+
},
|
|
220
|
+
"node_ids": {
|
|
221
|
+
"type": "array",
|
|
222
|
+
"items": {"type": "string"},
|
|
223
|
+
"description": "List of node IDs to check row counts (optional)",
|
|
224
|
+
},
|
|
225
|
+
"select": {
|
|
226
|
+
"type": "string",
|
|
227
|
+
"description": "dbt selector syntax to filter models (optional)",
|
|
228
|
+
},
|
|
229
|
+
"exclude": {
|
|
230
|
+
"type": "string",
|
|
231
|
+
"description": "dbt selector syntax to exclude models (optional)",
|
|
232
|
+
},
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
),
|
|
236
|
+
Tool(
|
|
237
|
+
name="query",
|
|
238
|
+
description="Execute a SQL query on the current environment. "
|
|
239
|
+
"Supports Jinja templates with dbt macros like {{ ref('model_name') }}.",
|
|
240
|
+
inputSchema={
|
|
241
|
+
"type": "object",
|
|
242
|
+
"properties": {
|
|
243
|
+
"sql_template": {
|
|
244
|
+
"type": "string",
|
|
245
|
+
"description": "SQL query template with optional Jinja syntax",
|
|
246
|
+
},
|
|
247
|
+
"base": {
|
|
248
|
+
"type": "boolean",
|
|
249
|
+
"description": "Whether to run on base environment (default: false)",
|
|
250
|
+
"default": False,
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
"required": ["sql_template"],
|
|
254
|
+
},
|
|
255
|
+
),
|
|
256
|
+
Tool(
|
|
257
|
+
name="query_diff",
|
|
258
|
+
description="Execute SQL queries on both base and current environments and compare results. "
|
|
259
|
+
"Supports primary keys for row-level comparison.",
|
|
260
|
+
inputSchema={
|
|
261
|
+
"type": "object",
|
|
262
|
+
"properties": {
|
|
263
|
+
"sql_template": {
|
|
264
|
+
"type": "string",
|
|
265
|
+
"description": "SQL query template for current environment",
|
|
266
|
+
},
|
|
267
|
+
"base_sql_template": {
|
|
268
|
+
"type": "string",
|
|
269
|
+
"description": "SQL query template for base environment (optional, defaults to sql_template)",
|
|
270
|
+
},
|
|
271
|
+
"primary_keys": {
|
|
272
|
+
"type": "array",
|
|
273
|
+
"items": {"type": "string"},
|
|
274
|
+
"description": "List of primary key columns for row comparison (optional)",
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
"required": ["sql_template"],
|
|
278
|
+
},
|
|
279
|
+
),
|
|
280
|
+
Tool(
|
|
281
|
+
name="profile_diff",
|
|
282
|
+
description="Generate and compare statistical profiles (min, max, avg, distinct count, etc.) "
|
|
283
|
+
"for columns in a model between base and current environments.",
|
|
284
|
+
inputSchema={
|
|
285
|
+
"type": "object",
|
|
286
|
+
"properties": {
|
|
287
|
+
"model": {
|
|
288
|
+
"type": "string",
|
|
289
|
+
"description": "Model name to profile",
|
|
290
|
+
},
|
|
291
|
+
"columns": {
|
|
292
|
+
"type": "array",
|
|
293
|
+
"items": {"type": "string"},
|
|
294
|
+
"description": "List of column names to profile (optional, profiles all columns if not specified)",
|
|
295
|
+
},
|
|
296
|
+
},
|
|
297
|
+
"required": ["model"],
|
|
298
|
+
},
|
|
299
|
+
),
|
|
300
|
+
]
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
self.mcp_logger.log_list_tools(tools)
|
|
304
|
+
|
|
305
|
+
return tools
|
|
306
|
+
|
|
307
|
+
@self.server.call_tool()
|
|
308
|
+
async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
|
|
309
|
+
"""Handle tool calls"""
|
|
310
|
+
start_time = time.perf_counter()
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
# Check if tool is blocked in non-server mode
|
|
314
|
+
blocked_tools_in_non_server = {"row_count_diff", "query", "query_diff", "profile_diff"}
|
|
315
|
+
if self.mode != RecceServerMode.server and name in blocked_tools_in_non_server:
|
|
316
|
+
raise ValueError(
|
|
317
|
+
f"Tool '{name}' is not available in {self.mode.value} mode. "
|
|
318
|
+
"Only 'lineage_diff' and 'schema_diff' are available in this mode."
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if name == "lineage_diff":
|
|
322
|
+
result = await self._tool_lineage_diff(arguments)
|
|
323
|
+
elif name == "schema_diff":
|
|
324
|
+
result = await self._tool_schema_diff(arguments)
|
|
325
|
+
elif name == "row_count_diff":
|
|
326
|
+
result = await self._tool_row_count_diff(arguments)
|
|
327
|
+
elif name == "query":
|
|
328
|
+
result = await self._tool_query(arguments)
|
|
329
|
+
elif name == "query_diff":
|
|
330
|
+
result = await self._tool_query_diff(arguments)
|
|
331
|
+
elif name == "profile_diff":
|
|
332
|
+
result = await self._tool_profile_diff(arguments)
|
|
333
|
+
else:
|
|
334
|
+
raise ValueError(f"Unknown tool: {name}")
|
|
335
|
+
|
|
336
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
337
|
+
self.mcp_logger.log_tool_call(name, arguments, result, duration_ms)
|
|
338
|
+
|
|
339
|
+
return [TextContent(type="text", text=json.dumps(result, indent=2))]
|
|
340
|
+
except Exception as e:
|
|
341
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
342
|
+
self.mcp_logger.log_tool_call(name, arguments, {}, duration_ms, error=str(e))
|
|
343
|
+
logger.exception(f"Error executing tool {name}")
|
|
344
|
+
return [TextContent(type="text", text=json.dumps({"error": str(e)}, indent=2))]
|
|
345
|
+
|
|
346
|
+
async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
347
|
+
"""Get lineage diff between base and current"""
|
|
348
|
+
try:
|
|
349
|
+
# Extract filter arguments
|
|
350
|
+
select = arguments.get("select")
|
|
351
|
+
exclude = arguments.get("exclude")
|
|
352
|
+
packages = arguments.get("packages")
|
|
353
|
+
view_mode = arguments.get("view_mode", "changed_models")
|
|
354
|
+
|
|
355
|
+
# Get lineage diff from adapter (returns a Pydantic LineageDiff model)
|
|
356
|
+
lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
|
|
357
|
+
|
|
358
|
+
# Apply node selection filtering if arguments provided
|
|
359
|
+
selected_node_ids = self.context.adapter.select_nodes(
|
|
360
|
+
select=select,
|
|
361
|
+
exclude=exclude,
|
|
362
|
+
packages=packages,
|
|
363
|
+
view_mode=view_mode,
|
|
364
|
+
)
|
|
365
|
+
impacted_node_ids = self.context.adapter.select_nodes(
|
|
366
|
+
select="state:modified+",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Get diff information for change_status
|
|
370
|
+
diff_info = lineage_diff.get("diff", {})
|
|
371
|
+
|
|
372
|
+
# Extract parent_map and simplified nodes from both base and current
|
|
373
|
+
parent_map = {}
|
|
374
|
+
nodes = {}
|
|
375
|
+
|
|
376
|
+
# Merge parent_map and nodes: base first, then current overrides
|
|
377
|
+
for env_key in ["base", "current"]:
|
|
378
|
+
if env_key not in lineage_diff:
|
|
379
|
+
continue
|
|
380
|
+
|
|
381
|
+
env_data = lineage_diff[env_key]
|
|
382
|
+
|
|
383
|
+
# Merge parent_map (filtering by selected nodes)
|
|
384
|
+
if "parent_map" in env_data:
|
|
385
|
+
for node_id, parents in env_data["parent_map"].items():
|
|
386
|
+
if node_id in selected_node_ids:
|
|
387
|
+
parent_map[node_id] = parents
|
|
388
|
+
|
|
389
|
+
# Merge nodes (filtering by selected nodes)
|
|
390
|
+
if "nodes" in env_data:
|
|
391
|
+
for node_id, node_info in env_data["nodes"].items():
|
|
392
|
+
if node_id in selected_node_ids:
|
|
393
|
+
nodes[node_id] = {
|
|
394
|
+
"name": node_info.get("name"),
|
|
395
|
+
"resource_type": node_info.get("resource_type"),
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
materialized = node_info.get("config", {}).get("materialized")
|
|
399
|
+
if materialized is not None:
|
|
400
|
+
nodes[node_id]["materialized"] = materialized
|
|
401
|
+
|
|
402
|
+
# Create id to idx mapping
|
|
403
|
+
id_to_idx = {node_id: idx for idx, node_id in enumerate(nodes.keys())}
|
|
404
|
+
|
|
405
|
+
# Prepare node data for DataFrame
|
|
406
|
+
nodes_data = [
|
|
407
|
+
[
|
|
408
|
+
id_to_idx[node_id],
|
|
409
|
+
node_id,
|
|
410
|
+
node_info.get("name"),
|
|
411
|
+
node_info.get("resource_type"),
|
|
412
|
+
node_info.get("materialized"),
|
|
413
|
+
diff_info.get(node_id, {}).get("change_status"),
|
|
414
|
+
node_id in impacted_node_ids,
|
|
415
|
+
]
|
|
416
|
+
for node_id, node_info in nodes.items()
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
# Create nodes DataFrame using from_data with simple dict format
|
|
420
|
+
nodes_df = DataFrame.from_data(
|
|
421
|
+
columns={
|
|
422
|
+
"idx": "integer",
|
|
423
|
+
"id": "text",
|
|
424
|
+
"name": "text",
|
|
425
|
+
"resource_type": "text",
|
|
426
|
+
"materialized": "text",
|
|
427
|
+
"change_status": "text",
|
|
428
|
+
"impacted": "boolean",
|
|
429
|
+
},
|
|
430
|
+
data=nodes_data,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Map parent_map IDs to indices
|
|
434
|
+
parent_map_indexed = {}
|
|
435
|
+
for node_id, parents in parent_map.items():
|
|
436
|
+
if node_id in id_to_idx:
|
|
437
|
+
node_idx = id_to_idx[node_id]
|
|
438
|
+
parent_indices = [id_to_idx[p] for p in parents if p in id_to_idx]
|
|
439
|
+
parent_map_indexed[node_idx] = parent_indices
|
|
440
|
+
|
|
441
|
+
# Build simplified result
|
|
442
|
+
result = {"nodes": nodes_df.model_dump(mode="json"), "parent_map": parent_map_indexed}
|
|
443
|
+
|
|
444
|
+
return result
|
|
445
|
+
|
|
446
|
+
except Exception:
|
|
447
|
+
logger.exception("Error getting lineage diff")
|
|
448
|
+
raise
|
|
449
|
+
|
|
450
|
+
async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
451
|
+
"""Get schema diff (column changes) between base and current"""
|
|
452
|
+
try:
|
|
453
|
+
# Extract filter arguments
|
|
454
|
+
select = arguments.get("select")
|
|
455
|
+
exclude = arguments.get("exclude")
|
|
456
|
+
packages = arguments.get("packages")
|
|
457
|
+
|
|
458
|
+
# Get lineage diff from adapter
|
|
459
|
+
lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
|
|
460
|
+
|
|
461
|
+
# Get all nodes from current environment
|
|
462
|
+
current_nodes = {}
|
|
463
|
+
if "current" in lineage_diff and "nodes" in lineage_diff["current"]:
|
|
464
|
+
current_nodes = lineage_diff["current"]["nodes"]
|
|
465
|
+
|
|
466
|
+
# Filter to only nodes that exist in both base and current (exclude added nodes)
|
|
467
|
+
base_nodes = lineage_diff.get("base", {}).get("nodes", {})
|
|
468
|
+
nodes_to_compare = set(current_nodes.keys()) & set(base_nodes.keys())
|
|
469
|
+
|
|
470
|
+
# Apply filtering if arguments provided
|
|
471
|
+
if select or exclude or packages:
|
|
472
|
+
selected_node_ids = self.context.adapter.select_nodes(
|
|
473
|
+
select=select,
|
|
474
|
+
exclude=exclude,
|
|
475
|
+
packages=packages,
|
|
476
|
+
)
|
|
477
|
+
nodes_to_compare = nodes_to_compare & selected_node_ids
|
|
478
|
+
|
|
479
|
+
# Build schema changes
|
|
480
|
+
schema_changes = []
|
|
481
|
+
|
|
482
|
+
for node_id in nodes_to_compare:
|
|
483
|
+
base_node = base_nodes.get(node_id, {})
|
|
484
|
+
current_node = current_nodes.get(node_id, {})
|
|
485
|
+
|
|
486
|
+
base_columns = base_node.get("columns", {})
|
|
487
|
+
current_columns = current_node.get("columns", {})
|
|
488
|
+
|
|
489
|
+
# Get column names in base and current
|
|
490
|
+
base_col_names = set(base_columns.keys())
|
|
491
|
+
current_col_names = set(current_columns.keys())
|
|
492
|
+
|
|
493
|
+
# Find added columns (in current but not in base)
|
|
494
|
+
for col_name in current_col_names - base_col_names:
|
|
495
|
+
schema_changes.append([node_id, col_name, "added"])
|
|
496
|
+
|
|
497
|
+
# Find removed columns (in base but not in current)
|
|
498
|
+
for col_name in base_col_names - current_col_names:
|
|
499
|
+
schema_changes.append([node_id, col_name, "removed"])
|
|
500
|
+
|
|
501
|
+
# Find modified columns (in both but with different types)
|
|
502
|
+
for col_name in base_col_names & current_col_names:
|
|
503
|
+
base_col_type = base_columns[col_name].get("type")
|
|
504
|
+
current_col_type = current_columns[col_name].get("type")
|
|
505
|
+
if base_col_type != current_col_type:
|
|
506
|
+
schema_changes.append([node_id, col_name, "modified"])
|
|
507
|
+
|
|
508
|
+
# Check if there are more than 100 rows
|
|
509
|
+
limit = 100
|
|
510
|
+
has_more = len(schema_changes) > limit
|
|
511
|
+
limited_schema_changes = schema_changes[:limit]
|
|
512
|
+
|
|
513
|
+
# Convert schema changes to dataframe format using DataFrame.from_data()
|
|
514
|
+
diff_df = DataFrame.from_data(
|
|
515
|
+
columns={
|
|
516
|
+
"node_id": "text",
|
|
517
|
+
"column": "text",
|
|
518
|
+
"change_status": "text",
|
|
519
|
+
},
|
|
520
|
+
data=limited_schema_changes,
|
|
521
|
+
limit=limit,
|
|
522
|
+
more=has_more,
|
|
523
|
+
)
|
|
524
|
+
return diff_df.model_dump(mode="json")
|
|
525
|
+
|
|
526
|
+
except Exception:
|
|
527
|
+
logger.exception("Error getting schema diff")
|
|
528
|
+
raise
|
|
529
|
+
|
|
530
|
+
async def _tool_row_count_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
531
|
+
"""Execute row count diff task"""
|
|
532
|
+
try:
|
|
533
|
+
task = RowCountDiffTask(params=arguments)
|
|
534
|
+
|
|
535
|
+
# Execute task synchronously (it's already sync)
|
|
536
|
+
result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
|
|
537
|
+
|
|
538
|
+
return result
|
|
539
|
+
except Exception:
|
|
540
|
+
logger.exception("Error executing row count diff")
|
|
541
|
+
raise
|
|
542
|
+
|
|
543
|
+
async def _tool_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
544
|
+
"""Execute a query"""
|
|
545
|
+
try:
|
|
546
|
+
sql_template = arguments.get("sql_template")
|
|
547
|
+
is_base = arguments.get("base", False)
|
|
548
|
+
|
|
549
|
+
params = {"sql_template": sql_template}
|
|
550
|
+
task = QueryTask(params=params)
|
|
551
|
+
task.is_base = is_base
|
|
552
|
+
|
|
553
|
+
# Execute task
|
|
554
|
+
result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
|
|
555
|
+
|
|
556
|
+
# Convert to dict if it's a model
|
|
557
|
+
if hasattr(result, "model_dump"):
|
|
558
|
+
return result.model_dump(mode="json")
|
|
559
|
+
return result
|
|
560
|
+
except Exception:
|
|
561
|
+
logger.exception("Error executing query")
|
|
562
|
+
raise
|
|
563
|
+
|
|
564
|
+
async def _tool_query_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
565
|
+
"""Execute query diff task"""
|
|
566
|
+
try:
|
|
567
|
+
task = QueryDiffTask(params=arguments)
|
|
568
|
+
|
|
569
|
+
# Execute task
|
|
570
|
+
result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
|
|
571
|
+
|
|
572
|
+
# Convert to dict if it's a model
|
|
573
|
+
if hasattr(result, "model_dump"):
|
|
574
|
+
return result.model_dump(mode="json")
|
|
575
|
+
return result
|
|
576
|
+
except Exception:
|
|
577
|
+
logger.exception("Error executing query diff")
|
|
578
|
+
raise
|
|
579
|
+
|
|
580
|
+
async def _tool_profile_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
581
|
+
"""Execute profile diff task"""
|
|
582
|
+
try:
|
|
583
|
+
task = ProfileDiffTask(params=arguments)
|
|
584
|
+
|
|
585
|
+
# Execute task
|
|
586
|
+
result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
|
|
587
|
+
|
|
588
|
+
# Convert to dict if it's a model
|
|
589
|
+
if hasattr(result, "model_dump"):
|
|
590
|
+
return result.model_dump(mode="json")
|
|
591
|
+
return result
|
|
592
|
+
except Exception:
|
|
593
|
+
logger.exception("Error executing profile diff")
|
|
594
|
+
raise
|
|
595
|
+
|
|
596
|
+
async def run(self):
|
|
597
|
+
"""Run the MCP server"""
|
|
598
|
+
async with stdio_server() as (read_stream, write_stream):
|
|
599
|
+
await self.server.run(read_stream, write_stream, self.server.create_initialization_options())
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
async def run_mcp_server(**kwargs):
|
|
603
|
+
"""
|
|
604
|
+
Entry point for running the MCP server
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
**kwargs: Arguments for loading RecceContext (dbt options, etc.)
|
|
608
|
+
Optionally includes 'mode' for server mode (server, preview, read-only)
|
|
609
|
+
Optionally includes 'debug' flag for enabling MCP logging
|
|
610
|
+
"""
|
|
611
|
+
# Setup logging
|
|
612
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
613
|
+
|
|
614
|
+
# Load Recce context
|
|
615
|
+
context = load_context(**kwargs)
|
|
616
|
+
|
|
617
|
+
# Extract mode from kwargs (defaults to server mode)
|
|
618
|
+
mode_str = kwargs.get("mode")
|
|
619
|
+
mode = None
|
|
620
|
+
if mode_str:
|
|
621
|
+
# Convert string mode to RecceServerMode enum
|
|
622
|
+
try:
|
|
623
|
+
mode = RecceServerMode(mode_str)
|
|
624
|
+
except ValueError:
|
|
625
|
+
logger.warning(f"Invalid mode '{mode_str}', using default server mode")
|
|
626
|
+
|
|
627
|
+
# Extract debug flag from kwargs
|
|
628
|
+
debug = kwargs.get("debug", False)
|
|
629
|
+
|
|
630
|
+
# Create and run server with debug logging enabled if requested
|
|
631
|
+
server = RecceMCPServer(context, mode=mode, debug=debug)
|
|
632
|
+
await server.run()
|
recce/models/types.py
CHANGED
|
@@ -36,8 +36,6 @@ class RunStatus(Enum):
|
|
|
36
36
|
FAILED = "failed"
|
|
37
37
|
CANCELLED = "cancelled"
|
|
38
38
|
RUNNING = "running"
|
|
39
|
-
# This is a special status only in v0.36.0. Replaced by FINISHED. To be removed in the future.
|
|
40
|
-
SUCCESSFUL = "successful"
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class Run(BaseModel):
|
|
@@ -151,6 +149,29 @@ class CllNode(BaseModel):
|
|
|
151
149
|
# Column to column dependencies
|
|
152
150
|
columns: Dict[str, CllColumn] = Field(default_factory=dict)
|
|
153
151
|
|
|
152
|
+
# If the node is impacted. Only used if option 'change_analysis' is set
|
|
153
|
+
impacted: Optional[bool] = None
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def build_cll_node(cls, manifest, resource_key, node_id) -> Optional["CllNode"]:
|
|
157
|
+
resources = getattr(manifest, resource_key)
|
|
158
|
+
if node_id not in resources:
|
|
159
|
+
return None
|
|
160
|
+
n = resources[node_id]
|
|
161
|
+
if resource_key == "nodes" and n.resource_type not in ["model", "seed", "snapshot"]:
|
|
162
|
+
return None
|
|
163
|
+
cll_node = CllNode(
|
|
164
|
+
id=n.unique_id,
|
|
165
|
+
name=n.name,
|
|
166
|
+
package_name=n.package_name,
|
|
167
|
+
resource_type=n.resource_type,
|
|
168
|
+
)
|
|
169
|
+
if resource_key == "sources":
|
|
170
|
+
cll_node.source_name = n.source_name
|
|
171
|
+
elif resource_key == "nodes":
|
|
172
|
+
cll_node.raw_code = n.raw_code
|
|
173
|
+
return cll_node
|
|
174
|
+
|
|
154
175
|
|
|
155
176
|
class CllData(BaseModel):
|
|
156
177
|
nodes: Dict[str, CllNode] = Field(default_factory=dict)
|
recce/pull_request.py
CHANGED
|
@@ -83,7 +83,7 @@ def fetch_pr_metadata_from_event_path() -> Optional[dict]:
|
|
|
83
83
|
github_repository = os.getenv("GITHUB_REPOSITORY")
|
|
84
84
|
if event_path:
|
|
85
85
|
try:
|
|
86
|
-
with open(event_path, "r") as event_file:
|
|
86
|
+
with open(event_path, "r", encoding="utf-8") as event_file:
|
|
87
87
|
event_data = json.load(event_file)
|
|
88
88
|
|
|
89
89
|
pr_id = event_data["number"]
|