recce-nightly 1.10.0.20250629__py3-none-any.whl → 1.25.0.20251112a2066__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +5 -0
  3. recce/adapter/dbt_adapter/__init__.py +116 -74
  4. recce/artifact.py +76 -3
  5. recce/cli.py +665 -69
  6. recce/config.py +2 -2
  7. recce/connect_to_cloud.py +1 -1
  8. recce/core.py +3 -3
  9. recce/data/404.html +1 -22
  10. recce/data/__next.__PAGE__.txt +10 -0
  11. recce/data/__next._full.txt +23 -0
  12. recce/data/__next._index.txt +8 -0
  13. recce/data/__next._tree.txt +12 -0
  14. recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_buildManifest.js +11 -0
  15. recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_clientMiddlewareManifest.json +1 -0
  16. recce/data/_next/static/chunks/0a2b2dd4b57049c2.js +1 -0
  17. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  18. recce/data/_next/static/chunks/24fd885c7180a612.js +1 -0
  19. recce/data/_next/static/chunks/27e66b2eab4adc32.js +19 -0
  20. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  21. recce/data/_next/static/chunks/917619ab62a32388.js +1 -0
  22. recce/data/_next/static/chunks/93ba5a62932b704f.js +4 -0
  23. recce/data/_next/static/chunks/a43a2a5e06d5a92b.js +1 -0
  24. recce/data/_next/static/chunks/a6c78b24bd8b84fc.js +1 -0
  25. recce/data/_next/static/chunks/b2610ba997ff8c4f.js +110 -0
  26. recce/data/_next/static/chunks/ba2d87265a68599d.css +2 -0
  27. recce/data/_next/static/chunks/c117fd1c1382dd83.js +11 -0
  28. recce/data/_next/static/chunks/c9425ca46eebdde9.js +1 -0
  29. recce/data/_next/static/chunks/cc8a9eadba012be0.css +6 -0
  30. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  31. recce/data/_next/static/chunks/e392ad92847c3e17.js +1 -0
  32. recce/data/_next/static/chunks/e4ce95efe88dae79.js +11 -0
  33. recce/data/_next/static/chunks/e69c777814fea6ed.js +2 -0
  34. recce/data/_next/static/chunks/turbopack-21cfd73037ff57ab.js +3 -0
  35. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  36. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  37. recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
  38. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  39. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  40. recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
  41. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  42. recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
  43. recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
  44. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  45. recce/data/_not-found/__next._full.txt +17 -0
  46. recce/data/_not-found/__next._index.txt +8 -0
  47. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  48. recce/data/_not-found/__next._not-found.txt +4 -0
  49. recce/data/_not-found/__next._tree.txt +10 -0
  50. recce/data/_not-found.html +1 -0
  51. recce/data/_not-found.txt +17 -0
  52. recce/data/auth_callback.html +1 -1
  53. recce/data/index.html +1 -27
  54. recce/data/index.txt +23 -8
  55. recce/event/__init__.py +9 -8
  56. recce/event/collector.py +6 -2
  57. recce/event/track.py +10 -0
  58. recce/github.py +1 -1
  59. recce/mcp_server.py +632 -0
  60. recce/models/types.py +23 -2
  61. recce/pull_request.py +1 -1
  62. recce/run.py +23 -16
  63. recce/server.py +165 -11
  64. recce/state/__init__.py +31 -0
  65. recce/state/cloud.py +632 -0
  66. recce/state/const.py +26 -0
  67. recce/state/local.py +56 -0
  68. recce/state/state.py +119 -0
  69. recce/state/state_loader.py +174 -0
  70. recce/summary.py +2 -1
  71. recce/tasks/dataframe.py +59 -2
  72. recce/tasks/rowcount.py +4 -1
  73. recce/tasks/schema.py +4 -1
  74. recce/tasks/valuediff.py +1 -1
  75. recce/util/api_token.py +11 -2
  76. recce/util/breaking.py +9 -0
  77. recce/util/cll.py +1 -2
  78. recce/util/io.py +2 -2
  79. recce/util/lineage.py +14 -18
  80. recce/util/perf_tracking.py +85 -0
  81. recce/util/recce_cloud.py +229 -5
  82. recce/yaml/__init__.py +2 -2
  83. recce_cloud/__init__.py +15 -0
  84. recce_cloud/api/__init__.py +17 -0
  85. recce_cloud/api/base.py +104 -0
  86. recce_cloud/api/client.py +150 -0
  87. recce_cloud/api/exceptions.py +26 -0
  88. recce_cloud/api/factory.py +63 -0
  89. recce_cloud/api/github.py +72 -0
  90. recce_cloud/api/gitlab.py +78 -0
  91. recce_cloud/artifact.py +57 -0
  92. recce_cloud/ci_providers/__init__.py +9 -0
  93. recce_cloud/ci_providers/base.py +82 -0
  94. recce_cloud/ci_providers/detector.py +147 -0
  95. recce_cloud/ci_providers/github_actions.py +136 -0
  96. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  97. recce_cloud/cli.py +303 -0
  98. recce_cloud/upload.py +213 -0
  99. {recce_nightly-1.10.0.20250629.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/METADATA +31 -27
  100. recce_nightly-1.25.0.20251112a2066.dist-info/RECORD +178 -0
  101. {recce_nightly-1.10.0.20250629.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/top_level.txt +1 -0
  102. tests/adapter/dbt_adapter/test_dbt_cll.py +68 -17
  103. tests/recce_cloud/__init__.py +0 -0
  104. tests/recce_cloud/test_ci_providers.py +351 -0
  105. tests/recce_cloud/test_cli.py +372 -0
  106. tests/recce_cloud/test_client.py +273 -0
  107. tests/recce_cloud/test_platform_clients.py +279 -0
  108. tests/test_cli.py +106 -3
  109. tests/test_cli_mcp_optional.py +45 -0
  110. tests/test_cloud_listing_cli.py +324 -0
  111. tests/test_core.py +147 -0
  112. tests/test_mcp_server.py +332 -0
  113. tests/test_server.py +6 -6
  114. tests/test_summary.py +14 -6
  115. recce/data/_next/static/Mrb9CZ3toH6Q8xrzNzCrg/_buildManifest.js +0 -1
  116. recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
  117. recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
  118. recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
  119. recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
  120. recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
  121. recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
  122. recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
  123. recce/data/_next/static/chunks/41-f30276c289169376.js +0 -9
  124. recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
  125. recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
  126. recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
  127. recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
  128. recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
  129. recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
  130. recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
  131. recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
  132. recce/data/_next/static/chunks/92-68460b15fe448f33.js +0 -1
  133. recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
  134. recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
  135. recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
  136. recce/data/_next/static/chunks/app/layout-292f035bb0d2a98e.js +0 -1
  137. recce/data/_next/static/chunks/app/page-598f8acc82179d01.js +0 -1
  138. recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
  139. recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
  140. recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
  141. recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
  142. recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
  143. recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
  144. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  145. recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
  146. recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
  147. recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
  148. recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
  149. recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
  150. recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
  151. recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
  152. recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
  153. recce/data/_next/static/css/a2b12b4ba4227f0a.css +0 -3
  154. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  155. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  156. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  157. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  158. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  159. recce/state.py +0 -786
  160. recce_nightly-1.10.0.20250629.dist-info/RECORD +0 -154
  161. tests/test_state.py +0 -134
  162. /recce/data/_next/static/{Mrb9CZ3toH6Q8xrzNzCrg → 6LypcDXgyuSaiSCrsmUub}/_ssgManifest.js +0 -0
  163. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  164. /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
  165. /recce/data/_next/static/media/{reload-image.79aabb7d.svg → reload-image.7aa931c7.svg} +0 -0
  166. {recce_nightly-1.10.0.20250629.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/WHEEL +0 -0
  167. {recce_nightly-1.10.0.20250629.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/entry_points.txt +0 -0
  168. {recce_nightly-1.10.0.20250629.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/licenses/LICENSE +0 -0
recce/mcp_server.py ADDED
@@ -0,0 +1,632 @@
1
+ """
2
+ Recce MCP (Model Context Protocol) Server
3
+
4
+ This module implements a stdio-based MCP server that provides tools for
5
+ interacting with Recce's data validation capabilities.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import os
12
+ import textwrap
13
+ import time
14
+ from datetime import datetime, timezone
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from mcp.server import Server
18
+ from mcp.server.stdio import stdio_server
19
+ from mcp.types import TextContent, Tool
20
+
21
+ from recce.core import RecceContext, load_context
22
+ from recce.server import RecceServerMode
23
+ from recce.tasks.dataframe import DataFrame
24
+ from recce.tasks.profile import ProfileDiffTask
25
+ from recce.tasks.query import QueryDiffTask, QueryTask
26
+ from recce.tasks.rowcount import RowCountDiffTask
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def _truncate_strings(obj: Any, max_length: int = 200) -> Any:
32
+ """Recursively truncate strings longer than max_length in nested dicts and lists"""
33
+ if isinstance(obj, dict):
34
+ return {k: _truncate_strings(v, max_length) for k, v in obj.items()}
35
+ elif isinstance(obj, list):
36
+ return [_truncate_strings(item, max_length) for item in obj]
37
+ elif isinstance(obj, str) and len(obj) > max_length:
38
+ return obj[:max_length] + "..."
39
+ return obj
40
+
41
+
42
+ class MCPLogger:
43
+ """JSON logger for MCP server request/response logging"""
44
+
45
+ def __init__(self, debug: bool = False, log_file: str = "logs/recce-mcp.json"):
46
+ self.debug = debug
47
+ self.log_file = log_file
48
+
49
+ if self.debug:
50
+ # Create logs directory if it doesn't exist
51
+ log_dir = os.path.dirname(log_file)
52
+ if log_dir:
53
+ os.makedirs(log_dir, exist_ok=True)
54
+
55
+ # Overwrite log file on initialization
56
+ try:
57
+ with open(log_file, "w") as f:
58
+ f.write("") # Clear existing content
59
+ except Exception as e:
60
+ logger.warning(f"Failed to initialize log file {log_file}: {e}")
61
+
62
+ def _write_log(self, log_entry: Dict[str, Any]) -> None:
63
+ """Write a log entry to the JSON file"""
64
+ if not self.debug:
65
+ return
66
+
67
+ try:
68
+ with open(self.log_file, "a") as f:
69
+ f.write(json.dumps(log_entry) + "\n")
70
+ except Exception as e:
71
+ logger.warning(f"Failed to write to log file {self.log_file}: {e}")
72
+
73
+ def log_list_tools(self, tools: List[Tool]) -> None:
74
+ """Log a list_tools call"""
75
+ tool_names = [tool.name for tool in tools]
76
+ log_entry = {
77
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
78
+ "type": "list_tools",
79
+ "tools": tool_names,
80
+ }
81
+ self._write_log(log_entry)
82
+
83
+ def log_tool_call(
84
+ self,
85
+ tool_name: str,
86
+ arguments: Dict[str, Any],
87
+ response: Dict[str, Any],
88
+ duration_ms: float,
89
+ error: Optional[str] = None,
90
+ ) -> None:
91
+ """Log a tool call with request and response"""
92
+ log_entry = {
93
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
94
+ "type": "call_tool",
95
+ "tool": tool_name,
96
+ "request": arguments,
97
+ "duration_ms": round(duration_ms, 2),
98
+ }
99
+
100
+ if error:
101
+ log_entry["error"] = error
102
+ else:
103
+ log_entry["response"] = _truncate_strings(response)
104
+
105
+ self._write_log(log_entry)
106
+
107
+
108
+ class RecceMCPServer:
109
+ """MCP Server for Recce data validation tools"""
110
+
111
+ def __init__(
112
+ self,
113
+ context: RecceContext,
114
+ mode: Optional[RecceServerMode] = None,
115
+ debug: bool = False,
116
+ log_file: str = "logs/recce-mcp.json",
117
+ ):
118
+ self.context = context
119
+ self.mode = mode or RecceServerMode.server
120
+ self.server = Server("recce")
121
+ self.mcp_logger = MCPLogger(debug=debug, log_file=log_file)
122
+ self._setup_handlers()
123
+
124
+ def _setup_handlers(self):
125
+ """Register all tool handlers"""
126
+
127
+ @self.server.list_tools()
128
+ async def list_tools() -> List[Tool]:
129
+ """List all available tools based on server mode"""
130
+ tools = []
131
+
132
+ # Always available in all modes
133
+ tools.append(
134
+ Tool(
135
+ name="lineage_diff",
136
+ description=textwrap.dedent(
137
+ """
138
+ Get the lineage diff between production(base) and session(current) for changed models.
139
+ Returns nodes, parent_map (node dependencies), and change_status/impacted information in compact dataframe format.
140
+
141
+ In parent_map: key is a node index, value is list of parent node indices
142
+ Nodes dataframe includes: idx, id, name, resource_type, materialized, change_status, impacted.
143
+
144
+ Rendering guidance for Mermaid diagram:
145
+ Use graph LR and apply these styles based on change_status and impacted:
146
+ - change_status="added": fill:#d4edda, stroke:#28a745, color:#000000
147
+ - change_status="removed": fill:#f8d7da, stroke:#dc3545, color:#000000
148
+ - change_status="modified" AND impacted=true: fill:#fff3cd, stroke:#ffc107, color:#000000
149
+ - change_status=null AND impacted=true: fill:#ffffff, stroke:#ffc107, color:#000000
150
+ - change_status=null AND impacted=false: fill:#ffffff, stroke:#d3d3d3, color:#999999
151
+ """
152
+ ).strip(),
153
+ inputSchema={
154
+ "type": "object",
155
+ "properties": {
156
+ "select": {
157
+ "type": "string",
158
+ "description": "dbt selector syntax to filter models (optional)",
159
+ },
160
+ "exclude": {
161
+ "type": "string",
162
+ "description": "dbt selector syntax to exclude models (optional)",
163
+ },
164
+ "packages": {
165
+ "type": "array",
166
+ "items": {"type": "string"},
167
+ "description": "List of packages to filter (optional)",
168
+ },
169
+ "view_mode": {
170
+ "type": "string",
171
+ "enum": ["changed_models", "all"],
172
+ "default": "changed_models",
173
+ "description": "View mode: 'changed_models' for only changed models (default), 'all' for all models",
174
+ },
175
+ },
176
+ },
177
+ )
178
+ )
179
+ tools.append(
180
+ Tool(
181
+ name="schema_diff",
182
+ description="Get the schema diff (column changes) between base and current environments. "
183
+ "Shows added, removed, and type-changed columns in compact dataframe format.",
184
+ inputSchema={
185
+ "type": "object",
186
+ "properties": {
187
+ "select": {
188
+ "type": "string",
189
+ "description": "dbt selector syntax to filter models (optional)",
190
+ },
191
+ "exclude": {
192
+ "type": "string",
193
+ "description": "dbt selector syntax to exclude models (optional)",
194
+ },
195
+ "packages": {
196
+ "type": "array",
197
+ "items": {"type": "string"},
198
+ "description": "List of packages to filter (optional)",
199
+ },
200
+ },
201
+ },
202
+ )
203
+ )
204
+
205
+ # Diff tools only available in server mode, not in preview or read-only mode
206
+ if self.mode == RecceServerMode.server:
207
+ tools.extend(
208
+ [
209
+ Tool(
210
+ name="row_count_diff",
211
+ description="Compare row counts between base and current environments for specified models.",
212
+ inputSchema={
213
+ "type": "object",
214
+ "properties": {
215
+ "node_names": {
216
+ "type": "array",
217
+ "items": {"type": "string"},
218
+ "description": "List of model names to check row counts (optional)",
219
+ },
220
+ "node_ids": {
221
+ "type": "array",
222
+ "items": {"type": "string"},
223
+ "description": "List of node IDs to check row counts (optional)",
224
+ },
225
+ "select": {
226
+ "type": "string",
227
+ "description": "dbt selector syntax to filter models (optional)",
228
+ },
229
+ "exclude": {
230
+ "type": "string",
231
+ "description": "dbt selector syntax to exclude models (optional)",
232
+ },
233
+ },
234
+ },
235
+ ),
236
+ Tool(
237
+ name="query",
238
+ description="Execute a SQL query on the current environment. "
239
+ "Supports Jinja templates with dbt macros like {{ ref('model_name') }}.",
240
+ inputSchema={
241
+ "type": "object",
242
+ "properties": {
243
+ "sql_template": {
244
+ "type": "string",
245
+ "description": "SQL query template with optional Jinja syntax",
246
+ },
247
+ "base": {
248
+ "type": "boolean",
249
+ "description": "Whether to run on base environment (default: false)",
250
+ "default": False,
251
+ },
252
+ },
253
+ "required": ["sql_template"],
254
+ },
255
+ ),
256
+ Tool(
257
+ name="query_diff",
258
+ description="Execute SQL queries on both base and current environments and compare results. "
259
+ "Supports primary keys for row-level comparison.",
260
+ inputSchema={
261
+ "type": "object",
262
+ "properties": {
263
+ "sql_template": {
264
+ "type": "string",
265
+ "description": "SQL query template for current environment",
266
+ },
267
+ "base_sql_template": {
268
+ "type": "string",
269
+ "description": "SQL query template for base environment (optional, defaults to sql_template)",
270
+ },
271
+ "primary_keys": {
272
+ "type": "array",
273
+ "items": {"type": "string"},
274
+ "description": "List of primary key columns for row comparison (optional)",
275
+ },
276
+ },
277
+ "required": ["sql_template"],
278
+ },
279
+ ),
280
+ Tool(
281
+ name="profile_diff",
282
+ description="Generate and compare statistical profiles (min, max, avg, distinct count, etc.) "
283
+ "for columns in a model between base and current environments.",
284
+ inputSchema={
285
+ "type": "object",
286
+ "properties": {
287
+ "model": {
288
+ "type": "string",
289
+ "description": "Model name to profile",
290
+ },
291
+ "columns": {
292
+ "type": "array",
293
+ "items": {"type": "string"},
294
+ "description": "List of column names to profile (optional, profiles all columns if not specified)",
295
+ },
296
+ },
297
+ "required": ["model"],
298
+ },
299
+ ),
300
+ ]
301
+ )
302
+
303
+ self.mcp_logger.log_list_tools(tools)
304
+
305
+ return tools
306
+
307
+ @self.server.call_tool()
308
+ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
309
+ """Handle tool calls"""
310
+ start_time = time.perf_counter()
311
+
312
+ try:
313
+ # Check if tool is blocked in non-server mode
314
+ blocked_tools_in_non_server = {"row_count_diff", "query", "query_diff", "profile_diff"}
315
+ if self.mode != RecceServerMode.server and name in blocked_tools_in_non_server:
316
+ raise ValueError(
317
+ f"Tool '{name}' is not available in {self.mode.value} mode. "
318
+ "Only 'lineage_diff' and 'schema_diff' are available in this mode."
319
+ )
320
+
321
+ if name == "lineage_diff":
322
+ result = await self._tool_lineage_diff(arguments)
323
+ elif name == "schema_diff":
324
+ result = await self._tool_schema_diff(arguments)
325
+ elif name == "row_count_diff":
326
+ result = await self._tool_row_count_diff(arguments)
327
+ elif name == "query":
328
+ result = await self._tool_query(arguments)
329
+ elif name == "query_diff":
330
+ result = await self._tool_query_diff(arguments)
331
+ elif name == "profile_diff":
332
+ result = await self._tool_profile_diff(arguments)
333
+ else:
334
+ raise ValueError(f"Unknown tool: {name}")
335
+
336
+ duration_ms = (time.perf_counter() - start_time) * 1000
337
+ self.mcp_logger.log_tool_call(name, arguments, result, duration_ms)
338
+
339
+ return [TextContent(type="text", text=json.dumps(result, indent=2))]
340
+ except Exception as e:
341
+ duration_ms = (time.perf_counter() - start_time) * 1000
342
+ self.mcp_logger.log_tool_call(name, arguments, {}, duration_ms, error=str(e))
343
+ logger.exception(f"Error executing tool {name}")
344
+ return [TextContent(type="text", text=json.dumps({"error": str(e)}, indent=2))]
345
+
346
+ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
347
+ """Get lineage diff between base and current"""
348
+ try:
349
+ # Extract filter arguments
350
+ select = arguments.get("select")
351
+ exclude = arguments.get("exclude")
352
+ packages = arguments.get("packages")
353
+ view_mode = arguments.get("view_mode", "changed_models")
354
+
355
+ # Get lineage diff from adapter (returns a Pydantic LineageDiff model)
356
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
357
+
358
+ # Apply node selection filtering if arguments provided
359
+ selected_node_ids = self.context.adapter.select_nodes(
360
+ select=select,
361
+ exclude=exclude,
362
+ packages=packages,
363
+ view_mode=view_mode,
364
+ )
365
+ impacted_node_ids = self.context.adapter.select_nodes(
366
+ select="state:modified+",
367
+ )
368
+
369
+ # Get diff information for change_status
370
+ diff_info = lineage_diff.get("diff", {})
371
+
372
+ # Extract parent_map and simplified nodes from both base and current
373
+ parent_map = {}
374
+ nodes = {}
375
+
376
+ # Merge parent_map and nodes: base first, then current overrides
377
+ for env_key in ["base", "current"]:
378
+ if env_key not in lineage_diff:
379
+ continue
380
+
381
+ env_data = lineage_diff[env_key]
382
+
383
+ # Merge parent_map (filtering by selected nodes)
384
+ if "parent_map" in env_data:
385
+ for node_id, parents in env_data["parent_map"].items():
386
+ if node_id in selected_node_ids:
387
+ parent_map[node_id] = parents
388
+
389
+ # Merge nodes (filtering by selected nodes)
390
+ if "nodes" in env_data:
391
+ for node_id, node_info in env_data["nodes"].items():
392
+ if node_id in selected_node_ids:
393
+ nodes[node_id] = {
394
+ "name": node_info.get("name"),
395
+ "resource_type": node_info.get("resource_type"),
396
+ }
397
+
398
+ materialized = node_info.get("config", {}).get("materialized")
399
+ if materialized is not None:
400
+ nodes[node_id]["materialized"] = materialized
401
+
402
+ # Create id to idx mapping
403
+ id_to_idx = {node_id: idx for idx, node_id in enumerate(nodes.keys())}
404
+
405
+ # Prepare node data for DataFrame
406
+ nodes_data = [
407
+ [
408
+ id_to_idx[node_id],
409
+ node_id,
410
+ node_info.get("name"),
411
+ node_info.get("resource_type"),
412
+ node_info.get("materialized"),
413
+ diff_info.get(node_id, {}).get("change_status"),
414
+ node_id in impacted_node_ids,
415
+ ]
416
+ for node_id, node_info in nodes.items()
417
+ ]
418
+
419
+ # Create nodes DataFrame using from_data with simple dict format
420
+ nodes_df = DataFrame.from_data(
421
+ columns={
422
+ "idx": "integer",
423
+ "id": "text",
424
+ "name": "text",
425
+ "resource_type": "text",
426
+ "materialized": "text",
427
+ "change_status": "text",
428
+ "impacted": "boolean",
429
+ },
430
+ data=nodes_data,
431
+ )
432
+
433
+ # Map parent_map IDs to indices
434
+ parent_map_indexed = {}
435
+ for node_id, parents in parent_map.items():
436
+ if node_id in id_to_idx:
437
+ node_idx = id_to_idx[node_id]
438
+ parent_indices = [id_to_idx[p] for p in parents if p in id_to_idx]
439
+ parent_map_indexed[node_idx] = parent_indices
440
+
441
+ # Build simplified result
442
+ result = {"nodes": nodes_df.model_dump(mode="json"), "parent_map": parent_map_indexed}
443
+
444
+ return result
445
+
446
+ except Exception:
447
+ logger.exception("Error getting lineage diff")
448
+ raise
449
+
450
+ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
451
+ """Get schema diff (column changes) between base and current"""
452
+ try:
453
+ # Extract filter arguments
454
+ select = arguments.get("select")
455
+ exclude = arguments.get("exclude")
456
+ packages = arguments.get("packages")
457
+
458
+ # Get lineage diff from adapter
459
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
460
+
461
+ # Get all nodes from current environment
462
+ current_nodes = {}
463
+ if "current" in lineage_diff and "nodes" in lineage_diff["current"]:
464
+ current_nodes = lineage_diff["current"]["nodes"]
465
+
466
+ # Filter to only nodes that exist in both base and current (exclude added nodes)
467
+ base_nodes = lineage_diff.get("base", {}).get("nodes", {})
468
+ nodes_to_compare = set(current_nodes.keys()) & set(base_nodes.keys())
469
+
470
+ # Apply filtering if arguments provided
471
+ if select or exclude or packages:
472
+ selected_node_ids = self.context.adapter.select_nodes(
473
+ select=select,
474
+ exclude=exclude,
475
+ packages=packages,
476
+ )
477
+ nodes_to_compare = nodes_to_compare & selected_node_ids
478
+
479
+ # Build schema changes
480
+ schema_changes = []
481
+
482
+ for node_id in nodes_to_compare:
483
+ base_node = base_nodes.get(node_id, {})
484
+ current_node = current_nodes.get(node_id, {})
485
+
486
+ base_columns = base_node.get("columns", {})
487
+ current_columns = current_node.get("columns", {})
488
+
489
+ # Get column names in base and current
490
+ base_col_names = set(base_columns.keys())
491
+ current_col_names = set(current_columns.keys())
492
+
493
+ # Find added columns (in current but not in base)
494
+ for col_name in current_col_names - base_col_names:
495
+ schema_changes.append([node_id, col_name, "added"])
496
+
497
+ # Find removed columns (in base but not in current)
498
+ for col_name in base_col_names - current_col_names:
499
+ schema_changes.append([node_id, col_name, "removed"])
500
+
501
+ # Find modified columns (in both but with different types)
502
+ for col_name in base_col_names & current_col_names:
503
+ base_col_type = base_columns[col_name].get("type")
504
+ current_col_type = current_columns[col_name].get("type")
505
+ if base_col_type != current_col_type:
506
+ schema_changes.append([node_id, col_name, "modified"])
507
+
508
+ # Check if there are more than 100 rows
509
+ limit = 100
510
+ has_more = len(schema_changes) > limit
511
+ limited_schema_changes = schema_changes[:limit]
512
+
513
+ # Convert schema changes to dataframe format using DataFrame.from_data()
514
+ diff_df = DataFrame.from_data(
515
+ columns={
516
+ "node_id": "text",
517
+ "column": "text",
518
+ "change_status": "text",
519
+ },
520
+ data=limited_schema_changes,
521
+ limit=limit,
522
+ more=has_more,
523
+ )
524
+ return diff_df.model_dump(mode="json")
525
+
526
+ except Exception:
527
+ logger.exception("Error getting schema diff")
528
+ raise
529
+
530
+ async def _tool_row_count_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
531
+ """Execute row count diff task"""
532
+ try:
533
+ task = RowCountDiffTask(params=arguments)
534
+
535
+ # Execute task synchronously (it's already sync)
536
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
537
+
538
+ return result
539
+ except Exception:
540
+ logger.exception("Error executing row count diff")
541
+ raise
542
+
543
+ async def _tool_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
544
+ """Execute a query"""
545
+ try:
546
+ sql_template = arguments.get("sql_template")
547
+ is_base = arguments.get("base", False)
548
+
549
+ params = {"sql_template": sql_template}
550
+ task = QueryTask(params=params)
551
+ task.is_base = is_base
552
+
553
+ # Execute task
554
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
555
+
556
+ # Convert to dict if it's a model
557
+ if hasattr(result, "model_dump"):
558
+ return result.model_dump(mode="json")
559
+ return result
560
+ except Exception:
561
+ logger.exception("Error executing query")
562
+ raise
563
+
564
+ async def _tool_query_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
565
+ """Execute query diff task"""
566
+ try:
567
+ task = QueryDiffTask(params=arguments)
568
+
569
+ # Execute task
570
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
571
+
572
+ # Convert to dict if it's a model
573
+ if hasattr(result, "model_dump"):
574
+ return result.model_dump(mode="json")
575
+ return result
576
+ except Exception:
577
+ logger.exception("Error executing query diff")
578
+ raise
579
+
580
+ async def _tool_profile_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
581
+ """Execute profile diff task"""
582
+ try:
583
+ task = ProfileDiffTask(params=arguments)
584
+
585
+ # Execute task
586
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
587
+
588
+ # Convert to dict if it's a model
589
+ if hasattr(result, "model_dump"):
590
+ return result.model_dump(mode="json")
591
+ return result
592
+ except Exception:
593
+ logger.exception("Error executing profile diff")
594
+ raise
595
+
596
+ async def run(self):
597
+ """Run the MCP server"""
598
+ async with stdio_server() as (read_stream, write_stream):
599
+ await self.server.run(read_stream, write_stream, self.server.create_initialization_options())
600
+
601
+
602
+ async def run_mcp_server(**kwargs):
603
+ """
604
+ Entry point for running the MCP server
605
+
606
+ Args:
607
+ **kwargs: Arguments for loading RecceContext (dbt options, etc.)
608
+ Optionally includes 'mode' for server mode (server, preview, read-only)
609
+ Optionally includes 'debug' flag for enabling MCP logging
610
+ """
611
+ # Setup logging
612
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
613
+
614
+ # Load Recce context
615
+ context = load_context(**kwargs)
616
+
617
+ # Extract mode from kwargs (defaults to server mode)
618
+ mode_str = kwargs.get("mode")
619
+ mode = None
620
+ if mode_str:
621
+ # Convert string mode to RecceServerMode enum
622
+ try:
623
+ mode = RecceServerMode(mode_str)
624
+ except ValueError:
625
+ logger.warning(f"Invalid mode '{mode_str}', using default server mode")
626
+
627
+ # Extract debug flag from kwargs
628
+ debug = kwargs.get("debug", False)
629
+
630
+ # Create and run server with debug logging enabled if requested
631
+ server = RecceMCPServer(context, mode=mode, debug=debug)
632
+ await server.run()
recce/models/types.py CHANGED
@@ -36,8 +36,6 @@ class RunStatus(Enum):
36
36
  FAILED = "failed"
37
37
  CANCELLED = "cancelled"
38
38
  RUNNING = "running"
39
- # This is a special status only in v0.36.0. Replaced by FINISHED. To be removed in the future.
40
- SUCCESSFUL = "successful"
41
39
 
42
40
 
43
41
  class Run(BaseModel):
@@ -151,6 +149,29 @@ class CllNode(BaseModel):
151
149
  # Column to column dependencies
152
150
  columns: Dict[str, CllColumn] = Field(default_factory=dict)
153
151
 
152
+ # If the node is impacted. Only used if option 'change_analysis' is set
153
+ impacted: Optional[bool] = None
154
+
155
+ @classmethod
156
+ def build_cll_node(cls, manifest, resource_key, node_id) -> Optional["CllNode"]:
157
+ resources = getattr(manifest, resource_key)
158
+ if node_id not in resources:
159
+ return None
160
+ n = resources[node_id]
161
+ if resource_key == "nodes" and n.resource_type not in ["model", "seed", "snapshot"]:
162
+ return None
163
+ cll_node = CllNode(
164
+ id=n.unique_id,
165
+ name=n.name,
166
+ package_name=n.package_name,
167
+ resource_type=n.resource_type,
168
+ )
169
+ if resource_key == "sources":
170
+ cll_node.source_name = n.source_name
171
+ elif resource_key == "nodes":
172
+ cll_node.raw_code = n.raw_code
173
+ return cll_node
174
+
154
175
 
155
176
  class CllData(BaseModel):
156
177
  nodes: Dict[str, CllNode] = Field(default_factory=dict)
recce/pull_request.py CHANGED
@@ -83,7 +83,7 @@ def fetch_pr_metadata_from_event_path() -> Optional[dict]:
83
83
  github_repository = os.getenv("GITHUB_REPOSITORY")
84
84
  if event_path:
85
85
  try:
86
- with open(event_path, "r") as event_file:
86
+ with open(event_path, "r", encoding="utf-8") as event_file:
87
87
  event_data = json.load(event_file)
88
88
 
89
89
  pr_id = event_data["number"]