recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (213) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +810 -480
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +119 -51
  12. recce/cli.py +1299 -323
  13. recce/config.py +42 -33
  14. recce/connect_to_cloud.py +138 -0
  15. recce/core.py +55 -47
  16. recce/data/404.html +1 -1
  17. recce/data/__next.__PAGE__.txt +10 -0
  18. recce/data/__next._full.txt +23 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +8 -0
  21. recce/data/__next._tree.txt +5 -0
  22. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
  23. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
  24. recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
  25. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  26. recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
  27. recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
  28. recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
  29. recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
  30. recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
  31. recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
  32. recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
  33. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  34. recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
  35. recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
  36. recce/data/_next/static/chunks/99d638224186c118.js +1 -0
  37. recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
  38. recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
  39. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  40. recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
  41. recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
  42. recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
  43. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  44. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  45. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  46. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  47. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  48. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  49. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  50. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  51. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  52. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  53. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  54. recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
  55. recce/data/_not-found/__next._full.txt +17 -0
  56. recce/data/_not-found/__next._head.txt +8 -0
  57. recce/data/_not-found/__next._index.txt +8 -0
  58. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  59. recce/data/_not-found/__next._not-found.txt +4 -0
  60. recce/data/_not-found/__next._tree.txt +3 -0
  61. recce/data/_not-found.html +1 -0
  62. recce/data/_not-found.txt +17 -0
  63. recce/data/auth_callback.html +68 -0
  64. recce/data/imgs/reload-image.svg +4 -0
  65. recce/data/index.html +1 -27
  66. recce/data/index.txt +23 -7
  67. recce/diff.py +6 -12
  68. recce/event/__init__.py +86 -74
  69. recce/event/collector.py +33 -22
  70. recce/event/track.py +49 -27
  71. recce/exceptions.py +1 -1
  72. recce/git.py +7 -7
  73. recce/github.py +57 -53
  74. recce/mcp_server.py +716 -0
  75. recce/models/__init__.py +4 -1
  76. recce/models/check.py +6 -7
  77. recce/models/run.py +1 -0
  78. recce/models/types.py +131 -28
  79. recce/pull_request.py +27 -25
  80. recce/run.py +165 -121
  81. recce/server.py +303 -111
  82. recce/state/__init__.py +31 -0
  83. recce/state/cloud.py +632 -0
  84. recce/state/const.py +26 -0
  85. recce/state/local.py +56 -0
  86. recce/state/state.py +119 -0
  87. recce/state/state_loader.py +174 -0
  88. recce/summary.py +188 -143
  89. recce/tasks/__init__.py +19 -3
  90. recce/tasks/core.py +11 -13
  91. recce/tasks/dataframe.py +82 -18
  92. recce/tasks/histogram.py +69 -34
  93. recce/tasks/lineage.py +2 -2
  94. recce/tasks/profile.py +152 -86
  95. recce/tasks/query.py +139 -87
  96. recce/tasks/rowcount.py +37 -31
  97. recce/tasks/schema.py +18 -15
  98. recce/tasks/top_k.py +35 -35
  99. recce/tasks/valuediff.py +216 -152
  100. recce/util/__init__.py +3 -0
  101. recce/util/api_token.py +80 -0
  102. recce/util/breaking.py +87 -85
  103. recce/util/cll.py +274 -219
  104. recce/util/io.py +22 -17
  105. recce/util/lineage.py +65 -16
  106. recce/util/logger.py +1 -1
  107. recce/util/onboarding_state.py +45 -0
  108. recce/util/perf_tracking.py +85 -0
  109. recce/util/recce_cloud.py +322 -72
  110. recce/util/singleton.py +4 -4
  111. recce/yaml/__init__.py +7 -10
  112. recce_cloud/__init__.py +24 -0
  113. recce_cloud/api/__init__.py +17 -0
  114. recce_cloud/api/base.py +111 -0
  115. recce_cloud/api/client.py +150 -0
  116. recce_cloud/api/exceptions.py +26 -0
  117. recce_cloud/api/factory.py +63 -0
  118. recce_cloud/api/github.py +76 -0
  119. recce_cloud/api/gitlab.py +82 -0
  120. recce_cloud/artifact.py +57 -0
  121. recce_cloud/ci_providers/__init__.py +9 -0
  122. recce_cloud/ci_providers/base.py +82 -0
  123. recce_cloud/ci_providers/detector.py +147 -0
  124. recce_cloud/ci_providers/github_actions.py +136 -0
  125. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  126. recce_cloud/cli.py +245 -0
  127. recce_cloud/upload.py +214 -0
  128. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
  129. recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
  130. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
  131. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
  132. tests/adapter/dbt_adapter/conftest.py +9 -5
  133. tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
  134. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  135. tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
  136. tests/adapter/dbt_adapter/test_selector.py +22 -21
  137. tests/recce_cloud/__init__.py +0 -0
  138. tests/recce_cloud/test_ci_providers.py +351 -0
  139. tests/recce_cloud/test_cli.py +372 -0
  140. tests/recce_cloud/test_client.py +273 -0
  141. tests/recce_cloud/test_platform_clients.py +333 -0
  142. tests/tasks/conftest.py +1 -1
  143. tests/tasks/test_histogram.py +58 -66
  144. tests/tasks/test_lineage.py +36 -23
  145. tests/tasks/test_preset_checks.py +45 -31
  146. tests/tasks/test_profile.py +339 -15
  147. tests/tasks/test_query.py +46 -46
  148. tests/tasks/test_row_count.py +65 -46
  149. tests/tasks/test_schema.py +65 -42
  150. tests/tasks/test_top_k.py +22 -18
  151. tests/tasks/test_valuediff.py +43 -32
  152. tests/test_cli.py +174 -60
  153. tests/test_cli_mcp_optional.py +45 -0
  154. tests/test_cloud_listing_cli.py +324 -0
  155. tests/test_config.py +7 -9
  156. tests/test_connect_to_cloud.py +82 -0
  157. tests/test_core.py +151 -4
  158. tests/test_dbt.py +7 -7
  159. tests/test_mcp_server.py +332 -0
  160. tests/test_pull_request.py +1 -1
  161. tests/test_server.py +25 -19
  162. tests/test_summary.py +29 -17
  163. recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
  164. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  165. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  166. recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
  167. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  168. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  169. recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
  170. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  171. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  172. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  173. recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
  174. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  175. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  176. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  177. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  178. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  179. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  180. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  181. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  182. recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
  183. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  184. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  185. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  186. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  187. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  188. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  189. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  190. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  191. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  192. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  193. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  194. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  195. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  196. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  197. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  198. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  199. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  200. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  202. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  203. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  205. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  206. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  207. recce/state.py +0 -753
  208. recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
  209. tests/test_state.py +0 -123
  210. /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
  211. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  212. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
  213. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/mcp_server.py ADDED
@@ -0,0 +1,716 @@
1
+ """
2
+ Recce MCP (Model Context Protocol) Server
3
+
4
+ This module implements a stdio-based MCP server that provides tools for
5
+ interacting with Recce's data validation capabilities.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import os
12
+ import textwrap
13
+ import time
14
+ from datetime import datetime, timezone
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from mcp.server import Server
18
+ from mcp.server.stdio import stdio_server
19
+ from mcp.types import TextContent, Tool
20
+
21
+ from recce.core import RecceContext, load_context
22
+ from recce.server import RecceServerMode
23
+ from recce.tasks.dataframe import DataFrame
24
+ from recce.tasks.profile import ProfileDiffTask
25
+ from recce.tasks.query import QueryDiffTask, QueryTask
26
+ from recce.tasks.rowcount import RowCountDiffTask
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def _truncate_strings(obj: Any, max_length: int = 200) -> Any:
32
+ """Recursively truncate strings longer than max_length in nested dicts and lists"""
33
+ if isinstance(obj, dict):
34
+ return {k: _truncate_strings(v, max_length) for k, v in obj.items()}
35
+ elif isinstance(obj, list):
36
+ return [_truncate_strings(item, max_length) for item in obj]
37
+ elif isinstance(obj, str) and len(obj) > max_length:
38
+ return obj[:max_length] + "..."
39
+ return obj
40
+
41
+
42
+ class MCPLogger:
43
+ """JSON logger for MCP server request/response logging"""
44
+
45
+ def __init__(self, debug: bool = False, log_file: str = "logs/recce-mcp.json"):
46
+ self.debug = debug
47
+ self.log_file = log_file
48
+
49
+ if self.debug:
50
+ # Create logs directory if it doesn't exist
51
+ log_dir = os.path.dirname(log_file)
52
+ if log_dir:
53
+ os.makedirs(log_dir, exist_ok=True)
54
+
55
+ # Overwrite log file on initialization
56
+ try:
57
+ with open(log_file, "w") as f:
58
+ f.write("") # Clear existing content
59
+ except Exception as e:
60
+ logger.warning(f"Failed to initialize log file {log_file}: {e}")
61
+
62
+ def _write_log(self, log_entry: Dict[str, Any]) -> None:
63
+ """Write a log entry to the JSON file"""
64
+ if not self.debug:
65
+ return
66
+
67
+ try:
68
+ with open(self.log_file, "a") as f:
69
+ f.write(json.dumps(log_entry) + "\n")
70
+ except Exception as e:
71
+ logger.warning(f"Failed to write to log file {self.log_file}: {e}")
72
+
73
+ def log_list_tools(self, tools: List[Tool]) -> None:
74
+ """Log a list_tools call"""
75
+ tool_names = [tool.name for tool in tools]
76
+ log_entry = {
77
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
78
+ "type": "list_tools",
79
+ "tools": tool_names,
80
+ }
81
+ self._write_log(log_entry)
82
+
83
+ def log_tool_call(
84
+ self,
85
+ tool_name: str,
86
+ arguments: Dict[str, Any],
87
+ response: Dict[str, Any],
88
+ duration_ms: float,
89
+ error: Optional[str] = None,
90
+ ) -> None:
91
+ """Log a tool call with request and response"""
92
+ log_entry = {
93
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
94
+ "type": "call_tool",
95
+ "tool": tool_name,
96
+ "request": arguments,
97
+ "duration_ms": round(duration_ms, 2),
98
+ }
99
+
100
+ if error:
101
+ log_entry["error"] = error
102
+ else:
103
+ log_entry["response"] = _truncate_strings(response)
104
+
105
+ self._write_log(log_entry)
106
+
107
+
108
+ class RecceMCPServer:
109
+ """MCP Server for Recce data validation tools"""
110
+
111
+ def __init__(
112
+ self,
113
+ context: RecceContext,
114
+ mode: Optional[RecceServerMode] = None,
115
+ debug: bool = False,
116
+ log_file: str = "logs/recce-mcp.json",
117
+ ):
118
+ self.context = context
119
+ self.mode = mode or RecceServerMode.server
120
+ self.server = Server("recce")
121
+ self.mcp_logger = MCPLogger(debug=debug, log_file=log_file)
122
+ self._setup_handlers()
123
+
124
+ def _setup_handlers(self):
125
+ """Register all tool handlers"""
126
+
127
+ @self.server.list_tools()
128
+ async def list_tools() -> List[Tool]:
129
+ """List all available tools based on server mode"""
130
+ logger.info(f"[MCP] list_tools called (mode: {self.mode.value if self.mode else 'server'})")
131
+ tools = []
132
+
133
+ # Always available in all modes
134
+ tools.append(
135
+ Tool(
136
+ name="lineage_diff",
137
+ description=textwrap.dedent(
138
+ """
139
+ Get the lineage diff between production(base) and session(current) for changed models.
140
+ Returns nodes, parent_map (node dependencies), and change_status/impacted information in compact dataframe format.
141
+
142
+ In parent_map: key is a node index, value is list of parent node indices
143
+ Nodes dataframe includes: idx, id, name, resource_type, materialized, change_status, impacted.
144
+
145
+ Rendering guidance for Mermaid diagram:
146
+ Use graph LR and apply these styles based on change_status and impacted:
147
+ - change_status="added": fill:#d4edda, stroke:#28a745, color:#000000
148
+ - change_status="removed": fill:#f8d7da, stroke:#dc3545, color:#000000
149
+ - change_status="modified" AND impacted=true: fill:#fff3cd, stroke:#ffc107, color:#000000
150
+ - change_status=null AND impacted=true: fill:#ffffff, stroke:#ffc107, color:#000000
151
+ - change_status=null AND impacted=false: fill:#ffffff, stroke:#d3d3d3, color:#999999
152
+ """
153
+ ).strip(),
154
+ inputSchema={
155
+ "type": "object",
156
+ "properties": {
157
+ "select": {
158
+ "type": "string",
159
+ "description": "dbt selector syntax to filter models (optional)",
160
+ },
161
+ "exclude": {
162
+ "type": "string",
163
+ "description": "dbt selector syntax to exclude models (optional)",
164
+ },
165
+ "packages": {
166
+ "type": "array",
167
+ "items": {"type": "string"},
168
+ "description": "List of packages to filter (optional)",
169
+ },
170
+ "view_mode": {
171
+ "type": "string",
172
+ "enum": ["changed_models", "all"],
173
+ "default": "changed_models",
174
+ "description": "View mode: 'changed_models' for only changed models (default), 'all' for all models",
175
+ },
176
+ },
177
+ },
178
+ )
179
+ )
180
+ tools.append(
181
+ Tool(
182
+ name="schema_diff",
183
+ description="Get the schema diff (column changes) between base and current environments. "
184
+ "Shows added, removed, and type-changed columns in compact dataframe format.",
185
+ inputSchema={
186
+ "type": "object",
187
+ "properties": {
188
+ "select": {
189
+ "type": "string",
190
+ "description": "dbt selector syntax to filter models (optional)",
191
+ },
192
+ "exclude": {
193
+ "type": "string",
194
+ "description": "dbt selector syntax to exclude models (optional)",
195
+ },
196
+ "packages": {
197
+ "type": "array",
198
+ "items": {"type": "string"},
199
+ "description": "List of packages to filter (optional)",
200
+ },
201
+ },
202
+ },
203
+ )
204
+ )
205
+
206
+ # Diff tools only available in server mode, not in preview or read-only mode
207
+ if self.mode == RecceServerMode.server:
208
+ tools.extend(
209
+ [
210
+ Tool(
211
+ name="row_count_diff",
212
+ description="Compare row counts between base and current environments for specified models.",
213
+ inputSchema={
214
+ "type": "object",
215
+ "properties": {
216
+ "node_names": {
217
+ "type": "array",
218
+ "items": {"type": "string"},
219
+ "description": "List of model names to check row counts (optional)",
220
+ },
221
+ "node_ids": {
222
+ "type": "array",
223
+ "items": {"type": "string"},
224
+ "description": "List of node IDs to check row counts (optional)",
225
+ },
226
+ "select": {
227
+ "type": "string",
228
+ "description": "dbt selector syntax to filter models (optional)",
229
+ },
230
+ "exclude": {
231
+ "type": "string",
232
+ "description": "dbt selector syntax to exclude models (optional)",
233
+ },
234
+ },
235
+ },
236
+ ),
237
+ Tool(
238
+ name="query",
239
+ description="Execute a SQL query on the current environment. "
240
+ "Supports Jinja templates with dbt macros like {{ ref('model_name') }}.",
241
+ inputSchema={
242
+ "type": "object",
243
+ "properties": {
244
+ "sql_template": {
245
+ "type": "string",
246
+ "description": "SQL query template with optional Jinja syntax",
247
+ },
248
+ "base": {
249
+ "type": "boolean",
250
+ "description": "Whether to run on base environment (default: false)",
251
+ "default": False,
252
+ },
253
+ },
254
+ "required": ["sql_template"],
255
+ },
256
+ ),
257
+ Tool(
258
+ name="query_diff",
259
+ description="Execute SQL queries on both base and current environments and compare results. "
260
+ "Supports primary keys for row-level comparison.",
261
+ inputSchema={
262
+ "type": "object",
263
+ "properties": {
264
+ "sql_template": {
265
+ "type": "string",
266
+ "description": "SQL query template for current environment",
267
+ },
268
+ "base_sql_template": {
269
+ "type": "string",
270
+ "description": "SQL query template for base environment (optional, defaults to sql_template)",
271
+ },
272
+ "primary_keys": {
273
+ "type": "array",
274
+ "items": {"type": "string"},
275
+ "description": "List of primary key columns for row comparison (optional)",
276
+ },
277
+ },
278
+ "required": ["sql_template"],
279
+ },
280
+ ),
281
+ Tool(
282
+ name="profile_diff",
283
+ description="Generate and compare statistical profiles (min, max, avg, distinct count, etc.) "
284
+ "for columns in a model between base and current environments.",
285
+ inputSchema={
286
+ "type": "object",
287
+ "properties": {
288
+ "model": {
289
+ "type": "string",
290
+ "description": "Model name to profile",
291
+ },
292
+ "columns": {
293
+ "type": "array",
294
+ "items": {"type": "string"},
295
+ "description": "List of column names to profile (optional, profiles all columns if not specified)",
296
+ },
297
+ },
298
+ "required": ["model"],
299
+ },
300
+ ),
301
+ ]
302
+ )
303
+
304
+ self.mcp_logger.log_list_tools(tools)
305
+
306
+ # Log available tools to console
307
+ tool_names = [tool.name for tool in tools]
308
+ logger.info(f"[MCP] Returning {len(tools)} tools: {', '.join(tool_names)}")
309
+
310
+ return tools
311
+
312
+ @self.server.call_tool()
313
+ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
314
+ """Handle tool calls"""
315
+ start_time = time.perf_counter()
316
+
317
+ # Log incoming request
318
+ logger.info(f"[MCP] Tool call received: {name}")
319
+ logger.info(f"[MCP] Arguments: {json.dumps(arguments, indent=2)}")
320
+
321
+ try:
322
+ # Check if tool is blocked in non-server mode
323
+ blocked_tools_in_non_server = {"row_count_diff", "query", "query_diff", "profile_diff"}
324
+ if self.mode != RecceServerMode.server and name in blocked_tools_in_non_server:
325
+ raise ValueError(
326
+ f"Tool '{name}' is not available in {self.mode.value} mode. "
327
+ "Only 'lineage_diff' and 'schema_diff' are available in this mode."
328
+ )
329
+
330
+ if name == "lineage_diff":
331
+ result = await self._tool_lineage_diff(arguments)
332
+ elif name == "schema_diff":
333
+ result = await self._tool_schema_diff(arguments)
334
+ elif name == "row_count_diff":
335
+ result = await self._tool_row_count_diff(arguments)
336
+ elif name == "query":
337
+ result = await self._tool_query(arguments)
338
+ elif name == "query_diff":
339
+ result = await self._tool_query_diff(arguments)
340
+ elif name == "profile_diff":
341
+ result = await self._tool_profile_diff(arguments)
342
+ else:
343
+ raise ValueError(f"Unknown tool: {name}")
344
+
345
+ duration_ms = (time.perf_counter() - start_time) * 1000
346
+ self.mcp_logger.log_tool_call(name, arguments, result, duration_ms)
347
+
348
+ # Log outgoing response
349
+ response_json = json.dumps(result, indent=2)
350
+ logger.info(f"[MCP] Tool response for {name} ({duration_ms:.2f}ms):")
351
+ # Truncate large responses for console readability
352
+ if len(response_json) > 1000:
353
+ logger.debug(f"[MCP] {response_json[:1000]}... (truncated, {len(response_json)} chars total)")
354
+ else:
355
+ logger.debug(f"[MCP] {response_json}")
356
+
357
+ return [TextContent(type="text", text=response_json)]
358
+ except Exception as e:
359
+ duration_ms = (time.perf_counter() - start_time) * 1000
360
+ self.mcp_logger.log_tool_call(name, arguments, {}, duration_ms, error=str(e))
361
+ logger.error(f"[MCP] Error executing tool {name} ({duration_ms:.2f}ms): {str(e)}")
362
+ logger.exception("[MCP] Full traceback:")
363
+ error_response = json.dumps({"error": str(e)}, indent=2)
364
+ return [TextContent(type="text", text=error_response)]
365
+
366
+ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
367
+ """Get lineage diff between base and current"""
368
+ try:
369
+ # Extract filter arguments
370
+ select = arguments.get("select")
371
+ exclude = arguments.get("exclude")
372
+ packages = arguments.get("packages")
373
+ view_mode = arguments.get("view_mode", "changed_models")
374
+
375
+ # Get lineage diff from adapter (returns a Pydantic LineageDiff model)
376
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
377
+
378
+ # Apply node selection filtering if arguments provided
379
+ selected_node_ids = self.context.adapter.select_nodes(
380
+ select=select,
381
+ exclude=exclude,
382
+ packages=packages,
383
+ view_mode=view_mode,
384
+ )
385
+ impacted_node_ids = self.context.adapter.select_nodes(
386
+ select="state:modified+",
387
+ )
388
+
389
+ # Get diff information for change_status
390
+ diff_info = lineage_diff.get("diff", {})
391
+
392
+ # Extract parent_map and simplified nodes from both base and current
393
+ parent_map = {}
394
+ nodes = {}
395
+
396
+ # Merge parent_map and nodes: base first, then current overrides
397
+ for env_key in ["base", "current"]:
398
+ if env_key not in lineage_diff:
399
+ continue
400
+
401
+ env_data = lineage_diff[env_key]
402
+
403
+ # Merge parent_map (filtering by selected nodes)
404
+ if "parent_map" in env_data:
405
+ for node_id, parents in env_data["parent_map"].items():
406
+ if node_id in selected_node_ids:
407
+ parent_map[node_id] = parents
408
+
409
+ # Merge nodes (filtering by selected nodes)
410
+ if "nodes" in env_data:
411
+ for node_id, node_info in env_data["nodes"].items():
412
+ if node_id in selected_node_ids:
413
+ nodes[node_id] = {
414
+ "name": node_info.get("name"),
415
+ "resource_type": node_info.get("resource_type"),
416
+ }
417
+
418
+ materialized = node_info.get("config", {}).get("materialized")
419
+ if materialized is not None:
420
+ nodes[node_id]["materialized"] = materialized
421
+
422
+ # Create id to idx mapping
423
+ id_to_idx = {node_id: idx for idx, node_id in enumerate(nodes.keys())}
424
+
425
+ # Prepare node data for DataFrame
426
+ nodes_data = [
427
+ (
428
+ id_to_idx[node_id],
429
+ node_id,
430
+ node_info.get("name"),
431
+ node_info.get("resource_type"),
432
+ node_info.get("materialized"),
433
+ diff_info.get(node_id, {}).get("change_status"),
434
+ node_id in impacted_node_ids,
435
+ )
436
+ for node_id, node_info in nodes.items()
437
+ ]
438
+
439
+ # Create nodes DataFrame using from_data with simple dict format
440
+ nodes_df = DataFrame.from_data(
441
+ columns={
442
+ "idx": "integer",
443
+ "id": "text",
444
+ "name": "text",
445
+ "resource_type": "text",
446
+ "materialized": "text",
447
+ "change_status": "text",
448
+ "impacted": "boolean",
449
+ },
450
+ data=nodes_data,
451
+ )
452
+
453
+ # Map parent_map IDs to indices
454
+ parent_map_indexed = {}
455
+ for node_id, parents in parent_map.items():
456
+ if node_id in id_to_idx:
457
+ node_idx = id_to_idx[node_id]
458
+ parent_indices = [id_to_idx[p] for p in parents if p in id_to_idx]
459
+ parent_map_indexed[node_idx] = parent_indices
460
+
461
+ # Build simplified result
462
+ result = {"nodes": nodes_df.model_dump(mode="json"), "parent_map": parent_map_indexed}
463
+
464
+ return result
465
+
466
+ except Exception:
467
+ logger.exception("Error getting lineage diff")
468
+ raise
469
+
470
+ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
471
+ """Get schema diff (column changes) between base and current"""
472
+ try:
473
+ # Extract filter arguments
474
+ select = arguments.get("select")
475
+ exclude = arguments.get("exclude")
476
+ packages = arguments.get("packages")
477
+
478
+ # Get lineage diff from adapter
479
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
480
+
481
+ # Get all nodes from current environment
482
+ current_nodes = {}
483
+ if "current" in lineage_diff and "nodes" in lineage_diff["current"]:
484
+ current_nodes = lineage_diff["current"]["nodes"]
485
+
486
+ # Filter to only nodes that exist in both base and current (exclude added nodes)
487
+ base_nodes = lineage_diff.get("base", {}).get("nodes", {})
488
+ nodes_to_compare = set(current_nodes.keys()) & set(base_nodes.keys())
489
+
490
+ # Apply filtering if arguments provided
491
+ if select or exclude or packages:
492
+ selected_node_ids = self.context.adapter.select_nodes(
493
+ select=select,
494
+ exclude=exclude,
495
+ packages=packages,
496
+ )
497
+ nodes_to_compare = nodes_to_compare & selected_node_ids
498
+
499
+ # Build schema changes
500
+ schema_changes = []
501
+
502
+ for node_id in nodes_to_compare:
503
+ base_node = base_nodes.get(node_id, {})
504
+ current_node = current_nodes.get(node_id, {})
505
+
506
+ base_columns = base_node.get("columns", {})
507
+ current_columns = current_node.get("columns", {})
508
+
509
+ # Get column names in base and current
510
+ base_col_names = set(base_columns.keys())
511
+ current_col_names = set(current_columns.keys())
512
+
513
+ # Find added columns (in current but not in base)
514
+ for col_name in current_col_names - base_col_names:
515
+ schema_changes.append((node_id, col_name, "added"))
516
+
517
+ # Find removed columns (in base but not in current)
518
+ for col_name in base_col_names - current_col_names:
519
+ schema_changes.append((node_id, col_name, "removed"))
520
+
521
+ # Find modified columns (in both but with different types)
522
+ for col_name in base_col_names & current_col_names:
523
+ base_col_type = base_columns[col_name].get("type")
524
+ current_col_type = current_columns[col_name].get("type")
525
+ if base_col_type != current_col_type:
526
+ schema_changes.append((node_id, col_name, "modified"))
527
+
528
+ # Check if there are more than 100 rows
529
+ limit = 100
530
+ has_more = len(schema_changes) > limit
531
+ limited_schema_changes = schema_changes[:limit]
532
+
533
+ # Convert schema changes to dataframe format using DataFrame.from_data()
534
+ diff_df = DataFrame.from_data(
535
+ columns={
536
+ "node_id": "text",
537
+ "column": "text",
538
+ "change_status": "text",
539
+ },
540
+ data=limited_schema_changes,
541
+ limit=limit,
542
+ more=has_more,
543
+ )
544
+ return diff_df.model_dump(mode="json")
545
+
546
+ except Exception:
547
+ logger.exception("Error getting schema diff")
548
+ raise
549
+
550
+ async def _tool_row_count_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
551
+ """Execute row count diff task"""
552
+ try:
553
+ task = RowCountDiffTask(params=arguments)
554
+
555
+ # Execute task synchronously (it's already sync)
556
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
557
+
558
+ return result
559
+ except Exception:
560
+ logger.exception("Error executing row count diff")
561
+ raise
562
+
563
+ async def _tool_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
564
+ """Execute a query"""
565
+ try:
566
+ sql_template = arguments.get("sql_template")
567
+ is_base = arguments.get("base", False)
568
+
569
+ params = {"sql_template": sql_template}
570
+ task = QueryTask(params=params)
571
+ task.is_base = is_base
572
+
573
+ # Execute task
574
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
575
+
576
+ # Convert to dict if it's a model
577
+ if hasattr(result, "model_dump"):
578
+ return result.model_dump(mode="json")
579
+ return result
580
+ except Exception:
581
+ logger.exception("Error executing query")
582
+ raise
583
+
584
+ async def _tool_query_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
585
+ """Execute query diff task"""
586
+ try:
587
+ task = QueryDiffTask(params=arguments)
588
+
589
+ # Execute task
590
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
591
+
592
+ # Convert to dict if it's a model
593
+ if hasattr(result, "model_dump"):
594
+ return result.model_dump(mode="json")
595
+ return result
596
+ except Exception:
597
+ logger.exception("Error executing query diff")
598
+ raise
599
+
600
+ async def _tool_profile_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
601
+ """Execute profile diff task"""
602
+ try:
603
+ task = ProfileDiffTask(params=arguments)
604
+
605
+ # Execute task
606
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
607
+
608
+ # Convert to dict if it's a model
609
+ if hasattr(result, "model_dump"):
610
+ return result.model_dump(mode="json")
611
+ return result
612
+ except Exception:
613
+ logger.exception("Error executing profile diff")
614
+ raise
615
+
616
+ async def run(self):
617
+ """Run the MCP server in stdio mode"""
618
+ async with stdio_server() as (read_stream, write_stream):
619
+ await self.server.run(read_stream, write_stream, self.server.create_initialization_options())
620
+
621
+ async def run_sse(self, host: str = "localhost", port: int = 8000):
622
+ """Run the MCP server in HTTP mode using Server-Sent Events (SSE)
623
+
624
+ Args:
625
+ host: Host to bind to (default: localhost)
626
+ port: Port to bind to (default: 8000)
627
+ """
628
+ import uvicorn
629
+ from mcp.server.sse import SseServerTransport
630
+ from starlette.applications import Starlette
631
+ from starlette.requests import Request
632
+ from starlette.responses import Response
633
+ from starlette.routing import Mount, Route
634
+
635
+ # Create SSE transport - endpoint where clients POST messages
636
+ sse = SseServerTransport("/")
637
+
638
+ async def handle_sse_request(request: Request):
639
+ """Handle SSE connection (GET /sse) following official MCP example"""
640
+ client_info = f"{request.client.host}:{request.client.port}" if request.client else "unknown"
641
+ logger.info(f"[MCP HTTP] SSE connection established from {client_info}")
642
+ try:
643
+ async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
644
+ await self.server.run(streams[0], streams[1], self.server.create_initialization_options())
645
+ finally:
646
+ logger.info(f"[MCP HTTP] SSE connection closed from {client_info}")
647
+ return Response() # Required to avoid NoneType error
648
+
649
+ async def handle_post_message(scope, receive, send):
650
+ """Handle POST messages (POST /) for MCP protocol"""
651
+ # Log POST message (session_id will be in query params)
652
+ query_string = scope.get("query_string", b"").decode("utf-8")
653
+ logger.debug(f"[MCP HTTP] POST message received with query: {query_string}")
654
+ await sse.handle_post_message(scope, receive, send)
655
+
656
+ async def handle_health_check(request: Request):
657
+ """Handle health check endpoint (GET /health)"""
658
+ return Response(content='{"status":"ok"}', media_type="application/json")
659
+
660
+ # Create Starlette app
661
+ app = Starlette(
662
+ debug=self.mcp_logger.debug,
663
+ routes=[
664
+ Route("/health", endpoint=handle_health_check, methods=["GET"]),
665
+ Route("/sse", endpoint=handle_sse_request, methods=["GET"]),
666
+ Mount("/", app=handle_post_message),
667
+ ],
668
+ )
669
+
670
+ # Run with uvicorn
671
+ logger.info(f"Starting Recce MCP Server in HTTP mode on {host}:{port}")
672
+ logger.info(f"Connection URL: http://{host}:{port}/sse")
673
+ config = uvicorn.Config(app, host=host, port=port, log_level="info")
674
+ server = uvicorn.Server(config)
675
+ await server.serve()
676
+
677
+
678
+ async def run_mcp_server(sse: bool = False, host: str = "localhost", port: int = 8000, **kwargs):
679
+ """
680
+ Entry point for running the MCP server
681
+
682
+ Args:
683
+ sse: Whether to run in HTTP/SSE mode (default: False for stdio mode)
684
+ host: Host to bind to in SSE mode (default: localhost)
685
+ port: Port to bind to in SSE mode (default: 8000)
686
+ **kwargs: Arguments for loading RecceContext (dbt options, etc.)
687
+ Optionally includes 'mode' for server mode (server, preview, read-only)
688
+ Optionally includes 'debug' flag for enabling MCP logging
689
+ """
690
+ # Setup logging
691
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
692
+
693
+ # Load Recce context
694
+ context = load_context(**kwargs)
695
+
696
+ # Extract mode from kwargs (defaults to server mode)
697
+ mode_str = kwargs.get("mode")
698
+ mode = None
699
+ if mode_str:
700
+ # Convert string mode to RecceServerMode enum
701
+ try:
702
+ mode = RecceServerMode(mode_str)
703
+ except ValueError:
704
+ logger.warning(f"Invalid mode '{mode_str}', using default server mode")
705
+
706
+ # Extract debug flag from kwargs
707
+ debug = kwargs.get("debug", False)
708
+
709
+ # Create MCP server
710
+ server = RecceMCPServer(context, mode=mode, debug=debug)
711
+
712
+ # Run in either stdio or SSE mode
713
+ if sse:
714
+ await server.run_sse(host=host, port=port)
715
+ else:
716
+ await server.run()