recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
recce/mcp_server.py ADDED
@@ -0,0 +1,725 @@
1
+ """
2
+ Recce MCP (Model Context Protocol) Server
3
+
4
+ This module implements a stdio-based MCP server that provides tools for
5
+ interacting with Recce's data validation capabilities.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import os
12
+ import textwrap
13
+ import time
14
+ from datetime import datetime, timezone
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from mcp.server import Server
18
+ from mcp.server.stdio import stdio_server
19
+ from mcp.types import TextContent, Tool
20
+
21
+ from recce.core import RecceContext, load_context
22
+ from recce.server import RecceServerMode
23
+ from recce.tasks.dataframe import DataFrame
24
+ from recce.tasks.profile import ProfileDiffTask
25
+ from recce.tasks.query import QueryDiffTask, QueryTask
26
+ from recce.tasks.rowcount import RowCountDiffTask
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def _truncate_strings(obj: Any, max_length: int = 200) -> Any:
32
+ """Recursively truncate strings longer than max_length in nested dicts and lists"""
33
+ if isinstance(obj, dict):
34
+ return {k: _truncate_strings(v, max_length) for k, v in obj.items()}
35
+ elif isinstance(obj, list):
36
+ return [_truncate_strings(item, max_length) for item in obj]
37
+ elif isinstance(obj, str) and len(obj) > max_length:
38
+ return obj[:max_length] + "..."
39
+ return obj
40
+
41
+
42
+ class MCPLogger:
43
+ """JSON logger for MCP server request/response logging"""
44
+
45
+ def __init__(self, debug: bool = False, log_file: str = "logs/recce-mcp.json"):
46
+ self.debug = debug
47
+ self.log_file = log_file
48
+
49
+ if self.debug:
50
+ # Create logs directory if it doesn't exist
51
+ log_dir = os.path.dirname(log_file)
52
+ if log_dir:
53
+ os.makedirs(log_dir, exist_ok=True)
54
+
55
+ # Overwrite log file on initialization
56
+ try:
57
+ with open(log_file, "w") as f:
58
+ f.write("") # Clear existing content
59
+ except Exception as e:
60
+ logger.warning(f"Failed to initialize log file {log_file}: {e}")
61
+
62
+ def _write_log(self, log_entry: Dict[str, Any]) -> None:
63
+ """Write a log entry to the JSON file"""
64
+ if not self.debug:
65
+ return
66
+
67
+ try:
68
+ with open(self.log_file, "a") as f:
69
+ f.write(json.dumps(log_entry) + "\n")
70
+ except Exception as e:
71
+ logger.warning(f"Failed to write to log file {self.log_file}: {e}")
72
+
73
+ def log_list_tools(self, tools: List[Tool]) -> None:
74
+ """Log a list_tools call"""
75
+ tool_names = [tool.name for tool in tools]
76
+ log_entry = {
77
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
78
+ "type": "list_tools",
79
+ "tools": tool_names,
80
+ }
81
+ self._write_log(log_entry)
82
+
83
+ def log_tool_call(
84
+ self,
85
+ tool_name: str,
86
+ arguments: Dict[str, Any],
87
+ response: Dict[str, Any],
88
+ duration_ms: float,
89
+ error: Optional[str] = None,
90
+ ) -> None:
91
+ """Log a tool call with request and response"""
92
+ log_entry = {
93
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
94
+ "type": "call_tool",
95
+ "tool": tool_name,
96
+ "request": arguments,
97
+ "duration_ms": round(duration_ms, 2),
98
+ }
99
+
100
+ if error:
101
+ log_entry["error"] = error
102
+ else:
103
+ log_entry["response"] = _truncate_strings(response)
104
+
105
+ self._write_log(log_entry)
106
+
107
+
108
+ class RecceMCPServer:
109
+ """MCP Server for Recce data validation tools"""
110
+
111
+ def __init__(
112
+ self,
113
+ context: RecceContext,
114
+ mode: Optional[RecceServerMode] = None,
115
+ debug: bool = False,
116
+ log_file: str = "logs/recce-mcp.json",
117
+ ):
118
+ self.context = context
119
+ self.mode = mode or RecceServerMode.server
120
+ self.server = Server("recce")
121
+ self.mcp_logger = MCPLogger(debug=debug, log_file=log_file)
122
+ self._setup_handlers()
123
+
124
+ def _setup_handlers(self):
125
+ """Register all tool handlers"""
126
+
127
+ @self.server.list_tools()
128
+ async def list_tools() -> List[Tool]:
129
+ """List all available tools based on server mode"""
130
+ logger.info(f"[MCP] list_tools called (mode: {self.mode.value if self.mode else 'server'})")
131
+ tools = []
132
+
133
+ # Always available in all modes
134
+ tools.append(
135
+ Tool(
136
+ name="lineage_diff",
137
+ description=textwrap.dedent(
138
+ """
139
+ Get the lineage diff between production(base) and session(current) for changed models.
140
+ Returns nodes and edges (node dependencies) in compact dataframe format.
141
+
142
+ Nodes dataframe includes: idx, id, name, resource_type, materialized, change_status, impacted.
143
+ Edges dataframe includes: from (parent node idx), to (child node idx).
144
+
145
+ Rendering guidance for Mermaid diagram:
146
+ Use graph LR and apply these styles based on change_status and impacted:
147
+ - change_status="added": fill:#d4edda, stroke:#28a745, color:#000000
148
+ - change_status="removed": fill:#f8d7da, stroke:#dc3545, color:#000000
149
+ - change_status="modified" AND impacted=true: fill:#fff3cd, stroke:#ffc107, color:#000000
150
+ - change_status=null AND impacted=true: fill:#ffffff, stroke:#ffc107, color:#000000
151
+ - change_status=null AND impacted=false: fill:#ffffff, stroke:#d3d3d3, color:#999999
152
+ """
153
+ ).strip(),
154
+ inputSchema={
155
+ "type": "object",
156
+ "properties": {
157
+ "select": {
158
+ "type": "string",
159
+ "description": "dbt selector syntax to filter models (optional)",
160
+ },
161
+ "exclude": {
162
+ "type": "string",
163
+ "description": "dbt selector syntax to exclude models (optional)",
164
+ },
165
+ "packages": {
166
+ "type": "array",
167
+ "items": {"type": "string"},
168
+ "description": "List of packages to filter (optional)",
169
+ },
170
+ "view_mode": {
171
+ "type": "string",
172
+ "enum": ["changed_models", "all"],
173
+ "default": "changed_models",
174
+ "description": "View mode: 'changed_models' for only changed models (default), 'all' for all models",
175
+ },
176
+ },
177
+ },
178
+ )
179
+ )
180
+ tools.append(
181
+ Tool(
182
+ name="schema_diff",
183
+ description="Get the schema diff (column changes) between base and current environments. "
184
+ "Shows added, removed, and type-changed columns in compact dataframe format.",
185
+ inputSchema={
186
+ "type": "object",
187
+ "properties": {
188
+ "select": {
189
+ "type": "string",
190
+ "description": "dbt selector syntax to filter models (optional)",
191
+ },
192
+ "exclude": {
193
+ "type": "string",
194
+ "description": "dbt selector syntax to exclude models (optional)",
195
+ },
196
+ "packages": {
197
+ "type": "array",
198
+ "items": {"type": "string"},
199
+ "description": "List of packages to filter (optional)",
200
+ },
201
+ },
202
+ },
203
+ )
204
+ )
205
+
206
+ # Diff tools only available in server mode, not in preview or read-only mode
207
+ if self.mode == RecceServerMode.server:
208
+ tools.extend(
209
+ [
210
+ Tool(
211
+ name="row_count_diff",
212
+ description="Compare row counts between base and current environments for specified models.",
213
+ inputSchema={
214
+ "type": "object",
215
+ "properties": {
216
+ "node_names": {
217
+ "type": "array",
218
+ "items": {"type": "string"},
219
+ "description": "List of model names to check row counts (optional)",
220
+ },
221
+ "node_ids": {
222
+ "type": "array",
223
+ "items": {"type": "string"},
224
+ "description": "List of node IDs to check row counts (optional)",
225
+ },
226
+ "select": {
227
+ "type": "string",
228
+ "description": "dbt selector syntax to filter models (optional)",
229
+ },
230
+ "exclude": {
231
+ "type": "string",
232
+ "description": "dbt selector syntax to exclude models (optional)",
233
+ },
234
+ },
235
+ },
236
+ ),
237
+ Tool(
238
+ name="query",
239
+ description="Execute a SQL query on the current environment. "
240
+ "Supports Jinja templates with dbt macros like {{ ref('model_name') }}.",
241
+ inputSchema={
242
+ "type": "object",
243
+ "properties": {
244
+ "sql_template": {
245
+ "type": "string",
246
+ "description": "SQL query template with optional Jinja syntax",
247
+ },
248
+ "base": {
249
+ "type": "boolean",
250
+ "description": "Whether to run on base environment (default: false)",
251
+ "default": False,
252
+ },
253
+ },
254
+ "required": ["sql_template"],
255
+ },
256
+ ),
257
+ Tool(
258
+ name="query_diff",
259
+ description="Execute SQL queries on both base and current environments and compare results. "
260
+ "Supports primary keys for row-level comparison.",
261
+ inputSchema={
262
+ "type": "object",
263
+ "properties": {
264
+ "sql_template": {
265
+ "type": "string",
266
+ "description": "SQL query template for current environment",
267
+ },
268
+ "base_sql_template": {
269
+ "type": "string",
270
+ "description": "SQL query template for base environment (optional, defaults to sql_template)",
271
+ },
272
+ "primary_keys": {
273
+ "type": "array",
274
+ "items": {"type": "string"},
275
+ "description": "List of primary key columns for row comparison (optional)",
276
+ },
277
+ },
278
+ "required": ["sql_template"],
279
+ },
280
+ ),
281
+ Tool(
282
+ name="profile_diff",
283
+ description="Generate and compare statistical profiles (min, max, avg, distinct count, etc.) "
284
+ "for columns in a model between base and current environments.",
285
+ inputSchema={
286
+ "type": "object",
287
+ "properties": {
288
+ "model": {
289
+ "type": "string",
290
+ "description": "Model name to profile",
291
+ },
292
+ "columns": {
293
+ "type": "array",
294
+ "items": {"type": "string"},
295
+ "description": "List of column names to profile (optional, profiles all columns if not specified)",
296
+ },
297
+ },
298
+ "required": ["model"],
299
+ },
300
+ ),
301
+ ]
302
+ )
303
+
304
+ self.mcp_logger.log_list_tools(tools)
305
+
306
+ # Log available tools to console
307
+ tool_names = [tool.name for tool in tools]
308
+ logger.info(f"[MCP] Returning {len(tools)} tools: {', '.join(tool_names)}")
309
+
310
+ return tools
311
+
312
+ @self.server.call_tool()
313
+ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
314
+ """Handle tool calls"""
315
+ start_time = time.perf_counter()
316
+
317
+ # Log incoming request
318
+ logger.info(f"[MCP] Tool call received: {name}")
319
+ logger.info(f"[MCP] Arguments: {json.dumps(arguments, indent=2)}")
320
+
321
+ try:
322
+ # Check if tool is blocked in non-server mode
323
+ blocked_tools_in_non_server = {"row_count_diff", "query", "query_diff", "profile_diff"}
324
+ if self.mode != RecceServerMode.server and name in blocked_tools_in_non_server:
325
+ raise ValueError(
326
+ f"Tool '{name}' is not available in {self.mode.value} mode. "
327
+ "Only 'lineage_diff' and 'schema_diff' are available in this mode."
328
+ )
329
+
330
+ if name == "lineage_diff":
331
+ result = await self._tool_lineage_diff(arguments)
332
+ elif name == "schema_diff":
333
+ result = await self._tool_schema_diff(arguments)
334
+ elif name == "row_count_diff":
335
+ result = await self._tool_row_count_diff(arguments)
336
+ elif name == "query":
337
+ result = await self._tool_query(arguments)
338
+ elif name == "query_diff":
339
+ result = await self._tool_query_diff(arguments)
340
+ elif name == "profile_diff":
341
+ result = await self._tool_profile_diff(arguments)
342
+ else:
343
+ raise ValueError(f"Unknown tool: {name}")
344
+
345
+ duration_ms = (time.perf_counter() - start_time) * 1000
346
+ self.mcp_logger.log_tool_call(name, arguments, result, duration_ms)
347
+
348
+ # Log outgoing response
349
+ response_json = json.dumps(result, indent=2)
350
+ logger.info(f"[MCP] Tool response for {name} ({duration_ms:.2f}ms):")
351
+ # Truncate large responses for console readability
352
+ if len(response_json) > 1000:
353
+ logger.debug(f"[MCP] {response_json[:1000]}... (truncated, {len(response_json)} chars total)")
354
+ else:
355
+ logger.debug(f"[MCP] {response_json}")
356
+
357
+ return [TextContent(type="text", text=response_json)]
358
+ except Exception as e:
359
+ duration_ms = (time.perf_counter() - start_time) * 1000
360
+ self.mcp_logger.log_tool_call(name, arguments, {}, duration_ms, error=str(e))
361
+ logger.error(f"[MCP] Error executing tool {name} ({duration_ms:.2f}ms): {str(e)}")
362
+ logger.exception("[MCP] Full traceback:")
363
+ error_response = json.dumps({"error": str(e)}, indent=2)
364
+ return [TextContent(type="text", text=error_response)]
365
+
366
+ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
367
+ """Get lineage diff between base and current"""
368
+ try:
369
+ # Extract filter arguments
370
+ select = arguments.get("select")
371
+ exclude = arguments.get("exclude")
372
+ packages = arguments.get("packages")
373
+ view_mode = arguments.get("view_mode", "changed_models")
374
+
375
+ # Get lineage diff from adapter (returns a Pydantic LineageDiff model)
376
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
377
+
378
+ # Apply node selection filtering if arguments provided
379
+ selected_node_ids = self.context.adapter.select_nodes(
380
+ select=select,
381
+ exclude=exclude,
382
+ packages=packages,
383
+ view_mode=view_mode,
384
+ )
385
+ impacted_node_ids = self.context.adapter.select_nodes(
386
+ select="state:modified+",
387
+ )
388
+
389
+ # Get diff information for change_status
390
+ diff_info = lineage_diff.get("diff", {})
391
+
392
+ # Extract parent_map and simplified nodes from both base and current
393
+ parent_map = {}
394
+ nodes = {}
395
+
396
+ # Merge parent_map and nodes: base first, then current overrides
397
+ for env_key in ["base", "current"]:
398
+ if env_key not in lineage_diff:
399
+ continue
400
+
401
+ env_data = lineage_diff[env_key]
402
+
403
+ # Merge parent_map (filtering by selected nodes)
404
+ if "parent_map" in env_data:
405
+ for node_id, parents in env_data["parent_map"].items():
406
+ if node_id in selected_node_ids:
407
+ parent_map[node_id] = parents
408
+
409
+ # Merge nodes (filtering by selected nodes)
410
+ if "nodes" in env_data:
411
+ for node_id, node_info in env_data["nodes"].items():
412
+ if node_id in selected_node_ids:
413
+ nodes[node_id] = {
414
+ "name": node_info.get("name"),
415
+ "resource_type": node_info.get("resource_type"),
416
+ }
417
+
418
+ materialized = node_info.get("config", {}).get("materialized")
419
+ if materialized is not None:
420
+ nodes[node_id]["materialized"] = materialized
421
+
422
+ # Create id to idx mapping
423
+ id_to_idx = {node_id: idx for idx, node_id in enumerate(nodes.keys())}
424
+
425
+ # Prepare node data for DataFrame
426
+ nodes_data = [
427
+ (
428
+ id_to_idx[node_id],
429
+ node_id,
430
+ node_info.get("name"),
431
+ node_info.get("resource_type"),
432
+ node_info.get("materialized"),
433
+ diff_info.get(node_id, {}).get("change_status"),
434
+ node_id in impacted_node_ids,
435
+ )
436
+ for node_id, node_info in nodes.items()
437
+ ]
438
+
439
+ # Create nodes DataFrame using from_data with simple dict format
440
+ nodes_df = DataFrame.from_data(
441
+ columns={
442
+ "idx": "integer",
443
+ "id": "text",
444
+ "name": "text",
445
+ "resource_type": "text",
446
+ "materialized": "text",
447
+ "change_status": "text",
448
+ "impacted": "boolean",
449
+ },
450
+ data=nodes_data,
451
+ )
452
+
453
+ # Build edges from parent_map
454
+ edges_data = []
455
+ for node_id, parents in parent_map.items():
456
+ if node_id in id_to_idx:
457
+ for parent_id in parents:
458
+ if parent_id in id_to_idx:
459
+ edges_data.append((id_to_idx[parent_id], id_to_idx[node_id]))
460
+
461
+ # Create edges DataFrame
462
+ edges_df = DataFrame.from_data(
463
+ columns={
464
+ "from": "integer",
465
+ "to": "integer",
466
+ },
467
+ data=edges_data,
468
+ )
469
+
470
+ # Build simplified result
471
+ result = {"nodes": nodes_df.model_dump(mode="json"), "edges": edges_df.model_dump(mode="json")}
472
+
473
+ return result
474
+
475
+ except Exception:
476
+ logger.exception("Error getting lineage diff")
477
+ raise
478
+
479
+ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
480
+ """Get schema diff (column changes) between base and current"""
481
+ try:
482
+ # Extract filter arguments
483
+ select = arguments.get("select")
484
+ exclude = arguments.get("exclude")
485
+ packages = arguments.get("packages")
486
+
487
+ # Get lineage diff from adapter
488
+ lineage_diff = self.context.get_lineage_diff().model_dump(mode="json")
489
+
490
+ # Get all nodes from current environment
491
+ current_nodes = {}
492
+ if "current" in lineage_diff and "nodes" in lineage_diff["current"]:
493
+ current_nodes = lineage_diff["current"]["nodes"]
494
+
495
+ # Filter to only nodes that exist in both base and current (exclude added nodes)
496
+ base_nodes = lineage_diff.get("base", {}).get("nodes", {})
497
+ nodes_to_compare = set(current_nodes.keys()) & set(base_nodes.keys())
498
+
499
+ # Apply filtering if arguments provided
500
+ if select or exclude or packages:
501
+ selected_node_ids = self.context.adapter.select_nodes(
502
+ select=select,
503
+ exclude=exclude,
504
+ packages=packages,
505
+ )
506
+ nodes_to_compare = nodes_to_compare & selected_node_ids
507
+
508
+ # Build schema changes
509
+ schema_changes = []
510
+
511
+ for node_id in nodes_to_compare:
512
+ base_node = base_nodes.get(node_id, {})
513
+ current_node = current_nodes.get(node_id, {})
514
+
515
+ base_columns = base_node.get("columns", {})
516
+ current_columns = current_node.get("columns", {})
517
+
518
+ # Get column names in base and current
519
+ base_col_names = set(base_columns.keys())
520
+ current_col_names = set(current_columns.keys())
521
+
522
+ # Find added columns (in current but not in base)
523
+ for col_name in current_col_names - base_col_names:
524
+ schema_changes.append((node_id, col_name, "added"))
525
+
526
+ # Find removed columns (in base but not in current)
527
+ for col_name in base_col_names - current_col_names:
528
+ schema_changes.append((node_id, col_name, "removed"))
529
+
530
+ # Find modified columns (in both but with different types)
531
+ for col_name in base_col_names & current_col_names:
532
+ base_col_type = base_columns[col_name].get("type")
533
+ current_col_type = current_columns[col_name].get("type")
534
+ if base_col_type != current_col_type:
535
+ schema_changes.append((node_id, col_name, "modified"))
536
+
537
+ # Check if there are more than 100 rows
538
+ limit = 100
539
+ has_more = len(schema_changes) > limit
540
+ limited_schema_changes = schema_changes[:limit]
541
+
542
+ # Convert schema changes to dataframe format using DataFrame.from_data()
543
+ diff_df = DataFrame.from_data(
544
+ columns={
545
+ "node_id": "text",
546
+ "column": "text",
547
+ "change_status": "text",
548
+ },
549
+ data=limited_schema_changes,
550
+ limit=limit,
551
+ more=has_more,
552
+ )
553
+ return diff_df.model_dump(mode="json")
554
+
555
+ except Exception:
556
+ logger.exception("Error getting schema diff")
557
+ raise
558
+
559
+ async def _tool_row_count_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
560
+ """Execute row count diff task"""
561
+ try:
562
+ task = RowCountDiffTask(params=arguments)
563
+
564
+ # Execute task synchronously (it's already sync)
565
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
566
+
567
+ return result
568
+ except Exception:
569
+ logger.exception("Error executing row count diff")
570
+ raise
571
+
572
+ async def _tool_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
573
+ """Execute a query"""
574
+ try:
575
+ sql_template = arguments.get("sql_template")
576
+ is_base = arguments.get("base", False)
577
+
578
+ params = {"sql_template": sql_template}
579
+ task = QueryTask(params=params)
580
+ task.is_base = is_base
581
+
582
+ # Execute task
583
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
584
+
585
+ # Convert to dict if it's a model
586
+ if hasattr(result, "model_dump"):
587
+ return result.model_dump(mode="json")
588
+ return result
589
+ except Exception:
590
+ logger.exception("Error executing query")
591
+ raise
592
+
593
+ async def _tool_query_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
594
+ """Execute query diff task"""
595
+ try:
596
+ task = QueryDiffTask(params=arguments)
597
+
598
+ # Execute task
599
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
600
+
601
+ # Convert to dict if it's a model
602
+ if hasattr(result, "model_dump"):
603
+ return result.model_dump(mode="json")
604
+ return result
605
+ except Exception:
606
+ logger.exception("Error executing query diff")
607
+ raise
608
+
609
+ async def _tool_profile_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
610
+ """Execute profile diff task"""
611
+ try:
612
+ task = ProfileDiffTask(params=arguments)
613
+
614
+ # Execute task
615
+ result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
616
+
617
+ # Convert to dict if it's a model
618
+ if hasattr(result, "model_dump"):
619
+ return result.model_dump(mode="json")
620
+ return result
621
+ except Exception:
622
+ logger.exception("Error executing profile diff")
623
+ raise
624
+
625
+ async def run(self):
626
+ """Run the MCP server in stdio mode"""
627
+ async with stdio_server() as (read_stream, write_stream):
628
+ await self.server.run(read_stream, write_stream, self.server.create_initialization_options())
629
+
630
+ async def run_sse(self, host: str = "localhost", port: int = 8000):
631
+ """Run the MCP server in HTTP mode using Server-Sent Events (SSE)
632
+
633
+ Args:
634
+ host: Host to bind to (default: localhost)
635
+ port: Port to bind to (default: 8000)
636
+ """
637
+ import uvicorn
638
+ from mcp.server.sse import SseServerTransport
639
+ from starlette.applications import Starlette
640
+ from starlette.requests import Request
641
+ from starlette.responses import Response
642
+ from starlette.routing import Mount, Route
643
+
644
+ # Create SSE transport - endpoint where clients POST messages
645
+ sse = SseServerTransport("/")
646
+
647
+ async def handle_sse_request(request: Request):
648
+ """Handle SSE connection (GET /sse) following official MCP example"""
649
+ client_info = f"{request.client.host}:{request.client.port}" if request.client else "unknown"
650
+ logger.info(f"[MCP HTTP] SSE connection established from {client_info}")
651
+ try:
652
+ async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
653
+ await self.server.run(streams[0], streams[1], self.server.create_initialization_options())
654
+ finally:
655
+ logger.info(f"[MCP HTTP] SSE connection closed from {client_info}")
656
+ return Response() # Required to avoid NoneType error
657
+
658
+ async def handle_post_message(scope, receive, send):
659
+ """Handle POST messages (POST /) for MCP protocol"""
660
+ # Log POST message (session_id will be in query params)
661
+ query_string = scope.get("query_string", b"").decode("utf-8")
662
+ logger.debug(f"[MCP HTTP] POST message received with query: {query_string}")
663
+ await sse.handle_post_message(scope, receive, send)
664
+
665
+ async def handle_health_check(request: Request):
666
+ """Handle health check endpoint (GET /health)"""
667
+ return Response(content='{"status":"ok"}', media_type="application/json")
668
+
669
+ # Create Starlette app
670
+ app = Starlette(
671
+ debug=self.mcp_logger.debug,
672
+ routes=[
673
+ Route("/health", endpoint=handle_health_check, methods=["GET"]),
674
+ Route("/sse", endpoint=handle_sse_request, methods=["GET"]),
675
+ Mount("/", app=handle_post_message),
676
+ ],
677
+ )
678
+
679
+ # Run with uvicorn
680
+ logger.info(f"Starting Recce MCP Server in HTTP mode on {host}:{port}")
681
+ logger.info(f"Connection URL: http://{host}:{port}/sse")
682
+ config = uvicorn.Config(app, host=host, port=port, log_level="info")
683
+ server = uvicorn.Server(config)
684
+ await server.serve()
685
+
686
+
687
+ async def run_mcp_server(sse: bool = False, host: str = "localhost", port: int = 8000, **kwargs):
688
+ """
689
+ Entry point for running the MCP server
690
+
691
+ Args:
692
+ sse: Whether to run in HTTP/SSE mode (default: False for stdio mode)
693
+ host: Host to bind to in SSE mode (default: localhost)
694
+ port: Port to bind to in SSE mode (default: 8000)
695
+ **kwargs: Arguments for loading RecceContext (dbt options, etc.)
696
+ Optionally includes 'mode' for server mode (server, preview, read-only)
697
+ Optionally includes 'debug' flag for enabling MCP logging
698
+ """
699
+ # Setup logging
700
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
701
+
702
+ # Load Recce context
703
+ context = load_context(**kwargs)
704
+
705
+ # Extract mode from kwargs (defaults to server mode)
706
+ mode_str = kwargs.get("mode")
707
+ mode = None
708
+ if mode_str:
709
+ # Convert string mode to RecceServerMode enum
710
+ try:
711
+ mode = RecceServerMode(mode_str)
712
+ except ValueError:
713
+ logger.warning(f"Invalid mode '{mode_str}', using default server mode")
714
+
715
+ # Extract debug flag from kwargs
716
+ debug = kwargs.get("debug", False)
717
+
718
+ # Create MCP server
719
+ server = RecceMCPServer(context, mode=mode, debug=debug)
720
+
721
+ # Run in either stdio or SSE mode
722
+ if sse:
723
+ await server.run_sse(host=host, port=port)
724
+ else:
725
+ await server.run()