iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_core_mcp/__init__.py +18 -0
- dbt_core_mcp/__main__.py +436 -0
- dbt_core_mcp/context.py +459 -0
- dbt_core_mcp/cte_generator.py +601 -0
- dbt_core_mcp/dbt/__init__.py +1 -0
- dbt_core_mcp/dbt/bridge_runner.py +1361 -0
- dbt_core_mcp/dbt/manifest.py +781 -0
- dbt_core_mcp/dbt/runner.py +67 -0
- dbt_core_mcp/dependencies.py +50 -0
- dbt_core_mcp/server.py +381 -0
- dbt_core_mcp/tools/__init__.py +77 -0
- dbt_core_mcp/tools/analyze_impact.py +78 -0
- dbt_core_mcp/tools/build_models.py +190 -0
- dbt_core_mcp/tools/demo/__init__.py +1 -0
- dbt_core_mcp/tools/demo/hello.html +267 -0
- dbt_core_mcp/tools/demo/ui_demo.py +41 -0
- dbt_core_mcp/tools/get_column_lineage.py +1988 -0
- dbt_core_mcp/tools/get_lineage.py +89 -0
- dbt_core_mcp/tools/get_project_info.py +96 -0
- dbt_core_mcp/tools/get_resource_info.py +134 -0
- dbt_core_mcp/tools/install_deps.py +102 -0
- dbt_core_mcp/tools/list_resources.py +84 -0
- dbt_core_mcp/tools/load_seeds.py +179 -0
- dbt_core_mcp/tools/query_database.py +459 -0
- dbt_core_mcp/tools/run_models.py +234 -0
- dbt_core_mcp/tools/snapshot_models.py +120 -0
- dbt_core_mcp/tools/test_models.py +238 -0
- dbt_core_mcp/utils/__init__.py +1 -0
- dbt_core_mcp/utils/env_detector.py +186 -0
- dbt_core_mcp/utils/process_check.py +130 -0
- dbt_core_mcp/utils/tool_utils.py +411 -0
- dbt_core_mcp/utils/warehouse_adapter.py +82 -0
- dbt_core_mcp/utils/warehouse_databricks.py +297 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Databricks Warehouse Adapter.
|
|
3
|
+
|
|
4
|
+
Provides pre-warming capabilities for Databricks serverless SQL warehouses
|
|
5
|
+
to eliminate cold-start delays before dbt command execution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Callable
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DatabricksProfileError(Exception):
|
|
20
|
+
"""Raised when Databricks profile configuration is invalid or missing."""
|
|
21
|
+
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DatabricksWarehouseAdapter:
|
|
26
|
+
"""
|
|
27
|
+
Warehouse adapter for Databricks serverless SQL warehouses.
|
|
28
|
+
|
|
29
|
+
This adapter pre-warms Databricks warehouses by starting them via API
|
|
30
|
+
and polling until they reach RUNNING state. This eliminates the ~30s
|
|
31
|
+
cold-start delay that dbt would otherwise experience.
|
|
32
|
+
|
|
33
|
+
The adapter reads connection info from dbt profiles (profiles.yml) and
|
|
34
|
+
uses the Databricks SQL Warehouses API for control operations.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, project_dir: Path):
|
|
38
|
+
"""
|
|
39
|
+
Initialize Databricks warehouse adapter.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
project_dir: Path to the dbt project directory
|
|
43
|
+
"""
|
|
44
|
+
self.project_dir = project_dir
|
|
45
|
+
self._connection_info = None # Lazy-loaded connection details
|
|
46
|
+
self._is_running = False # Track if we've already started the warehouse
|
|
47
|
+
|
|
48
|
+
async def prewarm(self, progress_callback: Callable[[int, int, str], Any] | None = None) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Pre-warm the Databricks serverless warehouse.
|
|
51
|
+
|
|
52
|
+
Starts the warehouse if not already running and waits for it to reach
|
|
53
|
+
RUNNING state. This operation is idempotent - calling it multiple times
|
|
54
|
+
is safe and won't cause issues.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
progress_callback: Optional callback for progress updates (current, total, message)
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
DatabricksProfileError: If profile configuration is invalid
|
|
61
|
+
RuntimeError: If warehouse fails to start or times out
|
|
62
|
+
"""
|
|
63
|
+
# If we've already started the warehouse in this session, skip
|
|
64
|
+
if self._is_running:
|
|
65
|
+
logger.debug("Warehouse already pre-warmed in this session, skipping")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
logger.info("Pre-warming Databricks serverless warehouse...")
|
|
69
|
+
|
|
70
|
+
# Report initial progress
|
|
71
|
+
logger.info(f"Progress callback is: {progress_callback}")
|
|
72
|
+
if progress_callback:
|
|
73
|
+
logger.info("Invoking initial progress callback: 'Initializing warehouse...'")
|
|
74
|
+
try:
|
|
75
|
+
result = progress_callback(0, 1, "Initializing warehouse...")
|
|
76
|
+
logger.info(f"Progress callback result type: {type(result)}")
|
|
77
|
+
if asyncio.iscoroutine(result):
|
|
78
|
+
await result
|
|
79
|
+
logger.info("Initial progress callback completed")
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.warning(f"Progress callback error: {e}")
|
|
82
|
+
else:
|
|
83
|
+
logger.warning("No progress callback provided to prewarm")
|
|
84
|
+
|
|
85
|
+
# Get connection info from dbt profile
|
|
86
|
+
try:
|
|
87
|
+
instance, token, warehouse_id = await self._get_connection_info()
|
|
88
|
+
except DatabricksProfileError as e:
|
|
89
|
+
logger.error(f"Failed to get Databricks connection info: {e}")
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
93
|
+
warehouse_url = f"https://{instance}/api/2.0/sql/warehouses/{warehouse_id}"
|
|
94
|
+
|
|
95
|
+
# Check current warehouse state
|
|
96
|
+
try:
|
|
97
|
+
resp = await asyncio.to_thread(requests.get, warehouse_url, headers=headers)
|
|
98
|
+
if resp.status_code != 200:
|
|
99
|
+
raise RuntimeError(f"Failed to fetch warehouse info: {resp.text}")
|
|
100
|
+
|
|
101
|
+
warehouse = resp.json()
|
|
102
|
+
current_state = warehouse.get("state")
|
|
103
|
+
warehouse_name = warehouse.get("name", warehouse_id)
|
|
104
|
+
|
|
105
|
+
logger.info(f"Warehouse '{warehouse_name}' current state: {current_state}")
|
|
106
|
+
|
|
107
|
+
# If already running, we're done
|
|
108
|
+
if current_state == "RUNNING":
|
|
109
|
+
logger.info("Warehouse already running, no pre-warming needed")
|
|
110
|
+
self._is_running = True
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# Verify it's a serverless warehouse
|
|
114
|
+
is_serverless = warehouse.get("warehouse_type") == "PRO" and warehouse.get("enable_serverless_compute", False)
|
|
115
|
+
if not is_serverless:
|
|
116
|
+
logger.warning("Warehouse is not serverless, pre-warming may not be beneficial")
|
|
117
|
+
|
|
118
|
+
# Start the warehouse
|
|
119
|
+
start_url = f"https://{instance}/api/2.0/sql/warehouses/{warehouse_id}/start"
|
|
120
|
+
start_resp = await asyncio.to_thread(requests.post, start_url, headers=headers)
|
|
121
|
+
|
|
122
|
+
if start_resp.status_code != 200:
|
|
123
|
+
raise RuntimeError(f"Failed to start warehouse: {start_resp.text}")
|
|
124
|
+
|
|
125
|
+
logger.info(f"Started warehouse '{warehouse_name}', waiting for RUNNING state...")
|
|
126
|
+
|
|
127
|
+
# Poll for RUNNING state with progress reporting
|
|
128
|
+
max_wait = 300 # 5 minutes
|
|
129
|
+
poll_interval = 5 # seconds
|
|
130
|
+
waited = 0
|
|
131
|
+
|
|
132
|
+
while waited < max_wait:
|
|
133
|
+
await asyncio.sleep(poll_interval)
|
|
134
|
+
waited += poll_interval
|
|
135
|
+
|
|
136
|
+
# Report progress
|
|
137
|
+
if progress_callback:
|
|
138
|
+
try:
|
|
139
|
+
result = progress_callback(waited, max_wait, f"Pre-warming warehouse '{warehouse_name}'... ({waited}s)")
|
|
140
|
+
if asyncio.iscoroutine(result):
|
|
141
|
+
await result
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.warning(f"Progress callback error: {e}")
|
|
144
|
+
|
|
145
|
+
state_resp = await asyncio.to_thread(requests.get, warehouse_url, headers=headers)
|
|
146
|
+
if state_resp.status_code == 200:
|
|
147
|
+
state = state_resp.json().get("state")
|
|
148
|
+
logger.info(f"Warehouse state after {waited}s: {state}")
|
|
149
|
+
|
|
150
|
+
if state == "RUNNING":
|
|
151
|
+
logger.info(f"Warehouse is RUNNING after {waited}s")
|
|
152
|
+
# Final progress update
|
|
153
|
+
if progress_callback:
|
|
154
|
+
try:
|
|
155
|
+
result = progress_callback(max_wait, max_wait, f"Warehouse '{warehouse_name}' ready")
|
|
156
|
+
if asyncio.iscoroutine(result):
|
|
157
|
+
await result
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.warning(f"Progress callback error: {e}")
|
|
160
|
+
self._is_running = True
|
|
161
|
+
return
|
|
162
|
+
else:
|
|
163
|
+
logger.warning(f"Failed to get warehouse state: {state_resp.text}")
|
|
164
|
+
|
|
165
|
+
# Timeout
|
|
166
|
+
raise RuntimeError(f"Timed out waiting for warehouse to start after {max_wait}s")
|
|
167
|
+
|
|
168
|
+
except requests.RequestException as e:
|
|
169
|
+
logger.error(f"Network error during warehouse pre-warming: {e}")
|
|
170
|
+
raise RuntimeError(f"Failed to pre-warm warehouse: {e}")
|
|
171
|
+
|
|
172
|
+
async def _get_connection_info(self) -> tuple[str, str, str]:
|
|
173
|
+
"""
|
|
174
|
+
Extract Databricks connection info from dbt profile.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Tuple of (instance, token, warehouse_id)
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
DatabricksProfileError: If required configuration is missing
|
|
181
|
+
"""
|
|
182
|
+
if self._connection_info:
|
|
183
|
+
return self._connection_info
|
|
184
|
+
|
|
185
|
+
# Get dbt profile configuration
|
|
186
|
+
profile = await self._get_dbt_profile()
|
|
187
|
+
|
|
188
|
+
# Extract connection details
|
|
189
|
+
instance = profile.get("host", "").replace("https://", "").replace("/", "")
|
|
190
|
+
token = profile.get("token")
|
|
191
|
+
warehouse_id = profile.get("http_path", "").replace("/sql/1.0/warehouses/", "")
|
|
192
|
+
|
|
193
|
+
if not instance or not token:
|
|
194
|
+
raise DatabricksProfileError("Could not find Databricks instance or token in dbt profile")
|
|
195
|
+
|
|
196
|
+
if not warehouse_id:
|
|
197
|
+
raise DatabricksProfileError("No warehouse ID found in dbt profile config (http_path)")
|
|
198
|
+
|
|
199
|
+
logger.debug(f"Using Databricks instance: {instance}, warehouse: {warehouse_id}")
|
|
200
|
+
|
|
201
|
+
# Cache for future calls
|
|
202
|
+
self._connection_info = (instance, token, warehouse_id)
|
|
203
|
+
return self._connection_info
|
|
204
|
+
|
|
205
|
+
async def _get_dbt_profile(self) -> dict[str, Any]:
|
|
206
|
+
"""
|
|
207
|
+
Load dbt profile configuration from profiles.yml.
|
|
208
|
+
|
|
209
|
+
Searches for profiles.yml in:
|
|
210
|
+
1. Project directory (profiles.yml)
|
|
211
|
+
2. User home directory (~/.dbt/profiles.yml)
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Dictionary with dbt profile target configuration
|
|
215
|
+
|
|
216
|
+
Raises:
|
|
217
|
+
DatabricksProfileError: If profiles not found or invalid
|
|
218
|
+
"""
|
|
219
|
+
# First check project directory for profiles.yml
|
|
220
|
+
local_profiles_path = self.project_dir / "profiles.yml"
|
|
221
|
+
if local_profiles_path.exists():
|
|
222
|
+
profiles_path = local_profiles_path
|
|
223
|
+
logger.debug(f"Using local profiles.yml at {profiles_path}")
|
|
224
|
+
else:
|
|
225
|
+
# Fall back to ~/.dbt/profiles.yml
|
|
226
|
+
dbt_dir = Path.home() / ".dbt"
|
|
227
|
+
profiles_path = dbt_dir / "profiles.yml"
|
|
228
|
+
if not profiles_path.exists():
|
|
229
|
+
raise DatabricksProfileError(f"Could not find profiles.yml at {profiles_path}")
|
|
230
|
+
logger.debug(f"Using user profiles.yml at {profiles_path}")
|
|
231
|
+
|
|
232
|
+
# Load profiles.yml
|
|
233
|
+
try:
|
|
234
|
+
with open(profiles_path) as f:
|
|
235
|
+
profiles = yaml.safe_load(f)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
raise DatabricksProfileError(f"Failed to parse profiles.yml: {e}")
|
|
238
|
+
|
|
239
|
+
# Get profile name from dbt_project.yml
|
|
240
|
+
project_yml_path = self.project_dir / "dbt_project.yml"
|
|
241
|
+
if not project_yml_path.exists():
|
|
242
|
+
raise DatabricksProfileError(f"Could not find dbt_project.yml at {project_yml_path}")
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
with open(project_yml_path) as f:
|
|
246
|
+
project = yaml.safe_load(f)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
raise DatabricksProfileError(f"Failed to parse dbt_project.yml: {e}")
|
|
249
|
+
|
|
250
|
+
profile_name = project.get("profile")
|
|
251
|
+
if not profile_name:
|
|
252
|
+
raise DatabricksProfileError("No 'profile' key found in dbt_project.yml")
|
|
253
|
+
|
|
254
|
+
# Get profile
|
|
255
|
+
profile = profiles.get(profile_name)
|
|
256
|
+
if not profile:
|
|
257
|
+
raise DatabricksProfileError(f"Profile '{profile_name}' not found in profiles.yml")
|
|
258
|
+
|
|
259
|
+
# Get target
|
|
260
|
+
target_name = profile.get("target", "default")
|
|
261
|
+
target = profile.get("outputs", {}).get(target_name)
|
|
262
|
+
|
|
263
|
+
if not target:
|
|
264
|
+
raise DatabricksProfileError(f"Target '{target_name}' not found in profile '{profile_name}'")
|
|
265
|
+
|
|
266
|
+
logger.debug(f"Using profile '{profile_name}', target '{target_name}'")
|
|
267
|
+
return target
|
|
268
|
+
|
|
269
|
+
# Get profile name from dbt_project.yml
|
|
270
|
+
project_yml_path = self.project_dir / "dbt_project.yml"
|
|
271
|
+
if not project_yml_path.exists():
|
|
272
|
+
raise DatabricksProfileError(f"Could not find dbt_project.yml at {project_yml_path}")
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
with open(project_yml_path) as f:
|
|
276
|
+
project = yaml.safe_load(f)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
raise DatabricksProfileError(f"Failed to parse dbt_project.yml: {e}")
|
|
279
|
+
|
|
280
|
+
profile_name = project.get("profile")
|
|
281
|
+
if not profile_name:
|
|
282
|
+
raise DatabricksProfileError("No 'profile' key found in dbt_project.yml")
|
|
283
|
+
|
|
284
|
+
# Get profile
|
|
285
|
+
profile = profiles.get(profile_name)
|
|
286
|
+
if not profile:
|
|
287
|
+
raise DatabricksProfileError(f"Profile '{profile_name}' not found in profiles.yml")
|
|
288
|
+
|
|
289
|
+
# Get target
|
|
290
|
+
target_name = profile.get("target", "default")
|
|
291
|
+
target = profile.get("outputs", {}).get(target_name)
|
|
292
|
+
|
|
293
|
+
if not target:
|
|
294
|
+
raise DatabricksProfileError(f"Target '{target_name}' not found in profile '{profile_name}'")
|
|
295
|
+
|
|
296
|
+
logger.debug(f"Using profile '{profile_name}', target '{target_name}'")
|
|
297
|
+
return target
|