iflow-mcp_niclasolofsson-dbt-core-mcp 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. dbt_core_mcp/__init__.py +18 -0
  2. dbt_core_mcp/__main__.py +436 -0
  3. dbt_core_mcp/context.py +459 -0
  4. dbt_core_mcp/cte_generator.py +601 -0
  5. dbt_core_mcp/dbt/__init__.py +1 -0
  6. dbt_core_mcp/dbt/bridge_runner.py +1361 -0
  7. dbt_core_mcp/dbt/manifest.py +781 -0
  8. dbt_core_mcp/dbt/runner.py +67 -0
  9. dbt_core_mcp/dependencies.py +50 -0
  10. dbt_core_mcp/server.py +381 -0
  11. dbt_core_mcp/tools/__init__.py +77 -0
  12. dbt_core_mcp/tools/analyze_impact.py +78 -0
  13. dbt_core_mcp/tools/build_models.py +190 -0
  14. dbt_core_mcp/tools/demo/__init__.py +1 -0
  15. dbt_core_mcp/tools/demo/hello.html +267 -0
  16. dbt_core_mcp/tools/demo/ui_demo.py +41 -0
  17. dbt_core_mcp/tools/get_column_lineage.py +1988 -0
  18. dbt_core_mcp/tools/get_lineage.py +89 -0
  19. dbt_core_mcp/tools/get_project_info.py +96 -0
  20. dbt_core_mcp/tools/get_resource_info.py +134 -0
  21. dbt_core_mcp/tools/install_deps.py +102 -0
  22. dbt_core_mcp/tools/list_resources.py +84 -0
  23. dbt_core_mcp/tools/load_seeds.py +179 -0
  24. dbt_core_mcp/tools/query_database.py +459 -0
  25. dbt_core_mcp/tools/run_models.py +234 -0
  26. dbt_core_mcp/tools/snapshot_models.py +120 -0
  27. dbt_core_mcp/tools/test_models.py +238 -0
  28. dbt_core_mcp/utils/__init__.py +1 -0
  29. dbt_core_mcp/utils/env_detector.py +186 -0
  30. dbt_core_mcp/utils/process_check.py +130 -0
  31. dbt_core_mcp/utils/tool_utils.py +411 -0
  32. dbt_core_mcp/utils/warehouse_adapter.py +82 -0
  33. dbt_core_mcp/utils/warehouse_databricks.py +297 -0
  34. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/METADATA +784 -0
  35. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/RECORD +38 -0
  36. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/WHEEL +4 -0
  37. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/entry_points.txt +2 -0
  38. iflow_mcp_niclasolofsson_dbt_core_mcp-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ Databricks Warehouse Adapter.
3
+
4
+ Provides pre-warming capabilities for Databricks serverless SQL warehouses
5
+ to eliminate cold-start delays before dbt command execution.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ import requests
14
+ import yaml
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class DatabricksProfileError(Exception):
20
+ """Raised when Databricks profile configuration is invalid or missing."""
21
+
22
+ pass
23
+
24
+
25
+ class DatabricksWarehouseAdapter:
26
+ """
27
+ Warehouse adapter for Databricks serverless SQL warehouses.
28
+
29
+ This adapter pre-warms Databricks warehouses by starting them via API
30
+ and polling until they reach RUNNING state. This eliminates the ~30s
31
+ cold-start delay that dbt would otherwise experience.
32
+
33
+ The adapter reads connection info from dbt profiles (profiles.yml) and
34
+ uses the Databricks SQL Warehouses API for control operations.
35
+ """
36
+
37
+ def __init__(self, project_dir: Path):
38
+ """
39
+ Initialize Databricks warehouse adapter.
40
+
41
+ Args:
42
+ project_dir: Path to the dbt project directory
43
+ """
44
+ self.project_dir = project_dir
45
+ self._connection_info = None # Lazy-loaded connection details
46
+ self._is_running = False # Track if we've already started the warehouse
47
+
48
+ async def prewarm(self, progress_callback: Callable[[int, int, str], Any] | None = None) -> None:
49
+ """
50
+ Pre-warm the Databricks serverless warehouse.
51
+
52
+ Starts the warehouse if not already running and waits for it to reach
53
+ RUNNING state. This operation is idempotent - calling it multiple times
54
+ is safe and won't cause issues.
55
+
56
+ Args:
57
+ progress_callback: Optional callback for progress updates (current, total, message)
58
+
59
+ Raises:
60
+ DatabricksProfileError: If profile configuration is invalid
61
+ RuntimeError: If warehouse fails to start or times out
62
+ """
63
+ # If we've already started the warehouse in this session, skip
64
+ if self._is_running:
65
+ logger.debug("Warehouse already pre-warmed in this session, skipping")
66
+ return
67
+
68
+ logger.info("Pre-warming Databricks serverless warehouse...")
69
+
70
+ # Report initial progress
71
+ logger.info(f"Progress callback is: {progress_callback}")
72
+ if progress_callback:
73
+ logger.info("Invoking initial progress callback: 'Initializing warehouse...'")
74
+ try:
75
+ result = progress_callback(0, 1, "Initializing warehouse...")
76
+ logger.info(f"Progress callback result type: {type(result)}")
77
+ if asyncio.iscoroutine(result):
78
+ await result
79
+ logger.info("Initial progress callback completed")
80
+ except Exception as e:
81
+ logger.warning(f"Progress callback error: {e}")
82
+ else:
83
+ logger.warning("No progress callback provided to prewarm")
84
+
85
+ # Get connection info from dbt profile
86
+ try:
87
+ instance, token, warehouse_id = await self._get_connection_info()
88
+ except DatabricksProfileError as e:
89
+ logger.error(f"Failed to get Databricks connection info: {e}")
90
+ raise
91
+
92
+ headers = {"Authorization": f"Bearer {token}"}
93
+ warehouse_url = f"https://{instance}/api/2.0/sql/warehouses/{warehouse_id}"
94
+
95
+ # Check current warehouse state
96
+ try:
97
+ resp = await asyncio.to_thread(requests.get, warehouse_url, headers=headers)
98
+ if resp.status_code != 200:
99
+ raise RuntimeError(f"Failed to fetch warehouse info: {resp.text}")
100
+
101
+ warehouse = resp.json()
102
+ current_state = warehouse.get("state")
103
+ warehouse_name = warehouse.get("name", warehouse_id)
104
+
105
+ logger.info(f"Warehouse '{warehouse_name}' current state: {current_state}")
106
+
107
+ # If already running, we're done
108
+ if current_state == "RUNNING":
109
+ logger.info("Warehouse already running, no pre-warming needed")
110
+ self._is_running = True
111
+ return
112
+
113
+ # Verify it's a serverless warehouse
114
+ is_serverless = warehouse.get("warehouse_type") == "PRO" and warehouse.get("enable_serverless_compute", False)
115
+ if not is_serverless:
116
+ logger.warning("Warehouse is not serverless, pre-warming may not be beneficial")
117
+
118
+ # Start the warehouse
119
+ start_url = f"https://{instance}/api/2.0/sql/warehouses/{warehouse_id}/start"
120
+ start_resp = await asyncio.to_thread(requests.post, start_url, headers=headers)
121
+
122
+ if start_resp.status_code != 200:
123
+ raise RuntimeError(f"Failed to start warehouse: {start_resp.text}")
124
+
125
+ logger.info(f"Started warehouse '{warehouse_name}', waiting for RUNNING state...")
126
+
127
+ # Poll for RUNNING state with progress reporting
128
+ max_wait = 300 # 5 minutes
129
+ poll_interval = 5 # seconds
130
+ waited = 0
131
+
132
+ while waited < max_wait:
133
+ await asyncio.sleep(poll_interval)
134
+ waited += poll_interval
135
+
136
+ # Report progress
137
+ if progress_callback:
138
+ try:
139
+ result = progress_callback(waited, max_wait, f"Pre-warming warehouse '{warehouse_name}'... ({waited}s)")
140
+ if asyncio.iscoroutine(result):
141
+ await result
142
+ except Exception as e:
143
+ logger.warning(f"Progress callback error: {e}")
144
+
145
+ state_resp = await asyncio.to_thread(requests.get, warehouse_url, headers=headers)
146
+ if state_resp.status_code == 200:
147
+ state = state_resp.json().get("state")
148
+ logger.info(f"Warehouse state after {waited}s: {state}")
149
+
150
+ if state == "RUNNING":
151
+ logger.info(f"Warehouse is RUNNING after {waited}s")
152
+ # Final progress update
153
+ if progress_callback:
154
+ try:
155
+ result = progress_callback(max_wait, max_wait, f"Warehouse '{warehouse_name}' ready")
156
+ if asyncio.iscoroutine(result):
157
+ await result
158
+ except Exception as e:
159
+ logger.warning(f"Progress callback error: {e}")
160
+ self._is_running = True
161
+ return
162
+ else:
163
+ logger.warning(f"Failed to get warehouse state: {state_resp.text}")
164
+
165
+ # Timeout
166
+ raise RuntimeError(f"Timed out waiting for warehouse to start after {max_wait}s")
167
+
168
+ except requests.RequestException as e:
169
+ logger.error(f"Network error during warehouse pre-warming: {e}")
170
+ raise RuntimeError(f"Failed to pre-warm warehouse: {e}")
171
+
172
+ async def _get_connection_info(self) -> tuple[str, str, str]:
173
+ """
174
+ Extract Databricks connection info from dbt profile.
175
+
176
+ Returns:
177
+ Tuple of (instance, token, warehouse_id)
178
+
179
+ Raises:
180
+ DatabricksProfileError: If required configuration is missing
181
+ """
182
+ if self._connection_info:
183
+ return self._connection_info
184
+
185
+ # Get dbt profile configuration
186
+ profile = await self._get_dbt_profile()
187
+
188
+ # Extract connection details
189
+ instance = profile.get("host", "").replace("https://", "").replace("/", "")
190
+ token = profile.get("token")
191
+ warehouse_id = profile.get("http_path", "").replace("/sql/1.0/warehouses/", "")
192
+
193
+ if not instance or not token:
194
+ raise DatabricksProfileError("Could not find Databricks instance or token in dbt profile")
195
+
196
+ if not warehouse_id:
197
+ raise DatabricksProfileError("No warehouse ID found in dbt profile config (http_path)")
198
+
199
+ logger.debug(f"Using Databricks instance: {instance}, warehouse: {warehouse_id}")
200
+
201
+ # Cache for future calls
202
+ self._connection_info = (instance, token, warehouse_id)
203
+ return self._connection_info
204
+
205
+ async def _get_dbt_profile(self) -> dict[str, Any]:
206
+ """
207
+ Load dbt profile configuration from profiles.yml.
208
+
209
+ Searches for profiles.yml in:
210
+ 1. Project directory (profiles.yml)
211
+ 2. User home directory (~/.dbt/profiles.yml)
212
+
213
+ Returns:
214
+ Dictionary with dbt profile target configuration
215
+
216
+ Raises:
217
+ DatabricksProfileError: If profiles not found or invalid
218
+ """
219
+ # First check project directory for profiles.yml
220
+ local_profiles_path = self.project_dir / "profiles.yml"
221
+ if local_profiles_path.exists():
222
+ profiles_path = local_profiles_path
223
+ logger.debug(f"Using local profiles.yml at {profiles_path}")
224
+ else:
225
+ # Fall back to ~/.dbt/profiles.yml
226
+ dbt_dir = Path.home() / ".dbt"
227
+ profiles_path = dbt_dir / "profiles.yml"
228
+ if not profiles_path.exists():
229
+ raise DatabricksProfileError(f"Could not find profiles.yml at {profiles_path}")
230
+ logger.debug(f"Using user profiles.yml at {profiles_path}")
231
+
232
+ # Load profiles.yml
233
+ try:
234
+ with open(profiles_path) as f:
235
+ profiles = yaml.safe_load(f)
236
+ except Exception as e:
237
+ raise DatabricksProfileError(f"Failed to parse profiles.yml: {e}")
238
+
239
+ # Get profile name from dbt_project.yml
240
+ project_yml_path = self.project_dir / "dbt_project.yml"
241
+ if not project_yml_path.exists():
242
+ raise DatabricksProfileError(f"Could not find dbt_project.yml at {project_yml_path}")
243
+
244
+ try:
245
+ with open(project_yml_path) as f:
246
+ project = yaml.safe_load(f)
247
+ except Exception as e:
248
+ raise DatabricksProfileError(f"Failed to parse dbt_project.yml: {e}")
249
+
250
+ profile_name = project.get("profile")
251
+ if not profile_name:
252
+ raise DatabricksProfileError("No 'profile' key found in dbt_project.yml")
253
+
254
+ # Get profile
255
+ profile = profiles.get(profile_name)
256
+ if not profile:
257
+ raise DatabricksProfileError(f"Profile '{profile_name}' not found in profiles.yml")
258
+
259
+ # Get target
260
+ target_name = profile.get("target", "default")
261
+ target = profile.get("outputs", {}).get(target_name)
262
+
263
+ if not target:
264
+ raise DatabricksProfileError(f"Target '{target_name}' not found in profile '{profile_name}'")
265
+
266
+ logger.debug(f"Using profile '{profile_name}', target '{target_name}'")
267
+ return target
268
+
269
+ # Get profile name from dbt_project.yml
270
+ project_yml_path = self.project_dir / "dbt_project.yml"
271
+ if not project_yml_path.exists():
272
+ raise DatabricksProfileError(f"Could not find dbt_project.yml at {project_yml_path}")
273
+
274
+ try:
275
+ with open(project_yml_path) as f:
276
+ project = yaml.safe_load(f)
277
+ except Exception as e:
278
+ raise DatabricksProfileError(f"Failed to parse dbt_project.yml: {e}")
279
+
280
+ profile_name = project.get("profile")
281
+ if not profile_name:
282
+ raise DatabricksProfileError("No 'profile' key found in dbt_project.yml")
283
+
284
+ # Get profile
285
+ profile = profiles.get(profile_name)
286
+ if not profile:
287
+ raise DatabricksProfileError(f"Profile '{profile_name}' not found in profiles.yml")
288
+
289
+ # Get target
290
+ target_name = profile.get("target", "default")
291
+ target = profile.get("outputs", {}).get(target_name)
292
+
293
+ if not target:
294
+ raise DatabricksProfileError(f"Target '{target_name}' not found in profile '{profile_name}'")
295
+
296
+ logger.debug(f"Using profile '{profile_name}', target '{target_name}'")
297
+ return target