signalpilot-ai-internal 0.10.0__py3-none-any.whl → 0.10.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalpilot_ai_internal/__init__.py +1 -0
- signalpilot_ai_internal/_version.py +1 -1
- signalpilot_ai_internal/databricks_schema_service.py +902 -0
- signalpilot_ai_internal/handlers.py +72 -2
- signalpilot_ai_internal/mcp_handlers.py +508 -0
- signalpilot_ai_internal/mcp_server_manager.py +298 -0
- signalpilot_ai_internal/mcp_service.py +1303 -0
- signalpilot_ai_internal/schema_search_service.py +62 -1
- signalpilot_ai_internal/test_dbt_mcp_server.py +180 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/package.json +2 -2
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/package.json.orig +1 -1
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/plugin.json +7 -1
- signalpilot_ai_internal-0.10.0.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/786.770dc7bcab77e14cc135.js → signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/110.224e83db03814fd03955.js +2 -2
- signalpilot_ai_internal-0.10.0.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/57.e9acd2e1f9739037f1ab.js → signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/57.c4232851631fb2e7e59a.js +1 -1
- signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/726.318e4e791edb63cc788f.js +1 -0
- signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/880.d9914229e4f120e7e9e4.js +1 -0
- signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/936.d80de1e4da5b520d2f3b.js +1 -0
- signalpilot_ai_internal-0.10.22.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.b63c429ca81e743b403c.js +1 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/third-party-licenses.json +18 -0
- {signalpilot_ai_internal-0.10.0.dist-info → signalpilot_ai_internal-0.10.22.dist-info}/METADATA +3 -2
- signalpilot_ai_internal-0.10.22.dist-info/RECORD +56 -0
- signalpilot_ai_internal-0.10.0.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/330.af2e9cb5def5ae2b84d5.js +0 -1
- signalpilot_ai_internal-0.10.0.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/880.25ddd15aca09421d3765.js +0 -1
- signalpilot_ai_internal-0.10.0.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.b05b2f0c9617ba28370d.js +0 -1
- signalpilot_ai_internal-0.10.0.dist-info/RECORD +0 -50
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/etc/jupyter/jupyter_server_config.d/signalpilot_ai.json +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/install.json +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/122.e2dadf63dc64d7b5f1ee.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/220.328403b5545f268b95c6.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/262.726e1da31a50868cb297.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/353.972abe1d2d66f083f9cc.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/364.dbec4c2dc12e7b050dcc.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/384.fa432bdb7fb6b1c95ad6.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/439.37e271d7a80336daabe2.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/476.ad22ccddd74ee306fb56.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/481.73c7a9290b7d35a8b9c1.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/512.b58fc0093d080b8ee61c.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js.LICENSE.txt +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/635.9720593ee20b768da3ca.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/713.8e6edc9a965bdd578ca7.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/741.dc49867fafb03ea2ba4d.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/742.91e7b516c8699eea3373.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/785.2d75de1a8d2c3131a8db.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/801.ca9e114a30896b669a3c.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/888.34054db17bcf6e87ec95.js +0 -0
- {signalpilot_ai_internal-0.10.0.data → signalpilot_ai_internal-0.10.22.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/style.js +0 -0
- {signalpilot_ai_internal-0.10.0.dist-info → signalpilot_ai_internal-0.10.22.dist-info}/WHEEL +0 -0
- {signalpilot_ai_internal-0.10.0.dist-info → signalpilot_ai_internal-0.10.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,902 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Databricks schema service handlers for SignalPilot AI.
|
|
3
|
+
Provides REST API handlers for Databricks SQL Warehouse schema retrieval and query execution.
|
|
4
|
+
|
|
5
|
+
Supports two authentication methods:
|
|
6
|
+
- Personal Access Token (PAT): User pastes token directly
|
|
7
|
+
- Service Principal: OAuth client credentials flow with automatic token refresh
|
|
8
|
+
|
|
9
|
+
Uses Unity Catalog with 3-level namespace: catalog.schema.table
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any, Dict, Optional, List, Tuple
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
19
|
+
from functools import lru_cache
|
|
20
|
+
import threading
|
|
21
|
+
|
|
22
|
+
from jupyter_server.base.handlers import APIHandler
|
|
23
|
+
import tornado
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# In-memory token cache for Service Principal OAuth tokens
|
|
27
|
+
# Key: connection_id or hash of client credentials
|
|
28
|
+
# Value: {"access_token": str, "expires_at": float}
|
|
29
|
+
_sp_token_cache: Dict[str, Dict[str, Any]] = {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DatabricksSchemaHandler(APIHandler):
|
|
33
|
+
"""Handler for Databricks schema operations"""
|
|
34
|
+
|
|
35
|
+
def _setup_databricks_environment(self):
|
|
36
|
+
"""Install required Databricks packages if not available"""
|
|
37
|
+
def install_package(package_name):
|
|
38
|
+
try:
|
|
39
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
|
40
|
+
return True
|
|
41
|
+
except subprocess.CalledProcessError:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
missing_packages = []
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
from databricks import sql as databricks_sql
|
|
48
|
+
except ImportError:
|
|
49
|
+
if install_package("databricks-sql-connector"):
|
|
50
|
+
try:
|
|
51
|
+
from databricks import sql as databricks_sql
|
|
52
|
+
except ImportError as e:
|
|
53
|
+
missing_packages.append(f"databricks-sql-connector: {str(e)}")
|
|
54
|
+
else:
|
|
55
|
+
missing_packages.append("databricks-sql-connector: installation failed")
|
|
56
|
+
|
|
57
|
+
if missing_packages:
|
|
58
|
+
raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
|
|
59
|
+
|
|
60
|
+
from databricks import sql as databricks_sql
|
|
61
|
+
return databricks_sql
|
|
62
|
+
|
|
63
|
+
def _get_databricks_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
|
|
64
|
+
"""Get Databricks configuration from request or environment variables"""
|
|
65
|
+
if provided_config:
|
|
66
|
+
return provided_config
|
|
67
|
+
|
|
68
|
+
# Look for Databricks database configuration in the environment
|
|
69
|
+
for key, value in os.environ.items():
|
|
70
|
+
if key.endswith('_CONNECTION_JSON'):
|
|
71
|
+
try:
|
|
72
|
+
config = json.loads(value)
|
|
73
|
+
if config.get('type') == 'databricks':
|
|
74
|
+
return config
|
|
75
|
+
except Exception as e:
|
|
76
|
+
print(f"[DatabricksSchemaHandler] Error parsing database config {key}: {e}")
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def _get_access_token(self, config: Dict) -> str:
|
|
82
|
+
"""Get access token for authentication.
|
|
83
|
+
|
|
84
|
+
For PAT: returns the token directly
|
|
85
|
+
For Service Principal: obtains OAuth token via client credentials flow
|
|
86
|
+
"""
|
|
87
|
+
auth_type = config.get('authType', 'pat')
|
|
88
|
+
|
|
89
|
+
if auth_type == 'pat':
|
|
90
|
+
# Personal Access Token - use directly
|
|
91
|
+
token = config.get('accessToken')
|
|
92
|
+
if not token:
|
|
93
|
+
raise ValueError("Personal Access Token is required for PAT authentication")
|
|
94
|
+
return token
|
|
95
|
+
|
|
96
|
+
elif auth_type == 'service_principal':
|
|
97
|
+
# Service Principal - OAuth client credentials flow
|
|
98
|
+
return self._get_sp_access_token(config)
|
|
99
|
+
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError(f"Unknown authentication type: {auth_type}")
|
|
102
|
+
|
|
103
|
+
def _get_sp_access_token(self, config: Dict) -> str:
|
|
104
|
+
"""Get access token via Service Principal OAuth client credentials flow."""
|
|
105
|
+
client_id = config.get('clientId')
|
|
106
|
+
client_secret = config.get('clientSecret')
|
|
107
|
+
|
|
108
|
+
if not client_id or not client_secret:
|
|
109
|
+
raise ValueError("Client ID and Client Secret are required for Service Principal authentication")
|
|
110
|
+
|
|
111
|
+
# Create cache key from client credentials
|
|
112
|
+
cache_key = f"{client_id}:{hash(client_secret)}"
|
|
113
|
+
|
|
114
|
+
# Check cache for valid token
|
|
115
|
+
cached = _sp_token_cache.get(cache_key)
|
|
116
|
+
if cached:
|
|
117
|
+
# Refresh if within 60 seconds of expiry
|
|
118
|
+
if cached.get('expires_at', 0) > time.time() + 60:
|
|
119
|
+
return cached['access_token']
|
|
120
|
+
|
|
121
|
+
# Get OAuth token URL
|
|
122
|
+
# Default to Azure AD endpoint if not specified
|
|
123
|
+
token_url = config.get('oauthTokenUrl')
|
|
124
|
+
if not token_url:
|
|
125
|
+
# Try to derive from workspace URL for Azure
|
|
126
|
+
workspace_url = config.get('connectionUrl', '')
|
|
127
|
+
if 'azuredatabricks.net' in workspace_url:
|
|
128
|
+
# Azure Databricks - use Azure AD
|
|
129
|
+
tenant_id = config.get('tenantId', 'common')
|
|
130
|
+
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
|
|
131
|
+
else:
|
|
132
|
+
# AWS/GCP - use Databricks OAuth endpoint
|
|
133
|
+
# Extract host from workspace URL
|
|
134
|
+
import re
|
|
135
|
+
match = re.match(r'https?://([^/]+)', workspace_url)
|
|
136
|
+
if match:
|
|
137
|
+
host = match.group(1)
|
|
138
|
+
token_url = f"https://{host}/oidc/v1/token"
|
|
139
|
+
else:
|
|
140
|
+
raise ValueError("Cannot determine OAuth token URL. Please provide oauthTokenUrl in config.")
|
|
141
|
+
|
|
142
|
+
# Request new token
|
|
143
|
+
import urllib.request
|
|
144
|
+
import urllib.parse
|
|
145
|
+
|
|
146
|
+
# Prepare token request
|
|
147
|
+
scopes = config.get('scopes', ['2ff814a6-3304-4ab8-85cb-cd0e6f879c1d/.default'])
|
|
148
|
+
if isinstance(scopes, str):
|
|
149
|
+
scopes = [scopes]
|
|
150
|
+
|
|
151
|
+
data = {
|
|
152
|
+
'grant_type': 'client_credentials',
|
|
153
|
+
'client_id': client_id,
|
|
154
|
+
'client_secret': client_secret,
|
|
155
|
+
'scope': ' '.join(scopes)
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
encoded_data = urllib.parse.urlencode(data).encode('utf-8')
|
|
159
|
+
|
|
160
|
+
req = urllib.request.Request(
|
|
161
|
+
token_url,
|
|
162
|
+
data=encoded_data,
|
|
163
|
+
headers={
|
|
164
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
with urllib.request.urlopen(req, timeout=30) as response:
|
|
170
|
+
result = json.loads(response.read().decode('utf-8'))
|
|
171
|
+
|
|
172
|
+
access_token = result.get('access_token')
|
|
173
|
+
expires_in = result.get('expires_in', 3600)
|
|
174
|
+
|
|
175
|
+
if not access_token:
|
|
176
|
+
raise ValueError("No access_token in OAuth response")
|
|
177
|
+
|
|
178
|
+
# Cache the token
|
|
179
|
+
_sp_token_cache[cache_key] = {
|
|
180
|
+
'access_token': access_token,
|
|
181
|
+
'expires_at': time.time() + expires_in
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return access_token
|
|
185
|
+
|
|
186
|
+
except urllib.error.HTTPError as e:
|
|
187
|
+
error_body = e.read().decode('utf-8') if e.fp else str(e)
|
|
188
|
+
raise ValueError(f"OAuth token request failed: {e.code} - {error_body}")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
raise ValueError(f"Failed to obtain OAuth token: {str(e)}")
|
|
191
|
+
|
|
192
|
+
def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
|
|
193
|
+
"""Build Databricks connection parameters from configuration"""
|
|
194
|
+
import re
|
|
195
|
+
|
|
196
|
+
# Extract host from connectionUrl
|
|
197
|
+
connection_url = config.get('connectionUrl', '')
|
|
198
|
+
if not connection_url:
|
|
199
|
+
raise ValueError("connectionUrl (workspace URL) is required for Databricks")
|
|
200
|
+
|
|
201
|
+
# Extract host from URL
|
|
202
|
+
url_match = re.match(r'https?://([^/]+)', connection_url)
|
|
203
|
+
if not url_match:
|
|
204
|
+
raise ValueError(f"Invalid Databricks connectionUrl format: {connection_url}")
|
|
205
|
+
|
|
206
|
+
server_hostname = url_match.group(1)
|
|
207
|
+
|
|
208
|
+
# Get HTTP path for SQL warehouse
|
|
209
|
+
http_path = config.get('warehouseHttpPath') or config.get('httpPath')
|
|
210
|
+
if not http_path:
|
|
211
|
+
warehouse_id = config.get('warehouseId')
|
|
212
|
+
if warehouse_id:
|
|
213
|
+
http_path = f"/sql/1.0/warehouses/{warehouse_id}"
|
|
214
|
+
else:
|
|
215
|
+
raise ValueError("Either warehouseHttpPath or warehouseId is required")
|
|
216
|
+
|
|
217
|
+
# Get access token
|
|
218
|
+
access_token = self._get_access_token(config)
|
|
219
|
+
|
|
220
|
+
conn_params = {
|
|
221
|
+
'server_hostname': server_hostname,
|
|
222
|
+
'http_path': http_path,
|
|
223
|
+
'access_token': access_token,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Optional catalog (Unity Catalog)
|
|
227
|
+
catalog = config.get('catalog')
|
|
228
|
+
if catalog:
|
|
229
|
+
conn_params['catalog'] = catalog
|
|
230
|
+
|
|
231
|
+
# Optional schema
|
|
232
|
+
schema = config.get('schema')
|
|
233
|
+
if schema:
|
|
234
|
+
conn_params['schema'] = schema
|
|
235
|
+
|
|
236
|
+
return conn_params
|
|
237
|
+
|
|
238
|
+
def _list_catalogs(self, cursor) -> List[str]:
|
|
239
|
+
"""List all accessible catalogs"""
|
|
240
|
+
cursor.execute("SHOW CATALOGS")
|
|
241
|
+
rows = cursor.fetchall()
|
|
242
|
+
return [row[0] for row in rows if row[0] not in ('system', 'samples')]
|
|
243
|
+
|
|
244
|
+
def _list_schemas(self, cursor, catalog: str) -> List[str]:
|
|
245
|
+
"""List all schemas in a catalog"""
|
|
246
|
+
cursor.execute(f"SHOW SCHEMAS IN `{catalog}`")
|
|
247
|
+
rows = cursor.fetchall()
|
|
248
|
+
return [row[0] for row in rows if row[0] not in ('information_schema',)]
|
|
249
|
+
|
|
250
|
+
def _list_tables(self, cursor, catalog: str, schema: str) -> List[Dict]:
|
|
251
|
+
"""List all tables in a schema with their type"""
|
|
252
|
+
cursor.execute(f"SHOW TABLES IN `{catalog}`.`{schema}`")
|
|
253
|
+
rows = cursor.fetchall()
|
|
254
|
+
tables = []
|
|
255
|
+
for row in rows:
|
|
256
|
+
# SHOW TABLES returns: database, tableName, isTemporary
|
|
257
|
+
table_name = row[1] if len(row) > 1 else row[0]
|
|
258
|
+
tables.append({
|
|
259
|
+
'table_name': table_name,
|
|
260
|
+
'table_type': 'TABLE'
|
|
261
|
+
})
|
|
262
|
+
return tables
|
|
263
|
+
|
|
264
|
+
def _get_table_columns(self, cursor, catalog: str, schema: str, table: str) -> List[Dict]:
|
|
265
|
+
"""Get column information for a table"""
|
|
266
|
+
try:
|
|
267
|
+
cursor.execute(f"DESCRIBE TABLE `{catalog}`.`{schema}`.`{table}`")
|
|
268
|
+
rows = cursor.fetchall()
|
|
269
|
+
columns = []
|
|
270
|
+
for row in rows:
|
|
271
|
+
# DESCRIBE TABLE returns: col_name, data_type, comment
|
|
272
|
+
col_name = row[0]
|
|
273
|
+
|
|
274
|
+
# Skip metadata rows (partition info, etc.)
|
|
275
|
+
if col_name.startswith('#') or not col_name.strip():
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
columns.append({
|
|
279
|
+
'column_name': col_name,
|
|
280
|
+
'data_type': row[1] if len(row) > 1 else 'unknown',
|
|
281
|
+
'is_nullable': 'YES', # Databricks doesn't always expose this
|
|
282
|
+
'column_default': None,
|
|
283
|
+
'description': row[2] if len(row) > 2 and row[2] else None
|
|
284
|
+
})
|
|
285
|
+
return columns
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print(f"[DatabricksSchemaHandler] Error getting columns for {catalog}.{schema}.{table}: {e}")
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
def _fetch_table_with_columns(self, databricks_sql, conn_params: Dict, catalog: str, schema: str, table_info: Dict) -> Dict:
|
|
291
|
+
"""Fetch a single table with its columns using a new connection (for parallel execution)"""
|
|
292
|
+
connection = None
|
|
293
|
+
try:
|
|
294
|
+
connection = databricks_sql.connect(**conn_params)
|
|
295
|
+
cursor = connection.cursor()
|
|
296
|
+
|
|
297
|
+
table_name = table_info['table_name']
|
|
298
|
+
columns = self._get_table_columns(cursor, catalog, schema, table_name)
|
|
299
|
+
|
|
300
|
+
cursor.close()
|
|
301
|
+
|
|
302
|
+
return {
|
|
303
|
+
'catalog': catalog,
|
|
304
|
+
'schema': schema,
|
|
305
|
+
'table': table_name,
|
|
306
|
+
'type': table_info.get('table_type', 'TABLE'),
|
|
307
|
+
'columns': columns,
|
|
308
|
+
'error': None
|
|
309
|
+
}
|
|
310
|
+
except Exception as e:
|
|
311
|
+
print(f"[DatabricksSchemaHandler] Error fetching table {catalog}.{schema}.{table_info.get('table_name', 'unknown')}: {e}")
|
|
312
|
+
return {
|
|
313
|
+
'catalog': catalog,
|
|
314
|
+
'schema': schema,
|
|
315
|
+
'table': table_info.get('table_name', 'unknown'),
|
|
316
|
+
'type': table_info.get('table_type', 'TABLE'),
|
|
317
|
+
'columns': [],
|
|
318
|
+
'error': str(e)
|
|
319
|
+
}
|
|
320
|
+
finally:
|
|
321
|
+
if connection:
|
|
322
|
+
try:
|
|
323
|
+
connection.close()
|
|
324
|
+
except:
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
def _fetch_schema_tables(self, databricks_sql, conn_params: Dict, catalog: str, schema: str) -> Dict:
|
|
328
|
+
"""Fetch all tables for a schema with parallel column fetching"""
|
|
329
|
+
connection = None
|
|
330
|
+
try:
|
|
331
|
+
# Get list of tables first
|
|
332
|
+
connection = databricks_sql.connect(**conn_params)
|
|
333
|
+
cursor = connection.cursor()
|
|
334
|
+
|
|
335
|
+
tables = self._list_tables(cursor, catalog, schema)
|
|
336
|
+
cursor.close()
|
|
337
|
+
connection.close()
|
|
338
|
+
connection = None
|
|
339
|
+
|
|
340
|
+
print(f" Schema {schema}: {len(tables)} tables - fetching in parallel...")
|
|
341
|
+
|
|
342
|
+
# Fetch table details in parallel
|
|
343
|
+
schema_obj = {
|
|
344
|
+
'schema': schema,
|
|
345
|
+
'tables': [],
|
|
346
|
+
'error': None
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if not tables:
|
|
350
|
+
return schema_obj
|
|
351
|
+
|
|
352
|
+
# Use ThreadPoolExecutor for parallel table fetching
|
|
353
|
+
max_workers = min(10, len(tables)) # Limit concurrent connections
|
|
354
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
355
|
+
future_to_table = {
|
|
356
|
+
executor.submit(
|
|
357
|
+
self._fetch_table_with_columns,
|
|
358
|
+
databricks_sql,
|
|
359
|
+
conn_params,
|
|
360
|
+
catalog,
|
|
361
|
+
schema,
|
|
362
|
+
table_info
|
|
363
|
+
): table_info for table_info in tables
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
for future in as_completed(future_to_table):
|
|
367
|
+
try:
|
|
368
|
+
table_obj = future.result()
|
|
369
|
+
schema_obj['tables'].append(table_obj)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
table_info = future_to_table[future]
|
|
372
|
+
print(f" Error processing table {table_info.get('table_name')}: {e}")
|
|
373
|
+
|
|
374
|
+
return schema_obj
|
|
375
|
+
|
|
376
|
+
except Exception as e:
|
|
377
|
+
print(f" Error processing schema {schema}: {e}")
|
|
378
|
+
return {
|
|
379
|
+
'schema': schema,
|
|
380
|
+
'tables': [],
|
|
381
|
+
'error': str(e)
|
|
382
|
+
}
|
|
383
|
+
finally:
|
|
384
|
+
if connection:
|
|
385
|
+
try:
|
|
386
|
+
connection.close()
|
|
387
|
+
except:
|
|
388
|
+
pass
|
|
389
|
+
|
|
390
|
+
def _build_catalog(self, databricks_sql, conn_params: Dict, specified_catalog: Optional[str] = None, specified_schema: Optional[str] = None) -> Dict:
|
|
391
|
+
"""Build complete catalog structure with parallel processing"""
|
|
392
|
+
connection = databricks_sql.connect(**conn_params)
|
|
393
|
+
cursor = connection.cursor()
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
catalog_data = []
|
|
397
|
+
|
|
398
|
+
# Get catalogs to process
|
|
399
|
+
if specified_catalog:
|
|
400
|
+
catalogs = [specified_catalog]
|
|
401
|
+
else:
|
|
402
|
+
catalogs = self._list_catalogs(cursor)
|
|
403
|
+
|
|
404
|
+
print(f"[DatabricksSchemaHandler] Processing {len(catalogs)} catalogs with parallel optimization...")
|
|
405
|
+
|
|
406
|
+
for catalog in catalogs:
|
|
407
|
+
print(f" Processing catalog: {catalog}")
|
|
408
|
+
catalog_obj = {
|
|
409
|
+
'catalog': catalog,
|
|
410
|
+
'schemas': []
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
try:
|
|
414
|
+
schemas_list = self._list_schemas(cursor, catalog)
|
|
415
|
+
|
|
416
|
+
# Filter schemas if specified_schema is provided
|
|
417
|
+
if specified_schema:
|
|
418
|
+
schemas = [s for s in schemas_list if s == specified_schema]
|
|
419
|
+
else:
|
|
420
|
+
schemas = schemas_list
|
|
421
|
+
|
|
422
|
+
print(f" Found {len(schemas)} schemas - processing in parallel...")
|
|
423
|
+
|
|
424
|
+
if not schemas:
|
|
425
|
+
catalog_data.append(catalog_obj)
|
|
426
|
+
continue
|
|
427
|
+
|
|
428
|
+
# Process schemas in parallel
|
|
429
|
+
max_workers = min(5, len(schemas)) # Limit concurrent schema processing
|
|
430
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
431
|
+
future_to_schema = {
|
|
432
|
+
executor.submit(
|
|
433
|
+
self._fetch_schema_tables,
|
|
434
|
+
databricks_sql,
|
|
435
|
+
conn_params,
|
|
436
|
+
catalog,
|
|
437
|
+
schema
|
|
438
|
+
): schema for schema in schemas
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
for future in as_completed(future_to_schema):
|
|
442
|
+
try:
|
|
443
|
+
schema_obj = future.result()
|
|
444
|
+
catalog_obj['schemas'].append(schema_obj)
|
|
445
|
+
except Exception as e:
|
|
446
|
+
schema = future_to_schema[future]
|
|
447
|
+
print(f" Error processing schema {schema}: {e}")
|
|
448
|
+
catalog_obj['schemas'].append({
|
|
449
|
+
'schema': schema,
|
|
450
|
+
'tables': [],
|
|
451
|
+
'error': str(e)
|
|
452
|
+
})
|
|
453
|
+
|
|
454
|
+
except Exception as e:
|
|
455
|
+
print(f" Error processing catalog {catalog}: {e}")
|
|
456
|
+
catalog_obj['schemas'].append({
|
|
457
|
+
'schema': 'default',
|
|
458
|
+
'tables': [],
|
|
459
|
+
'error': str(e)
|
|
460
|
+
})
|
|
461
|
+
|
|
462
|
+
catalog_data.append(catalog_obj)
|
|
463
|
+
|
|
464
|
+
return {'catalogs': catalog_data}
|
|
465
|
+
|
|
466
|
+
finally:
|
|
467
|
+
cursor.close()
|
|
468
|
+
connection.close()
|
|
469
|
+
|
|
470
|
+
def _format_catalog_as_markdown(self, catalog_data: Dict) -> Tuple[str, Dict]:
|
|
471
|
+
"""Format the catalog as markdown and build table_schemas dict"""
|
|
472
|
+
lines = ["# Databricks Database Schema\n"]
|
|
473
|
+
table_schemas = {}
|
|
474
|
+
|
|
475
|
+
total_tables = 0
|
|
476
|
+
for cat in catalog_data.get('catalogs', []):
|
|
477
|
+
for sch in cat.get('schemas', []):
|
|
478
|
+
total_tables += len(sch.get('tables', []))
|
|
479
|
+
|
|
480
|
+
lines.append(f"Found **{total_tables}** table(s)\n")
|
|
481
|
+
|
|
482
|
+
for cat in catalog_data.get('catalogs', []):
|
|
483
|
+
catalog_name = cat['catalog']
|
|
484
|
+
|
|
485
|
+
for sch in cat.get('schemas', []):
|
|
486
|
+
schema_name = sch['schema']
|
|
487
|
+
|
|
488
|
+
if sch.get('error'):
|
|
489
|
+
lines.append(f"\n## {catalog_name}.{schema_name}\n")
|
|
490
|
+
lines.append(f"Error: {sch['error']}\n")
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
for table in sch.get('tables', []):
|
|
494
|
+
table_name = table['table']
|
|
495
|
+
full_name = f"{catalog_name}.{schema_name}.{table_name}"
|
|
496
|
+
|
|
497
|
+
lines.append(f"\n## {full_name}\n")
|
|
498
|
+
|
|
499
|
+
columns = table.get('columns', [])
|
|
500
|
+
lines.append(f"\n### Columns ({len(columns)})\n")
|
|
501
|
+
|
|
502
|
+
for col in columns:
|
|
503
|
+
col_name = col.get('column_name', 'unknown')
|
|
504
|
+
data_type = col.get('data_type', 'unknown')
|
|
505
|
+
description = col.get('description')
|
|
506
|
+
|
|
507
|
+
if description:
|
|
508
|
+
lines.append(f"- **{col_name}**: {data_type} - {description}\n")
|
|
509
|
+
else:
|
|
510
|
+
lines.append(f"- **{col_name}**: {data_type}\n")
|
|
511
|
+
|
|
512
|
+
# Store in table_schemas
|
|
513
|
+
table_schemas[full_name] = {
|
|
514
|
+
'catalog': catalog_name,
|
|
515
|
+
'schema': schema_name,
|
|
516
|
+
'table_name': table_name,
|
|
517
|
+
'full_name': full_name,
|
|
518
|
+
'columns': [dict(col) for col in columns],
|
|
519
|
+
'primary_keys': [], # Databricks doesn't always expose PK info
|
|
520
|
+
'foreign_keys': [],
|
|
521
|
+
'indices': []
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
lines.append("\n---\n")
|
|
525
|
+
|
|
526
|
+
return ''.join(lines).strip(), table_schemas
|
|
527
|
+
|
|
528
|
+
@tornado.web.authenticated
|
|
529
|
+
def post(self):
|
|
530
|
+
"""Get Databricks database schema information"""
|
|
531
|
+
try:
|
|
532
|
+
# Parse request body
|
|
533
|
+
try:
|
|
534
|
+
body = json.loads(self.request.body.decode('utf-8'))
|
|
535
|
+
except json.JSONDecodeError:
|
|
536
|
+
self.set_status(400)
|
|
537
|
+
self.finish(json.dumps({
|
|
538
|
+
"error": "Invalid JSON in request body"
|
|
539
|
+
}))
|
|
540
|
+
return
|
|
541
|
+
|
|
542
|
+
# Get Databricks configuration from request or environment
|
|
543
|
+
config = self._get_databricks_config(body.get('config'))
|
|
544
|
+
|
|
545
|
+
if not config:
|
|
546
|
+
self.set_status(400)
|
|
547
|
+
self.finish(json.dumps({
|
|
548
|
+
"error": "No Databricks configuration provided and no Databricks configurations found in environment"
|
|
549
|
+
}))
|
|
550
|
+
return
|
|
551
|
+
|
|
552
|
+
# Setup Databricks environment
|
|
553
|
+
try:
|
|
554
|
+
databricks_sql = self._setup_databricks_environment()
|
|
555
|
+
except ImportError as e:
|
|
556
|
+
self.set_status(500)
|
|
557
|
+
self.finish(json.dumps({
|
|
558
|
+
"error": str(e)
|
|
559
|
+
}))
|
|
560
|
+
return
|
|
561
|
+
|
|
562
|
+
# Get database schema
|
|
563
|
+
try:
|
|
564
|
+
conn_params = self._get_connection_params(config)
|
|
565
|
+
specified_catalog = config.get('catalog')
|
|
566
|
+
specified_schema = config.get('schema')
|
|
567
|
+
|
|
568
|
+
print(f"[DatabricksSchemaHandler] Connecting to {conn_params['server_hostname']}")
|
|
569
|
+
if specified_catalog:
|
|
570
|
+
print(f"[DatabricksSchemaHandler] Filtering to catalog: {specified_catalog}")
|
|
571
|
+
if specified_schema:
|
|
572
|
+
print(f"[DatabricksSchemaHandler] Filtering to schema: {specified_schema}")
|
|
573
|
+
|
|
574
|
+
catalog_data = self._build_catalog(
|
|
575
|
+
databricks_sql,
|
|
576
|
+
conn_params,
|
|
577
|
+
specified_catalog=specified_catalog,
|
|
578
|
+
specified_schema=specified_schema
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
markdown_result, table_schemas = self._format_catalog_as_markdown(catalog_data)
|
|
582
|
+
|
|
583
|
+
self.finish(json.dumps({
|
|
584
|
+
"result": markdown_result,
|
|
585
|
+
"table_schemas": table_schemas,
|
|
586
|
+
"catalogs": catalog_data.get('catalogs', [])
|
|
587
|
+
}))
|
|
588
|
+
|
|
589
|
+
except Exception as e:
|
|
590
|
+
error_msg = str(e)
|
|
591
|
+
# Provide helpful error messages
|
|
592
|
+
if 'PAT' in error_msg.upper() or 'token' in error_msg.lower():
|
|
593
|
+
error_msg = f"Authentication failed: {error_msg}. If PATs are disabled in your workspace, try Service Principal authentication."
|
|
594
|
+
elif 'warehouse' in error_msg.lower():
|
|
595
|
+
error_msg = f"SQL Warehouse error: {error_msg}. Ensure your warehouse is running and accessible."
|
|
596
|
+
|
|
597
|
+
self.set_status(500)
|
|
598
|
+
self.finish(json.dumps({
|
|
599
|
+
"error": f"Error connecting to Databricks: {error_msg}"
|
|
600
|
+
}))
|
|
601
|
+
|
|
602
|
+
except Exception as e:
|
|
603
|
+
self.set_status(500)
|
|
604
|
+
self.finish(json.dumps({
|
|
605
|
+
"error": "Internal server error",
|
|
606
|
+
"message": str(e)
|
|
607
|
+
}))
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
class DatabricksQueryHandler(APIHandler):
|
|
611
|
+
"""Handler for Databricks query execution"""
|
|
612
|
+
|
|
613
|
+
def _setup_databricks_environment(self):
|
|
614
|
+
"""Install required Databricks packages if not available"""
|
|
615
|
+
def install_package(package_name):
|
|
616
|
+
try:
|
|
617
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
|
618
|
+
return True
|
|
619
|
+
except subprocess.CalledProcessError:
|
|
620
|
+
return False
|
|
621
|
+
|
|
622
|
+
missing_packages = []
|
|
623
|
+
|
|
624
|
+
try:
|
|
625
|
+
from databricks import sql as databricks_sql
|
|
626
|
+
except ImportError:
|
|
627
|
+
if install_package("databricks-sql-connector"):
|
|
628
|
+
try:
|
|
629
|
+
from databricks import sql as databricks_sql
|
|
630
|
+
except ImportError as e:
|
|
631
|
+
missing_packages.append(f"databricks-sql-connector: {str(e)}")
|
|
632
|
+
else:
|
|
633
|
+
missing_packages.append("databricks-sql-connector: installation failed")
|
|
634
|
+
|
|
635
|
+
if missing_packages:
|
|
636
|
+
raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
|
|
637
|
+
|
|
638
|
+
from databricks import sql as databricks_sql
|
|
639
|
+
return databricks_sql
|
|
640
|
+
|
|
641
|
+
def _get_databricks_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
|
|
642
|
+
"""Get Databricks configuration from request or environment variables"""
|
|
643
|
+
if provided_config:
|
|
644
|
+
return provided_config
|
|
645
|
+
|
|
646
|
+
# Look for Databricks database configuration in the environment
|
|
647
|
+
for key, value in os.environ.items():
|
|
648
|
+
if key.endswith('_CONNECTION_JSON'):
|
|
649
|
+
try:
|
|
650
|
+
config = json.loads(value)
|
|
651
|
+
if config.get('type') == 'databricks':
|
|
652
|
+
return config
|
|
653
|
+
except Exception as e:
|
|
654
|
+
print(f"[DatabricksQueryHandler] Error parsing database config {key}: {e}")
|
|
655
|
+
continue
|
|
656
|
+
|
|
657
|
+
return None
|
|
658
|
+
|
|
659
|
+
def _get_access_token(self, config: Dict) -> str:
|
|
660
|
+
"""Get access token for authentication - delegates to schema handler logic"""
|
|
661
|
+
# Reuse the schema handler's token logic
|
|
662
|
+
handler = DatabricksSchemaHandler(self.application, self.request)
|
|
663
|
+
return handler._get_access_token(config)
|
|
664
|
+
|
|
665
|
+
def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
|
|
666
|
+
"""Build Databricks connection parameters from configuration"""
|
|
667
|
+
import re
|
|
668
|
+
|
|
669
|
+
connection_url = config.get('connectionUrl', '')
|
|
670
|
+
if not connection_url:
|
|
671
|
+
raise ValueError("connectionUrl (workspace URL) is required for Databricks")
|
|
672
|
+
|
|
673
|
+
url_match = re.match(r'https?://([^/]+)', connection_url)
|
|
674
|
+
if not url_match:
|
|
675
|
+
raise ValueError(f"Invalid Databricks connectionUrl format: {connection_url}")
|
|
676
|
+
|
|
677
|
+
server_hostname = url_match.group(1)
|
|
678
|
+
|
|
679
|
+
http_path = config.get('warehouseHttpPath') or config.get('httpPath')
|
|
680
|
+
if not http_path:
|
|
681
|
+
warehouse_id = config.get('warehouseId')
|
|
682
|
+
if warehouse_id:
|
|
683
|
+
http_path = f"/sql/1.0/warehouses/{warehouse_id}"
|
|
684
|
+
else:
|
|
685
|
+
raise ValueError("Either warehouseHttpPath or warehouseId is required")
|
|
686
|
+
|
|
687
|
+
access_token = self._get_access_token(config)
|
|
688
|
+
|
|
689
|
+
conn_params = {
|
|
690
|
+
'server_hostname': server_hostname,
|
|
691
|
+
'http_path': http_path,
|
|
692
|
+
'access_token': access_token,
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
catalog = config.get('catalog')
|
|
696
|
+
if catalog:
|
|
697
|
+
conn_params['catalog'] = catalog
|
|
698
|
+
|
|
699
|
+
schema = config.get('schema')
|
|
700
|
+
if schema:
|
|
701
|
+
conn_params['schema'] = schema
|
|
702
|
+
|
|
703
|
+
return conn_params
|
|
704
|
+
|
|
705
|
+
@tornado.web.authenticated
|
|
706
|
+
def post(self):
|
|
707
|
+
"""Execute a read-only SQL query on Databricks"""
|
|
708
|
+
try:
|
|
709
|
+
# Parse request body
|
|
710
|
+
try:
|
|
711
|
+
body = json.loads(self.request.body.decode('utf-8'))
|
|
712
|
+
except json.JSONDecodeError:
|
|
713
|
+
self.set_status(400)
|
|
714
|
+
self.finish(json.dumps({
|
|
715
|
+
"error": "Invalid JSON in request body"
|
|
716
|
+
}))
|
|
717
|
+
return
|
|
718
|
+
|
|
719
|
+
# Get query from request
|
|
720
|
+
query = body.get('query')
|
|
721
|
+
if not query:
|
|
722
|
+
self.set_status(400)
|
|
723
|
+
self.finish(json.dumps({
|
|
724
|
+
"error": "Missing 'query' field in request body"
|
|
725
|
+
}))
|
|
726
|
+
return
|
|
727
|
+
|
|
728
|
+
# Basic validation for read-only queries
|
|
729
|
+
normalized_query = query.strip().upper()
|
|
730
|
+
allowed_starts = ['SELECT', 'WITH', 'SHOW', 'DESCRIBE', 'EXPLAIN']
|
|
731
|
+
|
|
732
|
+
if not any(normalized_query.startswith(start) for start in allowed_starts):
|
|
733
|
+
self.set_status(400)
|
|
734
|
+
self.finish(json.dumps({
|
|
735
|
+
"error": f"Only {', '.join(allowed_starts)} statements are allowed for read queries."
|
|
736
|
+
}))
|
|
737
|
+
return
|
|
738
|
+
|
|
739
|
+
# Get Databricks configuration from request or environment
|
|
740
|
+
config = self._get_databricks_config(body.get('config'))
|
|
741
|
+
|
|
742
|
+
if not config:
|
|
743
|
+
self.set_status(400)
|
|
744
|
+
self.finish(json.dumps({
|
|
745
|
+
"error": "No Databricks configuration provided and no Databricks configurations found in environment"
|
|
746
|
+
}))
|
|
747
|
+
return
|
|
748
|
+
|
|
749
|
+
# Setup Databricks environment
|
|
750
|
+
try:
|
|
751
|
+
databricks_sql = self._setup_databricks_environment()
|
|
752
|
+
except ImportError as e:
|
|
753
|
+
self.set_status(500)
|
|
754
|
+
self.finish(json.dumps({
|
|
755
|
+
"error": str(e)
|
|
756
|
+
}))
|
|
757
|
+
return
|
|
758
|
+
|
|
759
|
+
# Execute query
|
|
760
|
+
try:
|
|
761
|
+
conn_params = self._get_connection_params(config)
|
|
762
|
+
|
|
763
|
+
# Allow specifying a specific catalog for the query
|
|
764
|
+
catalog = body.get('catalog')
|
|
765
|
+
if catalog:
|
|
766
|
+
conn_params['catalog'] = catalog
|
|
767
|
+
|
|
768
|
+
connection = databricks_sql.connect(**conn_params)
|
|
769
|
+
cursor = connection.cursor()
|
|
770
|
+
|
|
771
|
+
try:
|
|
772
|
+
cursor.execute(query)
|
|
773
|
+
|
|
774
|
+
# Get column names from cursor description
|
|
775
|
+
columns = [desc[0] for desc in cursor.description] if cursor.description else []
|
|
776
|
+
|
|
777
|
+
# Fetch all results
|
|
778
|
+
rows = cursor.fetchall()
|
|
779
|
+
|
|
780
|
+
# Convert result to list of dictionaries
|
|
781
|
+
result_rows = [
|
|
782
|
+
{columns[i]: row[i] for i in range(len(columns))}
|
|
783
|
+
for row in rows
|
|
784
|
+
]
|
|
785
|
+
|
|
786
|
+
self.finish(json.dumps({
|
|
787
|
+
"result": result_rows
|
|
788
|
+
}))
|
|
789
|
+
|
|
790
|
+
finally:
|
|
791
|
+
cursor.close()
|
|
792
|
+
connection.close()
|
|
793
|
+
|
|
794
|
+
except Exception as e:
|
|
795
|
+
self.set_status(500)
|
|
796
|
+
self.finish(json.dumps({
|
|
797
|
+
"error": f"Databricks query failed: {str(e)}"
|
|
798
|
+
}))
|
|
799
|
+
|
|
800
|
+
except Exception as e:
|
|
801
|
+
self.set_status(500)
|
|
802
|
+
self.finish(json.dumps({
|
|
803
|
+
"error": "Internal server error",
|
|
804
|
+
"message": str(e)
|
|
805
|
+
}))
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
class DatabricksTestHandler(APIHandler):
|
|
809
|
+
"""Handler for testing Databricks connection"""
|
|
810
|
+
|
|
811
|
+
@tornado.web.authenticated
|
|
812
|
+
def post(self):
|
|
813
|
+
"""Test Databricks connection and return status"""
|
|
814
|
+
try:
|
|
815
|
+
# Parse request body
|
|
816
|
+
try:
|
|
817
|
+
body = json.loads(self.request.body.decode('utf-8'))
|
|
818
|
+
except json.JSONDecodeError:
|
|
819
|
+
self.set_status(400)
|
|
820
|
+
self.finish(json.dumps({
|
|
821
|
+
"error": "Invalid JSON in request body"
|
|
822
|
+
}))
|
|
823
|
+
return
|
|
824
|
+
|
|
825
|
+
config = body.get('config')
|
|
826
|
+
if not config:
|
|
827
|
+
self.set_status(400)
|
|
828
|
+
self.finish(json.dumps({
|
|
829
|
+
"error": "No configuration provided"
|
|
830
|
+
}))
|
|
831
|
+
return
|
|
832
|
+
|
|
833
|
+
# Setup environment
|
|
834
|
+
schema_handler = DatabricksSchemaHandler(self.application, self.request)
|
|
835
|
+
try:
|
|
836
|
+
databricks_sql = schema_handler._setup_databricks_environment()
|
|
837
|
+
except ImportError as e:
|
|
838
|
+
self.set_status(500)
|
|
839
|
+
self.finish(json.dumps({
|
|
840
|
+
"ok": False,
|
|
841
|
+
"error": str(e)
|
|
842
|
+
}))
|
|
843
|
+
return
|
|
844
|
+
|
|
845
|
+
# Test connection
|
|
846
|
+
try:
|
|
847
|
+
import time
|
|
848
|
+
start_time = time.time()
|
|
849
|
+
|
|
850
|
+
conn_params = schema_handler._get_connection_params(config)
|
|
851
|
+
connection = databricks_sql.connect(**conn_params)
|
|
852
|
+
cursor = connection.cursor()
|
|
853
|
+
|
|
854
|
+
try:
|
|
855
|
+
# Test basic query
|
|
856
|
+
cursor.execute("SELECT 1 as test")
|
|
857
|
+
cursor.fetchall()
|
|
858
|
+
|
|
859
|
+
sql_latency = int((time.time() - start_time) * 1000)
|
|
860
|
+
|
|
861
|
+
# Try to get current user
|
|
862
|
+
identity_info = {"type": "unknown", "name": "unknown"}
|
|
863
|
+
try:
|
|
864
|
+
cursor.execute("SELECT current_user() as user")
|
|
865
|
+
user_row = cursor.fetchone()
|
|
866
|
+
if user_row:
|
|
867
|
+
auth_type = config.get('authType', 'pat')
|
|
868
|
+
identity_info = {
|
|
869
|
+
"type": "user" if auth_type == 'pat' else "service_principal",
|
|
870
|
+
"name": user_row[0]
|
|
871
|
+
}
|
|
872
|
+
except Exception:
|
|
873
|
+
pass
|
|
874
|
+
|
|
875
|
+
self.finish(json.dumps({
|
|
876
|
+
"ok": True,
|
|
877
|
+
"identity": identity_info,
|
|
878
|
+
"sql": {"ok": True, "latency_ms": sql_latency},
|
|
879
|
+
"api": {"ok": True}
|
|
880
|
+
}))
|
|
881
|
+
|
|
882
|
+
finally:
|
|
883
|
+
cursor.close()
|
|
884
|
+
connection.close()
|
|
885
|
+
|
|
886
|
+
except Exception as e:
|
|
887
|
+
error_msg = str(e)
|
|
888
|
+
self.finish(json.dumps({
|
|
889
|
+
"ok": False,
|
|
890
|
+
"error": error_msg,
|
|
891
|
+
"identity": None,
|
|
892
|
+
"sql": {"ok": False, "error": error_msg},
|
|
893
|
+
"api": {"ok": False}
|
|
894
|
+
}))
|
|
895
|
+
|
|
896
|
+
except Exception as e:
|
|
897
|
+
self.set_status(500)
|
|
898
|
+
self.finish(json.dumps({
|
|
899
|
+
"ok": False,
|
|
900
|
+
"error": "Internal server error",
|
|
901
|
+
"message": str(e)
|
|
902
|
+
}))
|