signalpilot-ai-internal 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. signalpilot_ai_internal/_version.py +1 -1
  2. signalpilot_ai_internal/cache_handlers.py +383 -0
  3. signalpilot_ai_internal/cache_service.py +549 -0
  4. signalpilot_ai_internal/handlers.py +35 -915
  5. signalpilot_ai_internal/snowflake_schema_service.py +671 -0
  6. signalpilot_ai_internal/unified_database_schema_service.py +742 -0
  7. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/package.json +3 -2
  8. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/package.json.orig +2 -1
  9. signalpilot_ai_internal-0.3.4.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/384.fa432bdb7fb6b1c95ad6.js +1 -0
  10. signalpilot_ai_internal-0.3.4.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.d8bc4aeaf8ddeacb2486.js +1 -0
  11. signalpilot_ai_internal-0.3.4.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.694baa59818fdf19fba9.js +1 -0
  12. signalpilot_ai_internal-0.3.4.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.a9d6eb0edda396db6779.js +1 -0
  13. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/third-party-licenses.json +6 -0
  14. {signalpilot_ai_internal-0.3.2.dist-info → signalpilot_ai_internal-0.3.4.dist-info}/METADATA +1 -1
  15. signalpilot_ai_internal-0.3.4.dist-info/RECORD +45 -0
  16. signalpilot_ai_internal-0.3.2.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.8d3d5d0480ba7396f2f5.js +0 -1
  17. signalpilot_ai_internal-0.3.2.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.5a362da0c4b891e005b3.js +0 -1
  18. signalpilot_ai_internal-0.3.2.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.57019ad0ad044a0f8ad8.js +0 -1
  19. signalpilot_ai_internal-0.3.2.dist-info/RECORD +0 -40
  20. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/etc/jupyter/jupyter_server_config.d/signalpilot_ai.json +0 -0
  21. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/install.json +0 -0
  22. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/plugin.json +0 -0
  23. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js +0 -0
  24. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js.LICENSE.txt +0 -0
  25. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/122.e2dadf63dc64d7b5f1ee.js +0 -0
  26. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/220.328403b5545f268b95c6.js +0 -0
  27. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/262.726e1da31a50868cb297.js +0 -0
  28. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js +0 -0
  29. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js.LICENSE.txt +0 -0
  30. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/353.72484b768a04f89bd3dd.js +0 -0
  31. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/364.dbec4c2dc12e7b050dcc.js +0 -0
  32. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/439.37e271d7a80336daabe2.js +0 -0
  33. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/476.9b4f05a99f5003f82094.js +0 -0
  34. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/481.73c7a9290b7d35a8b9c1.js +0 -0
  35. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/512.b58fc0093d080b8ee61c.js +0 -0
  36. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js +0 -0
  37. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js.LICENSE.txt +0 -0
  38. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/606.90aaaae46b73dc3c08fb.js +0 -0
  39. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/635.9720593ee20b768da3ca.js +0 -0
  40. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/713.8e6edc9a965bdd578ca7.js +0 -0
  41. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/742.91e7b516c8699eea3373.js +0 -0
  42. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/785.3aa564fc148b37d1d719.js +0 -0
  43. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/888.34054db17bcf6e87ec95.js +0 -0
  44. {signalpilot_ai_internal-0.3.2.data → signalpilot_ai_internal-0.3.4.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/style.js +0 -0
  45. {signalpilot_ai_internal-0.3.2.dist-info → signalpilot_ai_internal-0.3.4.dist-info}/WHEEL +0 -0
  46. {signalpilot_ai_internal-0.3.2.dist-info → signalpilot_ai_internal-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,671 @@
1
+ """
2
+ Snowflake schema service handlers for SignalPilot AI.
3
+ Provides REST API handlers for Snowflake database schema retrieval and query execution.
4
+ Supports multiple databases within a single Snowflake connection.
5
+
6
+ Behavior:
7
+ - If a warehouse is specified in the config, it will be used directly.
8
+ - Otherwise, picks the smallest RUNNING warehouse.
9
+ - If none running, resumes the smallest SUSPENDED warehouse.
10
+ - If none exist, attempts to CREATE a tiny warehouse (requires privilege).
11
+ - If a database is specified in the config, only that database will be processed.
12
+ - Otherwise, all accessible databases will be processed.
13
+ - Builds a catalog with parallel schema processing for performance.
14
+ - For each table, includes detailed column information: name, type, ordinal position,
15
+ nullable, description, default value, and type-specific attributes.
16
+ """
17
+
18
+ import json
19
+ import os
20
+ import re
21
+ import subprocess
22
+ import sys
23
+ from concurrent.futures import ThreadPoolExecutor, as_completed
24
+ from typing import Any, Dict, Optional, List
25
+
26
+ from jupyter_server.base.handlers import APIHandler
27
+ import tornado
28
+
29
+ SIZE_ORDER = ["XSMALL", "SMALL", "MEDIUM", "LARGE", "XLARGE", "XXLARGE", "XXXLARGE", "X4LARGE", "X5LARGE", "X6LARGE"]
30
+
31
+
32
+ class SnowflakeSchemaHandler(APIHandler):
33
+ """Handler for Snowflake schema operations"""
34
+
35
+ def _setup_snowflake_environment(self):
36
+ """Install required Snowflake packages if not available"""
37
+ def install_package(package_name):
38
+ try:
39
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
40
+ return True
41
+ except subprocess.CalledProcessError:
42
+ return False
43
+
44
+ missing_packages = []
45
+
46
+ try:
47
+ import snowflake.connector
48
+ except ImportError:
49
+ if install_package("snowflake-connector-python"):
50
+ try:
51
+ import snowflake.connector
52
+ except ImportError as e:
53
+ missing_packages.append(f"snowflake-connector-python: {str(e)}")
54
+ else:
55
+ missing_packages.append("snowflake-connector-python: installation failed")
56
+
57
+ if missing_packages:
58
+ raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
59
+
60
+ import snowflake.connector
61
+ return snowflake.connector
62
+
63
+ def _get_snowflake_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
64
+ """Get Snowflake configuration from request or environment variables"""
65
+ if provided_config:
66
+ return provided_config
67
+
68
+ # Look for Snowflake database configuration in the environment
69
+ for key, value in os.environ.items():
70
+ if key.endswith('_CONNECTION_JSON'):
71
+ try:
72
+ config = json.loads(value)
73
+ if config.get('type') == 'snowflake':
74
+ return config
75
+ except Exception as e:
76
+ print(f"[SnowflakeSchemaHandler] Error parsing database config {key}: {e}")
77
+ continue
78
+
79
+ return None
80
+
81
+ def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
82
+ """Build Snowflake connection parameters from configuration"""
83
+ # Extract account from connectionUrl
84
+ connection_url = config.get('connectionUrl', '')
85
+ if not connection_url:
86
+ raise ValueError("connectionUrl is required for Snowflake")
87
+
88
+ # Extract the account identifier from the connectionUrl
89
+ url_match = re.match(r'https?://([^/]+)', connection_url)
90
+ if not url_match:
91
+ raise ValueError(f"Invalid Snowflake connectionUrl format: {connection_url}")
92
+
93
+ account = url_match.group(1)
94
+ # Strip .snowflakecomputing.com if present
95
+ account_identifier = account.replace('.snowflakecomputing.com', '')
96
+
97
+ conn_params = {
98
+ 'account': account_identifier,
99
+ 'user': config['username'],
100
+ 'password': config['password'],
101
+ }
102
+
103
+ warehouse = config.get('warehouse')
104
+ database = config.get('database')
105
+ role = config.get('role')
106
+
107
+ if warehouse:
108
+ conn_params['warehouse'] = warehouse
109
+ if database:
110
+ conn_params['database'] = database
111
+ if role:
112
+ conn_params['role'] = role
113
+
114
+ return conn_params
115
+
116
+ def _fetch_result_scan(self, cur, sql: str, name_col: str = "name") -> List[str]:
117
+ """Execute SQL and fetch results using RESULT_SCAN"""
118
+ cur.execute(sql)
119
+ cur.execute(f'SELECT "{name_col}" FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))')
120
+ rows = cur.fetchall()
121
+ # Handle both DictCursor (returns dicts) and regular cursor (returns tuples)
122
+ if rows and isinstance(rows[0], dict):
123
+ return [r[name_col] for r in rows]
124
+ return [r[0] for r in rows]
125
+
126
+ def _get_warehouses(self, cur) -> List[Dict]:
127
+ """Get all warehouses with their state and size"""
128
+ cur.execute("SHOW WAREHOUSES")
129
+ cur.execute("""
130
+ SELECT "name","state","size","auto_suspend","auto_resume"
131
+ FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
132
+ """)
133
+ rows = cur.fetchall()
134
+ # Handle DictCursor (returns dicts) and regular cursor (returns tuples)
135
+ if rows and isinstance(rows[0], dict):
136
+ # DictCursor: normalize keys to lowercase
137
+ return [{k.lower(): v for k, v in row.items()} for row in rows]
138
+ else:
139
+ # Regular cursor: manually create dicts
140
+ cols = [d[0].lower() for d in cur.description]
141
+ return [dict(zip(cols, row)) for row in rows]
142
+
143
+ def _size_rank(self, sz: str) -> int:
144
+ """Get the numeric rank of a warehouse size"""
145
+ s = (sz or "").upper()
146
+ return SIZE_ORDER.index(s) if s in SIZE_ORDER else len(SIZE_ORDER) + 1
147
+
148
+ def _choose_smallest_running(self, warehouses: List[Dict]) -> Optional[str]:
149
+ """Choose the smallest running warehouse"""
150
+ running = [w for w in warehouses if (w.get("state") or "").upper() == "STARTED"]
151
+ if not running:
152
+ return None
153
+ running.sort(key=lambda w: self._size_rank(w.get("size")))
154
+ return running[0]["name"]
155
+
156
+ def _choose_smallest_suspended(self, warehouses: List[Dict]) -> Optional[str]:
157
+ """Choose the smallest suspended warehouse"""
158
+ suspended = [w for w in warehouses if (w.get("state") or "").upper() in ("SUSPENDED", "RESIZING")]
159
+ if not suspended:
160
+ return None
161
+ suspended.sort(key=lambda w: self._size_rank(w.get("size")))
162
+ return suspended[0]["name"]
163
+
164
+ def _resume_warehouse(self, cur, name: str) -> None:
165
+ """Resume a suspended warehouse"""
166
+ cur.execute(f'ALTER WAREHOUSE "{name}" RESUME')
167
+
168
+ def _create_tiny_warehouse(self, cur, name: str = "SPAI_TINY_WH") -> str:
169
+ """Create a tiny warehouse (requires proper privilege)"""
170
+ cur.execute(f'''
171
+ CREATE WAREHOUSE IF NOT EXISTS "{name}"
172
+ WITH WAREHOUSE_SIZE = XSMALL
173
+ AUTO_SUSPEND = 60
174
+ AUTO_RESUME = TRUE
175
+ INITIALLY_SUSPENDED = TRUE
176
+ ''')
177
+ # Start it
178
+ cur.execute(f'ALTER WAREHOUSE "{name}" RESUME')
179
+ return name
180
+
181
+ def _ensure_warehouse(self, cur, preferred: Optional[str]) -> str:
182
+ """Ensure a warehouse is available and running"""
183
+ # Respect an explicitly provided warehouse first, if any
184
+ if preferred:
185
+ try:
186
+ cur.execute(f'SHOW WAREHOUSES LIKE \'{preferred}\'')
187
+ cur.execute('SELECT "name","state","size" FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))')
188
+ row = cur.fetchone()
189
+ if row:
190
+ # Handle DictCursor vs regular cursor
191
+ state = (row["state"] if isinstance(row, dict) else row[1] or "").upper()
192
+ if state != "STARTED":
193
+ self._resume_warehouse(cur, preferred)
194
+ return preferred
195
+ except Exception as e:
196
+ # Fall back to discovery below
197
+ print(f"Note: preferred warehouse '{preferred}' not available or cannot be resumed ({e}). Falling back.")
198
+
199
+ warehouses = self._get_warehouses(cur)
200
+ name = self._choose_smallest_running(warehouses)
201
+ if name:
202
+ return name
203
+
204
+ name = self._choose_smallest_suspended(warehouses)
205
+ if name:
206
+ self._resume_warehouse(cur, name)
207
+ return name
208
+
209
+ # None exist → create tiny one
210
+ return self._create_tiny_warehouse(cur)
211
+
212
+ def _list_databases(self, cur) -> List[str]:
213
+ """List all databases"""
214
+ return self._fetch_result_scan(cur, "SHOW DATABASES", "name")
215
+
216
+ def _list_schemas_for_db(self, cur, db: str) -> List[str]:
217
+ """List all schemas for a database (excluding INFORMATION_SCHEMA)"""
218
+ cur.execute(f'USE DATABASE "{db}"')
219
+ schemas = self._fetch_result_scan(cur, "SHOW SCHEMAS", "name")
220
+ return [s for s in schemas if s.upper() != "INFORMATION_SCHEMA"]
221
+
222
+ def _list_tables_with_columns_for_schema(self, connector, conn, db: str, schema: str, limit: int = 5000) -> List[Dict]:
223
+ """Get tables and their columns for a schema using optimized bulk query."""
224
+ cur = conn.cursor(connector.DictCursor)
225
+ try:
226
+ cur.execute(f'USE DATABASE "{db}"')
227
+ cur.execute(f'USE SCHEMA "{schema}"')
228
+
229
+ # Get all tables and columns in one query for better performance
230
+ cur.execute("""
231
+ SELECT
232
+ t.TABLE_SCHEMA,
233
+ t.TABLE_NAME,
234
+ t.TABLE_TYPE,
235
+ c.COLUMN_NAME,
236
+ c.DATA_TYPE,
237
+ c.ORDINAL_POSITION,
238
+ c.IS_NULLABLE,
239
+ c.COLUMN_DEFAULT,
240
+ c.CHARACTER_MAXIMUM_LENGTH,
241
+ c.NUMERIC_PRECISION,
242
+ c.NUMERIC_SCALE,
243
+ c.COMMENT
244
+ FROM INFORMATION_SCHEMA.TABLES t
245
+ LEFT JOIN INFORMATION_SCHEMA.COLUMNS c
246
+ ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
247
+ AND t.TABLE_NAME = c.TABLE_NAME
248
+ WHERE t.TABLE_SCHEMA = %s
249
+ ORDER BY t.TABLE_NAME, c.ORDINAL_POSITION
250
+ LIMIT 50000
251
+ """, (schema,))
252
+ rows = cur.fetchall()
253
+
254
+ # Group by table
255
+ tables_dict = {}
256
+ for r in rows:
257
+ if isinstance(r, dict):
258
+ table_key = r["TABLE_NAME"]
259
+ if table_key not in tables_dict:
260
+ tables_dict[table_key] = {
261
+ "schema": r["TABLE_SCHEMA"],
262
+ "table": r["TABLE_NAME"],
263
+ "type": r["TABLE_TYPE"],
264
+ "columns": []
265
+ }
266
+
267
+ if r.get("COLUMN_NAME"):
268
+ col = {
269
+ "name": r["COLUMN_NAME"],
270
+ "type": r["DATA_TYPE"],
271
+ "ordinal": r["ORDINAL_POSITION"],
272
+ "nullable": r["IS_NULLABLE"] == "YES",
273
+ }
274
+ if r.get("COMMENT"):
275
+ col["description"] = r["COMMENT"]
276
+ if r.get("COLUMN_DEFAULT"):
277
+ col["default"] = r["COLUMN_DEFAULT"]
278
+ if r.get("CHARACTER_MAXIMUM_LENGTH"):
279
+ col["max_length"] = r["CHARACTER_MAXIMUM_LENGTH"]
280
+ if r.get("NUMERIC_PRECISION"):
281
+ col["precision"] = r["NUMERIC_PRECISION"]
282
+ if r.get("NUMERIC_SCALE") is not None:
283
+ col["scale"] = r["NUMERIC_SCALE"]
284
+ tables_dict[table_key]["columns"].append(col)
285
+ else:
286
+ table_key = r[1]
287
+ if table_key not in tables_dict:
288
+ tables_dict[table_key] = {
289
+ "schema": r[0],
290
+ "table": r[1],
291
+ "type": r[2],
292
+ "columns": []
293
+ }
294
+
295
+ if r[3]: # COLUMN_NAME
296
+ col = {
297
+ "name": r[3],
298
+ "type": r[4],
299
+ "ordinal": r[5],
300
+ "nullable": r[6] == "YES",
301
+ }
302
+ if r[11]: # COMMENT
303
+ col["description"] = r[11]
304
+ if r[7]: # COLUMN_DEFAULT
305
+ col["default"] = r[7]
306
+ if r[8]: # CHARACTER_MAXIMUM_LENGTH
307
+ col["max_length"] = r[8]
308
+ if r[9]: # NUMERIC_PRECISION
309
+ col["precision"] = r[9]
310
+ if r[10] is not None: # NUMERIC_SCALE
311
+ col["scale"] = r[10]
312
+ tables_dict[table_key]["columns"].append(col)
313
+
314
+ return list(tables_dict.values())[:limit]
315
+ finally:
316
+ cur.close()
317
+
318
+ def _process_schema(self, connector, conn, db: str, schema: str) -> Dict:
319
+ """Process a single schema with its tables and columns."""
320
+ try:
321
+ tables = self._list_tables_with_columns_for_schema(connector, conn, db, schema)
322
+ return {"schema": schema, "tables": tables, "error": None}
323
+ except Exception as e:
324
+ print(f"Warning: Error processing schema {db}.{schema}: {e}", file=sys.stderr)
325
+ return {"schema": schema, "tables": [], "error": str(e)}
326
+
327
+ def _build_catalog(self, connector, conn, max_workers: int = 5, specified_database: Optional[str] = None, specified_warehouse: Optional[str] = None) -> Dict:
328
+ """Build complete catalog with parallel schema processing
329
+
330
+ Args:
331
+ connector: Snowflake connector module
332
+ conn: Active Snowflake connection
333
+ max_workers: Number of parallel workers for schema processing
334
+ specified_database: If provided, only process this database
335
+ specified_warehouse: If provided, use this warehouse (don't auto-select)
336
+ """
337
+ cur = conn.cursor(connector.DictCursor)
338
+ try:
339
+ # 1) Handle warehouse selection
340
+ if specified_warehouse:
341
+ # Use the explicitly specified warehouse
342
+ wh = specified_warehouse
343
+ cur.execute(f'USE WAREHOUSE "{wh}"')
344
+ else:
345
+ # Auto-select a warehouse using existing logic
346
+ preferred_wh = None
347
+ # Extract warehouse from conn if available
348
+ try:
349
+ cur.execute("SELECT CURRENT_WAREHOUSE()")
350
+ row = cur.fetchone()
351
+ if row:
352
+ preferred_wh = row[0] if isinstance(row, tuple) else row.get("CURRENT_WAREHOUSE()")
353
+ except:
354
+ pass
355
+
356
+ wh = self._ensure_warehouse(cur, preferred_wh)
357
+ cur.execute(f'USE WAREHOUSE "{wh}"')
358
+
359
+ # 2) Handle database selection
360
+ if specified_database:
361
+ # Only process the specified database
362
+ dbs = [specified_database]
363
+ else:
364
+ # List all databases
365
+ dbs = self._list_databases(cur)
366
+ cur.close()
367
+
368
+ catalog = []
369
+ print(f"Processing {len(dbs)} databases...", file=sys.stderr)
370
+
371
+ for db in dbs:
372
+ print(f" Processing database: {db}", file=sys.stderr)
373
+ cur = conn.cursor(connector.DictCursor)
374
+ try:
375
+ schemas = self._list_schemas_for_db(cur, db)
376
+ print(f" Found {len(schemas)} schemas", file=sys.stderr)
377
+ finally:
378
+ cur.close()
379
+
380
+ # Process schemas in parallel for this database
381
+ schema_objs = []
382
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
383
+ future_to_schema = {
384
+ executor.submit(self._process_schema, connector, conn, db, s): s
385
+ for s in schemas
386
+ }
387
+
388
+ for future in as_completed(future_to_schema):
389
+ schema_name = future_to_schema[future]
390
+ try:
391
+ result = future.result()
392
+ schema_objs.append(result)
393
+ print(f" Completed schema: {schema_name} ({len(result['tables'])} tables)", file=sys.stderr)
394
+ except Exception as e:
395
+ print(f" Error with schema {schema_name}: {e}", file=sys.stderr)
396
+ schema_objs.append({"schema": schema_name, "tables": [], "error": str(e)})
397
+
398
+ catalog.append({"database": db, "schemas": schema_objs})
399
+
400
+ return {"warehouse": wh, "databases": catalog}
401
+ finally:
402
+ if not cur.is_closed():
403
+ cur.close()
404
+
405
+ def _format_catalog_as_json(self, catalog: Dict) -> Dict:
406
+ """Format the catalog for JSON response"""
407
+ return catalog
408
+
409
+ @tornado.web.authenticated
410
+ def post(self):
411
+ """Get Snowflake database schema information"""
412
+ try:
413
+ # Parse request body
414
+ try:
415
+ body = json.loads(self.request.body.decode('utf-8'))
416
+ except json.JSONDecodeError:
417
+ self.set_status(400)
418
+ self.finish(json.dumps({
419
+ "error": "Invalid JSON in request body"
420
+ }))
421
+ return
422
+
423
+ # Get Snowflake configuration from request or environment
424
+ config = self._get_snowflake_config(body.get('config'))
425
+
426
+ if not config:
427
+ self.set_status(400)
428
+ self.finish(json.dumps({
429
+ "error": "No Snowflake configuration provided and no Snowflake configurations found in environment"
430
+ }))
431
+ return
432
+
433
+ # Setup Snowflake environment
434
+ try:
435
+ connector = self._setup_snowflake_environment()
436
+ except ImportError as e:
437
+ self.set_status(500)
438
+ self.finish(json.dumps({
439
+ "error": str(e)
440
+ }))
441
+ return
442
+
443
+ # Get database schema using optimized catalog building
444
+ try:
445
+ conn_params = self._get_connection_params(config)
446
+ max_workers = int(body.get('max_workers', 5))
447
+
448
+ # Extract database and warehouse from config for filtering
449
+ specified_database = config.get('database')
450
+ specified_warehouse = config.get('warehouse')
451
+
452
+ print(f"[SnowflakeSchemaHandler] Connecting with account={conn_params['account']}, user={conn_params['user']}, warehouse={conn_params.get('warehouse')}, database={conn_params.get('database')}, role={conn_params.get('role')}")
453
+
454
+ connection = connector.connect(**conn_params, client_session_keep_alive=False)
455
+
456
+ try:
457
+ catalog = self._build_catalog(
458
+ connector,
459
+ connection,
460
+ max_workers=max_workers,
461
+ specified_database=specified_database,
462
+ specified_warehouse=specified_warehouse
463
+ )
464
+ result = self._format_catalog_as_json(catalog)
465
+ self.finish(json.dumps(result, indent=2))
466
+ finally:
467
+ connection.close()
468
+
469
+ except Exception as e:
470
+ self.set_status(500)
471
+ self.finish(json.dumps({
472
+ "error": f"Error connecting to Snowflake: {str(e)}"
473
+ }))
474
+
475
+ except Exception as e:
476
+ self.set_status(500)
477
+ self.finish(json.dumps({
478
+ "error": "Internal server error",
479
+ "message": str(e)
480
+ }))
481
+
482
+
483
+ class SnowflakeQueryHandler(APIHandler):
484
+ """Handler for Snowflake query execution"""
485
+
486
+ def _setup_snowflake_environment(self):
487
+ """Install required Snowflake packages if not available"""
488
+ def install_package(package_name):
489
+ try:
490
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
491
+ return True
492
+ except subprocess.CalledProcessError:
493
+ return False
494
+
495
+ missing_packages = []
496
+
497
+ try:
498
+ import snowflake.connector
499
+ except ImportError:
500
+ if install_package("snowflake-connector-python"):
501
+ try:
502
+ import snowflake.connector
503
+ except ImportError as e:
504
+ missing_packages.append(f"snowflake-connector-python: {str(e)}")
505
+ else:
506
+ missing_packages.append("snowflake-connector-python: installation failed")
507
+
508
+ if missing_packages:
509
+ raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
510
+
511
+ import snowflake.connector
512
+ return snowflake.connector
513
+
514
+ def _get_snowflake_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
515
+ """Get Snowflake configuration from request or environment variables"""
516
+ if provided_config:
517
+ return provided_config
518
+
519
+ # Look for Snowflake database configuration in the environment
520
+ for key, value in os.environ.items():
521
+ if key.endswith('_CONNECTION_JSON'):
522
+ try:
523
+ config = json.loads(value)
524
+ if config.get('type') == 'snowflake':
525
+ return config
526
+ except Exception as e:
527
+ print(f"[SnowflakeQueryHandler] Error parsing database config {key}: {e}")
528
+ continue
529
+
530
+ return None
531
+
532
+ def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
533
+ """Build Snowflake connection parameters from configuration"""
534
+ # Extract account from connectionUrl
535
+ connection_url = config.get('connectionUrl', '')
536
+ if not connection_url:
537
+ raise ValueError("connectionUrl is required for Snowflake")
538
+
539
+ # Extract the account identifier from the connectionUrl
540
+ # Expected format: https://account.snowflakecomputing.com or https://account-region.snowflakecomputing.com
541
+ import re
542
+ url_match = re.match(r'https?://([^/]+)', connection_url)
543
+ if not url_match:
544
+ raise ValueError(f"Invalid Snowflake connectionUrl format: {connection_url}")
545
+
546
+ account = url_match.group(1)
547
+ # Strip .snowflakecomputing.com if present
548
+ account_identifier = account.replace('.snowflakecomputing.com', '')
549
+
550
+ conn_params = {
551
+ 'account': account_identifier,
552
+ 'user': config['username'],
553
+ 'password': config['password'],
554
+ }
555
+
556
+ warehouse = config.get('warehouse')
557
+ database = config.get('database')
558
+ role = config.get('role')
559
+
560
+ if warehouse:
561
+ conn_params['warehouse'] = warehouse
562
+ if database:
563
+ conn_params['database'] = database
564
+ if role:
565
+ conn_params['role'] = role
566
+
567
+ return conn_params
568
+
569
+ @tornado.web.authenticated
570
+ def post(self):
571
+ """Execute a read-only SQL query on Snowflake"""
572
+ try:
573
+ # Parse request body
574
+ try:
575
+ body = json.loads(self.request.body.decode('utf-8'))
576
+ except json.JSONDecodeError:
577
+ self.set_status(400)
578
+ self.finish(json.dumps({
579
+ "error": "Invalid JSON in request body"
580
+ }))
581
+ return
582
+
583
+ # Get query from request
584
+ query = body.get('query')
585
+ if not query:
586
+ self.set_status(400)
587
+ self.finish(json.dumps({
588
+ "error": "Missing 'query' field in request body"
589
+ }))
590
+ return
591
+
592
+ # Basic validation for read-only queries
593
+ normalized_query = query.strip().upper()
594
+ if not normalized_query.startswith('SELECT') and not normalized_query.startswith('WITH') and not normalized_query.startswith('SHOW') and not normalized_query.startswith('DESCRIBE'):
595
+ self.set_status(400)
596
+ self.finish(json.dumps({
597
+ "error": "Only SELECT, WITH, SHOW, or DESCRIBE statements are allowed for read queries."
598
+ }))
599
+ return
600
+
601
+ # Get Snowflake configuration from request or environment
602
+ config = self._get_snowflake_config(body.get('config'))
603
+
604
+ if not config:
605
+ self.set_status(400)
606
+ self.finish(json.dumps({
607
+ "error": "No Snowflake configuration provided and no Snowflake configurations found in environment"
608
+ }))
609
+ return
610
+
611
+ # Setup Snowflake environment
612
+ try:
613
+ connector = self._setup_snowflake_environment()
614
+ except ImportError as e:
615
+ self.set_status(500)
616
+ self.finish(json.dumps({
617
+ "error": str(e)
618
+ }))
619
+ return
620
+
621
+ # Execute query
622
+ try:
623
+ conn_params = self._get_connection_params(config)
624
+
625
+ # Allow specifying a specific database for the query
626
+ database = body.get('database')
627
+ if database:
628
+ conn_params['database'] = database
629
+
630
+ # Ensure we have a warehouse for querying
631
+ if not conn_params.get('warehouse'):
632
+ raise ValueError("A warehouse is required to execute queries.")
633
+
634
+ connection = connector.connect(**conn_params)
635
+ cursor = connection.cursor()
636
+
637
+ try:
638
+ cursor.execute(query)
639
+
640
+ # Get column names from cursor description
641
+ columns = [desc[0] for desc in cursor.description] if cursor.description else []
642
+
643
+ # Fetch all results
644
+ rows = cursor.fetchall()
645
+
646
+ # Convert result to list of dictionaries
647
+ result_rows = [
648
+ {columns[i]: row[i] for i in range(len(columns))}
649
+ for row in rows
650
+ ]
651
+
652
+ self.finish(json.dumps({
653
+ "result": result_rows
654
+ }))
655
+
656
+ finally:
657
+ cursor.close()
658
+ connection.close()
659
+
660
+ except Exception as e:
661
+ self.set_status(500)
662
+ self.finish(json.dumps({
663
+ "error": f"Snowflake query failed: {str(e)}"
664
+ }))
665
+
666
+ except Exception as e:
667
+ self.set_status(500)
668
+ self.finish(json.dumps({
669
+ "error": "Internal server error",
670
+ "message": str(e)
671
+ }))