signalpilot-ai-internal 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of signalpilot-ai-internal might be problematic. Click here for more details.

Files changed (46) hide show
  1. signalpilot_ai_internal/_version.py +1 -1
  2. signalpilot_ai_internal/cache_handlers.py +383 -0
  3. signalpilot_ai_internal/cache_service.py +552 -0
  4. signalpilot_ai_internal/handlers.py +35 -915
  5. signalpilot_ai_internal/snowflake_schema_service.py +639 -0
  6. signalpilot_ai_internal/unified_database_schema_service.py +742 -0
  7. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/package.json +3 -2
  8. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/package.json.orig +2 -1
  9. signalpilot_ai_internal-0.3.3.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/384.fa432bdb7fb6b1c95ad6.js +1 -0
  10. signalpilot_ai_internal-0.3.3.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.0fea0d444fc7ba458d5a.js +1 -0
  11. signalpilot_ai_internal-0.3.3.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.c61f5bc4d0da4a0781d6.js +1 -0
  12. signalpilot_ai_internal-0.3.3.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.2e2c6ae0baa591126b0a.js +1 -0
  13. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/third-party-licenses.json +6 -0
  14. {signalpilot_ai_internal-0.3.1.dist-info → signalpilot_ai_internal-0.3.3.dist-info}/METADATA +1 -1
  15. signalpilot_ai_internal-0.3.3.dist-info/RECORD +45 -0
  16. signalpilot_ai_internal-0.3.1.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.45c187b4dc615d9cc073.js +0 -1
  17. signalpilot_ai_internal-0.3.1.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.e2bd05ad6dbdb957683f.js +0 -1
  18. signalpilot_ai_internal-0.3.1.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.07bc9bbdead29df455e7.js +0 -1
  19. signalpilot_ai_internal-0.3.1.dist-info/RECORD +0 -40
  20. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/etc/jupyter/jupyter_server_config.d/signalpilot_ai.json +0 -0
  21. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/install.json +0 -0
  22. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/plugin.json +0 -0
  23. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js +0 -0
  24. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js.LICENSE.txt +0 -0
  25. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/122.e2dadf63dc64d7b5f1ee.js +0 -0
  26. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/220.328403b5545f268b95c6.js +0 -0
  27. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/262.726e1da31a50868cb297.js +0 -0
  28. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js +0 -0
  29. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js.LICENSE.txt +0 -0
  30. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/353.72484b768a04f89bd3dd.js +0 -0
  31. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/364.dbec4c2dc12e7b050dcc.js +0 -0
  32. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/439.37e271d7a80336daabe2.js +0 -0
  33. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/476.9b4f05a99f5003f82094.js +0 -0
  34. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/481.73c7a9290b7d35a8b9c1.js +0 -0
  35. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/512.b58fc0093d080b8ee61c.js +0 -0
  36. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js +0 -0
  37. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js.LICENSE.txt +0 -0
  38. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/606.90aaaae46b73dc3c08fb.js +0 -0
  39. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/635.9720593ee20b768da3ca.js +0 -0
  40. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/713.8e6edc9a965bdd578ca7.js +0 -0
  41. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/742.91e7b516c8699eea3373.js +0 -0
  42. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/785.3aa564fc148b37d1d719.js +0 -0
  43. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/888.34054db17bcf6e87ec95.js +0 -0
  44. {signalpilot_ai_internal-0.3.1.data → signalpilot_ai_internal-0.3.3.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/style.js +0 -0
  45. {signalpilot_ai_internal-0.3.1.dist-info → signalpilot_ai_internal-0.3.3.dist-info}/WHEEL +0 -0
  46. {signalpilot_ai_internal-0.3.1.dist-info → signalpilot_ai_internal-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,639 @@
1
+ """
2
+ Snowflake schema service handlers for SignalPilot AI.
3
+ Provides REST API handlers for Snowflake database schema retrieval and query execution.
4
+ Supports multiple databases within a single Snowflake connection.
5
+
6
+ Behavior:
7
+ - Picks the smallest RUNNING warehouse.
8
+ - If none running, resumes the smallest SUSPENDED warehouse.
9
+ - If none exist, attempts to CREATE a tiny warehouse (requires privilege).
10
+ - Builds a catalog with parallel schema processing for performance.
11
+ - For each table, includes detailed column information: name, type, ordinal position,
12
+ nullable, description, default value, and type-specific attributes.
13
+ """
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ import subprocess
19
+ import sys
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ from typing import Any, Dict, Optional, List
22
+
23
+ from jupyter_server.base.handlers import APIHandler
24
+ import tornado
25
+
26
+ SIZE_ORDER = ["XSMALL", "SMALL", "MEDIUM", "LARGE", "XLARGE", "XXLARGE", "XXXLARGE", "X4LARGE", "X5LARGE", "X6LARGE"]
27
+
28
+
29
+ class SnowflakeSchemaHandler(APIHandler):
30
+ """Handler for Snowflake schema operations"""
31
+
32
+ def _setup_snowflake_environment(self):
33
+ """Install required Snowflake packages if not available"""
34
+ def install_package(package_name):
35
+ try:
36
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
37
+ return True
38
+ except subprocess.CalledProcessError:
39
+ return False
40
+
41
+ missing_packages = []
42
+
43
+ try:
44
+ import snowflake.connector
45
+ except ImportError:
46
+ if install_package("snowflake-connector-python"):
47
+ try:
48
+ import snowflake.connector
49
+ except ImportError as e:
50
+ missing_packages.append(f"snowflake-connector-python: {str(e)}")
51
+ else:
52
+ missing_packages.append("snowflake-connector-python: installation failed")
53
+
54
+ if missing_packages:
55
+ raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
56
+
57
+ import snowflake.connector
58
+ return snowflake.connector
59
+
60
+ def _get_snowflake_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
61
+ """Get Snowflake configuration from request or environment variables"""
62
+ if provided_config:
63
+ return provided_config
64
+
65
+ # Look for Snowflake database configuration in the environment
66
+ for key, value in os.environ.items():
67
+ if key.endswith('_CONNECTION_JSON'):
68
+ try:
69
+ config = json.loads(value)
70
+ if config.get('type') == 'snowflake':
71
+ return config
72
+ except Exception as e:
73
+ print(f"[SnowflakeSchemaHandler] Error parsing database config {key}: {e}")
74
+ continue
75
+
76
+ return None
77
+
78
+ def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
79
+ """Build Snowflake connection parameters from configuration"""
80
+ # Extract account from connectionUrl
81
+ connection_url = config.get('connectionUrl', '')
82
+ if not connection_url:
83
+ raise ValueError("connectionUrl is required for Snowflake")
84
+
85
+ # Extract the account identifier from the connectionUrl
86
+ url_match = re.match(r'https?://([^/]+)', connection_url)
87
+ if not url_match:
88
+ raise ValueError(f"Invalid Snowflake connectionUrl format: {connection_url}")
89
+
90
+ account = url_match.group(1)
91
+ # Strip .snowflakecomputing.com if present
92
+ account_identifier = account.replace('.snowflakecomputing.com', '')
93
+
94
+ conn_params = {
95
+ 'account': account_identifier,
96
+ 'user': config['username'],
97
+ 'password': config['password'],
98
+ }
99
+
100
+ warehouse = config.get('warehouse')
101
+ database = config.get('database')
102
+ role = config.get('role')
103
+
104
+ if warehouse:
105
+ conn_params['warehouse'] = warehouse
106
+ if database:
107
+ conn_params['database'] = database
108
+ if role:
109
+ conn_params['role'] = role
110
+
111
+ return conn_params
112
+
113
+ def _fetch_result_scan(self, cur, sql: str, name_col: str = "name") -> List[str]:
114
+ """Execute SQL and fetch results using RESULT_SCAN"""
115
+ cur.execute(sql)
116
+ cur.execute(f'SELECT "{name_col}" FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))')
117
+ rows = cur.fetchall()
118
+ # Handle both DictCursor (returns dicts) and regular cursor (returns tuples)
119
+ if rows and isinstance(rows[0], dict):
120
+ return [r[name_col] for r in rows]
121
+ return [r[0] for r in rows]
122
+
123
+ def _get_warehouses(self, cur) -> List[Dict]:
124
+ """Get all warehouses with their state and size"""
125
+ cur.execute("SHOW WAREHOUSES")
126
+ cur.execute("""
127
+ SELECT "name","state","size","auto_suspend","auto_resume"
128
+ FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
129
+ """)
130
+ rows = cur.fetchall()
131
+ # Handle DictCursor (returns dicts) and regular cursor (returns tuples)
132
+ if rows and isinstance(rows[0], dict):
133
+ # DictCursor: normalize keys to lowercase
134
+ return [{k.lower(): v for k, v in row.items()} for row in rows]
135
+ else:
136
+ # Regular cursor: manually create dicts
137
+ cols = [d[0].lower() for d in cur.description]
138
+ return [dict(zip(cols, row)) for row in rows]
139
+
140
+ def _size_rank(self, sz: str) -> int:
141
+ """Get the numeric rank of a warehouse size"""
142
+ s = (sz or "").upper()
143
+ return SIZE_ORDER.index(s) if s in SIZE_ORDER else len(SIZE_ORDER) + 1
144
+
145
+ def _choose_smallest_running(self, warehouses: List[Dict]) -> Optional[str]:
146
+ """Choose the smallest running warehouse"""
147
+ running = [w for w in warehouses if (w.get("state") or "").upper() == "STARTED"]
148
+ if not running:
149
+ return None
150
+ running.sort(key=lambda w: self._size_rank(w.get("size")))
151
+ return running[0]["name"]
152
+
153
+ def _choose_smallest_suspended(self, warehouses: List[Dict]) -> Optional[str]:
154
+ """Choose the smallest suspended warehouse"""
155
+ suspended = [w for w in warehouses if (w.get("state") or "").upper() in ("SUSPENDED", "RESIZING")]
156
+ if not suspended:
157
+ return None
158
+ suspended.sort(key=lambda w: self._size_rank(w.get("size")))
159
+ return suspended[0]["name"]
160
+
161
+ def _resume_warehouse(self, cur, name: str) -> None:
162
+ """Resume a suspended warehouse"""
163
+ cur.execute(f'ALTER WAREHOUSE "{name}" RESUME')
164
+
165
+ def _create_tiny_warehouse(self, cur, name: str = "SPAI_TINY_WH") -> str:
166
+ """Create a tiny warehouse (requires proper privilege)"""
167
+ cur.execute(f'''
168
+ CREATE WAREHOUSE IF NOT EXISTS "{name}"
169
+ WITH WAREHOUSE_SIZE = XSMALL
170
+ AUTO_SUSPEND = 60
171
+ AUTO_RESUME = TRUE
172
+ INITIALLY_SUSPENDED = TRUE
173
+ ''')
174
+ # Start it
175
+ cur.execute(f'ALTER WAREHOUSE "{name}" RESUME')
176
+ return name
177
+
178
+ def _ensure_warehouse(self, cur, preferred: Optional[str]) -> str:
179
+ """Ensure a warehouse is available and running"""
180
+ # Respect an explicitly provided warehouse first, if any
181
+ if preferred:
182
+ try:
183
+ cur.execute(f'SHOW WAREHOUSES LIKE \'{preferred}\'')
184
+ cur.execute('SELECT "name","state","size" FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))')
185
+ row = cur.fetchone()
186
+ if row:
187
+ # Handle DictCursor vs regular cursor
188
+ state = (row["state"] if isinstance(row, dict) else row[1] or "").upper()
189
+ if state != "STARTED":
190
+ self._resume_warehouse(cur, preferred)
191
+ return preferred
192
+ except Exception as e:
193
+ # Fall back to discovery below
194
+ print(f"Note: preferred warehouse '{preferred}' not available or cannot be resumed ({e}). Falling back.")
195
+
196
+ warehouses = self._get_warehouses(cur)
197
+ name = self._choose_smallest_running(warehouses)
198
+ if name:
199
+ return name
200
+
201
+ name = self._choose_smallest_suspended(warehouses)
202
+ if name:
203
+ self._resume_warehouse(cur, name)
204
+ return name
205
+
206
+ # None exist → create tiny one
207
+ return self._create_tiny_warehouse(cur)
208
+
209
+ def _list_databases(self, cur) -> List[str]:
210
+ """List all databases"""
211
+ return self._fetch_result_scan(cur, "SHOW DATABASES", "name")
212
+
213
+ def _list_schemas_for_db(self, cur, db: str) -> List[str]:
214
+ """List all schemas for a database (excluding INFORMATION_SCHEMA)"""
215
+ cur.execute(f'USE DATABASE "{db}"')
216
+ schemas = self._fetch_result_scan(cur, "SHOW SCHEMAS", "name")
217
+ return [s for s in schemas if s.upper() != "INFORMATION_SCHEMA"]
218
+
219
+ def _list_tables_with_columns_for_schema(self, connector, conn, db: str, schema: str, limit: int = 5000) -> List[Dict]:
220
+ """Get tables and their columns for a schema using optimized bulk query."""
221
+ cur = conn.cursor(connector.DictCursor)
222
+ try:
223
+ cur.execute(f'USE DATABASE "{db}"')
224
+ cur.execute(f'USE SCHEMA "{schema}"')
225
+
226
+ # Get all tables and columns in one query for better performance
227
+ cur.execute("""
228
+ SELECT
229
+ t.TABLE_SCHEMA,
230
+ t.TABLE_NAME,
231
+ t.TABLE_TYPE,
232
+ c.COLUMN_NAME,
233
+ c.DATA_TYPE,
234
+ c.ORDINAL_POSITION,
235
+ c.IS_NULLABLE,
236
+ c.COLUMN_DEFAULT,
237
+ c.CHARACTER_MAXIMUM_LENGTH,
238
+ c.NUMERIC_PRECISION,
239
+ c.NUMERIC_SCALE,
240
+ c.COMMENT
241
+ FROM INFORMATION_SCHEMA.TABLES t
242
+ LEFT JOIN INFORMATION_SCHEMA.COLUMNS c
243
+ ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
244
+ AND t.TABLE_NAME = c.TABLE_NAME
245
+ WHERE t.TABLE_SCHEMA = %s
246
+ ORDER BY t.TABLE_NAME, c.ORDINAL_POSITION
247
+ LIMIT 50000
248
+ """, (schema,))
249
+ rows = cur.fetchall()
250
+
251
+ # Group by table
252
+ tables_dict = {}
253
+ for r in rows:
254
+ if isinstance(r, dict):
255
+ table_key = r["TABLE_NAME"]
256
+ if table_key not in tables_dict:
257
+ tables_dict[table_key] = {
258
+ "schema": r["TABLE_SCHEMA"],
259
+ "table": r["TABLE_NAME"],
260
+ "type": r["TABLE_TYPE"],
261
+ "columns": []
262
+ }
263
+
264
+ if r.get("COLUMN_NAME"):
265
+ col = {
266
+ "name": r["COLUMN_NAME"],
267
+ "type": r["DATA_TYPE"],
268
+ "ordinal": r["ORDINAL_POSITION"],
269
+ "nullable": r["IS_NULLABLE"] == "YES",
270
+ }
271
+ if r.get("COMMENT"):
272
+ col["description"] = r["COMMENT"]
273
+ if r.get("COLUMN_DEFAULT"):
274
+ col["default"] = r["COLUMN_DEFAULT"]
275
+ if r.get("CHARACTER_MAXIMUM_LENGTH"):
276
+ col["max_length"] = r["CHARACTER_MAXIMUM_LENGTH"]
277
+ if r.get("NUMERIC_PRECISION"):
278
+ col["precision"] = r["NUMERIC_PRECISION"]
279
+ if r.get("NUMERIC_SCALE") is not None:
280
+ col["scale"] = r["NUMERIC_SCALE"]
281
+ tables_dict[table_key]["columns"].append(col)
282
+ else:
283
+ table_key = r[1]
284
+ if table_key not in tables_dict:
285
+ tables_dict[table_key] = {
286
+ "schema": r[0],
287
+ "table": r[1],
288
+ "type": r[2],
289
+ "columns": []
290
+ }
291
+
292
+ if r[3]: # COLUMN_NAME
293
+ col = {
294
+ "name": r[3],
295
+ "type": r[4],
296
+ "ordinal": r[5],
297
+ "nullable": r[6] == "YES",
298
+ }
299
+ if r[11]: # COMMENT
300
+ col["description"] = r[11]
301
+ if r[7]: # COLUMN_DEFAULT
302
+ col["default"] = r[7]
303
+ if r[8]: # CHARACTER_MAXIMUM_LENGTH
304
+ col["max_length"] = r[8]
305
+ if r[9]: # NUMERIC_PRECISION
306
+ col["precision"] = r[9]
307
+ if r[10] is not None: # NUMERIC_SCALE
308
+ col["scale"] = r[10]
309
+ tables_dict[table_key]["columns"].append(col)
310
+
311
+ return list(tables_dict.values())[:limit]
312
+ finally:
313
+ cur.close()
314
+
315
+ def _process_schema(self, connector, conn, db: str, schema: str) -> Dict:
316
+ """Process a single schema with its tables and columns."""
317
+ try:
318
+ tables = self._list_tables_with_columns_for_schema(connector, conn, db, schema)
319
+ return {"schema": schema, "tables": tables, "error": None}
320
+ except Exception as e:
321
+ print(f"Warning: Error processing schema {db}.{schema}: {e}", file=sys.stderr)
322
+ return {"schema": schema, "tables": [], "error": str(e)}
323
+
324
+ def _build_catalog(self, connector, conn, max_workers: int = 5) -> Dict:
325
+ """Build complete catalog with parallel schema processing"""
326
+ cur = conn.cursor(connector.DictCursor)
327
+ try:
328
+ # 1) Ensure a small warehouse is available
329
+ preferred_wh = None
330
+ # Extract warehouse from conn if available
331
+ try:
332
+ cur.execute("SELECT CURRENT_WAREHOUSE()")
333
+ row = cur.fetchone()
334
+ if row:
335
+ preferred_wh = row[0] if isinstance(row, tuple) else row.get("CURRENT_WAREHOUSE()")
336
+ except:
337
+ pass
338
+
339
+ wh = self._ensure_warehouse(cur, preferred_wh)
340
+ cur.execute(f'USE WAREHOUSE "{wh}"')
341
+
342
+ # 2) Databases (no compute requirement, but we already have compute)
343
+ dbs = self._list_databases(cur)
344
+ cur.close()
345
+
346
+ catalog = []
347
+ print(f"Processing {len(dbs)} databases...", file=sys.stderr)
348
+
349
+ for db in dbs:
350
+ print(f" Processing database: {db}", file=sys.stderr)
351
+ cur = conn.cursor(connector.DictCursor)
352
+ try:
353
+ schemas = self._list_schemas_for_db(cur, db)
354
+ print(f" Found {len(schemas)} schemas", file=sys.stderr)
355
+ finally:
356
+ cur.close()
357
+
358
+ # Process schemas in parallel for this database
359
+ schema_objs = []
360
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
361
+ future_to_schema = {
362
+ executor.submit(self._process_schema, connector, conn, db, s): s
363
+ for s in schemas
364
+ }
365
+
366
+ for future in as_completed(future_to_schema):
367
+ schema_name = future_to_schema[future]
368
+ try:
369
+ result = future.result()
370
+ schema_objs.append(result)
371
+ print(f" Completed schema: {schema_name} ({len(result['tables'])} tables)", file=sys.stderr)
372
+ except Exception as e:
373
+ print(f" Error with schema {schema_name}: {e}", file=sys.stderr)
374
+ schema_objs.append({"schema": schema_name, "tables": [], "error": str(e)})
375
+
376
+ catalog.append({"database": db, "schemas": schema_objs})
377
+
378
+ return {"warehouse": wh, "databases": catalog}
379
+ finally:
380
+ if not cur.is_closed():
381
+ cur.close()
382
+
383
+ def _format_catalog_as_json(self, catalog: Dict) -> Dict:
384
+ """Format the catalog for JSON response"""
385
+ return catalog
386
+
387
+ @tornado.web.authenticated
388
+ def post(self):
389
+ """Get Snowflake database schema information"""
390
+ try:
391
+ # Parse request body
392
+ try:
393
+ body = json.loads(self.request.body.decode('utf-8'))
394
+ except json.JSONDecodeError:
395
+ self.set_status(400)
396
+ self.finish(json.dumps({
397
+ "error": "Invalid JSON in request body"
398
+ }))
399
+ return
400
+
401
+ # Get Snowflake configuration from request or environment
402
+ config = self._get_snowflake_config(body.get('config'))
403
+
404
+ if not config:
405
+ self.set_status(400)
406
+ self.finish(json.dumps({
407
+ "error": "No Snowflake configuration provided and no Snowflake configurations found in environment"
408
+ }))
409
+ return
410
+
411
+ # Setup Snowflake environment
412
+ try:
413
+ connector = self._setup_snowflake_environment()
414
+ except ImportError as e:
415
+ self.set_status(500)
416
+ self.finish(json.dumps({
417
+ "error": str(e)
418
+ }))
419
+ return
420
+
421
+ # Get database schema using optimized catalog building
422
+ try:
423
+ conn_params = self._get_connection_params(config)
424
+ max_workers = int(body.get('max_workers', 5))
425
+
426
+ print(f"[SnowflakeSchemaHandler] Connecting with account={conn_params['account']}, user={conn_params['user']}, warehouse={conn_params.get('warehouse')}, role={conn_params.get('role')}")
427
+
428
+ connection = connector.connect(**conn_params, client_session_keep_alive=False)
429
+
430
+ try:
431
+ catalog = self._build_catalog(connector, connection, max_workers=max_workers)
432
+ result = self._format_catalog_as_json(catalog)
433
+ self.finish(json.dumps(result, indent=2))
434
+ finally:
435
+ connection.close()
436
+
437
+ except Exception as e:
438
+ self.set_status(500)
439
+ self.finish(json.dumps({
440
+ "error": f"Error connecting to Snowflake: {str(e)}"
441
+ }))
442
+
443
+ except Exception as e:
444
+ self.set_status(500)
445
+ self.finish(json.dumps({
446
+ "error": "Internal server error",
447
+ "message": str(e)
448
+ }))
449
+
450
+
451
+ class SnowflakeQueryHandler(APIHandler):
452
+ """Handler for Snowflake query execution"""
453
+
454
+ def _setup_snowflake_environment(self):
455
+ """Install required Snowflake packages if not available"""
456
+ def install_package(package_name):
457
+ try:
458
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
459
+ return True
460
+ except subprocess.CalledProcessError:
461
+ return False
462
+
463
+ missing_packages = []
464
+
465
+ try:
466
+ import snowflake.connector
467
+ except ImportError:
468
+ if install_package("snowflake-connector-python"):
469
+ try:
470
+ import snowflake.connector
471
+ except ImportError as e:
472
+ missing_packages.append(f"snowflake-connector-python: {str(e)}")
473
+ else:
474
+ missing_packages.append("snowflake-connector-python: installation failed")
475
+
476
+ if missing_packages:
477
+ raise ImportError("Required modules could not be installed: " + ", ".join(missing_packages))
478
+
479
+ import snowflake.connector
480
+ return snowflake.connector
481
+
482
+ def _get_snowflake_config(self, provided_config: Optional[Dict] = None) -> Optional[Dict]:
483
+ """Get Snowflake configuration from request or environment variables"""
484
+ if provided_config:
485
+ return provided_config
486
+
487
+ # Look for Snowflake database configuration in the environment
488
+ for key, value in os.environ.items():
489
+ if key.endswith('_CONNECTION_JSON'):
490
+ try:
491
+ config = json.loads(value)
492
+ if config.get('type') == 'snowflake':
493
+ return config
494
+ except Exception as e:
495
+ print(f"[SnowflakeQueryHandler] Error parsing database config {key}: {e}")
496
+ continue
497
+
498
+ return None
499
+
500
+ def _get_connection_params(self, config: Dict) -> Dict[str, Any]:
501
+ """Build Snowflake connection parameters from configuration"""
502
+ # Extract account from connectionUrl
503
+ connection_url = config.get('connectionUrl', '')
504
+ if not connection_url:
505
+ raise ValueError("connectionUrl is required for Snowflake")
506
+
507
+ # Extract the account identifier from the connectionUrl
508
+ # Expected format: https://account.snowflakecomputing.com or https://account-region.snowflakecomputing.com
509
+ import re
510
+ url_match = re.match(r'https?://([^/]+)', connection_url)
511
+ if not url_match:
512
+ raise ValueError(f"Invalid Snowflake connectionUrl format: {connection_url}")
513
+
514
+ account = url_match.group(1)
515
+ # Strip .snowflakecomputing.com if present
516
+ account_identifier = account.replace('.snowflakecomputing.com', '')
517
+
518
+ conn_params = {
519
+ 'account': account_identifier,
520
+ 'user': config['username'],
521
+ 'password': config['password'],
522
+ }
523
+
524
+ warehouse = config.get('warehouse')
525
+ database = config.get('database')
526
+ role = config.get('role')
527
+
528
+ if warehouse:
529
+ conn_params['warehouse'] = warehouse
530
+ if database:
531
+ conn_params['database'] = database
532
+ if role:
533
+ conn_params['role'] = role
534
+
535
+ return conn_params
536
+
537
+ @tornado.web.authenticated
538
+ def post(self):
539
+ """Execute a read-only SQL query on Snowflake"""
540
+ try:
541
+ # Parse request body
542
+ try:
543
+ body = json.loads(self.request.body.decode('utf-8'))
544
+ except json.JSONDecodeError:
545
+ self.set_status(400)
546
+ self.finish(json.dumps({
547
+ "error": "Invalid JSON in request body"
548
+ }))
549
+ return
550
+
551
+ # Get query from request
552
+ query = body.get('query')
553
+ if not query:
554
+ self.set_status(400)
555
+ self.finish(json.dumps({
556
+ "error": "Missing 'query' field in request body"
557
+ }))
558
+ return
559
+
560
+ # Basic validation for read-only queries
561
+ normalized_query = query.strip().upper()
562
+ if not normalized_query.startswith('SELECT') and not normalized_query.startswith('WITH') and not normalized_query.startswith('SHOW') and not normalized_query.startswith('DESCRIBE'):
563
+ self.set_status(400)
564
+ self.finish(json.dumps({
565
+ "error": "Only SELECT, WITH, SHOW, or DESCRIBE statements are allowed for read queries."
566
+ }))
567
+ return
568
+
569
+ # Get Snowflake configuration from request or environment
570
+ config = self._get_snowflake_config(body.get('config'))
571
+
572
+ if not config:
573
+ self.set_status(400)
574
+ self.finish(json.dumps({
575
+ "error": "No Snowflake configuration provided and no Snowflake configurations found in environment"
576
+ }))
577
+ return
578
+
579
+ # Setup Snowflake environment
580
+ try:
581
+ connector = self._setup_snowflake_environment()
582
+ except ImportError as e:
583
+ self.set_status(500)
584
+ self.finish(json.dumps({
585
+ "error": str(e)
586
+ }))
587
+ return
588
+
589
+ # Execute query
590
+ try:
591
+ conn_params = self._get_connection_params(config)
592
+
593
+ # Allow specifying a specific database for the query
594
+ database = body.get('database')
595
+ if database:
596
+ conn_params['database'] = database
597
+
598
+ # Ensure we have a warehouse for querying
599
+ if not conn_params.get('warehouse'):
600
+ raise ValueError("A warehouse is required to execute queries.")
601
+
602
+ connection = connector.connect(**conn_params)
603
+ cursor = connection.cursor()
604
+
605
+ try:
606
+ cursor.execute(query)
607
+
608
+ # Get column names from cursor description
609
+ columns = [desc[0] for desc in cursor.description] if cursor.description else []
610
+
611
+ # Fetch all results
612
+ rows = cursor.fetchall()
613
+
614
+ # Convert result to list of dictionaries
615
+ result_rows = [
616
+ {columns[i]: row[i] for i in range(len(columns))}
617
+ for row in rows
618
+ ]
619
+
620
+ self.finish(json.dumps({
621
+ "result": result_rows
622
+ }))
623
+
624
+ finally:
625
+ cursor.close()
626
+ connection.close()
627
+
628
+ except Exception as e:
629
+ self.set_status(500)
630
+ self.finish(json.dumps({
631
+ "error": f"Snowflake query failed: {str(e)}"
632
+ }))
633
+
634
+ except Exception as e:
635
+ self.set_status(500)
636
+ self.finish(json.dumps({
637
+ "error": "Internal server error",
638
+ "message": str(e)
639
+ }))