signalpilot-ai-internal 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of signalpilot-ai-internal might be problematic. Click here for more details.

Files changed (42) hide show
  1. signalpilot_ai_internal/_version.py +1 -1
  2. signalpilot_ai_internal/handlers.py +247 -1
  3. signalpilot_ai_internal/schema_search_config.yml +32 -0
  4. signalpilot_ai_internal/schema_search_service.py +109 -0
  5. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/package.json +2 -2
  6. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/package.json.orig +1 -1
  7. signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.4db23bddecbec684b06c.js +1 -0
  8. signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/923.e80ae4c5cedc1d73f2a1.js +1 -0
  9. signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.2304af1dc768da3716f9.js → signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.2a797d447eeb725a28cf.js +1 -1
  10. {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/METADATA +3 -2
  11. {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/RECORD +40 -38
  12. signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.1e78c4216aeaaeadff40.js +0 -1
  13. signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.be52ed152c5de2006fde.js +0 -1
  14. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/etc/jupyter/jupyter_server_config.d/signalpilot_ai.json +0 -0
  15. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/install.json +0 -0
  16. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/plugin.json +0 -0
  17. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js +0 -0
  18. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js.LICENSE.txt +0 -0
  19. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/122.e2dadf63dc64d7b5f1ee.js +0 -0
  20. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/220.328403b5545f268b95c6.js +0 -0
  21. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/262.726e1da31a50868cb297.js +0 -0
  22. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js +0 -0
  23. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js.LICENSE.txt +0 -0
  24. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/353.72484b768a04f89bd3dd.js +0 -0
  25. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/364.dbec4c2dc12e7b050dcc.js +0 -0
  26. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/384.fa432bdb7fb6b1c95ad6.js +0 -0
  27. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/439.37e271d7a80336daabe2.js +0 -0
  28. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/476.9b4f05a99f5003f82094.js +0 -0
  29. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/481.73c7a9290b7d35a8b9c1.js +0 -0
  30. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/512.b58fc0093d080b8ee61c.js +0 -0
  31. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js +0 -0
  32. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js.LICENSE.txt +0 -0
  33. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/606.90aaaae46b73dc3c08fb.js +0 -0
  34. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/635.9720593ee20b768da3ca.js +0 -0
  35. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/713.8e6edc9a965bdd578ca7.js +0 -0
  36. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/742.91e7b516c8699eea3373.js +0 -0
  37. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/785.3aa564fc148b37d1d719.js +0 -0
  38. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/888.34054db17bcf6e87ec95.js +0 -0
  39. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/style.js +0 -0
  40. {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/third-party-licenses.json +0 -0
  41. {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/WHEEL +0 -0
  42. {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '0.4.5'
4
+ __version__ = VERSION = '0.4.7'
@@ -1,4 +1,8 @@
1
1
  import json
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+ from datetime import datetime
2
6
 
3
7
  from jupyter_server.base.handlers import APIHandler
4
8
  from jupyter_server.utils import url_path_join
@@ -8,6 +12,7 @@ from .cache_service import get_cache_service
8
12
  from .cache_handlers import ChatHistoriesHandler, AppValuesHandler, CacheInfoHandler
9
13
  from .unified_database_schema_service import UnifiedDatabaseSchemaHandler, UnifiedDatabaseQueryHandler
10
14
  from .snowflake_schema_service import SnowflakeSchemaHandler, SnowflakeQueryHandler
15
+ from .schema_search_service import SchemaSearchHandler
11
16
 
12
17
 
13
18
  class HelloWorldHandler(APIHandler):
@@ -22,6 +27,237 @@ class HelloWorldHandler(APIHandler):
22
27
  }))
23
28
 
24
29
 
30
+ class ReadAllFilesHandler(APIHandler):
31
+ """Handler for reading all notebook and data files in the workspace"""
32
+
33
+ # Common data file extensions
34
+ DATA_EXTENSIONS = {'.csv', '.json', '.xlsx', '.xls', '.parquet', '.pkl', '.pickle',
35
+ '.feather', '.hdf5', '.h5', '.sql', '.db', '.sqlite', '.tsv', '.txt'}
36
+
37
+ # Directories to exclude from search
38
+ EXCLUDE_DIRS = {'.git', '.ipynb_checkpoints', 'node_modules', '__pycache__',
39
+ '.venv', 'venv', 'env', '.pytest_cache', '.mypy_cache',
40
+ 'dist', 'build', '.tox', 'logs', '.vscode'}
41
+
42
+ @tornado.web.authenticated
43
+ def get(self):
44
+ try:
45
+ # Get the root directory where Jupyter Lab is running
46
+ root_dir = Path(os.getcwd())
47
+
48
+ # Find all notebook files
49
+ notebooks = self._find_notebooks(root_dir)
50
+
51
+ # Find all data files
52
+ data_files = self._find_data_files(root_dir)
53
+
54
+ # Get the 10 most recently edited notebooks
55
+ recent_notebooks = self._get_recent_notebooks(notebooks, limit=10)
56
+
57
+ # Analyze each notebook for data dependencies
58
+ notebook_info = []
59
+ all_data_dependencies = set()
60
+ for notebook_path in recent_notebooks:
61
+ info = self._analyze_notebook(notebook_path, data_files, root_dir)
62
+ notebook_info.append(info)
63
+ # Collect all data dependencies from recent notebooks
64
+ all_data_dependencies.update(info['data_dependencies'])
65
+
66
+ # Filter data files to only those referenced by recent notebooks
67
+ referenced_data_files = []
68
+ for data_file in data_files:
69
+ rel_path = str(data_file.relative_to(root_dir))
70
+ rel_path_forward = rel_path.replace('\\', '/')
71
+ file_name = data_file.name
72
+
73
+ # Check if this data file is referenced in any dependency
74
+ if any(dep in [file_name, rel_path, rel_path_forward] or
75
+ file_name in dep or rel_path in dep or rel_path_forward in dep
76
+ for dep in all_data_dependencies):
77
+ referenced_data_files.append(data_file)
78
+
79
+ # Generate the LLM-optimized context string with only referenced data
80
+ welcome_context = self._generate_welcome_context(notebook_info, referenced_data_files, root_dir)
81
+
82
+ self.finish(json.dumps({
83
+ "welcome_context": welcome_context,
84
+ "notebook_count": len(notebooks),
85
+ "data_file_count": len(data_files),
86
+ "recent_notebook_count": len(recent_notebooks),
87
+ "referenced_data_count": len(referenced_data_files)
88
+ }))
89
+
90
+ except Exception as e:
91
+ self.set_status(500)
92
+ self.finish(json.dumps({
93
+ "error": str(e)
94
+ }))
95
+
96
+ def _find_notebooks(self, root_dir: Path) -> list:
97
+ """Find all .ipynb files in the workspace"""
98
+ notebooks = []
99
+ for path in root_dir.rglob('*.ipynb'):
100
+ # Skip excluded directories
101
+ if any(excluded in path.parts for excluded in self.EXCLUDE_DIRS):
102
+ continue
103
+ notebooks.append(path)
104
+ return notebooks
105
+
106
+ def _find_data_files(self, root_dir: Path) -> list:
107
+ """Find all data files in the workspace"""
108
+ data_files = []
109
+ for path in root_dir.rglob('*'):
110
+ # Skip excluded directories
111
+ if any(excluded in path.parts for excluded in self.EXCLUDE_DIRS):
112
+ continue
113
+ # Check if file has a data extension
114
+ if path.is_file() and path.suffix.lower() in self.DATA_EXTENSIONS:
115
+ data_files.append(path)
116
+ return data_files
117
+
118
+ def _get_recent_notebooks(self, notebooks: list, limit: int = 10) -> list:
119
+ """Get the most recently modified notebooks"""
120
+ # Sort by modification time (most recent first)
121
+ notebooks_with_mtime = [(nb, nb.stat().st_mtime) for nb in notebooks]
122
+ notebooks_with_mtime.sort(key=lambda x: x[1], reverse=True)
123
+
124
+ # Return only the paths, limited to the specified number
125
+ return [nb for nb, _ in notebooks_with_mtime[:limit]]
126
+
127
+ def _analyze_notebook(self, notebook_path: Path, data_files: list, root_dir: Path) -> dict:
128
+ """Analyze a notebook to find data dependencies"""
129
+ try:
130
+ with open(notebook_path, 'r', encoding='utf-8') as f:
131
+ notebook_content = f.read()
132
+
133
+ # Find data file references in the notebook
134
+ referenced_data_files = self._find_data_references(notebook_content, data_files, root_dir)
135
+
136
+ # Get relative path from root
137
+ relative_path = notebook_path.relative_to(root_dir)
138
+
139
+ # Get last modified time
140
+ mtime = datetime.fromtimestamp(notebook_path.stat().st_mtime)
141
+
142
+ return {
143
+ 'name': notebook_path.name,
144
+ 'path': str(relative_path),
145
+ 'last_modified': mtime.strftime('%Y-%m-%d %H:%M:%S'),
146
+ 'data_dependencies': referenced_data_files
147
+ }
148
+ except Exception as e:
149
+ # If we can't read the notebook, return basic info
150
+ relative_path = notebook_path.relative_to(root_dir)
151
+ return {
152
+ 'name': notebook_path.name,
153
+ 'path': str(relative_path),
154
+ 'last_modified': 'unknown',
155
+ 'data_dependencies': [],
156
+ 'error': str(e)
157
+ }
158
+
159
+ def _find_data_references(self, content: str, data_files: list, root_dir: Path) -> list:
160
+ """Find references to data files in notebook content"""
161
+ referenced_files = []
162
+
163
+ # Create a set of data file names and paths for matching
164
+ data_file_patterns = set()
165
+ for data_file in data_files:
166
+ # Add the filename
167
+ data_file_patterns.add(data_file.name)
168
+ # Add relative path
169
+ try:
170
+ rel_path = str(data_file.relative_to(root_dir))
171
+ data_file_patterns.add(rel_path)
172
+ # Also add with forward slashes (common in code)
173
+ data_file_patterns.add(rel_path.replace('\\', '/'))
174
+ except ValueError:
175
+ pass
176
+
177
+ # Search for data file references
178
+ # Common patterns: pd.read_csv('file.csv'), open('file.csv'), 'path/to/file.csv'
179
+ patterns = [
180
+ r'["\']([^"\']+\.(?:csv|json|xlsx?|parquet|pkl|pickle|feather|hdf5|h5|sql|db|sqlite|tsv|txt))["\']',
181
+ r'read_(?:csv|json|excel|parquet|pickle|feather|hdf|sql|table)\(["\']([^"\']+)["\']',
182
+ r'to_(?:csv|json|excel|parquet|pickle|feather|hdf|sql)\(["\']([^"\']+)["\']',
183
+ ]
184
+
185
+ found_references = set()
186
+ for pattern in patterns:
187
+ matches = re.finditer(pattern, content, re.IGNORECASE)
188
+ for match in matches:
189
+ file_ref = match.group(1)
190
+ # Check if this reference matches any of our data files
191
+ if file_ref in data_file_patterns or any(file_ref in str(df) for df in data_files):
192
+ found_references.add(file_ref)
193
+
194
+ # Also check for database connection strings
195
+ db_patterns = [
196
+ r'(?:postgresql|mysql|sqlite|mongodb)://[^\s\'"]+',
197
+ r'(?:DATABASE_URL|DB_URL|CONNECTION_STRING)\s*=\s*["\']([^"\']+)["\']'
198
+ ]
199
+
200
+ for pattern in db_patterns:
201
+ matches = re.finditer(pattern, content, re.IGNORECASE)
202
+ for match in matches:
203
+ found_references.add(f"Database: {match.group(0)[:50]}...")
204
+
205
+ return sorted(list(found_references))
206
+
207
+ def _generate_welcome_context(self, notebook_info: list, data_files: list, root_dir: Path) -> str:
208
+ """Generate an LLM-optimized, human-readable context string"""
209
+ lines = []
210
+ lines.append("# Workspace Overview\n")
211
+
212
+ if not notebook_info:
213
+ lines.append("No notebooks found in the workspace.\n")
214
+ else:
215
+ lines.append(f"## Recent Notebooks ({len(notebook_info)})\n")
216
+
217
+ for i, info in enumerate(notebook_info, 1):
218
+ lines.append(f"\n### {i}. {info['name']}")
219
+ lines.append(f" - Path: {info['path']}")
220
+ lines.append(f" - Last Modified: {info['last_modified']}")
221
+
222
+ if info.get('error'):
223
+ lines.append(f" - Note: Could not fully analyze ({info['error']})")
224
+
225
+ if info['data_dependencies']:
226
+ lines.append(f" - Data Dependencies:")
227
+ for dep in info['data_dependencies']:
228
+ lines.append(f" • {dep}")
229
+ else:
230
+ lines.append(f" - Data Dependencies: None detected")
231
+
232
+ # Add summary of data files referenced by recent notebooks
233
+ if data_files:
234
+ lines.append(f"\n## Data Files Referenced by Recent Notebooks ({len(data_files)} total)\n")
235
+
236
+ # Group by extension
237
+ by_extension = {}
238
+ for df in data_files:
239
+ ext = df.suffix.lower()
240
+ if ext not in by_extension:
241
+ by_extension[ext] = []
242
+ try:
243
+ rel_path = str(df.relative_to(root_dir))
244
+ by_extension[ext].append(rel_path)
245
+ except ValueError:
246
+ by_extension[ext].append(str(df))
247
+
248
+ for ext in sorted(by_extension.keys()):
249
+ files = by_extension[ext]
250
+ lines.append(f"\n### {ext} files ({len(files)})")
251
+ # Show all referenced files (they should be limited already)
252
+ for f in sorted(files):
253
+ lines.append(f" - {f}")
254
+ else:
255
+ lines.append(f"\n## Data Files Referenced by Recent Notebooks\n")
256
+ lines.append("No data file dependencies found in recent notebooks.\n")
257
+
258
+ return '\n'.join(lines)
259
+
260
+
25
261
  def setup_handlers(web_app):
26
262
  host_pattern = ".*$"
27
263
  base_url = web_app.settings["base_url"]
@@ -29,6 +265,9 @@ def setup_handlers(web_app):
29
265
  # Original hello world endpoint
30
266
  hello_route = url_path_join(base_url, "signalpilot-ai-internal", "hello-world")
31
267
 
268
+ # Read all files endpoint
269
+ read_all_files_route = url_path_join(base_url, "signalpilot-ai-internal", "read-all-files")
270
+
32
271
  # Cache service endpoints
33
272
  chat_histories_route = url_path_join(base_url, "signalpilot-ai-internal", "cache", "chat-histories")
34
273
  chat_history_route = url_path_join(base_url, "signalpilot-ai-internal", "cache", "chat-histories", "([^/]+)")
@@ -41,6 +280,7 @@ def setup_handlers(web_app):
41
280
  # Database service endpoints
42
281
  database_schema_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "schema")
43
282
  database_query_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "query")
283
+ database_schema_search_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "schema-search")
44
284
 
45
285
  # MySQL service endpoints
46
286
  mysql_schema_route = url_path_join(base_url, "signalpilot-ai-internal", "mysql", "schema")
@@ -54,6 +294,9 @@ def setup_handlers(web_app):
54
294
  # Original endpoint
55
295
  (hello_route, HelloWorldHandler),
56
296
 
297
+ # Read all files endpoint
298
+ (read_all_files_route, ReadAllFilesHandler),
299
+
57
300
  # Chat histories endpoints
58
301
  (chat_histories_route, ChatHistoriesHandler),
59
302
  (chat_history_route, ChatHistoriesHandler),
@@ -68,6 +311,7 @@ def setup_handlers(web_app):
68
311
  # Database service endpoints (unified for PostgreSQL and MySQL)
69
312
  (database_schema_route, UnifiedDatabaseSchemaHandler),
70
313
  (database_query_route, UnifiedDatabaseQueryHandler),
314
+ (database_schema_search_route, SchemaSearchHandler),
71
315
 
72
316
  # MySQL service endpoints (use unified handler)
73
317
  (mysql_schema_route, UnifiedDatabaseSchemaHandler),
@@ -90,6 +334,7 @@ def setup_handlers(web_app):
90
334
 
91
335
  print("SignalPilot AI backend handlers registered:")
92
336
  print(f" - Hello World: {hello_route}")
337
+ print(f" - Read All Files: {read_all_files_route}")
93
338
  print(f" - Chat Histories: {chat_histories_route}")
94
339
  print(f" - Chat History (by ID): {chat_history_route}")
95
340
  print(f" - App Values: {app_values_route}")
@@ -97,7 +342,8 @@ def setup_handlers(web_app):
97
342
  print(f" - Cache Info: {cache_info_route}")
98
343
  print(f" - Database Schema: {database_schema_route}")
99
344
  print(f" - Database Query: {database_query_route}")
345
+ print(f" - Database Schema Search: {database_schema_search_route}")
100
346
  print(f" - MySQL Schema: {mysql_schema_route}")
101
347
  print(f" - MySQL Query: {mysql_query_route}")
102
348
  print(f" - Snowflake Schema: {snowflake_schema_route}")
103
- print(f" - Snowflake Query: {snowflake_query_route}")
349
+ print(f" - Snowflake Query: {snowflake_query_route}")
@@ -0,0 +1,32 @@
1
+ logging:
2
+ level: "WARNING"
3
+
4
+ embedding:
5
+ location: "memory"
6
+ model: "multi-qa-MiniLM-L6-cos-v1"
7
+ metric: "cosine"
8
+ batch_size: 32
9
+ show_progress: false
10
+ cache_dir: "/tmp/.schema_search_cache"
11
+
12
+ chunking:
13
+ strategy: "raw"
14
+ max_tokens: 256
15
+ overlap_tokens: 50
16
+ model: "gpt-4o-mini"
17
+
18
+ search:
19
+ strategy: "hybrid"
20
+ initial_top_k: 20
21
+ rerank_top_k: 5
22
+ semantic_weight: 0.67
23
+ hops: 1
24
+
25
+ reranker:
26
+ model: null
27
+
28
+ schema:
29
+ include_columns: true
30
+ include_indices: true
31
+ include_foreign_keys: true
32
+ include_constraints: true
@@ -0,0 +1,109 @@
1
+ import json
2
+ import os
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from jupyter_server.base.handlers import APIHandler
9
+ import tornado
10
+ from schema_search import SchemaSearch
11
+ from sqlalchemy import create_engine
12
+
13
+
14
+ class SchemaSearchHandler(APIHandler):
15
+ CONFIG_PATH = Path(__file__).with_name("schema_search_config.yml")
16
+
17
+ def _get_database_url(self, explicit: Optional[str]) -> Optional[str]:
18
+ if isinstance(explicit, str) and explicit.strip():
19
+ return explicit.strip()
20
+
21
+ for key, value in os.environ.items():
22
+ if key.endswith("_CONNECTION_JSON") and isinstance(value, str) and value.strip().startswith("{"):
23
+ config = json.loads(value)
24
+ url = config.get("connectionUrl")
25
+ if url:
26
+ return url
27
+ return os.environ.get("DB_URL")
28
+
29
+ @tornado.web.authenticated
30
+ async def post(self):
31
+ body = self.get_json_body() or {}
32
+ queries = body.get("queries")
33
+ if isinstance(queries, str):
34
+ queries = [queries]
35
+
36
+ if not isinstance(queries, list):
37
+ self.set_status(400)
38
+ self.finish(json.dumps({"error": "queries parameter must be a list of strings"}))
39
+ return
40
+
41
+ queries = [q.strip() for q in queries if isinstance(q, str) and q.strip()]
42
+
43
+ if not queries:
44
+ self.set_status(400)
45
+ self.finish(json.dumps({"error": "queries parameter is required"}))
46
+ return
47
+
48
+ db_url = self._get_database_url(body.get("dbUrl"))
49
+ if not db_url:
50
+ self.set_status(400)
51
+ self.finish(json.dumps({"error": "Database connection URL is not configured"}))
52
+ return
53
+
54
+ db_url = db_url.strip()
55
+ db_url_lower = db_url.lower()
56
+
57
+ if db_url_lower.startswith("mysql://"):
58
+ db_url = "mysql+pymysql://" + db_url[len("mysql://"):]
59
+ db_url_lower = db_url.lower()
60
+
61
+ if db_url_lower.startswith("snowflake://"):
62
+ self._ensure_snowflake_dependencies()
63
+ elif db_url_lower.startswith("postgresql") or db_url_lower.startswith("postgres") or db_url_lower.startswith("mysql+pymysql"):
64
+ pass
65
+ else:
66
+ self.set_status(400)
67
+ self.finish(json.dumps({"error": "Schema search currently supports PostgreSQL, MySQL, or Snowflake connections"}))
68
+ return
69
+
70
+ engine = None
71
+ try:
72
+ engine = create_engine(db_url)
73
+ schema_search = SchemaSearch(engine=engine, config_path=str(self.CONFIG_PATH))
74
+ schema_search.index()
75
+
76
+ limit = body.get("limit")
77
+ if limit is not None:
78
+ limit = max(1, min(int(limit), 10))
79
+ else:
80
+ limit = 5
81
+
82
+ query_results = []
83
+ for query in queries:
84
+ result = schema_search.search(query, limit=limit)
85
+ query_results.append({
86
+ "query": query,
87
+ "results": result
88
+ })
89
+
90
+ self.finish(json.dumps({"results": query_results}))
91
+ except Exception as error:
92
+ self.set_status(500)
93
+ self.finish(json.dumps({"error": f"Schema search failed: {error}"}))
94
+ finally:
95
+ if engine is not None:
96
+ try:
97
+ engine.dispose()
98
+ except Exception:
99
+ pass
100
+
101
+ def _install_package(self, package: str) -> None:
102
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
103
+
104
+ def _ensure_snowflake_dependencies(self) -> None:
105
+ try:
106
+ import snowflake.sqlalchemy # type: ignore # noqa: F401
107
+ except ImportError:
108
+ self._install_package("snowflake-sqlalchemy")
109
+ import snowflake.sqlalchemy # type: ignore # noqa: F401
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "signalpilot-ai-internal",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "SignalPilot Agent - Your Jupyter Notebook Assistant",
5
5
  "keywords": [
6
6
  "jupyter",
@@ -133,7 +133,7 @@
133
133
  "outputDir": "signalpilot_ai_internal/labextension",
134
134
  "schemaDir": "schema",
135
135
  "_build": {
136
- "load": "static/remoteEntry.2304af1dc768da3716f9.js",
136
+ "load": "static/remoteEntry.2a797d447eeb725a28cf.js",
137
137
  "extension": "./extension",
138
138
  "style": "./style"
139
139
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "signalpilot-ai-internal",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "SignalPilot Agent - Your Jupyter Notebook Assistant",
5
5
  "keywords": [
6
6
  "jupyter",