signalpilot-ai-internal 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of signalpilot-ai-internal might be problematic. Click here for more details.
- signalpilot_ai_internal/_version.py +1 -1
- signalpilot_ai_internal/handlers.py +247 -1
- signalpilot_ai_internal/schema_search_config.yml +32 -0
- signalpilot_ai_internal/schema_search_service.py +109 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/package.json +2 -2
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/package.json.orig +1 -1
- signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.4db23bddecbec684b06c.js +1 -0
- signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/923.e80ae4c5cedc1d73f2a1.js +1 -0
- signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.2304af1dc768da3716f9.js → signalpilot_ai_internal-0.4.7.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/remoteEntry.2a797d447eeb725a28cf.js +1 -1
- {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/METADATA +3 -2
- {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/RECORD +40 -38
- signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/447.1e78c4216aeaaeadff40.js +0 -1
- signalpilot_ai_internal-0.4.5.data/data/share/jupyter/labextensions/signalpilot-ai-internal/static/839.be52ed152c5de2006fde.js +0 -1
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/etc/jupyter/jupyter_server_config.d/signalpilot_ai.json +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/install.json +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/schemas/signalpilot-ai-internal/plugin.json +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/104.04e170724f369fcbaf19.js.LICENSE.txt +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/122.e2dadf63dc64d7b5f1ee.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/220.328403b5545f268b95c6.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/262.726e1da31a50868cb297.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/280.35d8c8b68815702a5238.js.LICENSE.txt +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/353.72484b768a04f89bd3dd.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/364.dbec4c2dc12e7b050dcc.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/384.fa432bdb7fb6b1c95ad6.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/439.37e271d7a80336daabe2.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/476.9b4f05a99f5003f82094.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/481.73c7a9290b7d35a8b9c1.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/512.b58fc0093d080b8ee61c.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/553.b4042a795c91d9ff71ef.js.LICENSE.txt +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/606.90aaaae46b73dc3c08fb.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/635.9720593ee20b768da3ca.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/713.8e6edc9a965bdd578ca7.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/742.91e7b516c8699eea3373.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/785.3aa564fc148b37d1d719.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/888.34054db17bcf6e87ec95.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/style.js +0 -0
- {signalpilot_ai_internal-0.4.5.data → signalpilot_ai_internal-0.4.7.data}/data/share/jupyter/labextensions/signalpilot-ai-internal/static/third-party-licenses.json +0 -0
- {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/WHEEL +0 -0
- {signalpilot_ai_internal-0.4.5.dist-info → signalpilot_ai_internal-0.4.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from datetime import datetime
|
|
2
6
|
|
|
3
7
|
from jupyter_server.base.handlers import APIHandler
|
|
4
8
|
from jupyter_server.utils import url_path_join
|
|
@@ -8,6 +12,7 @@ from .cache_service import get_cache_service
|
|
|
8
12
|
from .cache_handlers import ChatHistoriesHandler, AppValuesHandler, CacheInfoHandler
|
|
9
13
|
from .unified_database_schema_service import UnifiedDatabaseSchemaHandler, UnifiedDatabaseQueryHandler
|
|
10
14
|
from .snowflake_schema_service import SnowflakeSchemaHandler, SnowflakeQueryHandler
|
|
15
|
+
from .schema_search_service import SchemaSearchHandler
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class HelloWorldHandler(APIHandler):
|
|
@@ -22,6 +27,237 @@ class HelloWorldHandler(APIHandler):
|
|
|
22
27
|
}))
|
|
23
28
|
|
|
24
29
|
|
|
30
|
+
class ReadAllFilesHandler(APIHandler):
|
|
31
|
+
"""Handler for reading all notebook and data files in the workspace"""
|
|
32
|
+
|
|
33
|
+
# Common data file extensions
|
|
34
|
+
DATA_EXTENSIONS = {'.csv', '.json', '.xlsx', '.xls', '.parquet', '.pkl', '.pickle',
|
|
35
|
+
'.feather', '.hdf5', '.h5', '.sql', '.db', '.sqlite', '.tsv', '.txt'}
|
|
36
|
+
|
|
37
|
+
# Directories to exclude from search
|
|
38
|
+
EXCLUDE_DIRS = {'.git', '.ipynb_checkpoints', 'node_modules', '__pycache__',
|
|
39
|
+
'.venv', 'venv', 'env', '.pytest_cache', '.mypy_cache',
|
|
40
|
+
'dist', 'build', '.tox', 'logs', '.vscode'}
|
|
41
|
+
|
|
42
|
+
@tornado.web.authenticated
|
|
43
|
+
def get(self):
|
|
44
|
+
try:
|
|
45
|
+
# Get the root directory where Jupyter Lab is running
|
|
46
|
+
root_dir = Path(os.getcwd())
|
|
47
|
+
|
|
48
|
+
# Find all notebook files
|
|
49
|
+
notebooks = self._find_notebooks(root_dir)
|
|
50
|
+
|
|
51
|
+
# Find all data files
|
|
52
|
+
data_files = self._find_data_files(root_dir)
|
|
53
|
+
|
|
54
|
+
# Get the 10 most recently edited notebooks
|
|
55
|
+
recent_notebooks = self._get_recent_notebooks(notebooks, limit=10)
|
|
56
|
+
|
|
57
|
+
# Analyze each notebook for data dependencies
|
|
58
|
+
notebook_info = []
|
|
59
|
+
all_data_dependencies = set()
|
|
60
|
+
for notebook_path in recent_notebooks:
|
|
61
|
+
info = self._analyze_notebook(notebook_path, data_files, root_dir)
|
|
62
|
+
notebook_info.append(info)
|
|
63
|
+
# Collect all data dependencies from recent notebooks
|
|
64
|
+
all_data_dependencies.update(info['data_dependencies'])
|
|
65
|
+
|
|
66
|
+
# Filter data files to only those referenced by recent notebooks
|
|
67
|
+
referenced_data_files = []
|
|
68
|
+
for data_file in data_files:
|
|
69
|
+
rel_path = str(data_file.relative_to(root_dir))
|
|
70
|
+
rel_path_forward = rel_path.replace('\\', '/')
|
|
71
|
+
file_name = data_file.name
|
|
72
|
+
|
|
73
|
+
# Check if this data file is referenced in any dependency
|
|
74
|
+
if any(dep in [file_name, rel_path, rel_path_forward] or
|
|
75
|
+
file_name in dep or rel_path in dep or rel_path_forward in dep
|
|
76
|
+
for dep in all_data_dependencies):
|
|
77
|
+
referenced_data_files.append(data_file)
|
|
78
|
+
|
|
79
|
+
# Generate the LLM-optimized context string with only referenced data
|
|
80
|
+
welcome_context = self._generate_welcome_context(notebook_info, referenced_data_files, root_dir)
|
|
81
|
+
|
|
82
|
+
self.finish(json.dumps({
|
|
83
|
+
"welcome_context": welcome_context,
|
|
84
|
+
"notebook_count": len(notebooks),
|
|
85
|
+
"data_file_count": len(data_files),
|
|
86
|
+
"recent_notebook_count": len(recent_notebooks),
|
|
87
|
+
"referenced_data_count": len(referenced_data_files)
|
|
88
|
+
}))
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
self.set_status(500)
|
|
92
|
+
self.finish(json.dumps({
|
|
93
|
+
"error": str(e)
|
|
94
|
+
}))
|
|
95
|
+
|
|
96
|
+
def _find_notebooks(self, root_dir: Path) -> list:
|
|
97
|
+
"""Find all .ipynb files in the workspace"""
|
|
98
|
+
notebooks = []
|
|
99
|
+
for path in root_dir.rglob('*.ipynb'):
|
|
100
|
+
# Skip excluded directories
|
|
101
|
+
if any(excluded in path.parts for excluded in self.EXCLUDE_DIRS):
|
|
102
|
+
continue
|
|
103
|
+
notebooks.append(path)
|
|
104
|
+
return notebooks
|
|
105
|
+
|
|
106
|
+
def _find_data_files(self, root_dir: Path) -> list:
|
|
107
|
+
"""Find all data files in the workspace"""
|
|
108
|
+
data_files = []
|
|
109
|
+
for path in root_dir.rglob('*'):
|
|
110
|
+
# Skip excluded directories
|
|
111
|
+
if any(excluded in path.parts for excluded in self.EXCLUDE_DIRS):
|
|
112
|
+
continue
|
|
113
|
+
# Check if file has a data extension
|
|
114
|
+
if path.is_file() and path.suffix.lower() in self.DATA_EXTENSIONS:
|
|
115
|
+
data_files.append(path)
|
|
116
|
+
return data_files
|
|
117
|
+
|
|
118
|
+
def _get_recent_notebooks(self, notebooks: list, limit: int = 10) -> list:
|
|
119
|
+
"""Get the most recently modified notebooks"""
|
|
120
|
+
# Sort by modification time (most recent first)
|
|
121
|
+
notebooks_with_mtime = [(nb, nb.stat().st_mtime) for nb in notebooks]
|
|
122
|
+
notebooks_with_mtime.sort(key=lambda x: x[1], reverse=True)
|
|
123
|
+
|
|
124
|
+
# Return only the paths, limited to the specified number
|
|
125
|
+
return [nb for nb, _ in notebooks_with_mtime[:limit]]
|
|
126
|
+
|
|
127
|
+
def _analyze_notebook(self, notebook_path: Path, data_files: list, root_dir: Path) -> dict:
|
|
128
|
+
"""Analyze a notebook to find data dependencies"""
|
|
129
|
+
try:
|
|
130
|
+
with open(notebook_path, 'r', encoding='utf-8') as f:
|
|
131
|
+
notebook_content = f.read()
|
|
132
|
+
|
|
133
|
+
# Find data file references in the notebook
|
|
134
|
+
referenced_data_files = self._find_data_references(notebook_content, data_files, root_dir)
|
|
135
|
+
|
|
136
|
+
# Get relative path from root
|
|
137
|
+
relative_path = notebook_path.relative_to(root_dir)
|
|
138
|
+
|
|
139
|
+
# Get last modified time
|
|
140
|
+
mtime = datetime.fromtimestamp(notebook_path.stat().st_mtime)
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
'name': notebook_path.name,
|
|
144
|
+
'path': str(relative_path),
|
|
145
|
+
'last_modified': mtime.strftime('%Y-%m-%d %H:%M:%S'),
|
|
146
|
+
'data_dependencies': referenced_data_files
|
|
147
|
+
}
|
|
148
|
+
except Exception as e:
|
|
149
|
+
# If we can't read the notebook, return basic info
|
|
150
|
+
relative_path = notebook_path.relative_to(root_dir)
|
|
151
|
+
return {
|
|
152
|
+
'name': notebook_path.name,
|
|
153
|
+
'path': str(relative_path),
|
|
154
|
+
'last_modified': 'unknown',
|
|
155
|
+
'data_dependencies': [],
|
|
156
|
+
'error': str(e)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
def _find_data_references(self, content: str, data_files: list, root_dir: Path) -> list:
|
|
160
|
+
"""Find references to data files in notebook content"""
|
|
161
|
+
referenced_files = []
|
|
162
|
+
|
|
163
|
+
# Create a set of data file names and paths for matching
|
|
164
|
+
data_file_patterns = set()
|
|
165
|
+
for data_file in data_files:
|
|
166
|
+
# Add the filename
|
|
167
|
+
data_file_patterns.add(data_file.name)
|
|
168
|
+
# Add relative path
|
|
169
|
+
try:
|
|
170
|
+
rel_path = str(data_file.relative_to(root_dir))
|
|
171
|
+
data_file_patterns.add(rel_path)
|
|
172
|
+
# Also add with forward slashes (common in code)
|
|
173
|
+
data_file_patterns.add(rel_path.replace('\\', '/'))
|
|
174
|
+
except ValueError:
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
# Search for data file references
|
|
178
|
+
# Common patterns: pd.read_csv('file.csv'), open('file.csv'), 'path/to/file.csv'
|
|
179
|
+
patterns = [
|
|
180
|
+
r'["\']([^"\']+\.(?:csv|json|xlsx?|parquet|pkl|pickle|feather|hdf5|h5|sql|db|sqlite|tsv|txt))["\']',
|
|
181
|
+
r'read_(?:csv|json|excel|parquet|pickle|feather|hdf|sql|table)\(["\']([^"\']+)["\']',
|
|
182
|
+
r'to_(?:csv|json|excel|parquet|pickle|feather|hdf|sql)\(["\']([^"\']+)["\']',
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
found_references = set()
|
|
186
|
+
for pattern in patterns:
|
|
187
|
+
matches = re.finditer(pattern, content, re.IGNORECASE)
|
|
188
|
+
for match in matches:
|
|
189
|
+
file_ref = match.group(1)
|
|
190
|
+
# Check if this reference matches any of our data files
|
|
191
|
+
if file_ref in data_file_patterns or any(file_ref in str(df) for df in data_files):
|
|
192
|
+
found_references.add(file_ref)
|
|
193
|
+
|
|
194
|
+
# Also check for database connection strings
|
|
195
|
+
db_patterns = [
|
|
196
|
+
r'(?:postgresql|mysql|sqlite|mongodb)://[^\s\'"]+',
|
|
197
|
+
r'(?:DATABASE_URL|DB_URL|CONNECTION_STRING)\s*=\s*["\']([^"\']+)["\']'
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
for pattern in db_patterns:
|
|
201
|
+
matches = re.finditer(pattern, content, re.IGNORECASE)
|
|
202
|
+
for match in matches:
|
|
203
|
+
found_references.add(f"Database: {match.group(0)[:50]}...")
|
|
204
|
+
|
|
205
|
+
return sorted(list(found_references))
|
|
206
|
+
|
|
207
|
+
def _generate_welcome_context(self, notebook_info: list, data_files: list, root_dir: Path) -> str:
|
|
208
|
+
"""Generate an LLM-optimized, human-readable context string"""
|
|
209
|
+
lines = []
|
|
210
|
+
lines.append("# Workspace Overview\n")
|
|
211
|
+
|
|
212
|
+
if not notebook_info:
|
|
213
|
+
lines.append("No notebooks found in the workspace.\n")
|
|
214
|
+
else:
|
|
215
|
+
lines.append(f"## Recent Notebooks ({len(notebook_info)})\n")
|
|
216
|
+
|
|
217
|
+
for i, info in enumerate(notebook_info, 1):
|
|
218
|
+
lines.append(f"\n### {i}. {info['name']}")
|
|
219
|
+
lines.append(f" - Path: {info['path']}")
|
|
220
|
+
lines.append(f" - Last Modified: {info['last_modified']}")
|
|
221
|
+
|
|
222
|
+
if info.get('error'):
|
|
223
|
+
lines.append(f" - Note: Could not fully analyze ({info['error']})")
|
|
224
|
+
|
|
225
|
+
if info['data_dependencies']:
|
|
226
|
+
lines.append(f" - Data Dependencies:")
|
|
227
|
+
for dep in info['data_dependencies']:
|
|
228
|
+
lines.append(f" • {dep}")
|
|
229
|
+
else:
|
|
230
|
+
lines.append(f" - Data Dependencies: None detected")
|
|
231
|
+
|
|
232
|
+
# Add summary of data files referenced by recent notebooks
|
|
233
|
+
if data_files:
|
|
234
|
+
lines.append(f"\n## Data Files Referenced by Recent Notebooks ({len(data_files)} total)\n")
|
|
235
|
+
|
|
236
|
+
# Group by extension
|
|
237
|
+
by_extension = {}
|
|
238
|
+
for df in data_files:
|
|
239
|
+
ext = df.suffix.lower()
|
|
240
|
+
if ext not in by_extension:
|
|
241
|
+
by_extension[ext] = []
|
|
242
|
+
try:
|
|
243
|
+
rel_path = str(df.relative_to(root_dir))
|
|
244
|
+
by_extension[ext].append(rel_path)
|
|
245
|
+
except ValueError:
|
|
246
|
+
by_extension[ext].append(str(df))
|
|
247
|
+
|
|
248
|
+
for ext in sorted(by_extension.keys()):
|
|
249
|
+
files = by_extension[ext]
|
|
250
|
+
lines.append(f"\n### {ext} files ({len(files)})")
|
|
251
|
+
# Show all referenced files (they should be limited already)
|
|
252
|
+
for f in sorted(files):
|
|
253
|
+
lines.append(f" - {f}")
|
|
254
|
+
else:
|
|
255
|
+
lines.append(f"\n## Data Files Referenced by Recent Notebooks\n")
|
|
256
|
+
lines.append("No data file dependencies found in recent notebooks.\n")
|
|
257
|
+
|
|
258
|
+
return '\n'.join(lines)
|
|
259
|
+
|
|
260
|
+
|
|
25
261
|
def setup_handlers(web_app):
|
|
26
262
|
host_pattern = ".*$"
|
|
27
263
|
base_url = web_app.settings["base_url"]
|
|
@@ -29,6 +265,9 @@ def setup_handlers(web_app):
|
|
|
29
265
|
# Original hello world endpoint
|
|
30
266
|
hello_route = url_path_join(base_url, "signalpilot-ai-internal", "hello-world")
|
|
31
267
|
|
|
268
|
+
# Read all files endpoint
|
|
269
|
+
read_all_files_route = url_path_join(base_url, "signalpilot-ai-internal", "read-all-files")
|
|
270
|
+
|
|
32
271
|
# Cache service endpoints
|
|
33
272
|
chat_histories_route = url_path_join(base_url, "signalpilot-ai-internal", "cache", "chat-histories")
|
|
34
273
|
chat_history_route = url_path_join(base_url, "signalpilot-ai-internal", "cache", "chat-histories", "([^/]+)")
|
|
@@ -41,6 +280,7 @@ def setup_handlers(web_app):
|
|
|
41
280
|
# Database service endpoints
|
|
42
281
|
database_schema_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "schema")
|
|
43
282
|
database_query_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "query")
|
|
283
|
+
database_schema_search_route = url_path_join(base_url, "signalpilot-ai-internal", "database", "schema-search")
|
|
44
284
|
|
|
45
285
|
# MySQL service endpoints
|
|
46
286
|
mysql_schema_route = url_path_join(base_url, "signalpilot-ai-internal", "mysql", "schema")
|
|
@@ -54,6 +294,9 @@ def setup_handlers(web_app):
|
|
|
54
294
|
# Original endpoint
|
|
55
295
|
(hello_route, HelloWorldHandler),
|
|
56
296
|
|
|
297
|
+
# Read all files endpoint
|
|
298
|
+
(read_all_files_route, ReadAllFilesHandler),
|
|
299
|
+
|
|
57
300
|
# Chat histories endpoints
|
|
58
301
|
(chat_histories_route, ChatHistoriesHandler),
|
|
59
302
|
(chat_history_route, ChatHistoriesHandler),
|
|
@@ -68,6 +311,7 @@ def setup_handlers(web_app):
|
|
|
68
311
|
# Database service endpoints (unified for PostgreSQL and MySQL)
|
|
69
312
|
(database_schema_route, UnifiedDatabaseSchemaHandler),
|
|
70
313
|
(database_query_route, UnifiedDatabaseQueryHandler),
|
|
314
|
+
(database_schema_search_route, SchemaSearchHandler),
|
|
71
315
|
|
|
72
316
|
# MySQL service endpoints (use unified handler)
|
|
73
317
|
(mysql_schema_route, UnifiedDatabaseSchemaHandler),
|
|
@@ -90,6 +334,7 @@ def setup_handlers(web_app):
|
|
|
90
334
|
|
|
91
335
|
print("SignalPilot AI backend handlers registered:")
|
|
92
336
|
print(f" - Hello World: {hello_route}")
|
|
337
|
+
print(f" - Read All Files: {read_all_files_route}")
|
|
93
338
|
print(f" - Chat Histories: {chat_histories_route}")
|
|
94
339
|
print(f" - Chat History (by ID): {chat_history_route}")
|
|
95
340
|
print(f" - App Values: {app_values_route}")
|
|
@@ -97,7 +342,8 @@ def setup_handlers(web_app):
|
|
|
97
342
|
print(f" - Cache Info: {cache_info_route}")
|
|
98
343
|
print(f" - Database Schema: {database_schema_route}")
|
|
99
344
|
print(f" - Database Query: {database_query_route}")
|
|
345
|
+
print(f" - Database Schema Search: {database_schema_search_route}")
|
|
100
346
|
print(f" - MySQL Schema: {mysql_schema_route}")
|
|
101
347
|
print(f" - MySQL Query: {mysql_query_route}")
|
|
102
348
|
print(f" - Snowflake Schema: {snowflake_schema_route}")
|
|
103
|
-
print(f" - Snowflake Query: {snowflake_query_route}")
|
|
349
|
+
print(f" - Snowflake Query: {snowflake_query_route}")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
logging:
|
|
2
|
+
level: "WARNING"
|
|
3
|
+
|
|
4
|
+
embedding:
|
|
5
|
+
location: "memory"
|
|
6
|
+
model: "multi-qa-MiniLM-L6-cos-v1"
|
|
7
|
+
metric: "cosine"
|
|
8
|
+
batch_size: 32
|
|
9
|
+
show_progress: false
|
|
10
|
+
cache_dir: "/tmp/.schema_search_cache"
|
|
11
|
+
|
|
12
|
+
chunking:
|
|
13
|
+
strategy: "raw"
|
|
14
|
+
max_tokens: 256
|
|
15
|
+
overlap_tokens: 50
|
|
16
|
+
model: "gpt-4o-mini"
|
|
17
|
+
|
|
18
|
+
search:
|
|
19
|
+
strategy: "hybrid"
|
|
20
|
+
initial_top_k: 20
|
|
21
|
+
rerank_top_k: 5
|
|
22
|
+
semantic_weight: 0.67
|
|
23
|
+
hops: 1
|
|
24
|
+
|
|
25
|
+
reranker:
|
|
26
|
+
model: null
|
|
27
|
+
|
|
28
|
+
schema:
|
|
29
|
+
include_columns: true
|
|
30
|
+
include_indices: true
|
|
31
|
+
include_foreign_keys: true
|
|
32
|
+
include_constraints: true
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from jupyter_server.base.handlers import APIHandler
|
|
9
|
+
import tornado
|
|
10
|
+
from schema_search import SchemaSearch
|
|
11
|
+
from sqlalchemy import create_engine
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SchemaSearchHandler(APIHandler):
|
|
15
|
+
CONFIG_PATH = Path(__file__).with_name("schema_search_config.yml")
|
|
16
|
+
|
|
17
|
+
def _get_database_url(self, explicit: Optional[str]) -> Optional[str]:
|
|
18
|
+
if isinstance(explicit, str) and explicit.strip():
|
|
19
|
+
return explicit.strip()
|
|
20
|
+
|
|
21
|
+
for key, value in os.environ.items():
|
|
22
|
+
if key.endswith("_CONNECTION_JSON") and isinstance(value, str) and value.strip().startswith("{"):
|
|
23
|
+
config = json.loads(value)
|
|
24
|
+
url = config.get("connectionUrl")
|
|
25
|
+
if url:
|
|
26
|
+
return url
|
|
27
|
+
return os.environ.get("DB_URL")
|
|
28
|
+
|
|
29
|
+
@tornado.web.authenticated
|
|
30
|
+
async def post(self):
|
|
31
|
+
body = self.get_json_body() or {}
|
|
32
|
+
queries = body.get("queries")
|
|
33
|
+
if isinstance(queries, str):
|
|
34
|
+
queries = [queries]
|
|
35
|
+
|
|
36
|
+
if not isinstance(queries, list):
|
|
37
|
+
self.set_status(400)
|
|
38
|
+
self.finish(json.dumps({"error": "queries parameter must be a list of strings"}))
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
queries = [q.strip() for q in queries if isinstance(q, str) and q.strip()]
|
|
42
|
+
|
|
43
|
+
if not queries:
|
|
44
|
+
self.set_status(400)
|
|
45
|
+
self.finish(json.dumps({"error": "queries parameter is required"}))
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
db_url = self._get_database_url(body.get("dbUrl"))
|
|
49
|
+
if not db_url:
|
|
50
|
+
self.set_status(400)
|
|
51
|
+
self.finish(json.dumps({"error": "Database connection URL is not configured"}))
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
db_url = db_url.strip()
|
|
55
|
+
db_url_lower = db_url.lower()
|
|
56
|
+
|
|
57
|
+
if db_url_lower.startswith("mysql://"):
|
|
58
|
+
db_url = "mysql+pymysql://" + db_url[len("mysql://"):]
|
|
59
|
+
db_url_lower = db_url.lower()
|
|
60
|
+
|
|
61
|
+
if db_url_lower.startswith("snowflake://"):
|
|
62
|
+
self._ensure_snowflake_dependencies()
|
|
63
|
+
elif db_url_lower.startswith("postgresql") or db_url_lower.startswith("postgres") or db_url_lower.startswith("mysql+pymysql"):
|
|
64
|
+
pass
|
|
65
|
+
else:
|
|
66
|
+
self.set_status(400)
|
|
67
|
+
self.finish(json.dumps({"error": "Schema search currently supports PostgreSQL, MySQL, or Snowflake connections"}))
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
engine = None
|
|
71
|
+
try:
|
|
72
|
+
engine = create_engine(db_url)
|
|
73
|
+
schema_search = SchemaSearch(engine=engine, config_path=str(self.CONFIG_PATH))
|
|
74
|
+
schema_search.index()
|
|
75
|
+
|
|
76
|
+
limit = body.get("limit")
|
|
77
|
+
if limit is not None:
|
|
78
|
+
limit = max(1, min(int(limit), 10))
|
|
79
|
+
else:
|
|
80
|
+
limit = 5
|
|
81
|
+
|
|
82
|
+
query_results = []
|
|
83
|
+
for query in queries:
|
|
84
|
+
result = schema_search.search(query, limit=limit)
|
|
85
|
+
query_results.append({
|
|
86
|
+
"query": query,
|
|
87
|
+
"results": result
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
self.finish(json.dumps({"results": query_results}))
|
|
91
|
+
except Exception as error:
|
|
92
|
+
self.set_status(500)
|
|
93
|
+
self.finish(json.dumps({"error": f"Schema search failed: {error}"}))
|
|
94
|
+
finally:
|
|
95
|
+
if engine is not None:
|
|
96
|
+
try:
|
|
97
|
+
engine.dispose()
|
|
98
|
+
except Exception:
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
def _install_package(self, package: str) -> None:
|
|
102
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
|
|
103
|
+
|
|
104
|
+
def _ensure_snowflake_dependencies(self) -> None:
|
|
105
|
+
try:
|
|
106
|
+
import snowflake.sqlalchemy # type: ignore # noqa: F401
|
|
107
|
+
except ImportError:
|
|
108
|
+
self._install_package("snowflake-sqlalchemy")
|
|
109
|
+
import snowflake.sqlalchemy # type: ignore # noqa: F401
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "signalpilot-ai-internal",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.7",
|
|
4
4
|
"description": "SignalPilot Agent - Your Jupyter Notebook Assistant",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"jupyter",
|
|
@@ -133,7 +133,7 @@
|
|
|
133
133
|
"outputDir": "signalpilot_ai_internal/labextension",
|
|
134
134
|
"schemaDir": "schema",
|
|
135
135
|
"_build": {
|
|
136
|
-
"load": "static/remoteEntry.
|
|
136
|
+
"load": "static/remoteEntry.2a797d447eeb725a28cf.js",
|
|
137
137
|
"extension": "./extension",
|
|
138
138
|
"style": "./style"
|
|
139
139
|
}
|