signalwire-agents 0.1.13__py3-none-any.whl → 1.0.17.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalwire_agents/__init__.py +99 -15
- signalwire_agents/agent_server.py +248 -60
- signalwire_agents/agents/bedrock.py +296 -0
- signalwire_agents/cli/__init__.py +9 -0
- signalwire_agents/cli/build_search.py +951 -41
- signalwire_agents/cli/config.py +80 -0
- signalwire_agents/cli/core/__init__.py +10 -0
- signalwire_agents/cli/core/agent_loader.py +470 -0
- signalwire_agents/cli/core/argparse_helpers.py +179 -0
- signalwire_agents/cli/core/dynamic_config.py +71 -0
- signalwire_agents/cli/core/service_loader.py +303 -0
- signalwire_agents/cli/dokku.py +2320 -0
- signalwire_agents/cli/execution/__init__.py +10 -0
- signalwire_agents/cli/execution/datamap_exec.py +446 -0
- signalwire_agents/cli/execution/webhook_exec.py +134 -0
- signalwire_agents/cli/init_project.py +2636 -0
- signalwire_agents/cli/output/__init__.py +10 -0
- signalwire_agents/cli/output/output_formatter.py +255 -0
- signalwire_agents/cli/output/swml_dump.py +186 -0
- signalwire_agents/cli/simulation/__init__.py +10 -0
- signalwire_agents/cli/simulation/data_generation.py +374 -0
- signalwire_agents/cli/simulation/data_overrides.py +200 -0
- signalwire_agents/cli/simulation/mock_env.py +282 -0
- signalwire_agents/cli/swaig_test_wrapper.py +52 -0
- signalwire_agents/cli/test_swaig.py +566 -2366
- signalwire_agents/cli/types.py +81 -0
- signalwire_agents/core/__init__.py +2 -2
- signalwire_agents/core/agent/__init__.py +12 -0
- signalwire_agents/core/agent/config/__init__.py +12 -0
- signalwire_agents/core/agent/deployment/__init__.py +9 -0
- signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
- signalwire_agents/core/agent/prompt/__init__.py +14 -0
- signalwire_agents/core/agent/prompt/manager.py +306 -0
- signalwire_agents/core/agent/routing/__init__.py +9 -0
- signalwire_agents/core/agent/security/__init__.py +9 -0
- signalwire_agents/core/agent/swml/__init__.py +9 -0
- signalwire_agents/core/agent/tools/__init__.py +15 -0
- signalwire_agents/core/agent/tools/decorator.py +97 -0
- signalwire_agents/core/agent/tools/registry.py +210 -0
- signalwire_agents/core/agent_base.py +845 -2916
- signalwire_agents/core/auth_handler.py +233 -0
- signalwire_agents/core/config_loader.py +259 -0
- signalwire_agents/core/contexts.py +418 -0
- signalwire_agents/core/data_map.py +3 -15
- signalwire_agents/core/function_result.py +116 -44
- signalwire_agents/core/logging_config.py +162 -18
- signalwire_agents/core/mixins/__init__.py +28 -0
- signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
- signalwire_agents/core/mixins/auth_mixin.py +280 -0
- signalwire_agents/core/mixins/prompt_mixin.py +358 -0
- signalwire_agents/core/mixins/serverless_mixin.py +460 -0
- signalwire_agents/core/mixins/skill_mixin.py +55 -0
- signalwire_agents/core/mixins/state_mixin.py +153 -0
- signalwire_agents/core/mixins/tool_mixin.py +230 -0
- signalwire_agents/core/mixins/web_mixin.py +1142 -0
- signalwire_agents/core/security_config.py +333 -0
- signalwire_agents/core/skill_base.py +84 -1
- signalwire_agents/core/skill_manager.py +62 -20
- signalwire_agents/core/swaig_function.py +18 -5
- signalwire_agents/core/swml_builder.py +207 -11
- signalwire_agents/core/swml_handler.py +27 -21
- signalwire_agents/core/swml_renderer.py +123 -312
- signalwire_agents/core/swml_service.py +171 -203
- signalwire_agents/mcp_gateway/__init__.py +29 -0
- signalwire_agents/mcp_gateway/gateway_service.py +564 -0
- signalwire_agents/mcp_gateway/mcp_manager.py +513 -0
- signalwire_agents/mcp_gateway/session_manager.py +218 -0
- signalwire_agents/prefabs/concierge.py +0 -3
- signalwire_agents/prefabs/faq_bot.py +0 -3
- signalwire_agents/prefabs/info_gatherer.py +0 -3
- signalwire_agents/prefabs/receptionist.py +0 -3
- signalwire_agents/prefabs/survey.py +0 -3
- signalwire_agents/schema.json +9218 -5489
- signalwire_agents/search/__init__.py +7 -1
- signalwire_agents/search/document_processor.py +490 -31
- signalwire_agents/search/index_builder.py +307 -37
- signalwire_agents/search/migration.py +418 -0
- signalwire_agents/search/models.py +30 -0
- signalwire_agents/search/pgvector_backend.py +748 -0
- signalwire_agents/search/query_processor.py +162 -31
- signalwire_agents/search/search_engine.py +916 -35
- signalwire_agents/search/search_service.py +376 -53
- signalwire_agents/skills/README.md +452 -0
- signalwire_agents/skills/__init__.py +14 -2
- signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
- signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
- signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
- signalwire_agents/skills/datasphere/README.md +210 -0
- signalwire_agents/skills/datasphere/skill.py +84 -3
- signalwire_agents/skills/datasphere_serverless/README.md +258 -0
- signalwire_agents/skills/datasphere_serverless/__init__.py +9 -0
- signalwire_agents/skills/datasphere_serverless/skill.py +82 -1
- signalwire_agents/skills/datetime/README.md +132 -0
- signalwire_agents/skills/datetime/__init__.py +9 -0
- signalwire_agents/skills/datetime/skill.py +20 -7
- signalwire_agents/skills/joke/README.md +149 -0
- signalwire_agents/skills/joke/__init__.py +9 -0
- signalwire_agents/skills/joke/skill.py +21 -0
- signalwire_agents/skills/math/README.md +161 -0
- signalwire_agents/skills/math/__init__.py +9 -0
- signalwire_agents/skills/math/skill.py +18 -4
- signalwire_agents/skills/mcp_gateway/README.md +230 -0
- signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
- signalwire_agents/skills/mcp_gateway/skill.py +421 -0
- signalwire_agents/skills/native_vector_search/README.md +210 -0
- signalwire_agents/skills/native_vector_search/__init__.py +9 -0
- signalwire_agents/skills/native_vector_search/skill.py +569 -101
- signalwire_agents/skills/play_background_file/README.md +218 -0
- signalwire_agents/skills/play_background_file/__init__.py +12 -0
- signalwire_agents/skills/play_background_file/skill.py +242 -0
- signalwire_agents/skills/registry.py +395 -40
- signalwire_agents/skills/spider/README.md +236 -0
- signalwire_agents/skills/spider/__init__.py +13 -0
- signalwire_agents/skills/spider/skill.py +598 -0
- signalwire_agents/skills/swml_transfer/README.md +395 -0
- signalwire_agents/skills/swml_transfer/__init__.py +10 -0
- signalwire_agents/skills/swml_transfer/skill.py +359 -0
- signalwire_agents/skills/weather_api/README.md +178 -0
- signalwire_agents/skills/weather_api/__init__.py +12 -0
- signalwire_agents/skills/weather_api/skill.py +191 -0
- signalwire_agents/skills/web_search/README.md +163 -0
- signalwire_agents/skills/web_search/__init__.py +9 -0
- signalwire_agents/skills/web_search/skill.py +586 -112
- signalwire_agents/skills/wikipedia_search/README.md +228 -0
- signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
- signalwire_agents/skills/{wikipedia → wikipedia_search}/skill.py +33 -3
- signalwire_agents/web/__init__.py +17 -0
- signalwire_agents/web/web_service.py +559 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-agent-init.1 +400 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-search.1 +483 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/swaig-test.1 +308 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/METADATA +347 -215
- signalwire_agents-1.0.17.dev4.dist-info/RECORD +147 -0
- signalwire_agents-1.0.17.dev4.dist-info/entry_points.txt +6 -0
- signalwire_agents/core/state/file_state_manager.py +0 -219
- signalwire_agents/core/state/state_manager.py +0 -101
- signalwire_agents/skills/wikipedia/__init__.py +0 -9
- signalwire_agents-0.1.13.data/data/schema.json +0 -5611
- signalwire_agents-0.1.13.dist-info/RECORD +0 -67
- signalwire_agents-0.1.13.dist-info/entry_points.txt +0 -3
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/WHEEL +0 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/licenses/LICENSE +0 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/top_level.txt +0 -0
|
@@ -8,25 +8,41 @@ See LICENSE file in the project root for full license information.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
-
from typing import Dict, Any, List, Optional
|
|
11
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
|
-
from fastapi import FastAPI, HTTPException
|
|
14
|
+
from fastapi import FastAPI, HTTPException, Request, Response, Depends
|
|
15
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
16
|
+
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
|
15
17
|
from pydantic import BaseModel
|
|
16
18
|
except ImportError:
|
|
17
19
|
FastAPI = None
|
|
18
20
|
HTTPException = None
|
|
19
21
|
BaseModel = None
|
|
22
|
+
Request = None
|
|
23
|
+
Response = None
|
|
24
|
+
Depends = None
|
|
25
|
+
CORSMiddleware = None
|
|
26
|
+
HTTPBasic = None
|
|
27
|
+
HTTPBasicCredentials = None
|
|
20
28
|
|
|
21
29
|
try:
|
|
22
30
|
from sentence_transformers import SentenceTransformer
|
|
23
31
|
except ImportError:
|
|
24
32
|
SentenceTransformer = None
|
|
25
33
|
|
|
26
|
-
from .query_processor import preprocess_query
|
|
34
|
+
from .query_processor import preprocess_query, set_global_model
|
|
27
35
|
from .search_engine import SearchEngine
|
|
36
|
+
from signalwire_agents.core.security_config import SecurityConfig
|
|
37
|
+
from signalwire_agents.core.config_loader import ConfigLoader
|
|
38
|
+
from signalwire_agents.core.logging_config import get_logger
|
|
28
39
|
|
|
29
|
-
logger =
|
|
40
|
+
logger = get_logger("search_service")
|
|
41
|
+
|
|
42
|
+
# Simple LRU cache for query results
|
|
43
|
+
from functools import lru_cache
|
|
44
|
+
import hashlib
|
|
45
|
+
import json
|
|
30
46
|
|
|
31
47
|
# Pydantic models for API
|
|
32
48
|
if BaseModel:
|
|
@@ -34,7 +50,7 @@ if BaseModel:
|
|
|
34
50
|
query: str
|
|
35
51
|
index_name: str = "default"
|
|
36
52
|
count: int = 3
|
|
37
|
-
|
|
53
|
+
similarity_threshold: float = 0.0
|
|
38
54
|
tags: Optional[List[str]] = None
|
|
39
55
|
language: Optional[str] = None
|
|
40
56
|
|
|
@@ -49,13 +65,13 @@ if BaseModel:
|
|
|
49
65
|
else:
|
|
50
66
|
# Fallback classes when FastAPI is not available
|
|
51
67
|
class SearchRequest:
|
|
52
|
-
def __init__(self, query: str, index_name: str = "default", count: int = 3,
|
|
53
|
-
|
|
68
|
+
def __init__(self, query: str, index_name: str = "default", count: int = 3,
|
|
69
|
+
similarity_threshold: float = 0.0, tags: Optional[List[str]] = None,
|
|
54
70
|
language: Optional[str] = None):
|
|
55
71
|
self.query = query
|
|
56
72
|
self.index_name = index_name
|
|
57
73
|
self.count = count
|
|
58
|
-
self.
|
|
74
|
+
self.similarity_threshold = similarity_threshold
|
|
59
75
|
self.tags = tags
|
|
60
76
|
self.language = language
|
|
61
77
|
|
|
@@ -70,17 +86,51 @@ else:
|
|
|
70
86
|
self.results = results
|
|
71
87
|
self.query_analysis = query_analysis
|
|
72
88
|
|
|
89
|
+
def _cache_key(query: str, index_name: str, count: int, tags: Optional[List[str]] = None) -> str:
|
|
90
|
+
"""Generate cache key for query results"""
|
|
91
|
+
key_data = {
|
|
92
|
+
'query': query.lower().strip(),
|
|
93
|
+
'index': index_name,
|
|
94
|
+
'count': count,
|
|
95
|
+
'tags': sorted(tags) if tags else []
|
|
96
|
+
}
|
|
97
|
+
key_str = json.dumps(key_data, sort_keys=True)
|
|
98
|
+
return hashlib.md5(key_str.encode()).hexdigest()
|
|
99
|
+
|
|
73
100
|
class SearchService:
|
|
74
|
-
"""Local search service with HTTP API"""
|
|
101
|
+
"""Local search service with HTTP API supporting both SQLite and pgvector backends"""
|
|
75
102
|
|
|
76
|
-
def __init__(self, port: int = 8001, indexes: Dict[str, str] = None
|
|
103
|
+
def __init__(self, port: int = 8001, indexes: Dict[str, str] = None,
|
|
104
|
+
basic_auth: Optional[Tuple[str, str]] = None,
|
|
105
|
+
config_file: Optional[str] = None,
|
|
106
|
+
backend: str = 'sqlite',
|
|
107
|
+
connection_string: Optional[str] = None):
|
|
108
|
+
# Load configuration first
|
|
109
|
+
self._load_config(config_file)
|
|
110
|
+
|
|
111
|
+
# Override with constructor params if provided
|
|
77
112
|
self.port = port
|
|
78
|
-
self.
|
|
113
|
+
self.backend = backend
|
|
114
|
+
self.connection_string = connection_string
|
|
115
|
+
|
|
116
|
+
if indexes is not None:
|
|
117
|
+
self.indexes = indexes
|
|
118
|
+
|
|
79
119
|
self.search_engines = {}
|
|
80
120
|
self.model = None
|
|
121
|
+
self._query_cache = {} # Simple query result cache
|
|
122
|
+
self._cache_size = 100 # Max number of cached queries
|
|
123
|
+
|
|
124
|
+
# Load security configuration with optional config file
|
|
125
|
+
self.security = SecurityConfig(config_file=config_file, service_name="search")
|
|
126
|
+
self.security.log_config("SearchService")
|
|
127
|
+
|
|
128
|
+
# Set up authentication
|
|
129
|
+
self._basic_auth = basic_auth or self.security.get_basic_auth()
|
|
81
130
|
|
|
82
131
|
if FastAPI:
|
|
83
132
|
self.app = FastAPI(title="SignalWire Local Search Service")
|
|
133
|
+
self._setup_security()
|
|
84
134
|
self._setup_routes()
|
|
85
135
|
else:
|
|
86
136
|
self.app = None
|
|
@@ -88,76 +138,287 @@ class SearchService:
|
|
|
88
138
|
|
|
89
139
|
self._load_resources()
|
|
90
140
|
|
|
141
|
+
def _load_config(self, config_file: Optional[str]):
|
|
142
|
+
"""Load configuration from file if available"""
|
|
143
|
+
# Initialize defaults
|
|
144
|
+
self.indexes = {}
|
|
145
|
+
self.backend = 'sqlite'
|
|
146
|
+
self.connection_string = None
|
|
147
|
+
|
|
148
|
+
# Find config file
|
|
149
|
+
if not config_file:
|
|
150
|
+
config_file = ConfigLoader.find_config_file("search")
|
|
151
|
+
|
|
152
|
+
if not config_file:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
# Load config
|
|
156
|
+
config_loader = ConfigLoader([config_file])
|
|
157
|
+
if not config_loader.has_config():
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
logger.info("loading_config_from_file", file=config_file)
|
|
161
|
+
|
|
162
|
+
# Get service section
|
|
163
|
+
service_config = config_loader.get_section('service')
|
|
164
|
+
if service_config:
|
|
165
|
+
if 'port' in service_config:
|
|
166
|
+
self.port = int(service_config['port'])
|
|
167
|
+
|
|
168
|
+
if 'backend' in service_config:
|
|
169
|
+
self.backend = service_config['backend']
|
|
170
|
+
|
|
171
|
+
if 'connection_string' in service_config:
|
|
172
|
+
self.connection_string = service_config['connection_string']
|
|
173
|
+
|
|
174
|
+
if 'indexes' in service_config and isinstance(service_config['indexes'], dict):
|
|
175
|
+
self.indexes = service_config['indexes']
|
|
176
|
+
|
|
177
|
+
def _setup_security(self):
|
|
178
|
+
"""Setup security middleware and authentication"""
|
|
179
|
+
if not self.app:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
# Add CORS middleware if FastAPI has it
|
|
183
|
+
if CORSMiddleware:
|
|
184
|
+
self.app.add_middleware(
|
|
185
|
+
CORSMiddleware,
|
|
186
|
+
**self.security.get_cors_config()
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Add security headers middleware
|
|
190
|
+
@self.app.middleware("http")
|
|
191
|
+
async def add_security_headers(request: Request, call_next):
|
|
192
|
+
response = await call_next(request)
|
|
193
|
+
|
|
194
|
+
# Add security headers
|
|
195
|
+
is_https = request.url.scheme == "https"
|
|
196
|
+
headers = self.security.get_security_headers(is_https)
|
|
197
|
+
for header, value in headers.items():
|
|
198
|
+
response.headers[header] = value
|
|
199
|
+
|
|
200
|
+
return response
|
|
201
|
+
|
|
202
|
+
# Add host validation middleware
|
|
203
|
+
@self.app.middleware("http")
|
|
204
|
+
async def validate_host(request: Request, call_next):
|
|
205
|
+
host = request.headers.get("host", "").split(":")[0]
|
|
206
|
+
if host and not self.security.should_allow_host(host):
|
|
207
|
+
return Response(content="Invalid host", status_code=400)
|
|
208
|
+
|
|
209
|
+
return await call_next(request)
|
|
210
|
+
|
|
211
|
+
def _get_current_username(self, credentials: HTTPBasicCredentials = None) -> str:
|
|
212
|
+
"""Validate basic auth credentials"""
|
|
213
|
+
if not credentials:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
correct_username, correct_password = self._basic_auth
|
|
217
|
+
|
|
218
|
+
# Compare credentials
|
|
219
|
+
import secrets
|
|
220
|
+
username_correct = secrets.compare_digest(credentials.username, correct_username)
|
|
221
|
+
password_correct = secrets.compare_digest(credentials.password, correct_password)
|
|
222
|
+
|
|
223
|
+
if not (username_correct and password_correct):
|
|
224
|
+
raise HTTPException(
|
|
225
|
+
status_code=401,
|
|
226
|
+
detail="Invalid authentication credentials",
|
|
227
|
+
headers={"WWW-Authenticate": "Basic"},
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return credentials.username
|
|
231
|
+
|
|
91
232
|
def _setup_routes(self):
|
|
92
233
|
"""Setup FastAPI routes"""
|
|
93
234
|
if not self.app:
|
|
94
235
|
return
|
|
236
|
+
|
|
237
|
+
# Create security dependency if HTTPBasic is available
|
|
238
|
+
security = HTTPBasic() if HTTPBasic else None
|
|
239
|
+
|
|
240
|
+
# Create dependency for authenticated routes
|
|
241
|
+
def get_authenticated():
|
|
242
|
+
if security:
|
|
243
|
+
return security
|
|
244
|
+
return None
|
|
95
245
|
|
|
96
246
|
@self.app.post("/search", response_model=SearchResponse)
|
|
97
|
-
async def search(
|
|
247
|
+
async def search(
|
|
248
|
+
request: SearchRequest,
|
|
249
|
+
credentials: HTTPBasicCredentials = None if not security else Depends(security)
|
|
250
|
+
):
|
|
251
|
+
if security:
|
|
252
|
+
self._get_current_username(credentials)
|
|
98
253
|
return await self._handle_search(request)
|
|
99
254
|
|
|
100
255
|
@self.app.get("/health")
|
|
101
256
|
async def health():
|
|
102
|
-
return {
|
|
257
|
+
return {
|
|
258
|
+
"status": "healthy",
|
|
259
|
+
"backend": self.backend,
|
|
260
|
+
"indexes": list(self.indexes.keys()),
|
|
261
|
+
"ssl_enabled": self.security.ssl_enabled,
|
|
262
|
+
"auth_required": bool(security),
|
|
263
|
+
"connection_string": self.connection_string if self.backend == 'pgvector' else None
|
|
264
|
+
}
|
|
103
265
|
|
|
104
266
|
@self.app.post("/reload_index")
|
|
105
|
-
async def reload_index(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
267
|
+
async def reload_index(
|
|
268
|
+
index_name: str,
|
|
269
|
+
index_path: str,
|
|
270
|
+
credentials: HTTPBasicCredentials = None if not security else Depends(security)
|
|
271
|
+
):
|
|
272
|
+
"""Reload or add new index/collection"""
|
|
273
|
+
if security:
|
|
274
|
+
self._get_current_username(credentials)
|
|
275
|
+
|
|
276
|
+
if self.backend == 'pgvector':
|
|
277
|
+
# For pgvector, index_path is actually the collection name
|
|
278
|
+
self.indexes[index_name] = index_path
|
|
279
|
+
try:
|
|
280
|
+
self.search_engines[index_name] = SearchEngine(
|
|
281
|
+
backend='pgvector',
|
|
282
|
+
connection_string=self.connection_string,
|
|
283
|
+
collection_name=index_path
|
|
284
|
+
)
|
|
285
|
+
return {"status": "reloaded", "index": index_name, "backend": "pgvector"}
|
|
286
|
+
except Exception as e:
|
|
287
|
+
raise HTTPException(status_code=500, detail=f"Failed to load pgvector collection: {e}")
|
|
288
|
+
else:
|
|
289
|
+
# SQLite backend
|
|
290
|
+
self.indexes[index_name] = index_path
|
|
291
|
+
self.search_engines[index_name] = SearchEngine(
|
|
292
|
+
backend='sqlite',
|
|
293
|
+
index_path=index_path,
|
|
294
|
+
model=self.model
|
|
295
|
+
)
|
|
296
|
+
return {"status": "reloaded", "index": index_name, "backend": "sqlite"}
|
|
110
297
|
|
|
111
298
|
def _load_resources(self):
|
|
112
299
|
"""Load embedding model and search indexes"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
#
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
300
|
+
if self.backend == 'pgvector':
|
|
301
|
+
# For pgvector, we need to load models for query embeddings
|
|
302
|
+
# Different collections might use different models
|
|
303
|
+
self.models = {} # model_name -> SentenceTransformer instance
|
|
304
|
+
self.collection_models = {} # collection_name -> model_name
|
|
305
|
+
|
|
306
|
+
# Load search engines for each collection and their models
|
|
307
|
+
for collection_name in self.indexes.keys():
|
|
308
|
+
try:
|
|
309
|
+
search_engine = SearchEngine(
|
|
310
|
+
backend='pgvector',
|
|
311
|
+
connection_string=self.connection_string,
|
|
312
|
+
collection_name=collection_name
|
|
313
|
+
)
|
|
314
|
+
self.search_engines[collection_name] = search_engine
|
|
315
|
+
|
|
316
|
+
# Get the model name from the collection config
|
|
317
|
+
model_name = search_engine.config.get('model_name')
|
|
318
|
+
if model_name:
|
|
319
|
+
self.collection_models[collection_name] = model_name
|
|
320
|
+
|
|
321
|
+
# Load the model if we haven't already
|
|
322
|
+
if model_name not in self.models:
|
|
323
|
+
logger.info(f"Loading model {model_name} for collection {collection_name}")
|
|
324
|
+
try:
|
|
325
|
+
model = SentenceTransformer(model_name)
|
|
326
|
+
model.model_name = model_name # Store for cache comparison
|
|
327
|
+
self.models[model_name] = model
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.error(f"Failed to load model {model_name}: {e}")
|
|
330
|
+
raise
|
|
331
|
+
else:
|
|
332
|
+
logger.info(f"Using cached model {model_name} for collection {collection_name}")
|
|
333
|
+
else:
|
|
334
|
+
logger.warning(f"No model_name in config for collection {collection_name}")
|
|
335
|
+
|
|
336
|
+
logger.info(f"Loaded pgvector collection: {collection_name}")
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.error(f"Error loading pgvector collection {collection_name}: {e}")
|
|
339
|
+
else:
|
|
340
|
+
# SQLite backend - original behavior
|
|
341
|
+
# Load model (shared across all indexes)
|
|
342
|
+
if self.indexes and SentenceTransformer:
|
|
343
|
+
# Get model name from first index
|
|
344
|
+
sample_index = next(iter(self.indexes.values()))
|
|
345
|
+
model_name = self._get_model_name(sample_index)
|
|
346
|
+
try:
|
|
347
|
+
self.model = SentenceTransformer(model_name)
|
|
348
|
+
# Set the global model for query processor to avoid reloading
|
|
349
|
+
set_global_model(self.model)
|
|
350
|
+
except Exception as e:
|
|
351
|
+
logger.warning(f"Could not load sentence transformer model: {e}")
|
|
352
|
+
self.model = None
|
|
353
|
+
|
|
354
|
+
# Load search engines for each index
|
|
355
|
+
for index_name, index_path in self.indexes.items():
|
|
356
|
+
try:
|
|
357
|
+
self.search_engines[index_name] = SearchEngine(
|
|
358
|
+
backend='sqlite',
|
|
359
|
+
index_path=index_path,
|
|
360
|
+
model=self.model
|
|
361
|
+
)
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.error(f"Error loading search engine for {index_name}: {e}")
|
|
130
364
|
|
|
131
365
|
def _get_model_name(self, index_path: str) -> str:
|
|
132
366
|
"""Get embedding model name from index config"""
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
cursor = conn.cursor()
|
|
137
|
-
cursor.execute("SELECT value FROM config WHERE key = 'embedding_model'")
|
|
138
|
-
result = cursor.fetchone()
|
|
139
|
-
conn.close()
|
|
140
|
-
return result[0] if result else 'sentence-transformers/all-mpnet-base-v2'
|
|
141
|
-
except Exception as e:
|
|
142
|
-
logger.warning(f"Could not get model name from index: {e}")
|
|
367
|
+
if self.backend == 'pgvector':
|
|
368
|
+
# For pgvector, we might want to store model info in the database
|
|
369
|
+
# For now, return default model
|
|
143
370
|
return 'sentence-transformers/all-mpnet-base-v2'
|
|
371
|
+
else:
|
|
372
|
+
# SQLite backend
|
|
373
|
+
try:
|
|
374
|
+
import sqlite3
|
|
375
|
+
conn = sqlite3.connect(index_path)
|
|
376
|
+
cursor = conn.cursor()
|
|
377
|
+
cursor.execute("SELECT value FROM config WHERE key = 'embedding_model'")
|
|
378
|
+
result = cursor.fetchone()
|
|
379
|
+
conn.close()
|
|
380
|
+
return result[0] if result else 'sentence-transformers/all-mpnet-base-v2'
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logger.warning(f"Could not get model name from index: {e}")
|
|
383
|
+
return 'sentence-transformers/all-mpnet-base-v2'
|
|
144
384
|
|
|
145
385
|
async def _handle_search(self, request: SearchRequest) -> SearchResponse:
|
|
146
|
-
"""Handle search request"""
|
|
386
|
+
"""Handle search request with caching"""
|
|
147
387
|
if request.index_name not in self.search_engines:
|
|
148
388
|
if HTTPException:
|
|
149
389
|
raise HTTPException(status_code=404, detail=f"Index '{request.index_name}' not found")
|
|
150
390
|
else:
|
|
151
391
|
raise ValueError(f"Index '{request.index_name}' not found")
|
|
152
392
|
|
|
153
|
-
|
|
393
|
+
# Check cache first
|
|
394
|
+
cache_key = _cache_key(request.query, request.index_name, request.count, request.tags)
|
|
395
|
+
if cache_key in self._query_cache:
|
|
396
|
+
logger.info(f"Cache hit for query: {request.query[:50]}...")
|
|
397
|
+
return self._query_cache[cache_key]
|
|
154
398
|
|
|
399
|
+
search_engine = self.search_engines[request.index_name]
|
|
400
|
+
|
|
401
|
+
# For pgvector, set the correct model globally before query processing
|
|
402
|
+
if self.backend == 'pgvector' and hasattr(self, 'collection_models'):
|
|
403
|
+
collection_model_name = self.collection_models.get(request.index_name)
|
|
404
|
+
if collection_model_name and collection_model_name in self.models:
|
|
405
|
+
# Set this model globally so query processor uses it
|
|
406
|
+
set_global_model(self.models[collection_model_name])
|
|
407
|
+
logger.debug(f"Set global model to {collection_model_name} for collection {request.index_name}")
|
|
408
|
+
|
|
409
|
+
# Get model name from the search engine config
|
|
410
|
+
model_name = None
|
|
411
|
+
if hasattr(search_engine, 'config') and search_engine.config:
|
|
412
|
+
# pgvector uses 'model_name', sqlite uses 'embedding_model'
|
|
413
|
+
model_name = search_engine.config.get('model_name') or search_engine.config.get('embedding_model')
|
|
414
|
+
|
|
155
415
|
# Enhance query
|
|
156
416
|
try:
|
|
157
417
|
enhanced = preprocess_query(
|
|
158
418
|
request.query,
|
|
159
419
|
language=request.language or 'auto',
|
|
160
|
-
vector=True
|
|
420
|
+
vector=True,
|
|
421
|
+
model_name=model_name # Pass the correct model!
|
|
161
422
|
)
|
|
162
423
|
except Exception as e:
|
|
163
424
|
logger.error(f"Error preprocessing query: {e}")
|
|
@@ -173,7 +434,7 @@ class SearchService:
|
|
|
173
434
|
query_vector=enhanced.get('vector', []),
|
|
174
435
|
enhanced_text=enhanced['enhanced_text'],
|
|
175
436
|
count=request.count,
|
|
176
|
-
|
|
437
|
+
similarity_threshold=request.similarity_threshold,
|
|
177
438
|
tags=request.tags
|
|
178
439
|
)
|
|
179
440
|
except Exception as e:
|
|
@@ -190,7 +451,7 @@ class SearchService:
|
|
|
190
451
|
for result in results
|
|
191
452
|
]
|
|
192
453
|
|
|
193
|
-
|
|
454
|
+
response = SearchResponse(
|
|
194
455
|
results=search_results,
|
|
195
456
|
query_analysis={
|
|
196
457
|
'original_query': request.query,
|
|
@@ -199,6 +460,15 @@ class SearchService:
|
|
|
199
460
|
'pos_analysis': enhanced.get('POS')
|
|
200
461
|
}
|
|
201
462
|
)
|
|
463
|
+
|
|
464
|
+
# Cache the result
|
|
465
|
+
if len(self._query_cache) >= self._cache_size:
|
|
466
|
+
# Simple FIFO eviction
|
|
467
|
+
first_key = next(iter(self._query_cache))
|
|
468
|
+
del self._query_cache[first_key]
|
|
469
|
+
self._query_cache[cache_key] = response
|
|
470
|
+
|
|
471
|
+
return response
|
|
202
472
|
|
|
203
473
|
def search_direct(self, query: str, index_name: str = "default", count: int = 3,
|
|
204
474
|
distance: float = 0.0, tags: Optional[List[str]] = None,
|
|
@@ -235,14 +505,67 @@ class SearchService:
|
|
|
235
505
|
'query_analysis': response.query_analysis
|
|
236
506
|
}
|
|
237
507
|
|
|
238
|
-
def start(self
|
|
239
|
-
|
|
508
|
+
def start(self, host: str = "0.0.0.0", port: Optional[int] = None,
|
|
509
|
+
ssl_cert: Optional[str] = None, ssl_key: Optional[str] = None):
|
|
510
|
+
"""
|
|
511
|
+
Start the service with optional HTTPS support.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
host: Host to bind to (default: "0.0.0.0")
|
|
515
|
+
port: Port to bind to (default: self.port)
|
|
516
|
+
ssl_cert: Path to SSL certificate file (overrides environment)
|
|
517
|
+
ssl_key: Path to SSL key file (overrides environment)
|
|
518
|
+
"""
|
|
240
519
|
if not self.app:
|
|
241
520
|
raise RuntimeError("FastAPI not available. Cannot start HTTP service.")
|
|
242
521
|
|
|
522
|
+
port = port or self.port
|
|
523
|
+
|
|
524
|
+
# Get SSL configuration
|
|
525
|
+
ssl_kwargs = {}
|
|
526
|
+
if ssl_cert and ssl_key:
|
|
527
|
+
# Use provided SSL files
|
|
528
|
+
ssl_kwargs = {
|
|
529
|
+
'ssl_certfile': ssl_cert,
|
|
530
|
+
'ssl_keyfile': ssl_key
|
|
531
|
+
}
|
|
532
|
+
else:
|
|
533
|
+
# Use security config SSL settings
|
|
534
|
+
ssl_kwargs = self.security.get_ssl_context_kwargs()
|
|
535
|
+
|
|
536
|
+
# Build startup URL
|
|
537
|
+
scheme = "https" if ssl_kwargs else "http"
|
|
538
|
+
startup_url = f"{scheme}://{host}:{port}"
|
|
539
|
+
|
|
540
|
+
# Get auth credentials
|
|
541
|
+
username, password = self._basic_auth
|
|
542
|
+
|
|
543
|
+
# Log startup information
|
|
544
|
+
logger.info(
|
|
545
|
+
"starting_search_service",
|
|
546
|
+
url=startup_url,
|
|
547
|
+
ssl_enabled=bool(ssl_kwargs),
|
|
548
|
+
indexes=list(self.indexes.keys()),
|
|
549
|
+
username=username
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Print user-friendly startup message
|
|
553
|
+
print(f"\nSignalWire Search Service starting...")
|
|
554
|
+
print(f"URL: {startup_url}")
|
|
555
|
+
print(f"Indexes: {', '.join(self.indexes.keys()) if self.indexes else 'None'}")
|
|
556
|
+
print(f"Basic Auth: {username}:{password}")
|
|
557
|
+
if ssl_kwargs:
|
|
558
|
+
print(f"SSL: Enabled")
|
|
559
|
+
print("")
|
|
560
|
+
|
|
243
561
|
try:
|
|
244
562
|
import uvicorn
|
|
245
|
-
uvicorn.run(
|
|
563
|
+
uvicorn.run(
|
|
564
|
+
self.app,
|
|
565
|
+
host=host,
|
|
566
|
+
port=port,
|
|
567
|
+
**ssl_kwargs
|
|
568
|
+
)
|
|
246
569
|
except ImportError:
|
|
247
570
|
raise RuntimeError("uvicorn not available. Cannot start HTTP service.")
|
|
248
571
|
|