signalwire-agents 0.1.6__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalwire_agents/__init__.py +130 -4
- signalwire_agents/agent_server.py +438 -32
- signalwire_agents/agents/bedrock.py +296 -0
- signalwire_agents/cli/__init__.py +18 -0
- signalwire_agents/cli/build_search.py +1367 -0
- signalwire_agents/cli/config.py +80 -0
- signalwire_agents/cli/core/__init__.py +10 -0
- signalwire_agents/cli/core/agent_loader.py +470 -0
- signalwire_agents/cli/core/argparse_helpers.py +179 -0
- signalwire_agents/cli/core/dynamic_config.py +71 -0
- signalwire_agents/cli/core/service_loader.py +303 -0
- signalwire_agents/cli/execution/__init__.py +10 -0
- signalwire_agents/cli/execution/datamap_exec.py +446 -0
- signalwire_agents/cli/execution/webhook_exec.py +134 -0
- signalwire_agents/cli/init_project.py +1225 -0
- signalwire_agents/cli/output/__init__.py +10 -0
- signalwire_agents/cli/output/output_formatter.py +255 -0
- signalwire_agents/cli/output/swml_dump.py +186 -0
- signalwire_agents/cli/simulation/__init__.py +10 -0
- signalwire_agents/cli/simulation/data_generation.py +374 -0
- signalwire_agents/cli/simulation/data_overrides.py +200 -0
- signalwire_agents/cli/simulation/mock_env.py +282 -0
- signalwire_agents/cli/swaig_test_wrapper.py +52 -0
- signalwire_agents/cli/test_swaig.py +809 -0
- signalwire_agents/cli/types.py +81 -0
- signalwire_agents/core/__init__.py +2 -2
- signalwire_agents/core/agent/__init__.py +12 -0
- signalwire_agents/core/agent/config/__init__.py +12 -0
- signalwire_agents/core/agent/deployment/__init__.py +9 -0
- signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
- signalwire_agents/core/agent/prompt/__init__.py +14 -0
- signalwire_agents/core/agent/prompt/manager.py +306 -0
- signalwire_agents/core/agent/routing/__init__.py +9 -0
- signalwire_agents/core/agent/security/__init__.py +9 -0
- signalwire_agents/core/agent/swml/__init__.py +9 -0
- signalwire_agents/core/agent/tools/__init__.py +15 -0
- signalwire_agents/core/agent/tools/decorator.py +97 -0
- signalwire_agents/core/agent/tools/registry.py +210 -0
- signalwire_agents/core/agent_base.py +959 -2166
- signalwire_agents/core/auth_handler.py +233 -0
- signalwire_agents/core/config_loader.py +259 -0
- signalwire_agents/core/contexts.py +707 -0
- signalwire_agents/core/data_map.py +487 -0
- signalwire_agents/core/function_result.py +1150 -1
- signalwire_agents/core/logging_config.py +376 -0
- signalwire_agents/core/mixins/__init__.py +28 -0
- signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
- signalwire_agents/core/mixins/auth_mixin.py +287 -0
- signalwire_agents/core/mixins/prompt_mixin.py +358 -0
- signalwire_agents/core/mixins/serverless_mixin.py +368 -0
- signalwire_agents/core/mixins/skill_mixin.py +55 -0
- signalwire_agents/core/mixins/state_mixin.py +153 -0
- signalwire_agents/core/mixins/tool_mixin.py +230 -0
- signalwire_agents/core/mixins/web_mixin.py +1134 -0
- signalwire_agents/core/security/session_manager.py +174 -86
- signalwire_agents/core/security_config.py +333 -0
- signalwire_agents/core/skill_base.py +200 -0
- signalwire_agents/core/skill_manager.py +244 -0
- signalwire_agents/core/swaig_function.py +33 -9
- signalwire_agents/core/swml_builder.py +212 -12
- signalwire_agents/core/swml_handler.py +43 -13
- signalwire_agents/core/swml_renderer.py +123 -297
- signalwire_agents/core/swml_service.py +277 -260
- signalwire_agents/prefabs/concierge.py +6 -2
- signalwire_agents/prefabs/info_gatherer.py +149 -33
- signalwire_agents/prefabs/receptionist.py +14 -22
- signalwire_agents/prefabs/survey.py +6 -2
- signalwire_agents/schema.json +9218 -5489
- signalwire_agents/search/__init__.py +137 -0
- signalwire_agents/search/document_processor.py +1223 -0
- signalwire_agents/search/index_builder.py +804 -0
- signalwire_agents/search/migration.py +418 -0
- signalwire_agents/search/models.py +30 -0
- signalwire_agents/search/pgvector_backend.py +752 -0
- signalwire_agents/search/query_processor.py +502 -0
- signalwire_agents/search/search_engine.py +1264 -0
- signalwire_agents/search/search_service.py +574 -0
- signalwire_agents/skills/README.md +452 -0
- signalwire_agents/skills/__init__.py +23 -0
- signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
- signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
- signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
- signalwire_agents/skills/datasphere/README.md +210 -0
- signalwire_agents/skills/datasphere/__init__.py +12 -0
- signalwire_agents/skills/datasphere/skill.py +310 -0
- signalwire_agents/skills/datasphere_serverless/README.md +258 -0
- signalwire_agents/skills/datasphere_serverless/__init__.py +10 -0
- signalwire_agents/skills/datasphere_serverless/skill.py +237 -0
- signalwire_agents/skills/datetime/README.md +132 -0
- signalwire_agents/skills/datetime/__init__.py +10 -0
- signalwire_agents/skills/datetime/skill.py +126 -0
- signalwire_agents/skills/joke/README.md +149 -0
- signalwire_agents/skills/joke/__init__.py +10 -0
- signalwire_agents/skills/joke/skill.py +109 -0
- signalwire_agents/skills/math/README.md +161 -0
- signalwire_agents/skills/math/__init__.py +10 -0
- signalwire_agents/skills/math/skill.py +105 -0
- signalwire_agents/skills/mcp_gateway/README.md +230 -0
- signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
- signalwire_agents/skills/mcp_gateway/skill.py +421 -0
- signalwire_agents/skills/native_vector_search/README.md +210 -0
- signalwire_agents/skills/native_vector_search/__init__.py +10 -0
- signalwire_agents/skills/native_vector_search/skill.py +820 -0
- signalwire_agents/skills/play_background_file/README.md +218 -0
- signalwire_agents/skills/play_background_file/__init__.py +12 -0
- signalwire_agents/skills/play_background_file/skill.py +242 -0
- signalwire_agents/skills/registry.py +459 -0
- signalwire_agents/skills/spider/README.md +236 -0
- signalwire_agents/skills/spider/__init__.py +13 -0
- signalwire_agents/skills/spider/skill.py +598 -0
- signalwire_agents/skills/swml_transfer/README.md +395 -0
- signalwire_agents/skills/swml_transfer/__init__.py +10 -0
- signalwire_agents/skills/swml_transfer/skill.py +359 -0
- signalwire_agents/skills/weather_api/README.md +178 -0
- signalwire_agents/skills/weather_api/__init__.py +12 -0
- signalwire_agents/skills/weather_api/skill.py +191 -0
- signalwire_agents/skills/web_search/README.md +163 -0
- signalwire_agents/skills/web_search/__init__.py +10 -0
- signalwire_agents/skills/web_search/skill.py +739 -0
- signalwire_agents/skills/wikipedia_search/README.md +228 -0
- signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
- signalwire_agents/skills/wikipedia_search/skill.py +210 -0
- signalwire_agents/utils/__init__.py +14 -0
- signalwire_agents/utils/schema_utils.py +111 -44
- signalwire_agents/web/__init__.py +17 -0
- signalwire_agents/web/web_service.py +559 -0
- signalwire_agents-1.0.7.data/data/share/man/man1/sw-agent-init.1 +307 -0
- signalwire_agents-1.0.7.data/data/share/man/man1/sw-search.1 +483 -0
- signalwire_agents-1.0.7.data/data/share/man/man1/swaig-test.1 +308 -0
- signalwire_agents-1.0.7.dist-info/METADATA +992 -0
- signalwire_agents-1.0.7.dist-info/RECORD +142 -0
- {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/WHEEL +1 -1
- signalwire_agents-1.0.7.dist-info/entry_points.txt +4 -0
- signalwire_agents/core/state/file_state_manager.py +0 -219
- signalwire_agents/core/state/state_manager.py +0 -101
- signalwire_agents-0.1.6.data/data/schema.json +0 -5611
- signalwire_agents-0.1.6.dist-info/METADATA +0 -199
- signalwire_agents-0.1.6.dist-info/RECORD +0 -34
- {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/licenses/LICENSE +0 -0
- {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,574 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 SignalWire
|
|
3
|
+
|
|
4
|
+
This file is part of the SignalWire AI Agents SDK.
|
|
5
|
+
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
See LICENSE file in the project root for full license information.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from fastapi import FastAPI, HTTPException, Request, Response, Depends
|
|
15
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
16
|
+
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
except ImportError:
|
|
19
|
+
FastAPI = None
|
|
20
|
+
HTTPException = None
|
|
21
|
+
BaseModel = None
|
|
22
|
+
Request = None
|
|
23
|
+
Response = None
|
|
24
|
+
Depends = None
|
|
25
|
+
CORSMiddleware = None
|
|
26
|
+
HTTPBasic = None
|
|
27
|
+
HTTPBasicCredentials = None
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
from sentence_transformers import SentenceTransformer
|
|
31
|
+
except ImportError:
|
|
32
|
+
SentenceTransformer = None
|
|
33
|
+
|
|
34
|
+
from .query_processor import preprocess_query, set_global_model
|
|
35
|
+
from .search_engine import SearchEngine
|
|
36
|
+
from signalwire_agents.core.security_config import SecurityConfig
|
|
37
|
+
from signalwire_agents.core.config_loader import ConfigLoader
|
|
38
|
+
from signalwire_agents.core.logging_config import get_logger
|
|
39
|
+
|
|
40
|
+
logger = get_logger("search_service")
|
|
41
|
+
|
|
42
|
+
# Simple LRU cache for query results
|
|
43
|
+
from functools import lru_cache
|
|
44
|
+
import hashlib
|
|
45
|
+
import json
|
|
46
|
+
|
|
47
|
+
# Pydantic models for API
|
|
48
|
+
if BaseModel:
|
|
49
|
+
class SearchRequest(BaseModel):
|
|
50
|
+
query: str
|
|
51
|
+
index_name: str = "default"
|
|
52
|
+
count: int = 3
|
|
53
|
+
similarity_threshold: float = 0.0
|
|
54
|
+
tags: Optional[List[str]] = None
|
|
55
|
+
language: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
class SearchResult(BaseModel):
|
|
58
|
+
content: str
|
|
59
|
+
score: float
|
|
60
|
+
metadata: Dict[str, Any]
|
|
61
|
+
|
|
62
|
+
class SearchResponse(BaseModel):
|
|
63
|
+
results: List[SearchResult]
|
|
64
|
+
query_analysis: Optional[Dict[str, Any]] = None
|
|
65
|
+
else:
|
|
66
|
+
# Fallback classes when FastAPI is not available
|
|
67
|
+
class SearchRequest:
|
|
68
|
+
def __init__(self, query: str, index_name: str = "default", count: int = 3,
|
|
69
|
+
similarity_threshold: float = 0.0, tags: Optional[List[str]] = None,
|
|
70
|
+
language: Optional[str] = None):
|
|
71
|
+
self.query = query
|
|
72
|
+
self.index_name = index_name
|
|
73
|
+
self.count = count
|
|
74
|
+
self.similarity_threshold = similarity_threshold
|
|
75
|
+
self.tags = tags
|
|
76
|
+
self.language = language
|
|
77
|
+
|
|
78
|
+
class SearchResult:
|
|
79
|
+
def __init__(self, content: str, score: float, metadata: Dict[str, Any]):
|
|
80
|
+
self.content = content
|
|
81
|
+
self.score = score
|
|
82
|
+
self.metadata = metadata
|
|
83
|
+
|
|
84
|
+
class SearchResponse:
|
|
85
|
+
def __init__(self, results: List[SearchResult], query_analysis: Optional[Dict[str, Any]] = None):
|
|
86
|
+
self.results = results
|
|
87
|
+
self.query_analysis = query_analysis
|
|
88
|
+
|
|
89
|
+
def _cache_key(query: str, index_name: str, count: int, tags: Optional[List[str]] = None) -> str:
|
|
90
|
+
"""Generate cache key for query results"""
|
|
91
|
+
key_data = {
|
|
92
|
+
'query': query.lower().strip(),
|
|
93
|
+
'index': index_name,
|
|
94
|
+
'count': count,
|
|
95
|
+
'tags': sorted(tags) if tags else []
|
|
96
|
+
}
|
|
97
|
+
key_str = json.dumps(key_data, sort_keys=True)
|
|
98
|
+
return hashlib.md5(key_str.encode()).hexdigest()
|
|
99
|
+
|
|
100
|
+
class SearchService:
|
|
101
|
+
"""Local search service with HTTP API supporting both SQLite and pgvector backends"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, port: int = 8001, indexes: Dict[str, str] = None,
|
|
104
|
+
basic_auth: Optional[Tuple[str, str]] = None,
|
|
105
|
+
config_file: Optional[str] = None,
|
|
106
|
+
backend: str = 'sqlite',
|
|
107
|
+
connection_string: Optional[str] = None):
|
|
108
|
+
# Load configuration first
|
|
109
|
+
self._load_config(config_file)
|
|
110
|
+
|
|
111
|
+
# Override with constructor params if provided
|
|
112
|
+
self.port = port
|
|
113
|
+
self.backend = backend
|
|
114
|
+
self.connection_string = connection_string
|
|
115
|
+
|
|
116
|
+
if indexes is not None:
|
|
117
|
+
self.indexes = indexes
|
|
118
|
+
|
|
119
|
+
self.search_engines = {}
|
|
120
|
+
self.model = None
|
|
121
|
+
self._query_cache = {} # Simple query result cache
|
|
122
|
+
self._cache_size = 100 # Max number of cached queries
|
|
123
|
+
|
|
124
|
+
# Load security configuration with optional config file
|
|
125
|
+
self.security = SecurityConfig(config_file=config_file, service_name="search")
|
|
126
|
+
self.security.log_config("SearchService")
|
|
127
|
+
|
|
128
|
+
# Set up authentication
|
|
129
|
+
self._basic_auth = basic_auth or self.security.get_basic_auth()
|
|
130
|
+
|
|
131
|
+
if FastAPI:
|
|
132
|
+
self.app = FastAPI(title="SignalWire Local Search Service")
|
|
133
|
+
self._setup_security()
|
|
134
|
+
self._setup_routes()
|
|
135
|
+
else:
|
|
136
|
+
self.app = None
|
|
137
|
+
logger.warning("FastAPI not available. HTTP service will not be available.")
|
|
138
|
+
|
|
139
|
+
self._load_resources()
|
|
140
|
+
|
|
141
|
+
def _load_config(self, config_file: Optional[str]):
|
|
142
|
+
"""Load configuration from file if available"""
|
|
143
|
+
# Initialize defaults
|
|
144
|
+
self.indexes = {}
|
|
145
|
+
self.backend = 'sqlite'
|
|
146
|
+
self.connection_string = None
|
|
147
|
+
|
|
148
|
+
# Find config file
|
|
149
|
+
if not config_file:
|
|
150
|
+
config_file = ConfigLoader.find_config_file("search")
|
|
151
|
+
|
|
152
|
+
if not config_file:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
# Load config
|
|
156
|
+
config_loader = ConfigLoader([config_file])
|
|
157
|
+
if not config_loader.has_config():
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
logger.info("loading_config_from_file", file=config_file)
|
|
161
|
+
|
|
162
|
+
# Get service section
|
|
163
|
+
service_config = config_loader.get_section('service')
|
|
164
|
+
if service_config:
|
|
165
|
+
if 'port' in service_config:
|
|
166
|
+
self.port = int(service_config['port'])
|
|
167
|
+
|
|
168
|
+
if 'backend' in service_config:
|
|
169
|
+
self.backend = service_config['backend']
|
|
170
|
+
|
|
171
|
+
if 'connection_string' in service_config:
|
|
172
|
+
self.connection_string = service_config['connection_string']
|
|
173
|
+
|
|
174
|
+
if 'indexes' in service_config and isinstance(service_config['indexes'], dict):
|
|
175
|
+
self.indexes = service_config['indexes']
|
|
176
|
+
|
|
177
|
+
def _setup_security(self):
|
|
178
|
+
"""Setup security middleware and authentication"""
|
|
179
|
+
if not self.app:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
# Add CORS middleware if FastAPI has it
|
|
183
|
+
if CORSMiddleware:
|
|
184
|
+
self.app.add_middleware(
|
|
185
|
+
CORSMiddleware,
|
|
186
|
+
**self.security.get_cors_config()
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Add security headers middleware
|
|
190
|
+
@self.app.middleware("http")
|
|
191
|
+
async def add_security_headers(request: Request, call_next):
|
|
192
|
+
response = await call_next(request)
|
|
193
|
+
|
|
194
|
+
# Add security headers
|
|
195
|
+
is_https = request.url.scheme == "https"
|
|
196
|
+
headers = self.security.get_security_headers(is_https)
|
|
197
|
+
for header, value in headers.items():
|
|
198
|
+
response.headers[header] = value
|
|
199
|
+
|
|
200
|
+
return response
|
|
201
|
+
|
|
202
|
+
# Add host validation middleware
|
|
203
|
+
@self.app.middleware("http")
|
|
204
|
+
async def validate_host(request: Request, call_next):
|
|
205
|
+
host = request.headers.get("host", "").split(":")[0]
|
|
206
|
+
if host and not self.security.should_allow_host(host):
|
|
207
|
+
return Response(content="Invalid host", status_code=400)
|
|
208
|
+
|
|
209
|
+
return await call_next(request)
|
|
210
|
+
|
|
211
|
+
def _get_current_username(self, credentials: HTTPBasicCredentials = None) -> str:
|
|
212
|
+
"""Validate basic auth credentials"""
|
|
213
|
+
if not credentials:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
correct_username, correct_password = self._basic_auth
|
|
217
|
+
|
|
218
|
+
# Compare credentials
|
|
219
|
+
import secrets
|
|
220
|
+
username_correct = secrets.compare_digest(credentials.username, correct_username)
|
|
221
|
+
password_correct = secrets.compare_digest(credentials.password, correct_password)
|
|
222
|
+
|
|
223
|
+
if not (username_correct and password_correct):
|
|
224
|
+
raise HTTPException(
|
|
225
|
+
status_code=401,
|
|
226
|
+
detail="Invalid authentication credentials",
|
|
227
|
+
headers={"WWW-Authenticate": "Basic"},
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return credentials.username
|
|
231
|
+
|
|
232
|
+
def _setup_routes(self):
|
|
233
|
+
"""Setup FastAPI routes"""
|
|
234
|
+
if not self.app:
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
# Create security dependency if HTTPBasic is available
|
|
238
|
+
security = HTTPBasic() if HTTPBasic else None
|
|
239
|
+
|
|
240
|
+
# Create dependency for authenticated routes
|
|
241
|
+
def get_authenticated():
|
|
242
|
+
if security:
|
|
243
|
+
return security
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
@self.app.post("/search", response_model=SearchResponse)
|
|
247
|
+
async def search(
|
|
248
|
+
request: SearchRequest,
|
|
249
|
+
credentials: HTTPBasicCredentials = None if not security else Depends(security)
|
|
250
|
+
):
|
|
251
|
+
if security:
|
|
252
|
+
self._get_current_username(credentials)
|
|
253
|
+
return await self._handle_search(request)
|
|
254
|
+
|
|
255
|
+
@self.app.get("/health")
|
|
256
|
+
async def health():
|
|
257
|
+
return {
|
|
258
|
+
"status": "healthy",
|
|
259
|
+
"backend": self.backend,
|
|
260
|
+
"indexes": list(self.indexes.keys()),
|
|
261
|
+
"ssl_enabled": self.security.ssl_enabled,
|
|
262
|
+
"auth_required": bool(security),
|
|
263
|
+
"connection_string": self.connection_string if self.backend == 'pgvector' else None
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
@self.app.post("/reload_index")
|
|
267
|
+
async def reload_index(
|
|
268
|
+
index_name: str,
|
|
269
|
+
index_path: str,
|
|
270
|
+
credentials: HTTPBasicCredentials = None if not security else Depends(security)
|
|
271
|
+
):
|
|
272
|
+
"""Reload or add new index/collection"""
|
|
273
|
+
if security:
|
|
274
|
+
self._get_current_username(credentials)
|
|
275
|
+
|
|
276
|
+
if self.backend == 'pgvector':
|
|
277
|
+
# For pgvector, index_path is actually the collection name
|
|
278
|
+
self.indexes[index_name] = index_path
|
|
279
|
+
try:
|
|
280
|
+
self.search_engines[index_name] = SearchEngine(
|
|
281
|
+
backend='pgvector',
|
|
282
|
+
connection_string=self.connection_string,
|
|
283
|
+
collection_name=index_path
|
|
284
|
+
)
|
|
285
|
+
return {"status": "reloaded", "index": index_name, "backend": "pgvector"}
|
|
286
|
+
except Exception as e:
|
|
287
|
+
raise HTTPException(status_code=500, detail=f"Failed to load pgvector collection: {e}")
|
|
288
|
+
else:
|
|
289
|
+
# SQLite backend
|
|
290
|
+
self.indexes[index_name] = index_path
|
|
291
|
+
self.search_engines[index_name] = SearchEngine(
|
|
292
|
+
backend='sqlite',
|
|
293
|
+
index_path=index_path,
|
|
294
|
+
model=self.model
|
|
295
|
+
)
|
|
296
|
+
return {"status": "reloaded", "index": index_name, "backend": "sqlite"}
|
|
297
|
+
|
|
298
|
+
def _load_resources(self):
|
|
299
|
+
"""Load embedding model and search indexes"""
|
|
300
|
+
if self.backend == 'pgvector':
|
|
301
|
+
# For pgvector, we need to load models for query embeddings
|
|
302
|
+
# Different collections might use different models
|
|
303
|
+
self.models = {} # model_name -> SentenceTransformer instance
|
|
304
|
+
self.collection_models = {} # collection_name -> model_name
|
|
305
|
+
|
|
306
|
+
# Load search engines for each collection and their models
|
|
307
|
+
for collection_name in self.indexes.keys():
|
|
308
|
+
try:
|
|
309
|
+
search_engine = SearchEngine(
|
|
310
|
+
backend='pgvector',
|
|
311
|
+
connection_string=self.connection_string,
|
|
312
|
+
collection_name=collection_name
|
|
313
|
+
)
|
|
314
|
+
self.search_engines[collection_name] = search_engine
|
|
315
|
+
|
|
316
|
+
# Get the model name from the collection config
|
|
317
|
+
model_name = search_engine.config.get('model_name')
|
|
318
|
+
if model_name:
|
|
319
|
+
self.collection_models[collection_name] = model_name
|
|
320
|
+
|
|
321
|
+
# Load the model if we haven't already
|
|
322
|
+
if model_name not in self.models:
|
|
323
|
+
logger.info(f"Loading model {model_name} for collection {collection_name}")
|
|
324
|
+
try:
|
|
325
|
+
model = SentenceTransformer(model_name)
|
|
326
|
+
model.model_name = model_name # Store for cache comparison
|
|
327
|
+
self.models[model_name] = model
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.error(f"Failed to load model {model_name}: {e}")
|
|
330
|
+
raise
|
|
331
|
+
else:
|
|
332
|
+
logger.info(f"Using cached model {model_name} for collection {collection_name}")
|
|
333
|
+
else:
|
|
334
|
+
logger.warning(f"No model_name in config for collection {collection_name}")
|
|
335
|
+
|
|
336
|
+
logger.info(f"Loaded pgvector collection: {collection_name}")
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.error(f"Error loading pgvector collection {collection_name}: {e}")
|
|
339
|
+
else:
|
|
340
|
+
# SQLite backend - original behavior
|
|
341
|
+
# Load model (shared across all indexes)
|
|
342
|
+
if self.indexes and SentenceTransformer:
|
|
343
|
+
# Get model name from first index
|
|
344
|
+
sample_index = next(iter(self.indexes.values()))
|
|
345
|
+
model_name = self._get_model_name(sample_index)
|
|
346
|
+
try:
|
|
347
|
+
self.model = SentenceTransformer(model_name)
|
|
348
|
+
# Set the global model for query processor to avoid reloading
|
|
349
|
+
set_global_model(self.model)
|
|
350
|
+
except Exception as e:
|
|
351
|
+
logger.warning(f"Could not load sentence transformer model: {e}")
|
|
352
|
+
self.model = None
|
|
353
|
+
|
|
354
|
+
# Load search engines for each index
|
|
355
|
+
for index_name, index_path in self.indexes.items():
|
|
356
|
+
try:
|
|
357
|
+
self.search_engines[index_name] = SearchEngine(
|
|
358
|
+
backend='sqlite',
|
|
359
|
+
index_path=index_path,
|
|
360
|
+
model=self.model
|
|
361
|
+
)
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.error(f"Error loading search engine for {index_name}: {e}")
|
|
364
|
+
|
|
365
|
+
def _get_model_name(self, index_path: str) -> str:
|
|
366
|
+
"""Get embedding model name from index config"""
|
|
367
|
+
if self.backend == 'pgvector':
|
|
368
|
+
# For pgvector, we might want to store model info in the database
|
|
369
|
+
# For now, return default model
|
|
370
|
+
return 'sentence-transformers/all-mpnet-base-v2'
|
|
371
|
+
else:
|
|
372
|
+
# SQLite backend
|
|
373
|
+
try:
|
|
374
|
+
import sqlite3
|
|
375
|
+
conn = sqlite3.connect(index_path)
|
|
376
|
+
cursor = conn.cursor()
|
|
377
|
+
cursor.execute("SELECT value FROM config WHERE key = 'embedding_model'")
|
|
378
|
+
result = cursor.fetchone()
|
|
379
|
+
conn.close()
|
|
380
|
+
return result[0] if result else 'sentence-transformers/all-mpnet-base-v2'
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logger.warning(f"Could not get model name from index: {e}")
|
|
383
|
+
return 'sentence-transformers/all-mpnet-base-v2'
|
|
384
|
+
|
|
385
|
+
async def _handle_search(self, request: SearchRequest) -> SearchResponse:
|
|
386
|
+
"""Handle search request with caching"""
|
|
387
|
+
if request.index_name not in self.search_engines:
|
|
388
|
+
if HTTPException:
|
|
389
|
+
raise HTTPException(status_code=404, detail=f"Index '{request.index_name}' not found")
|
|
390
|
+
else:
|
|
391
|
+
raise ValueError(f"Index '{request.index_name}' not found")
|
|
392
|
+
|
|
393
|
+
# Check cache first
|
|
394
|
+
cache_key = _cache_key(request.query, request.index_name, request.count, request.tags)
|
|
395
|
+
if cache_key in self._query_cache:
|
|
396
|
+
logger.info(f"Cache hit for query: {request.query[:50]}...")
|
|
397
|
+
return self._query_cache[cache_key]
|
|
398
|
+
|
|
399
|
+
search_engine = self.search_engines[request.index_name]
|
|
400
|
+
|
|
401
|
+
# For pgvector, set the correct model globally before query processing
|
|
402
|
+
if self.backend == 'pgvector' and hasattr(self, 'collection_models'):
|
|
403
|
+
collection_model_name = self.collection_models.get(request.index_name)
|
|
404
|
+
if collection_model_name and collection_model_name in self.models:
|
|
405
|
+
# Set this model globally so query processor uses it
|
|
406
|
+
set_global_model(self.models[collection_model_name])
|
|
407
|
+
logger.debug(f"Set global model to {collection_model_name} for collection {request.index_name}")
|
|
408
|
+
|
|
409
|
+
# Get model name from the search engine config
|
|
410
|
+
model_name = None
|
|
411
|
+
if hasattr(search_engine, 'config') and search_engine.config:
|
|
412
|
+
# pgvector uses 'model_name', sqlite uses 'embedding_model'
|
|
413
|
+
model_name = search_engine.config.get('model_name') or search_engine.config.get('embedding_model')
|
|
414
|
+
|
|
415
|
+
# Enhance query
|
|
416
|
+
try:
|
|
417
|
+
enhanced = preprocess_query(
|
|
418
|
+
request.query,
|
|
419
|
+
language=request.language or 'auto',
|
|
420
|
+
vector=True,
|
|
421
|
+
model_name=model_name # Pass the correct model!
|
|
422
|
+
)
|
|
423
|
+
except Exception as e:
|
|
424
|
+
logger.error(f"Error preprocessing query: {e}")
|
|
425
|
+
enhanced = {
|
|
426
|
+
'enhanced_text': request.query,
|
|
427
|
+
'vector': [],
|
|
428
|
+
'language': 'en'
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
# Perform search
|
|
432
|
+
try:
|
|
433
|
+
results = search_engine.search(
|
|
434
|
+
query_vector=enhanced.get('vector', []),
|
|
435
|
+
enhanced_text=enhanced['enhanced_text'],
|
|
436
|
+
count=request.count,
|
|
437
|
+
similarity_threshold=request.similarity_threshold,
|
|
438
|
+
tags=request.tags
|
|
439
|
+
)
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logger.error(f"Error performing search: {e}")
|
|
442
|
+
results = []
|
|
443
|
+
|
|
444
|
+
# Format response
|
|
445
|
+
search_results = [
|
|
446
|
+
SearchResult(
|
|
447
|
+
content=result['content'],
|
|
448
|
+
score=result['score'],
|
|
449
|
+
metadata=result['metadata']
|
|
450
|
+
)
|
|
451
|
+
for result in results
|
|
452
|
+
]
|
|
453
|
+
|
|
454
|
+
response = SearchResponse(
|
|
455
|
+
results=search_results,
|
|
456
|
+
query_analysis={
|
|
457
|
+
'original_query': request.query,
|
|
458
|
+
'enhanced_query': enhanced['enhanced_text'],
|
|
459
|
+
'detected_language': enhanced.get('language'),
|
|
460
|
+
'pos_analysis': enhanced.get('POS')
|
|
461
|
+
}
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Cache the result
|
|
465
|
+
if len(self._query_cache) >= self._cache_size:
|
|
466
|
+
# Simple FIFO eviction
|
|
467
|
+
first_key = next(iter(self._query_cache))
|
|
468
|
+
del self._query_cache[first_key]
|
|
469
|
+
self._query_cache[cache_key] = response
|
|
470
|
+
|
|
471
|
+
return response
|
|
472
|
+
|
|
473
|
+
def search_direct(self, query: str, index_name: str = "default", count: int = 3,
|
|
474
|
+
distance: float = 0.0, tags: Optional[List[str]] = None,
|
|
475
|
+
language: Optional[str] = None) -> Dict[str, Any]:
|
|
476
|
+
"""Direct search method (non-async) for programmatic use"""
|
|
477
|
+
request = SearchRequest(
|
|
478
|
+
query=query,
|
|
479
|
+
index_name=index_name,
|
|
480
|
+
count=count,
|
|
481
|
+
distance=distance,
|
|
482
|
+
tags=tags,
|
|
483
|
+
language=language
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
# Use asyncio to run the async method
|
|
487
|
+
import asyncio
|
|
488
|
+
try:
|
|
489
|
+
loop = asyncio.get_event_loop()
|
|
490
|
+
except RuntimeError:
|
|
491
|
+
loop = asyncio.new_event_loop()
|
|
492
|
+
asyncio.set_event_loop(loop)
|
|
493
|
+
|
|
494
|
+
response = loop.run_until_complete(self._handle_search(request))
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
'results': [
|
|
498
|
+
{
|
|
499
|
+
'content': r.content,
|
|
500
|
+
'score': r.score,
|
|
501
|
+
'metadata': r.metadata
|
|
502
|
+
}
|
|
503
|
+
for r in response.results
|
|
504
|
+
],
|
|
505
|
+
'query_analysis': response.query_analysis
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
def start(self, host: str = "0.0.0.0", port: Optional[int] = None,
|
|
509
|
+
ssl_cert: Optional[str] = None, ssl_key: Optional[str] = None):
|
|
510
|
+
"""
|
|
511
|
+
Start the service with optional HTTPS support.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
host: Host to bind to (default: "0.0.0.0")
|
|
515
|
+
port: Port to bind to (default: self.port)
|
|
516
|
+
ssl_cert: Path to SSL certificate file (overrides environment)
|
|
517
|
+
ssl_key: Path to SSL key file (overrides environment)
|
|
518
|
+
"""
|
|
519
|
+
if not self.app:
|
|
520
|
+
raise RuntimeError("FastAPI not available. Cannot start HTTP service.")
|
|
521
|
+
|
|
522
|
+
port = port or self.port
|
|
523
|
+
|
|
524
|
+
# Get SSL configuration
|
|
525
|
+
ssl_kwargs = {}
|
|
526
|
+
if ssl_cert and ssl_key:
|
|
527
|
+
# Use provided SSL files
|
|
528
|
+
ssl_kwargs = {
|
|
529
|
+
'ssl_certfile': ssl_cert,
|
|
530
|
+
'ssl_keyfile': ssl_key
|
|
531
|
+
}
|
|
532
|
+
else:
|
|
533
|
+
# Use security config SSL settings
|
|
534
|
+
ssl_kwargs = self.security.get_ssl_context_kwargs()
|
|
535
|
+
|
|
536
|
+
# Build startup URL
|
|
537
|
+
scheme = "https" if ssl_kwargs else "http"
|
|
538
|
+
startup_url = f"{scheme}://{host}:{port}"
|
|
539
|
+
|
|
540
|
+
# Get auth credentials
|
|
541
|
+
username, password = self._basic_auth
|
|
542
|
+
|
|
543
|
+
# Log startup information
|
|
544
|
+
logger.info(
|
|
545
|
+
"starting_search_service",
|
|
546
|
+
url=startup_url,
|
|
547
|
+
ssl_enabled=bool(ssl_kwargs),
|
|
548
|
+
indexes=list(self.indexes.keys()),
|
|
549
|
+
username=username
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Print user-friendly startup message
|
|
553
|
+
print(f"\nSignalWire Search Service starting...")
|
|
554
|
+
print(f"URL: {startup_url}")
|
|
555
|
+
print(f"Indexes: {', '.join(self.indexes.keys()) if self.indexes else 'None'}")
|
|
556
|
+
print(f"Basic Auth: {username}:{password}")
|
|
557
|
+
if ssl_kwargs:
|
|
558
|
+
print(f"SSL: Enabled")
|
|
559
|
+
print("")
|
|
560
|
+
|
|
561
|
+
try:
|
|
562
|
+
import uvicorn
|
|
563
|
+
uvicorn.run(
|
|
564
|
+
self.app,
|
|
565
|
+
host=host,
|
|
566
|
+
port=port,
|
|
567
|
+
**ssl_kwargs
|
|
568
|
+
)
|
|
569
|
+
except ImportError:
|
|
570
|
+
raise RuntimeError("uvicorn not available. Cannot start HTTP service.")
|
|
571
|
+
|
|
572
|
+
def stop(self):
|
|
573
|
+
"""Stop the service (placeholder for cleanup)"""
|
|
574
|
+
pass
|