signalwire-agents 0.1.13__py3-none-any.whl → 1.0.17.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalwire_agents/__init__.py +99 -15
- signalwire_agents/agent_server.py +248 -60
- signalwire_agents/agents/bedrock.py +296 -0
- signalwire_agents/cli/__init__.py +9 -0
- signalwire_agents/cli/build_search.py +951 -41
- signalwire_agents/cli/config.py +80 -0
- signalwire_agents/cli/core/__init__.py +10 -0
- signalwire_agents/cli/core/agent_loader.py +470 -0
- signalwire_agents/cli/core/argparse_helpers.py +179 -0
- signalwire_agents/cli/core/dynamic_config.py +71 -0
- signalwire_agents/cli/core/service_loader.py +303 -0
- signalwire_agents/cli/dokku.py +2320 -0
- signalwire_agents/cli/execution/__init__.py +10 -0
- signalwire_agents/cli/execution/datamap_exec.py +446 -0
- signalwire_agents/cli/execution/webhook_exec.py +134 -0
- signalwire_agents/cli/init_project.py +2636 -0
- signalwire_agents/cli/output/__init__.py +10 -0
- signalwire_agents/cli/output/output_formatter.py +255 -0
- signalwire_agents/cli/output/swml_dump.py +186 -0
- signalwire_agents/cli/simulation/__init__.py +10 -0
- signalwire_agents/cli/simulation/data_generation.py +374 -0
- signalwire_agents/cli/simulation/data_overrides.py +200 -0
- signalwire_agents/cli/simulation/mock_env.py +282 -0
- signalwire_agents/cli/swaig_test_wrapper.py +52 -0
- signalwire_agents/cli/test_swaig.py +566 -2366
- signalwire_agents/cli/types.py +81 -0
- signalwire_agents/core/__init__.py +2 -2
- signalwire_agents/core/agent/__init__.py +12 -0
- signalwire_agents/core/agent/config/__init__.py +12 -0
- signalwire_agents/core/agent/deployment/__init__.py +9 -0
- signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
- signalwire_agents/core/agent/prompt/__init__.py +14 -0
- signalwire_agents/core/agent/prompt/manager.py +306 -0
- signalwire_agents/core/agent/routing/__init__.py +9 -0
- signalwire_agents/core/agent/security/__init__.py +9 -0
- signalwire_agents/core/agent/swml/__init__.py +9 -0
- signalwire_agents/core/agent/tools/__init__.py +15 -0
- signalwire_agents/core/agent/tools/decorator.py +97 -0
- signalwire_agents/core/agent/tools/registry.py +210 -0
- signalwire_agents/core/agent_base.py +845 -2916
- signalwire_agents/core/auth_handler.py +233 -0
- signalwire_agents/core/config_loader.py +259 -0
- signalwire_agents/core/contexts.py +418 -0
- signalwire_agents/core/data_map.py +3 -15
- signalwire_agents/core/function_result.py +116 -44
- signalwire_agents/core/logging_config.py +162 -18
- signalwire_agents/core/mixins/__init__.py +28 -0
- signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
- signalwire_agents/core/mixins/auth_mixin.py +280 -0
- signalwire_agents/core/mixins/prompt_mixin.py +358 -0
- signalwire_agents/core/mixins/serverless_mixin.py +460 -0
- signalwire_agents/core/mixins/skill_mixin.py +55 -0
- signalwire_agents/core/mixins/state_mixin.py +153 -0
- signalwire_agents/core/mixins/tool_mixin.py +230 -0
- signalwire_agents/core/mixins/web_mixin.py +1142 -0
- signalwire_agents/core/security_config.py +333 -0
- signalwire_agents/core/skill_base.py +84 -1
- signalwire_agents/core/skill_manager.py +62 -20
- signalwire_agents/core/swaig_function.py +18 -5
- signalwire_agents/core/swml_builder.py +207 -11
- signalwire_agents/core/swml_handler.py +27 -21
- signalwire_agents/core/swml_renderer.py +123 -312
- signalwire_agents/core/swml_service.py +171 -203
- signalwire_agents/mcp_gateway/__init__.py +29 -0
- signalwire_agents/mcp_gateway/gateway_service.py +564 -0
- signalwire_agents/mcp_gateway/mcp_manager.py +513 -0
- signalwire_agents/mcp_gateway/session_manager.py +218 -0
- signalwire_agents/prefabs/concierge.py +0 -3
- signalwire_agents/prefabs/faq_bot.py +0 -3
- signalwire_agents/prefabs/info_gatherer.py +0 -3
- signalwire_agents/prefabs/receptionist.py +0 -3
- signalwire_agents/prefabs/survey.py +0 -3
- signalwire_agents/schema.json +9218 -5489
- signalwire_agents/search/__init__.py +7 -1
- signalwire_agents/search/document_processor.py +490 -31
- signalwire_agents/search/index_builder.py +307 -37
- signalwire_agents/search/migration.py +418 -0
- signalwire_agents/search/models.py +30 -0
- signalwire_agents/search/pgvector_backend.py +748 -0
- signalwire_agents/search/query_processor.py +162 -31
- signalwire_agents/search/search_engine.py +916 -35
- signalwire_agents/search/search_service.py +376 -53
- signalwire_agents/skills/README.md +452 -0
- signalwire_agents/skills/__init__.py +14 -2
- signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
- signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
- signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
- signalwire_agents/skills/datasphere/README.md +210 -0
- signalwire_agents/skills/datasphere/skill.py +84 -3
- signalwire_agents/skills/datasphere_serverless/README.md +258 -0
- signalwire_agents/skills/datasphere_serverless/__init__.py +9 -0
- signalwire_agents/skills/datasphere_serverless/skill.py +82 -1
- signalwire_agents/skills/datetime/README.md +132 -0
- signalwire_agents/skills/datetime/__init__.py +9 -0
- signalwire_agents/skills/datetime/skill.py +20 -7
- signalwire_agents/skills/joke/README.md +149 -0
- signalwire_agents/skills/joke/__init__.py +9 -0
- signalwire_agents/skills/joke/skill.py +21 -0
- signalwire_agents/skills/math/README.md +161 -0
- signalwire_agents/skills/math/__init__.py +9 -0
- signalwire_agents/skills/math/skill.py +18 -4
- signalwire_agents/skills/mcp_gateway/README.md +230 -0
- signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
- signalwire_agents/skills/mcp_gateway/skill.py +421 -0
- signalwire_agents/skills/native_vector_search/README.md +210 -0
- signalwire_agents/skills/native_vector_search/__init__.py +9 -0
- signalwire_agents/skills/native_vector_search/skill.py +569 -101
- signalwire_agents/skills/play_background_file/README.md +218 -0
- signalwire_agents/skills/play_background_file/__init__.py +12 -0
- signalwire_agents/skills/play_background_file/skill.py +242 -0
- signalwire_agents/skills/registry.py +395 -40
- signalwire_agents/skills/spider/README.md +236 -0
- signalwire_agents/skills/spider/__init__.py +13 -0
- signalwire_agents/skills/spider/skill.py +598 -0
- signalwire_agents/skills/swml_transfer/README.md +395 -0
- signalwire_agents/skills/swml_transfer/__init__.py +10 -0
- signalwire_agents/skills/swml_transfer/skill.py +359 -0
- signalwire_agents/skills/weather_api/README.md +178 -0
- signalwire_agents/skills/weather_api/__init__.py +12 -0
- signalwire_agents/skills/weather_api/skill.py +191 -0
- signalwire_agents/skills/web_search/README.md +163 -0
- signalwire_agents/skills/web_search/__init__.py +9 -0
- signalwire_agents/skills/web_search/skill.py +586 -112
- signalwire_agents/skills/wikipedia_search/README.md +228 -0
- signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
- signalwire_agents/skills/{wikipedia → wikipedia_search}/skill.py +33 -3
- signalwire_agents/web/__init__.py +17 -0
- signalwire_agents/web/web_service.py +559 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-agent-init.1 +400 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-search.1 +483 -0
- signalwire_agents-1.0.17.dev4.data/data/share/man/man1/swaig-test.1 +308 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/METADATA +347 -215
- signalwire_agents-1.0.17.dev4.dist-info/RECORD +147 -0
- signalwire_agents-1.0.17.dev4.dist-info/entry_points.txt +6 -0
- signalwire_agents/core/state/file_state_manager.py +0 -219
- signalwire_agents/core/state/state_manager.py +0 -101
- signalwire_agents/skills/wikipedia/__init__.py +0 -9
- signalwire_agents-0.1.13.data/data/schema.json +0 -5611
- signalwire_agents-0.1.13.dist-info/RECORD +0 -67
- signalwire_agents-0.1.13.dist-info/entry_points.txt +0 -3
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/WHEEL +0 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/licenses/LICENSE +0 -0
- {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/top_level.txt +0 -0
|
@@ -28,6 +28,206 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
28
28
|
# Enable multiple instances support
|
|
29
29
|
SUPPORTS_MULTIPLE_INSTANCES = True
|
|
30
30
|
|
|
31
|
+
@classmethod
|
|
32
|
+
def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
|
|
33
|
+
"""Get parameter schema for Native Vector Search skill
|
|
34
|
+
|
|
35
|
+
This skill supports three modes of operation:
|
|
36
|
+
1. Network Mode: Set 'remote_url' to connect to a remote search server
|
|
37
|
+
2. Local pgvector: Set backend='pgvector' with connection_string and collection_name
|
|
38
|
+
3. Local SQLite: Set 'index_file' to use a local .swsearch file (default)
|
|
39
|
+
"""
|
|
40
|
+
schema = super().get_parameter_schema()
|
|
41
|
+
schema.update({
|
|
42
|
+
"index_file": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "Path to .swsearch index file (SQLite backend only). Use this for local file-based search",
|
|
45
|
+
"required": False
|
|
46
|
+
},
|
|
47
|
+
"build_index": {
|
|
48
|
+
"type": "boolean",
|
|
49
|
+
"description": "Whether to build index from source files",
|
|
50
|
+
"default": False,
|
|
51
|
+
"required": False
|
|
52
|
+
},
|
|
53
|
+
"source_dir": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "Directory containing documents to index (required if build_index=True)",
|
|
56
|
+
"required": False
|
|
57
|
+
},
|
|
58
|
+
"remote_url": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"description": "URL of remote search server for network mode (e.g., http://localhost:8001). Use this instead of index_file or pgvector for centralized search",
|
|
61
|
+
"required": False
|
|
62
|
+
},
|
|
63
|
+
"index_name": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "Name of index on remote server (network mode only, used with remote_url)",
|
|
66
|
+
"default": "default",
|
|
67
|
+
"required": False
|
|
68
|
+
},
|
|
69
|
+
"count": {
|
|
70
|
+
"type": "integer",
|
|
71
|
+
"description": "Number of search results to return",
|
|
72
|
+
"default": 5,
|
|
73
|
+
"required": False,
|
|
74
|
+
"minimum": 1,
|
|
75
|
+
"maximum": 20
|
|
76
|
+
},
|
|
77
|
+
"similarity_threshold": {
|
|
78
|
+
"type": "number",
|
|
79
|
+
"description": "Minimum similarity score for results (0.0 = no limit, 1.0 = exact match)",
|
|
80
|
+
"default": 0.0,
|
|
81
|
+
"required": False,
|
|
82
|
+
"minimum": 0.0,
|
|
83
|
+
"maximum": 1.0
|
|
84
|
+
},
|
|
85
|
+
"tags": {
|
|
86
|
+
"type": "array",
|
|
87
|
+
"description": "Tags to filter search results",
|
|
88
|
+
"default": [],
|
|
89
|
+
"required": False,
|
|
90
|
+
"items": {
|
|
91
|
+
"type": "string"
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
"global_tags": {
|
|
95
|
+
"type": "array",
|
|
96
|
+
"description": "Tags to apply to all indexed documents",
|
|
97
|
+
"default": [],
|
|
98
|
+
"required": False,
|
|
99
|
+
"items": {
|
|
100
|
+
"type": "string"
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
"file_types": {
|
|
104
|
+
"type": "array",
|
|
105
|
+
"description": "File extensions to include when building index",
|
|
106
|
+
"default": ["md", "txt", "pdf", "docx", "html"],
|
|
107
|
+
"required": False,
|
|
108
|
+
"items": {
|
|
109
|
+
"type": "string"
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
"exclude_patterns": {
|
|
113
|
+
"type": "array",
|
|
114
|
+
"description": "Patterns to exclude when building index",
|
|
115
|
+
"default": ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
|
|
116
|
+
"required": False,
|
|
117
|
+
"items": {
|
|
118
|
+
"type": "string"
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
"no_results_message": {
|
|
122
|
+
"type": "string",
|
|
123
|
+
"description": "Message when no results are found",
|
|
124
|
+
"default": "No information found for '{query}'",
|
|
125
|
+
"required": False
|
|
126
|
+
},
|
|
127
|
+
"response_prefix": {
|
|
128
|
+
"type": "string",
|
|
129
|
+
"description": "Prefix to add to search results",
|
|
130
|
+
"default": "",
|
|
131
|
+
"required": False
|
|
132
|
+
},
|
|
133
|
+
"response_postfix": {
|
|
134
|
+
"type": "string",
|
|
135
|
+
"description": "Postfix to add to search results",
|
|
136
|
+
"default": "",
|
|
137
|
+
"required": False
|
|
138
|
+
},
|
|
139
|
+
"max_content_length": {
|
|
140
|
+
"type": "integer",
|
|
141
|
+
"description": "Maximum total response size in characters (distributed across all results)",
|
|
142
|
+
"default": 32768,
|
|
143
|
+
"required": False,
|
|
144
|
+
"minimum": 1000
|
|
145
|
+
},
|
|
146
|
+
"response_format_callback": {
|
|
147
|
+
"type": "callable",
|
|
148
|
+
"description": "Optional callback function to format/transform the response. Called with (response, agent, query, results, args). Must return a string.",
|
|
149
|
+
"required": False
|
|
150
|
+
},
|
|
151
|
+
"description": {
|
|
152
|
+
"type": "string",
|
|
153
|
+
"description": "Tool description",
|
|
154
|
+
"default": "Search the knowledge base for information",
|
|
155
|
+
"required": False
|
|
156
|
+
},
|
|
157
|
+
"hints": {
|
|
158
|
+
"type": "array",
|
|
159
|
+
"description": "Speech recognition hints",
|
|
160
|
+
"default": [],
|
|
161
|
+
"required": False,
|
|
162
|
+
"items": {
|
|
163
|
+
"type": "string"
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
"nlp_backend": {
|
|
167
|
+
"type": "string",
|
|
168
|
+
"description": "NLP backend for query processing",
|
|
169
|
+
"default": "basic",
|
|
170
|
+
"required": False,
|
|
171
|
+
"enum": ["basic", "spacy", "nltk"]
|
|
172
|
+
},
|
|
173
|
+
"query_nlp_backend": {
|
|
174
|
+
"type": "string",
|
|
175
|
+
"description": "NLP backend for query expansion",
|
|
176
|
+
"required": False,
|
|
177
|
+
"enum": ["basic", "spacy", "nltk"]
|
|
178
|
+
},
|
|
179
|
+
"index_nlp_backend": {
|
|
180
|
+
"type": "string",
|
|
181
|
+
"description": "NLP backend for indexing",
|
|
182
|
+
"required": False,
|
|
183
|
+
"enum": ["basic", "spacy", "nltk"]
|
|
184
|
+
},
|
|
185
|
+
"backend": {
|
|
186
|
+
"type": "string",
|
|
187
|
+
"description": "Storage backend for local database mode: 'sqlite' for file-based or 'pgvector' for PostgreSQL. Ignored if remote_url is set",
|
|
188
|
+
"default": "sqlite",
|
|
189
|
+
"required": False,
|
|
190
|
+
"enum": ["sqlite", "pgvector"]
|
|
191
|
+
},
|
|
192
|
+
"connection_string": {
|
|
193
|
+
"type": "string",
|
|
194
|
+
"description": "PostgreSQL connection string (pgvector backend only, e.g., 'postgresql://user:pass@localhost:5432/dbname'). Required when backend='pgvector'",
|
|
195
|
+
"required": False
|
|
196
|
+
},
|
|
197
|
+
"collection_name": {
|
|
198
|
+
"type": "string",
|
|
199
|
+
"description": "Collection/table name in PostgreSQL (pgvector backend only). Required when backend='pgvector'",
|
|
200
|
+
"required": False
|
|
201
|
+
},
|
|
202
|
+
"verbose": {
|
|
203
|
+
"type": "boolean",
|
|
204
|
+
"description": "Enable verbose logging",
|
|
205
|
+
"default": False,
|
|
206
|
+
"required": False
|
|
207
|
+
},
|
|
208
|
+
"keyword_weight": {
|
|
209
|
+
"type": "number",
|
|
210
|
+
"description": "Manual keyword weight (0.0-1.0). Overrides automatic weight detection",
|
|
211
|
+
"default": None,
|
|
212
|
+
"required": False,
|
|
213
|
+
"minimum": 0.0,
|
|
214
|
+
"maximum": 1.0
|
|
215
|
+
},
|
|
216
|
+
"model_name": {
|
|
217
|
+
"type": "string",
|
|
218
|
+
"description": "Embedding model to use. Options: 'mini' (fastest, 384 dims), 'base' (balanced, 768 dims), 'large' (same as base). Or specify full model name like 'sentence-transformers/all-MiniLM-L6-v2'",
|
|
219
|
+
"default": "mini",
|
|
220
|
+
"required": False
|
|
221
|
+
},
|
|
222
|
+
"overwrite": {
|
|
223
|
+
"type": "boolean",
|
|
224
|
+
"description": "Overwrite existing pgvector collection when building index (pgvector backend only)",
|
|
225
|
+
"default": False,
|
|
226
|
+
"required": False
|
|
227
|
+
}
|
|
228
|
+
})
|
|
229
|
+
return schema
|
|
230
|
+
|
|
31
231
|
def get_instance_key(self) -> str:
|
|
32
232
|
"""
|
|
33
233
|
Get the key used to track this skill instance
|
|
@@ -41,24 +241,16 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
41
241
|
def setup(self) -> bool:
|
|
42
242
|
"""Setup the native vector search skill"""
|
|
43
243
|
|
|
44
|
-
#
|
|
45
|
-
try:
|
|
46
|
-
from signalwire_agents.search import IndexBuilder, SearchEngine
|
|
47
|
-
from signalwire_agents.search.query_processor import preprocess_query
|
|
48
|
-
self.search_available = True
|
|
49
|
-
except ImportError as e:
|
|
50
|
-
self.search_available = False
|
|
51
|
-
self.import_error = str(e)
|
|
52
|
-
self.logger.warning(f"Search dependencies not available: {e}")
|
|
53
|
-
# Don't fail setup - we'll provide helpful error messages at runtime
|
|
54
|
-
|
|
55
|
-
# Get configuration
|
|
244
|
+
# Get configuration first
|
|
56
245
|
self.tool_name = self.params.get('tool_name', 'search_knowledge')
|
|
246
|
+
self.backend = self.params.get('backend', 'sqlite')
|
|
247
|
+
self.connection_string = self.params.get('connection_string')
|
|
248
|
+
self.collection_name = self.params.get('collection_name')
|
|
57
249
|
self.index_file = self.params.get('index_file')
|
|
58
250
|
self.build_index = self.params.get('build_index', False)
|
|
59
251
|
self.source_dir = self.params.get('source_dir')
|
|
60
252
|
self.count = self.params.get('count', 5)
|
|
61
|
-
self.
|
|
253
|
+
self.similarity_threshold = self.params.get('similarity_threshold', 0.0)
|
|
62
254
|
self.tags = self.params.get('tags', [])
|
|
63
255
|
self.no_results_message = self.params.get(
|
|
64
256
|
'no_results_message',
|
|
@@ -66,73 +258,204 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
66
258
|
)
|
|
67
259
|
self.response_prefix = self.params.get('response_prefix', '')
|
|
68
260
|
self.response_postfix = self.params.get('response_postfix', '')
|
|
261
|
+
self.max_content_length = self.params.get('max_content_length', 32768)
|
|
262
|
+
self.response_format_callback = self.params.get('response_format_callback')
|
|
263
|
+
self.keyword_weight = self.params.get('keyword_weight')
|
|
264
|
+
self.model_name = self.params.get('model_name', 'mini')
|
|
69
265
|
|
|
70
266
|
# Remote search server configuration
|
|
71
|
-
self.remote_url = self.params.get('remote_url') # e.g., "http://localhost:8001"
|
|
267
|
+
self.remote_url = self.params.get('remote_url') # e.g., "http://user:pass@localhost:8001"
|
|
72
268
|
self.index_name = self.params.get('index_name', 'default') # For remote searches
|
|
73
269
|
|
|
74
|
-
#
|
|
75
|
-
self.
|
|
270
|
+
# Parse auth from URL if present
|
|
271
|
+
self.remote_auth = None
|
|
272
|
+
self.remote_base_url = self.remote_url
|
|
273
|
+
if self.remote_url:
|
|
274
|
+
from urllib.parse import urlparse
|
|
275
|
+
parsed = urlparse(self.remote_url)
|
|
276
|
+
if parsed.username and parsed.password:
|
|
277
|
+
self.remote_auth = (parsed.username, parsed.password)
|
|
278
|
+
# Reconstruct URL without auth for display
|
|
279
|
+
self.remote_base_url = f"{parsed.scheme}://{parsed.hostname}"
|
|
280
|
+
if parsed.port:
|
|
281
|
+
self.remote_base_url += f":{parsed.port}"
|
|
282
|
+
if parsed.path:
|
|
283
|
+
self.remote_base_url += parsed.path
|
|
76
284
|
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
if self.nlp_backend not in ['nltk', 'spacy']:
|
|
80
|
-
self.logger.warning(f"Invalid nlp_backend '{self.nlp_backend}', using 'nltk'")
|
|
81
|
-
self.nlp_backend = 'nltk'
|
|
285
|
+
# SWAIG fields are already extracted by SkillBase.__init__()
|
|
286
|
+
# No need to re-fetch from params - use self.swaig_fields inherited from parent
|
|
82
287
|
|
|
83
|
-
#
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
self.index_file = f"{source_name}.swsearch"
|
|
89
|
-
|
|
90
|
-
# Build index if it doesn't exist
|
|
91
|
-
if not os.path.exists(self.index_file):
|
|
92
|
-
try:
|
|
93
|
-
self.logger.info(f"Building search index from {self.source_dir}...")
|
|
94
|
-
from signalwire_agents.search import IndexBuilder
|
|
95
|
-
|
|
96
|
-
builder = IndexBuilder(verbose=self.params.get('verbose', False))
|
|
97
|
-
builder.build_index(
|
|
98
|
-
source_dir=self.source_dir,
|
|
99
|
-
output_file=self.index_file,
|
|
100
|
-
file_types=self.params.get('file_types', ['md', 'txt']),
|
|
101
|
-
exclude_patterns=self.params.get('exclude_patterns'),
|
|
102
|
-
tags=self.params.get('global_tags')
|
|
103
|
-
)
|
|
104
|
-
self.logger.info(f"Search index created: {self.index_file}")
|
|
105
|
-
except Exception as e:
|
|
106
|
-
self.logger.error(f"Failed to build search index: {e}")
|
|
107
|
-
self.search_available = False
|
|
108
|
-
|
|
109
|
-
# Initialize search engine
|
|
110
|
-
self.search_engine = None
|
|
111
|
-
if self.search_available and self.index_file and os.path.exists(self.index_file):
|
|
112
|
-
try:
|
|
113
|
-
from signalwire_agents.search import SearchEngine
|
|
114
|
-
self.search_engine = SearchEngine(self.index_file)
|
|
115
|
-
except Exception as e:
|
|
116
|
-
self.logger.error(f"Failed to load search index {self.index_file}: {e}")
|
|
117
|
-
self.search_available = False
|
|
118
|
-
|
|
119
|
-
# Check if we should use remote search mode
|
|
120
|
-
self.use_remote = bool(self.remote_url)
|
|
121
|
-
if self.use_remote:
|
|
288
|
+
# **EARLY REMOTE CHECK - Option 1**
|
|
289
|
+
# If remote URL is configured, skip all heavy local imports and just validate remote connectivity
|
|
290
|
+
if self.remote_url:
|
|
291
|
+
self.use_remote = True
|
|
292
|
+
self.search_engine = None # No local search engine needed
|
|
122
293
|
self.logger.info(f"Using remote search server: {self.remote_url}")
|
|
123
|
-
|
|
294
|
+
|
|
295
|
+
# Test remote connection (lightweight check)
|
|
124
296
|
try:
|
|
125
297
|
import requests
|
|
126
|
-
|
|
298
|
+
# Use parsed base URL and auth
|
|
299
|
+
response = requests.get(
|
|
300
|
+
f"{self.remote_base_url}/health",
|
|
301
|
+
auth=self.remote_auth,
|
|
302
|
+
timeout=5
|
|
303
|
+
)
|
|
127
304
|
if response.status_code == 200:
|
|
128
|
-
self.logger.info("Remote search server is available")
|
|
305
|
+
self.logger.info(f"Remote search server is available at {self.remote_base_url}")
|
|
129
306
|
self.search_available = True
|
|
307
|
+
return True # Success - skip all local setup
|
|
308
|
+
elif response.status_code == 401:
|
|
309
|
+
self.logger.error("Authentication failed for remote search server. Check credentials.")
|
|
310
|
+
self.search_available = False
|
|
311
|
+
return False
|
|
130
312
|
else:
|
|
131
313
|
self.logger.error(f"Remote search server returned status {response.status_code}")
|
|
132
314
|
self.search_available = False
|
|
315
|
+
return False
|
|
133
316
|
except Exception as e:
|
|
134
317
|
self.logger.error(f"Failed to connect to remote search server: {e}")
|
|
135
318
|
self.search_available = False
|
|
319
|
+
return False
|
|
320
|
+
|
|
321
|
+
# **LOCAL MODE SETUP - Only when no remote URL**
|
|
322
|
+
self.use_remote = False
|
|
323
|
+
|
|
324
|
+
# NLP backend configuration (only needed for local mode)
|
|
325
|
+
self.nlp_backend = self.params.get('nlp_backend') # Backward compatibility
|
|
326
|
+
self.index_nlp_backend = self.params.get('index_nlp_backend', 'nltk') # Default to fast NLTK for indexing
|
|
327
|
+
self.query_nlp_backend = self.params.get('query_nlp_backend', 'nltk') # Default to fast NLTK for search
|
|
328
|
+
|
|
329
|
+
# Handle backward compatibility
|
|
330
|
+
if self.nlp_backend is not None:
|
|
331
|
+
self.logger.warning("Parameter 'nlp_backend' is deprecated. Use 'index_nlp_backend' and 'query_nlp_backend' instead.")
|
|
332
|
+
# If old parameter is used, apply it to both
|
|
333
|
+
self.index_nlp_backend = self.nlp_backend
|
|
334
|
+
self.query_nlp_backend = self.nlp_backend
|
|
335
|
+
|
|
336
|
+
# Validate parameters
|
|
337
|
+
if self.index_nlp_backend not in ['nltk', 'spacy']:
|
|
338
|
+
self.logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
|
|
339
|
+
self.index_nlp_backend = 'nltk'
|
|
340
|
+
|
|
341
|
+
if self.query_nlp_backend not in ['nltk', 'spacy']:
|
|
342
|
+
self.logger.warning(f"Invalid query_nlp_backend '{self.query_nlp_backend}', using 'nltk'")
|
|
343
|
+
self.query_nlp_backend = 'nltk'
|
|
344
|
+
|
|
345
|
+
# Check if local search functionality is available (heavy imports only for local mode)
|
|
346
|
+
try:
|
|
347
|
+
from signalwire_agents.search import IndexBuilder, SearchEngine
|
|
348
|
+
from signalwire_agents.search.query_processor import preprocess_query
|
|
349
|
+
self.search_available = True
|
|
350
|
+
except ImportError as e:
|
|
351
|
+
self.search_available = False
|
|
352
|
+
self.import_error = str(e)
|
|
353
|
+
self.logger.warning(f"Search dependencies not available: {e}")
|
|
354
|
+
# Don't fail setup - we'll provide helpful error messages at runtime
|
|
355
|
+
|
|
356
|
+
# Auto-build index if requested and search is available
|
|
357
|
+
if self.build_index and self.source_dir and self.search_available:
|
|
358
|
+
# Handle auto-build for different backends
|
|
359
|
+
if self.backend == 'sqlite':
|
|
360
|
+
if not self.index_file:
|
|
361
|
+
# Generate index filename from source directory
|
|
362
|
+
source_name = Path(self.source_dir).name
|
|
363
|
+
self.index_file = f"{source_name}.swsearch"
|
|
364
|
+
|
|
365
|
+
# Build index if it doesn't exist
|
|
366
|
+
if not os.path.exists(self.index_file):
|
|
367
|
+
try:
|
|
368
|
+
self.logger.info(f"Building search index from {self.source_dir}...")
|
|
369
|
+
from signalwire_agents.search import IndexBuilder
|
|
370
|
+
|
|
371
|
+
# Resolve model alias if needed
|
|
372
|
+
from signalwire_agents.search.models import resolve_model_alias
|
|
373
|
+
model_to_use = resolve_model_alias(self.model_name)
|
|
374
|
+
|
|
375
|
+
builder = IndexBuilder(
|
|
376
|
+
model_name=model_to_use,
|
|
377
|
+
verbose=self.params.get('verbose', False),
|
|
378
|
+
index_nlp_backend=self.index_nlp_backend
|
|
379
|
+
)
|
|
380
|
+
builder.build_index(
|
|
381
|
+
source_dir=self.source_dir,
|
|
382
|
+
output_file=self.index_file,
|
|
383
|
+
file_types=self.params.get('file_types', ['md', 'txt']),
|
|
384
|
+
exclude_patterns=self.params.get('exclude_patterns'),
|
|
385
|
+
tags=self.params.get('global_tags')
|
|
386
|
+
)
|
|
387
|
+
self.logger.info(f"Search index created: {self.index_file}")
|
|
388
|
+
except Exception as e:
|
|
389
|
+
self.logger.error(f"Failed to build search index: {e}")
|
|
390
|
+
self.search_available = False
|
|
391
|
+
|
|
392
|
+
elif self.backend == 'pgvector':
|
|
393
|
+
# Auto-build for pgvector
|
|
394
|
+
if self.connection_string and self.collection_name:
|
|
395
|
+
try:
|
|
396
|
+
self.logger.info(f"Building pgvector index from {self.source_dir}...")
|
|
397
|
+
from signalwire_agents.search import IndexBuilder
|
|
398
|
+
from signalwire_agents.search.models import resolve_model_alias
|
|
399
|
+
|
|
400
|
+
model_to_use = resolve_model_alias(self.model_name)
|
|
401
|
+
|
|
402
|
+
builder = IndexBuilder(
|
|
403
|
+
backend='pgvector',
|
|
404
|
+
connection_string=self.connection_string,
|
|
405
|
+
model_name=model_to_use,
|
|
406
|
+
verbose=self.params.get('verbose', False),
|
|
407
|
+
index_nlp_backend=self.index_nlp_backend
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
builder.build_index(
|
|
411
|
+
source_dir=self.source_dir,
|
|
412
|
+
output_file=self.collection_name, # pgvector uses this as collection name
|
|
413
|
+
file_types=self.params.get('file_types', ['md', 'txt']),
|
|
414
|
+
exclude_patterns=self.params.get('exclude_patterns'),
|
|
415
|
+
tags=self.params.get('global_tags'),
|
|
416
|
+
overwrite=self.params.get('overwrite', False)
|
|
417
|
+
)
|
|
418
|
+
self.logger.info(f"pgvector collection created: {self.collection_name}")
|
|
419
|
+
except Exception as e:
|
|
420
|
+
self.logger.error(f"Failed to build pgvector index: {e}")
|
|
421
|
+
# Don't set search_available to False - we might be connecting to existing collection
|
|
422
|
+
else:
|
|
423
|
+
self.logger.warning("pgvector auto-build requires connection_string and collection_name")
|
|
424
|
+
|
|
425
|
+
# Initialize local search engine
|
|
426
|
+
self.search_engine = None
|
|
427
|
+
if self.search_available:
|
|
428
|
+
if self.backend == 'pgvector':
|
|
429
|
+
# Initialize pgvector backend
|
|
430
|
+
if self.connection_string and self.collection_name:
|
|
431
|
+
try:
|
|
432
|
+
from signalwire_agents.search import SearchEngine
|
|
433
|
+
self.search_engine = SearchEngine(
|
|
434
|
+
backend='pgvector',
|
|
435
|
+
connection_string=self.connection_string,
|
|
436
|
+
collection_name=self.collection_name
|
|
437
|
+
)
|
|
438
|
+
self.logger.info(f"Connected to pgvector collection: {self.collection_name}")
|
|
439
|
+
except Exception as e:
|
|
440
|
+
self.logger.error(f"Failed to connect to pgvector: {e}")
|
|
441
|
+
self.search_available = False
|
|
442
|
+
else:
|
|
443
|
+
self.logger.error("pgvector backend requires connection_string and collection_name")
|
|
444
|
+
self.search_available = False
|
|
445
|
+
elif self.index_file and os.path.exists(self.index_file):
|
|
446
|
+
# Initialize SQLite backend
|
|
447
|
+
try:
|
|
448
|
+
from signalwire_agents.search import SearchEngine
|
|
449
|
+
self.search_engine = SearchEngine(backend='sqlite', index_path=self.index_file)
|
|
450
|
+
# The SearchEngine will auto-detect the model from the index
|
|
451
|
+
# Get the model name from config for query processing
|
|
452
|
+
if hasattr(self.search_engine, 'config'):
|
|
453
|
+
index_model = self.search_engine.config.get('embedding_model')
|
|
454
|
+
if index_model:
|
|
455
|
+
self.logger.info(f"Using model from index: {index_model}")
|
|
456
|
+
except Exception as e:
|
|
457
|
+
self.logger.error(f"Failed to load search index {self.index_file}: {e}")
|
|
458
|
+
self.search_available = False
|
|
136
459
|
|
|
137
460
|
return True
|
|
138
461
|
|
|
@@ -145,7 +468,7 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
145
468
|
'Search the local knowledge base for information'
|
|
146
469
|
)
|
|
147
470
|
|
|
148
|
-
self.
|
|
471
|
+
self.define_tool(
|
|
149
472
|
name=self.tool_name,
|
|
150
473
|
description=description,
|
|
151
474
|
parameters={
|
|
@@ -159,13 +482,48 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
159
482
|
"default": self.count
|
|
160
483
|
}
|
|
161
484
|
},
|
|
162
|
-
handler=self._search_handler
|
|
163
|
-
**self.swaig_fields
|
|
485
|
+
handler=self._search_handler
|
|
164
486
|
)
|
|
165
487
|
|
|
488
|
+
# Add our tool to the Knowledge Search section
|
|
489
|
+
search_mode = "remote search server" if self.use_remote else "local document indexes"
|
|
490
|
+
section_title = "Knowledge Search"
|
|
491
|
+
|
|
492
|
+
# Try to check if section exists, but handle if method doesn't exist
|
|
493
|
+
section_exists = False
|
|
494
|
+
try:
|
|
495
|
+
if hasattr(self.agent, 'prompt_has_section'):
|
|
496
|
+
section_exists = self.agent.prompt_has_section(section_title)
|
|
497
|
+
except Exception:
|
|
498
|
+
# Method might not work, assume section doesn't exist
|
|
499
|
+
pass
|
|
500
|
+
|
|
501
|
+
if section_exists:
|
|
502
|
+
# Add bullet to existing section
|
|
503
|
+
self.agent.prompt_add_to_section(
|
|
504
|
+
title=section_title,
|
|
505
|
+
bullet=f"Use {self.tool_name} to search {search_mode}: {description}"
|
|
506
|
+
)
|
|
507
|
+
else:
|
|
508
|
+
# Create the section with this tool
|
|
509
|
+
self.agent.prompt_add_section(
|
|
510
|
+
title=section_title,
|
|
511
|
+
body="You can search various knowledge sources using the following tools:",
|
|
512
|
+
bullets=[
|
|
513
|
+
f"Use {self.tool_name} to search {search_mode}: {description}",
|
|
514
|
+
"Search for relevant information using clear, specific queries",
|
|
515
|
+
"If no results are found, suggest the user try rephrasing their question or try another knowledge source"
|
|
516
|
+
]
|
|
517
|
+
)
|
|
518
|
+
|
|
166
519
|
def _search_handler(self, args, raw_data):
|
|
167
520
|
"""Handle search requests"""
|
|
168
521
|
|
|
522
|
+
# Debug logging to see what arguments are being passed
|
|
523
|
+
self.logger.info(f"Search handler called with args: {args}")
|
|
524
|
+
self.logger.info(f"Args type: {type(args)}")
|
|
525
|
+
self.logger.info(f"Raw data: {raw_data}")
|
|
526
|
+
|
|
169
527
|
if not self.search_available:
|
|
170
528
|
return SwaigFunctionResult(
|
|
171
529
|
f"Search functionality is not available. {getattr(self, 'import_error', '')}\n"
|
|
@@ -178,27 +536,52 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
178
536
|
f"{'Index file not found: ' + (self.index_file or 'not specified') if self.index_file else 'No index file configured'}"
|
|
179
537
|
)
|
|
180
538
|
|
|
539
|
+
# Get arguments - the framework handles parsing correctly
|
|
181
540
|
query = args.get('query', '').strip()
|
|
541
|
+
self.logger.error(f"DEBUG: Extracted query: '{query}' (length: {len(query)})")
|
|
542
|
+
self.logger.info(f"Query bool value: {bool(query)}")
|
|
543
|
+
|
|
182
544
|
if not query:
|
|
545
|
+
self.logger.error(f"Query validation failed - returning error message")
|
|
183
546
|
return SwaigFunctionResult("Please provide a search query.")
|
|
184
547
|
|
|
548
|
+
self.logger.info(f"Query validation passed - proceeding with search")
|
|
185
549
|
count = args.get('count', self.count)
|
|
186
550
|
|
|
187
551
|
try:
|
|
188
|
-
# Preprocess the query
|
|
189
|
-
from signalwire_agents.search.query_processor import preprocess_query
|
|
190
|
-
enhanced = preprocess_query(query, language='en', vector=True, nlp_backend=self.nlp_backend)
|
|
191
|
-
|
|
192
552
|
# Perform search (local or remote)
|
|
553
|
+
self.logger.info(f"DEBUG: use_remote={self.use_remote}, remote_base_url={self.remote_base_url}")
|
|
193
554
|
if self.use_remote:
|
|
194
|
-
|
|
555
|
+
# For remote searches, let the server handle query preprocessing
|
|
556
|
+
self.logger.info(f"DEBUG: Calling _search_remote with query='{query}', count={count}")
|
|
557
|
+
results = self._search_remote(query, None, count)
|
|
558
|
+
self.logger.info(f"DEBUG: _search_remote returned {len(results)} results")
|
|
195
559
|
else:
|
|
560
|
+
# For local searches, preprocess the query locally
|
|
561
|
+
from signalwire_agents.search.query_processor import preprocess_query
|
|
562
|
+
|
|
563
|
+
# Get model name from index config if available
|
|
564
|
+
model_for_query = None
|
|
565
|
+
if hasattr(self.search_engine, 'config'):
|
|
566
|
+
model_for_query = self.search_engine.config.get('embedding_model')
|
|
567
|
+
|
|
568
|
+
enhanced = preprocess_query(
|
|
569
|
+
query,
|
|
570
|
+
language='en',
|
|
571
|
+
vector=True,
|
|
572
|
+
query_nlp_backend=self.query_nlp_backend,
|
|
573
|
+
model_name=model_for_query, # Use model from index
|
|
574
|
+
preserve_original=True, # Keep original query terms
|
|
575
|
+
max_synonyms=2 # Reduce synonym expansion
|
|
576
|
+
)
|
|
196
577
|
results = self.search_engine.search(
|
|
197
578
|
query_vector=enhanced.get('vector', []),
|
|
198
579
|
enhanced_text=enhanced['enhanced_text'],
|
|
199
580
|
count=count,
|
|
200
|
-
|
|
201
|
-
tags=self.tags
|
|
581
|
+
similarity_threshold=self.similarity_threshold,
|
|
582
|
+
tags=self.tags,
|
|
583
|
+
keyword_weight=self.keyword_weight,
|
|
584
|
+
original_query=query # Pass original for exact match boosting
|
|
202
585
|
)
|
|
203
586
|
|
|
204
587
|
if not results:
|
|
@@ -207,38 +590,130 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
207
590
|
no_results_msg = f"{self.response_prefix} {no_results_msg}"
|
|
208
591
|
if self.response_postfix:
|
|
209
592
|
no_results_msg = f"{no_results_msg} {self.response_postfix}"
|
|
593
|
+
|
|
594
|
+
# Apply custom formatting callback for no results case
|
|
595
|
+
if self.response_format_callback and callable(self.response_format_callback):
|
|
596
|
+
try:
|
|
597
|
+
callback_context = {
|
|
598
|
+
'response': no_results_msg,
|
|
599
|
+
'agent': self.agent,
|
|
600
|
+
'query': query,
|
|
601
|
+
'results': [], # Empty results
|
|
602
|
+
'args': args,
|
|
603
|
+
'count': count,
|
|
604
|
+
'skill': self
|
|
605
|
+
}
|
|
606
|
+
formatted_response = self.response_format_callback(**callback_context)
|
|
607
|
+
if isinstance(formatted_response, str):
|
|
608
|
+
no_results_msg = formatted_response
|
|
609
|
+
except Exception as e:
|
|
610
|
+
self.logger.error(f"Error in response_format_callback (no results): {e}", exc_info=True)
|
|
611
|
+
|
|
210
612
|
return SwaigFunctionResult(no_results_msg)
|
|
211
613
|
|
|
212
|
-
# Format results
|
|
614
|
+
# Format results with dynamic per-result truncation
|
|
213
615
|
response_parts = []
|
|
214
|
-
|
|
616
|
+
|
|
215
617
|
# Add response prefix if configured
|
|
216
618
|
if self.response_prefix:
|
|
217
619
|
response_parts.append(self.response_prefix)
|
|
218
|
-
|
|
620
|
+
|
|
219
621
|
response_parts.append(f"Found {len(results)} relevant results for '{query}':\n")
|
|
220
|
-
|
|
622
|
+
|
|
623
|
+
# Calculate per-result content budget
|
|
624
|
+
# Estimate overhead per result: metadata (~200 chars) + formatting (~100 chars)
|
|
625
|
+
estimated_overhead_per_result = 300
|
|
626
|
+
# Account for prefix/postfix/header in total overhead
|
|
627
|
+
prefix_postfix_overhead = len(self.response_prefix) + len(self.response_postfix) + 100
|
|
628
|
+
total_overhead = (len(results) * estimated_overhead_per_result) + prefix_postfix_overhead
|
|
629
|
+
available_for_content = self.max_content_length - total_overhead
|
|
630
|
+
|
|
631
|
+
# Ensure minimum of 500 chars per result
|
|
632
|
+
per_result_limit = max(500, available_for_content // len(results)) if len(results) > 0 else 1000
|
|
633
|
+
|
|
221
634
|
for i, result in enumerate(results, 1):
|
|
222
635
|
filename = result['metadata']['filename']
|
|
223
636
|
section = result['metadata'].get('section', '')
|
|
224
637
|
score = result['score']
|
|
225
638
|
content = result['content']
|
|
226
|
-
|
|
639
|
+
|
|
640
|
+
# Truncate content to per-result limit
|
|
641
|
+
if len(content) > per_result_limit:
|
|
642
|
+
content = content[:per_result_limit] + "..."
|
|
643
|
+
|
|
644
|
+
# Get tags from either top level or metadata
|
|
645
|
+
tags = result.get('tags', [])
|
|
646
|
+
if not tags and 'metadata' in result['metadata'] and 'tags' in result['metadata']['metadata']:
|
|
647
|
+
# Handle double-nested metadata from older indexes
|
|
648
|
+
tags = result['metadata']['metadata']['tags']
|
|
649
|
+
elif not tags and 'tags' in result['metadata']:
|
|
650
|
+
# Check in metadata directly
|
|
651
|
+
tags = result['metadata']['tags']
|
|
652
|
+
|
|
227
653
|
result_text = f"**Result {i}** (from {filename}"
|
|
228
654
|
if section:
|
|
229
655
|
result_text += f", section: {section}"
|
|
656
|
+
if tags:
|
|
657
|
+
result_text += f", tags: {', '.join(tags)}"
|
|
230
658
|
result_text += f", relevance: {score:.2f})\n{content}\n"
|
|
231
|
-
|
|
659
|
+
|
|
232
660
|
response_parts.append(result_text)
|
|
233
|
-
|
|
661
|
+
|
|
234
662
|
# Add response postfix if configured
|
|
235
663
|
if self.response_postfix:
|
|
236
664
|
response_parts.append(self.response_postfix)
|
|
237
665
|
|
|
238
|
-
|
|
666
|
+
# Build the initial response
|
|
667
|
+
response = "\n".join(response_parts)
|
|
668
|
+
|
|
669
|
+
# Apply custom formatting callback if provided
|
|
670
|
+
if self.response_format_callback and callable(self.response_format_callback):
|
|
671
|
+
try:
|
|
672
|
+
# Prepare callback context
|
|
673
|
+
callback_context = {
|
|
674
|
+
'response': response,
|
|
675
|
+
'agent': self.agent,
|
|
676
|
+
'query': query,
|
|
677
|
+
'results': results,
|
|
678
|
+
'args': args,
|
|
679
|
+
'count': count,
|
|
680
|
+
'skill': self
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
# Call the callback
|
|
684
|
+
formatted_response = self.response_format_callback(**callback_context)
|
|
685
|
+
|
|
686
|
+
# Validate callback returned a string
|
|
687
|
+
if isinstance(formatted_response, str):
|
|
688
|
+
response = formatted_response
|
|
689
|
+
else:
|
|
690
|
+
self.logger.warning(f"response_format_callback returned non-string type: {type(formatted_response)}")
|
|
691
|
+
|
|
692
|
+
except Exception as e:
|
|
693
|
+
self.logger.error(f"Error in response_format_callback: {e}", exc_info=True)
|
|
694
|
+
# Continue with original response if callback fails
|
|
695
|
+
|
|
696
|
+
return SwaigFunctionResult(response)
|
|
239
697
|
|
|
240
698
|
except Exception as e:
|
|
241
|
-
|
|
699
|
+
# Log the full error details for debugging
|
|
700
|
+
self.logger.error(f"Search error for query '{query}': {str(e)}", exc_info=True)
|
|
701
|
+
|
|
702
|
+
# Return user-friendly error message
|
|
703
|
+
user_msg = "I'm sorry, I encountered an issue while searching. "
|
|
704
|
+
|
|
705
|
+
# Check for specific error types and provide helpful guidance
|
|
706
|
+
error_str = str(e).lower()
|
|
707
|
+
if 'punkt' in error_str or 'nltk' in error_str:
|
|
708
|
+
user_msg += "It looks like some language processing resources are missing. Please try again in a moment."
|
|
709
|
+
elif 'vector' in error_str or 'embedding' in error_str:
|
|
710
|
+
user_msg += "There was an issue with the search indexing. Please try rephrasing your question."
|
|
711
|
+
elif 'timeout' in error_str or 'connection' in error_str:
|
|
712
|
+
user_msg += "The search service is temporarily unavailable. Please try again later."
|
|
713
|
+
else:
|
|
714
|
+
user_msg += "Please try rephrasing your question or contact support if the issue persists."
|
|
715
|
+
|
|
716
|
+
return SwaigFunctionResult(user_msg)
|
|
242
717
|
|
|
243
718
|
def _search_remote(self, query: str, enhanced: dict, count: int) -> list:
|
|
244
719
|
"""Perform search using remote search server"""
|
|
@@ -249,19 +724,23 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
249
724
|
"query": query,
|
|
250
725
|
"index_name": self.index_name,
|
|
251
726
|
"count": count,
|
|
252
|
-
"
|
|
253
|
-
"tags": self.tags
|
|
254
|
-
"language": "en"
|
|
727
|
+
"similarity_threshold": self.similarity_threshold,
|
|
728
|
+
"tags": self.tags
|
|
255
729
|
}
|
|
256
|
-
|
|
730
|
+
|
|
731
|
+
url = f"{self.remote_base_url}/search"
|
|
732
|
+
self.logger.info(f"DEBUG: Sending POST to {url} with request: {search_request}")
|
|
733
|
+
|
|
257
734
|
response = requests.post(
|
|
258
|
-
|
|
735
|
+
url,
|
|
259
736
|
json=search_request,
|
|
737
|
+
auth=self.remote_auth,
|
|
260
738
|
timeout=30
|
|
261
739
|
)
|
|
262
|
-
|
|
740
|
+
|
|
263
741
|
if response.status_code == 200:
|
|
264
742
|
data = response.json()
|
|
743
|
+
self.logger.info(f"DEBUG: Got response with {len(data.get('results', []))} results")
|
|
265
744
|
# Convert remote response format to local format
|
|
266
745
|
results = []
|
|
267
746
|
for result in data.get('results', []):
|
|
@@ -310,19 +789,8 @@ class NativeVectorSearchSkill(SkillBase):
|
|
|
310
789
|
|
|
311
790
|
def get_prompt_sections(self) -> List[Dict[str, Any]]:
|
|
312
791
|
"""Return prompt sections to add to agent"""
|
|
313
|
-
|
|
314
|
-
return [
|
|
315
|
-
{
|
|
316
|
-
"title": "Document Search",
|
|
317
|
-
"body": f"You can search {search_mode} using the {self.tool_name} tool.",
|
|
318
|
-
"bullets": [
|
|
319
|
-
f"Use the {self.tool_name} tool when users ask questions about topics that might be in the indexed documents",
|
|
320
|
-
"Search for relevant information using clear, specific queries",
|
|
321
|
-
"Provide helpful summaries of the search results",
|
|
322
|
-
"If no results are found, suggest the user try rephrasing their question or ask about different topics"
|
|
323
|
-
]
|
|
324
|
-
}
|
|
325
|
-
]
|
|
792
|
+
# We'll handle this in register_tools after the agent is set
|
|
793
|
+
return []
|
|
326
794
|
|
|
327
795
|
def _add_prompt_section(self, agent):
|
|
328
796
|
"""Add prompt section to agent (called during skill loading)"""
|