signalwire-agents 0.1.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. signalwire_agents/__init__.py +130 -4
  2. signalwire_agents/agent_server.py +438 -32
  3. signalwire_agents/agents/bedrock.py +296 -0
  4. signalwire_agents/cli/__init__.py +18 -0
  5. signalwire_agents/cli/build_search.py +1367 -0
  6. signalwire_agents/cli/config.py +80 -0
  7. signalwire_agents/cli/core/__init__.py +10 -0
  8. signalwire_agents/cli/core/agent_loader.py +470 -0
  9. signalwire_agents/cli/core/argparse_helpers.py +179 -0
  10. signalwire_agents/cli/core/dynamic_config.py +71 -0
  11. signalwire_agents/cli/core/service_loader.py +303 -0
  12. signalwire_agents/cli/execution/__init__.py +10 -0
  13. signalwire_agents/cli/execution/datamap_exec.py +446 -0
  14. signalwire_agents/cli/execution/webhook_exec.py +134 -0
  15. signalwire_agents/cli/init_project.py +1225 -0
  16. signalwire_agents/cli/output/__init__.py +10 -0
  17. signalwire_agents/cli/output/output_formatter.py +255 -0
  18. signalwire_agents/cli/output/swml_dump.py +186 -0
  19. signalwire_agents/cli/simulation/__init__.py +10 -0
  20. signalwire_agents/cli/simulation/data_generation.py +374 -0
  21. signalwire_agents/cli/simulation/data_overrides.py +200 -0
  22. signalwire_agents/cli/simulation/mock_env.py +282 -0
  23. signalwire_agents/cli/swaig_test_wrapper.py +52 -0
  24. signalwire_agents/cli/test_swaig.py +809 -0
  25. signalwire_agents/cli/types.py +81 -0
  26. signalwire_agents/core/__init__.py +2 -2
  27. signalwire_agents/core/agent/__init__.py +12 -0
  28. signalwire_agents/core/agent/config/__init__.py +12 -0
  29. signalwire_agents/core/agent/deployment/__init__.py +9 -0
  30. signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
  31. signalwire_agents/core/agent/prompt/__init__.py +14 -0
  32. signalwire_agents/core/agent/prompt/manager.py +306 -0
  33. signalwire_agents/core/agent/routing/__init__.py +9 -0
  34. signalwire_agents/core/agent/security/__init__.py +9 -0
  35. signalwire_agents/core/agent/swml/__init__.py +9 -0
  36. signalwire_agents/core/agent/tools/__init__.py +15 -0
  37. signalwire_agents/core/agent/tools/decorator.py +97 -0
  38. signalwire_agents/core/agent/tools/registry.py +210 -0
  39. signalwire_agents/core/agent_base.py +959 -2166
  40. signalwire_agents/core/auth_handler.py +233 -0
  41. signalwire_agents/core/config_loader.py +259 -0
  42. signalwire_agents/core/contexts.py +707 -0
  43. signalwire_agents/core/data_map.py +487 -0
  44. signalwire_agents/core/function_result.py +1150 -1
  45. signalwire_agents/core/logging_config.py +376 -0
  46. signalwire_agents/core/mixins/__init__.py +28 -0
  47. signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
  48. signalwire_agents/core/mixins/auth_mixin.py +287 -0
  49. signalwire_agents/core/mixins/prompt_mixin.py +358 -0
  50. signalwire_agents/core/mixins/serverless_mixin.py +368 -0
  51. signalwire_agents/core/mixins/skill_mixin.py +55 -0
  52. signalwire_agents/core/mixins/state_mixin.py +153 -0
  53. signalwire_agents/core/mixins/tool_mixin.py +230 -0
  54. signalwire_agents/core/mixins/web_mixin.py +1134 -0
  55. signalwire_agents/core/security/session_manager.py +174 -86
  56. signalwire_agents/core/security_config.py +333 -0
  57. signalwire_agents/core/skill_base.py +200 -0
  58. signalwire_agents/core/skill_manager.py +244 -0
  59. signalwire_agents/core/swaig_function.py +33 -9
  60. signalwire_agents/core/swml_builder.py +212 -12
  61. signalwire_agents/core/swml_handler.py +43 -13
  62. signalwire_agents/core/swml_renderer.py +123 -297
  63. signalwire_agents/core/swml_service.py +277 -260
  64. signalwire_agents/prefabs/concierge.py +6 -2
  65. signalwire_agents/prefabs/info_gatherer.py +149 -33
  66. signalwire_agents/prefabs/receptionist.py +14 -22
  67. signalwire_agents/prefabs/survey.py +6 -2
  68. signalwire_agents/schema.json +9218 -5489
  69. signalwire_agents/search/__init__.py +137 -0
  70. signalwire_agents/search/document_processor.py +1223 -0
  71. signalwire_agents/search/index_builder.py +804 -0
  72. signalwire_agents/search/migration.py +418 -0
  73. signalwire_agents/search/models.py +30 -0
  74. signalwire_agents/search/pgvector_backend.py +752 -0
  75. signalwire_agents/search/query_processor.py +502 -0
  76. signalwire_agents/search/search_engine.py +1264 -0
  77. signalwire_agents/search/search_service.py +574 -0
  78. signalwire_agents/skills/README.md +452 -0
  79. signalwire_agents/skills/__init__.py +23 -0
  80. signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
  81. signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
  82. signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
  83. signalwire_agents/skills/datasphere/README.md +210 -0
  84. signalwire_agents/skills/datasphere/__init__.py +12 -0
  85. signalwire_agents/skills/datasphere/skill.py +310 -0
  86. signalwire_agents/skills/datasphere_serverless/README.md +258 -0
  87. signalwire_agents/skills/datasphere_serverless/__init__.py +10 -0
  88. signalwire_agents/skills/datasphere_serverless/skill.py +237 -0
  89. signalwire_agents/skills/datetime/README.md +132 -0
  90. signalwire_agents/skills/datetime/__init__.py +10 -0
  91. signalwire_agents/skills/datetime/skill.py +126 -0
  92. signalwire_agents/skills/joke/README.md +149 -0
  93. signalwire_agents/skills/joke/__init__.py +10 -0
  94. signalwire_agents/skills/joke/skill.py +109 -0
  95. signalwire_agents/skills/math/README.md +161 -0
  96. signalwire_agents/skills/math/__init__.py +10 -0
  97. signalwire_agents/skills/math/skill.py +105 -0
  98. signalwire_agents/skills/mcp_gateway/README.md +230 -0
  99. signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
  100. signalwire_agents/skills/mcp_gateway/skill.py +421 -0
  101. signalwire_agents/skills/native_vector_search/README.md +210 -0
  102. signalwire_agents/skills/native_vector_search/__init__.py +10 -0
  103. signalwire_agents/skills/native_vector_search/skill.py +820 -0
  104. signalwire_agents/skills/play_background_file/README.md +218 -0
  105. signalwire_agents/skills/play_background_file/__init__.py +12 -0
  106. signalwire_agents/skills/play_background_file/skill.py +242 -0
  107. signalwire_agents/skills/registry.py +459 -0
  108. signalwire_agents/skills/spider/README.md +236 -0
  109. signalwire_agents/skills/spider/__init__.py +13 -0
  110. signalwire_agents/skills/spider/skill.py +598 -0
  111. signalwire_agents/skills/swml_transfer/README.md +395 -0
  112. signalwire_agents/skills/swml_transfer/__init__.py +10 -0
  113. signalwire_agents/skills/swml_transfer/skill.py +359 -0
  114. signalwire_agents/skills/weather_api/README.md +178 -0
  115. signalwire_agents/skills/weather_api/__init__.py +12 -0
  116. signalwire_agents/skills/weather_api/skill.py +191 -0
  117. signalwire_agents/skills/web_search/README.md +163 -0
  118. signalwire_agents/skills/web_search/__init__.py +10 -0
  119. signalwire_agents/skills/web_search/skill.py +739 -0
  120. signalwire_agents/skills/wikipedia_search/README.md +228 -0
  121. signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
  122. signalwire_agents/skills/wikipedia_search/skill.py +210 -0
  123. signalwire_agents/utils/__init__.py +14 -0
  124. signalwire_agents/utils/schema_utils.py +111 -44
  125. signalwire_agents/web/__init__.py +17 -0
  126. signalwire_agents/web/web_service.py +559 -0
  127. signalwire_agents-1.0.7.data/data/share/man/man1/sw-agent-init.1 +307 -0
  128. signalwire_agents-1.0.7.data/data/share/man/man1/sw-search.1 +483 -0
  129. signalwire_agents-1.0.7.data/data/share/man/man1/swaig-test.1 +308 -0
  130. signalwire_agents-1.0.7.dist-info/METADATA +992 -0
  131. signalwire_agents-1.0.7.dist-info/RECORD +142 -0
  132. {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/WHEEL +1 -1
  133. signalwire_agents-1.0.7.dist-info/entry_points.txt +4 -0
  134. signalwire_agents/core/state/file_state_manager.py +0 -219
  135. signalwire_agents/core/state/state_manager.py +0 -101
  136. signalwire_agents-0.1.6.data/data/schema.json +0 -5611
  137. signalwire_agents-0.1.6.dist-info/METADATA +0 -199
  138. signalwire_agents-0.1.6.dist-info/RECORD +0 -34
  139. {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/licenses/LICENSE +0 -0
  140. {signalwire_agents-0.1.6.dist-info → signalwire_agents-1.0.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,820 @@
1
+ """
2
+ Copyright (c) 2025 SignalWire
3
+
4
+ This file is part of the SignalWire AI Agents SDK.
5
+
6
+ Licensed under the MIT License.
7
+ See LICENSE file in the project root for full license information.
8
+ """
9
+
10
+ import os
11
+ import tempfile
12
+ import shutil
13
+ from typing import Dict, Any, Optional, List
14
+ from pathlib import Path
15
+
16
+ from signalwire_agents.core.skill_base import SkillBase
17
+ from signalwire_agents.core.function_result import SwaigFunctionResult
18
+
19
+ class NativeVectorSearchSkill(SkillBase):
20
+ """Native vector search capability using local document indexes or remote search servers"""
21
+
22
+ SKILL_NAME = "native_vector_search"
23
+ SKILL_DESCRIPTION = "Search document indexes using vector similarity and keyword search (local or remote)"
24
+ SKILL_VERSION = "1.0.0"
25
+ REQUIRED_PACKAGES = [] # Optional packages checked at runtime
26
+ REQUIRED_ENV_VARS = [] # No required env vars since all config comes from params
27
+
28
+ # Enable multiple instances support
29
+ SUPPORTS_MULTIPLE_INSTANCES = True
30
+
31
+ @classmethod
32
+ def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
33
+ """Get parameter schema for Native Vector Search skill
34
+
35
+ This skill supports three modes of operation:
36
+ 1. Network Mode: Set 'remote_url' to connect to a remote search server
37
+ 2. Local pgvector: Set backend='pgvector' with connection_string and collection_name
38
+ 3. Local SQLite: Set 'index_file' to use a local .swsearch file (default)
39
+ """
40
+ schema = super().get_parameter_schema()
41
+ schema.update({
42
+ "index_file": {
43
+ "type": "string",
44
+ "description": "Path to .swsearch index file (SQLite backend only). Use this for local file-based search",
45
+ "required": False
46
+ },
47
+ "build_index": {
48
+ "type": "boolean",
49
+ "description": "Whether to build index from source files",
50
+ "default": False,
51
+ "required": False
52
+ },
53
+ "source_dir": {
54
+ "type": "string",
55
+ "description": "Directory containing documents to index (required if build_index=True)",
56
+ "required": False
57
+ },
58
+ "remote_url": {
59
+ "type": "string",
60
+ "description": "URL of remote search server for network mode (e.g., http://localhost:8001). Use this instead of index_file or pgvector for centralized search",
61
+ "required": False
62
+ },
63
+ "index_name": {
64
+ "type": "string",
65
+ "description": "Name of index on remote server (network mode only, used with remote_url)",
66
+ "default": "default",
67
+ "required": False
68
+ },
69
+ "count": {
70
+ "type": "integer",
71
+ "description": "Number of search results to return",
72
+ "default": 5,
73
+ "required": False,
74
+ "minimum": 1,
75
+ "maximum": 20
76
+ },
77
+ "similarity_threshold": {
78
+ "type": "number",
79
+ "description": "Minimum similarity score for results (0.0 = no limit, 1.0 = exact match)",
80
+ "default": 0.0,
81
+ "required": False,
82
+ "minimum": 0.0,
83
+ "maximum": 1.0
84
+ },
85
+ "tags": {
86
+ "type": "array",
87
+ "description": "Tags to filter search results",
88
+ "default": [],
89
+ "required": False,
90
+ "items": {
91
+ "type": "string"
92
+ }
93
+ },
94
+ "global_tags": {
95
+ "type": "array",
96
+ "description": "Tags to apply to all indexed documents",
97
+ "default": [],
98
+ "required": False,
99
+ "items": {
100
+ "type": "string"
101
+ }
102
+ },
103
+ "file_types": {
104
+ "type": "array",
105
+ "description": "File extensions to include when building index",
106
+ "default": ["md", "txt", "pdf", "docx", "html"],
107
+ "required": False,
108
+ "items": {
109
+ "type": "string"
110
+ }
111
+ },
112
+ "exclude_patterns": {
113
+ "type": "array",
114
+ "description": "Patterns to exclude when building index",
115
+ "default": ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
116
+ "required": False,
117
+ "items": {
118
+ "type": "string"
119
+ }
120
+ },
121
+ "no_results_message": {
122
+ "type": "string",
123
+ "description": "Message when no results are found",
124
+ "default": "No information found for '{query}'",
125
+ "required": False
126
+ },
127
+ "response_prefix": {
128
+ "type": "string",
129
+ "description": "Prefix to add to search results",
130
+ "default": "",
131
+ "required": False
132
+ },
133
+ "response_postfix": {
134
+ "type": "string",
135
+ "description": "Postfix to add to search results",
136
+ "default": "",
137
+ "required": False
138
+ },
139
+ "max_content_length": {
140
+ "type": "integer",
141
+ "description": "Maximum total response size in characters (distributed across all results)",
142
+ "default": 32768,
143
+ "required": False,
144
+ "minimum": 1000
145
+ },
146
+ "response_format_callback": {
147
+ "type": "callable",
148
+ "description": "Optional callback function to format/transform the response. Called with (response, agent, query, results, args). Must return a string.",
149
+ "required": False
150
+ },
151
+ "description": {
152
+ "type": "string",
153
+ "description": "Tool description",
154
+ "default": "Search the knowledge base for information",
155
+ "required": False
156
+ },
157
+ "hints": {
158
+ "type": "array",
159
+ "description": "Speech recognition hints",
160
+ "default": [],
161
+ "required": False,
162
+ "items": {
163
+ "type": "string"
164
+ }
165
+ },
166
+ "nlp_backend": {
167
+ "type": "string",
168
+ "description": "NLP backend for query processing",
169
+ "default": "basic",
170
+ "required": False,
171
+ "enum": ["basic", "spacy", "nltk"]
172
+ },
173
+ "query_nlp_backend": {
174
+ "type": "string",
175
+ "description": "NLP backend for query expansion",
176
+ "required": False,
177
+ "enum": ["basic", "spacy", "nltk"]
178
+ },
179
+ "index_nlp_backend": {
180
+ "type": "string",
181
+ "description": "NLP backend for indexing",
182
+ "required": False,
183
+ "enum": ["basic", "spacy", "nltk"]
184
+ },
185
+ "backend": {
186
+ "type": "string",
187
+ "description": "Storage backend for local database mode: 'sqlite' for file-based or 'pgvector' for PostgreSQL. Ignored if remote_url is set",
188
+ "default": "sqlite",
189
+ "required": False,
190
+ "enum": ["sqlite", "pgvector"]
191
+ },
192
+ "connection_string": {
193
+ "type": "string",
194
+ "description": "PostgreSQL connection string (pgvector backend only, e.g., 'postgresql://user:pass@localhost:5432/dbname'). Required when backend='pgvector'",
195
+ "required": False
196
+ },
197
+ "collection_name": {
198
+ "type": "string",
199
+ "description": "Collection/table name in PostgreSQL (pgvector backend only). Required when backend='pgvector'",
200
+ "required": False
201
+ },
202
+ "verbose": {
203
+ "type": "boolean",
204
+ "description": "Enable verbose logging",
205
+ "default": False,
206
+ "required": False
207
+ },
208
+ "keyword_weight": {
209
+ "type": "number",
210
+ "description": "Manual keyword weight (0.0-1.0). Overrides automatic weight detection",
211
+ "default": None,
212
+ "required": False,
213
+ "minimum": 0.0,
214
+ "maximum": 1.0
215
+ },
216
+ "model_name": {
217
+ "type": "string",
218
+ "description": "Embedding model to use. Options: 'mini' (fastest, 384 dims), 'base' (balanced, 768 dims), 'large' (same as base). Or specify full model name like 'sentence-transformers/all-MiniLM-L6-v2'",
219
+ "default": "mini",
220
+ "required": False
221
+ },
222
+ "overwrite": {
223
+ "type": "boolean",
224
+ "description": "Overwrite existing pgvector collection when building index (pgvector backend only)",
225
+ "default": False,
226
+ "required": False
227
+ }
228
+ })
229
+ return schema
230
+
231
+ def get_instance_key(self) -> str:
232
+ """
233
+ Get the key used to track this skill instance
234
+
235
+ For native vector search, we use the tool name to differentiate instances
236
+ """
237
+ tool_name = self.params.get('tool_name', 'search_knowledge')
238
+ index_file = self.params.get('index_file', 'default')
239
+ return f"{self.SKILL_NAME}_{tool_name}_{index_file}"
240
+
241
+ def setup(self) -> bool:
242
+ """Setup the native vector search skill"""
243
+
244
+ # Get configuration first
245
+ self.tool_name = self.params.get('tool_name', 'search_knowledge')
246
+ self.backend = self.params.get('backend', 'sqlite')
247
+ self.connection_string = self.params.get('connection_string')
248
+ self.collection_name = self.params.get('collection_name')
249
+ self.index_file = self.params.get('index_file')
250
+ self.build_index = self.params.get('build_index', False)
251
+ self.source_dir = self.params.get('source_dir')
252
+ self.count = self.params.get('count', 5)
253
+ self.similarity_threshold = self.params.get('similarity_threshold', 0.0)
254
+ self.tags = self.params.get('tags', [])
255
+ self.no_results_message = self.params.get(
256
+ 'no_results_message',
257
+ "No information found for '{query}'"
258
+ )
259
+ self.response_prefix = self.params.get('response_prefix', '')
260
+ self.response_postfix = self.params.get('response_postfix', '')
261
+ self.max_content_length = self.params.get('max_content_length', 32768)
262
+ self.response_format_callback = self.params.get('response_format_callback')
263
+ self.keyword_weight = self.params.get('keyword_weight')
264
+ self.model_name = self.params.get('model_name', 'mini')
265
+
266
+ # Remote search server configuration
267
+ self.remote_url = self.params.get('remote_url') # e.g., "http://user:pass@localhost:8001"
268
+ self.index_name = self.params.get('index_name', 'default') # For remote searches
269
+
270
+ # Parse auth from URL if present
271
+ self.remote_auth = None
272
+ self.remote_base_url = self.remote_url
273
+ if self.remote_url:
274
+ from urllib.parse import urlparse
275
+ parsed = urlparse(self.remote_url)
276
+ if parsed.username and parsed.password:
277
+ self.remote_auth = (parsed.username, parsed.password)
278
+ # Reconstruct URL without auth for display
279
+ self.remote_base_url = f"{parsed.scheme}://{parsed.hostname}"
280
+ if parsed.port:
281
+ self.remote_base_url += f":{parsed.port}"
282
+ if parsed.path:
283
+ self.remote_base_url += parsed.path
284
+
285
+ # SWAIG fields are already extracted by SkillBase.__init__()
286
+ # No need to re-fetch from params - use self.swaig_fields inherited from parent
287
+
288
+ # **EARLY REMOTE CHECK - Option 1**
289
+ # If remote URL is configured, skip all heavy local imports and just validate remote connectivity
290
+ if self.remote_url:
291
+ self.use_remote = True
292
+ self.search_engine = None # No local search engine needed
293
+ self.logger.info(f"Using remote search server: {self.remote_url}")
294
+
295
+ # Test remote connection (lightweight check)
296
+ try:
297
+ import requests
298
+ # Use parsed base URL and auth
299
+ response = requests.get(
300
+ f"{self.remote_base_url}/health",
301
+ auth=self.remote_auth,
302
+ timeout=5
303
+ )
304
+ if response.status_code == 200:
305
+ self.logger.info(f"Remote search server is available at {self.remote_base_url}")
306
+ self.search_available = True
307
+ return True # Success - skip all local setup
308
+ elif response.status_code == 401:
309
+ self.logger.error("Authentication failed for remote search server. Check credentials.")
310
+ self.search_available = False
311
+ return False
312
+ else:
313
+ self.logger.error(f"Remote search server returned status {response.status_code}")
314
+ self.search_available = False
315
+ return False
316
+ except Exception as e:
317
+ self.logger.error(f"Failed to connect to remote search server: {e}")
318
+ self.search_available = False
319
+ return False
320
+
321
+ # **LOCAL MODE SETUP - Only when no remote URL**
322
+ self.use_remote = False
323
+
324
+ # NLP backend configuration (only needed for local mode)
325
+ self.nlp_backend = self.params.get('nlp_backend') # Backward compatibility
326
+ self.index_nlp_backend = self.params.get('index_nlp_backend', 'nltk') # Default to fast NLTK for indexing
327
+ self.query_nlp_backend = self.params.get('query_nlp_backend', 'nltk') # Default to fast NLTK for search
328
+
329
+ # Handle backward compatibility
330
+ if self.nlp_backend is not None:
331
+ self.logger.warning("Parameter 'nlp_backend' is deprecated. Use 'index_nlp_backend' and 'query_nlp_backend' instead.")
332
+ # If old parameter is used, apply it to both
333
+ self.index_nlp_backend = self.nlp_backend
334
+ self.query_nlp_backend = self.nlp_backend
335
+
336
+ # Validate parameters
337
+ if self.index_nlp_backend not in ['nltk', 'spacy']:
338
+ self.logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
339
+ self.index_nlp_backend = 'nltk'
340
+
341
+ if self.query_nlp_backend not in ['nltk', 'spacy']:
342
+ self.logger.warning(f"Invalid query_nlp_backend '{self.query_nlp_backend}', using 'nltk'")
343
+ self.query_nlp_backend = 'nltk'
344
+
345
+ # Check if local search functionality is available (heavy imports only for local mode)
346
+ try:
347
+ from signalwire_agents.search import IndexBuilder, SearchEngine
348
+ from signalwire_agents.search.query_processor import preprocess_query
349
+ self.search_available = True
350
+ except ImportError as e:
351
+ self.search_available = False
352
+ self.import_error = str(e)
353
+ self.logger.warning(f"Search dependencies not available: {e}")
354
+ # Don't fail setup - we'll provide helpful error messages at runtime
355
+
356
+ # Auto-build index if requested and search is available
357
+ if self.build_index and self.source_dir and self.search_available:
358
+ # Handle auto-build for different backends
359
+ if self.backend == 'sqlite':
360
+ if not self.index_file:
361
+ # Generate index filename from source directory
362
+ source_name = Path(self.source_dir).name
363
+ self.index_file = f"{source_name}.swsearch"
364
+
365
+ # Build index if it doesn't exist
366
+ if not os.path.exists(self.index_file):
367
+ try:
368
+ self.logger.info(f"Building search index from {self.source_dir}...")
369
+ from signalwire_agents.search import IndexBuilder
370
+
371
+ # Resolve model alias if needed
372
+ from signalwire_agents.search.models import resolve_model_alias
373
+ model_to_use = resolve_model_alias(self.model_name)
374
+
375
+ builder = IndexBuilder(
376
+ model_name=model_to_use,
377
+ verbose=self.params.get('verbose', False),
378
+ index_nlp_backend=self.index_nlp_backend
379
+ )
380
+ builder.build_index(
381
+ source_dir=self.source_dir,
382
+ output_file=self.index_file,
383
+ file_types=self.params.get('file_types', ['md', 'txt']),
384
+ exclude_patterns=self.params.get('exclude_patterns'),
385
+ tags=self.params.get('global_tags')
386
+ )
387
+ self.logger.info(f"Search index created: {self.index_file}")
388
+ except Exception as e:
389
+ self.logger.error(f"Failed to build search index: {e}")
390
+ self.search_available = False
391
+
392
+ elif self.backend == 'pgvector':
393
+ # Auto-build for pgvector
394
+ if self.connection_string and self.collection_name:
395
+ try:
396
+ self.logger.info(f"Building pgvector index from {self.source_dir}...")
397
+ from signalwire_agents.search import IndexBuilder
398
+ from signalwire_agents.search.models import resolve_model_alias
399
+
400
+ model_to_use = resolve_model_alias(self.model_name)
401
+
402
+ builder = IndexBuilder(
403
+ backend='pgvector',
404
+ connection_string=self.connection_string,
405
+ model_name=model_to_use,
406
+ verbose=self.params.get('verbose', False),
407
+ index_nlp_backend=self.index_nlp_backend
408
+ )
409
+
410
+ builder.build_index(
411
+ source_dir=self.source_dir,
412
+ output_file=self.collection_name, # pgvector uses this as collection name
413
+ file_types=self.params.get('file_types', ['md', 'txt']),
414
+ exclude_patterns=self.params.get('exclude_patterns'),
415
+ tags=self.params.get('global_tags'),
416
+ overwrite=self.params.get('overwrite', False)
417
+ )
418
+ self.logger.info(f"pgvector collection created: {self.collection_name}")
419
+ except Exception as e:
420
+ self.logger.error(f"Failed to build pgvector index: {e}")
421
+ # Don't set search_available to False - we might be connecting to existing collection
422
+ else:
423
+ self.logger.warning("pgvector auto-build requires connection_string and collection_name")
424
+
425
+ # Initialize local search engine
426
+ self.search_engine = None
427
+ if self.search_available:
428
+ if self.backend == 'pgvector':
429
+ # Initialize pgvector backend
430
+ if self.connection_string and self.collection_name:
431
+ try:
432
+ from signalwire_agents.search import SearchEngine
433
+ self.search_engine = SearchEngine(
434
+ backend='pgvector',
435
+ connection_string=self.connection_string,
436
+ collection_name=self.collection_name
437
+ )
438
+ self.logger.info(f"Connected to pgvector collection: {self.collection_name}")
439
+ except Exception as e:
440
+ self.logger.error(f"Failed to connect to pgvector: {e}")
441
+ self.search_available = False
442
+ else:
443
+ self.logger.error("pgvector backend requires connection_string and collection_name")
444
+ self.search_available = False
445
+ elif self.index_file and os.path.exists(self.index_file):
446
+ # Initialize SQLite backend
447
+ try:
448
+ from signalwire_agents.search import SearchEngine
449
+ self.search_engine = SearchEngine(backend='sqlite', index_path=self.index_file)
450
+ # The SearchEngine will auto-detect the model from the index
451
+ # Get the model name from config for query processing
452
+ if hasattr(self.search_engine, 'config'):
453
+ index_model = self.search_engine.config.get('embedding_model')
454
+ if index_model:
455
+ self.logger.info(f"Using model from index: {index_model}")
456
+ except Exception as e:
457
+ self.logger.error(f"Failed to load search index {self.index_file}: {e}")
458
+ self.search_available = False
459
+
460
+ return True
461
+
462
+ def register_tools(self) -> None:
463
+ """Register native vector search tool with the agent"""
464
+
465
+ # Get description from params or use default
466
+ description = self.params.get(
467
+ 'description',
468
+ 'Search the local knowledge base for information'
469
+ )
470
+
471
+ self.define_tool(
472
+ name=self.tool_name,
473
+ description=description,
474
+ parameters={
475
+ "query": {
476
+ "type": "string",
477
+ "description": "Search query or question"
478
+ },
479
+ "count": {
480
+ "type": "integer",
481
+ "description": f"Number of results to return (default: {self.count})",
482
+ "default": self.count
483
+ }
484
+ },
485
+ handler=self._search_handler
486
+ )
487
+
488
+ # Add our tool to the Knowledge Search section
489
+ search_mode = "remote search server" if self.use_remote else "local document indexes"
490
+ section_title = "Knowledge Search"
491
+
492
+ # Try to check if section exists, but handle if method doesn't exist
493
+ section_exists = False
494
+ try:
495
+ if hasattr(self.agent, 'prompt_has_section'):
496
+ section_exists = self.agent.prompt_has_section(section_title)
497
+ except Exception:
498
+ # Method might not work, assume section doesn't exist
499
+ pass
500
+
501
+ if section_exists:
502
+ # Add bullet to existing section
503
+ self.agent.prompt_add_to_section(
504
+ title=section_title,
505
+ bullet=f"Use {self.tool_name} to search {search_mode}: {description}"
506
+ )
507
+ else:
508
+ # Create the section with this tool
509
+ self.agent.prompt_add_section(
510
+ title=section_title,
511
+ body="You can search various knowledge sources using the following tools:",
512
+ bullets=[
513
+ f"Use {self.tool_name} to search {search_mode}: {description}",
514
+ "Search for relevant information using clear, specific queries",
515
+ "If no results are found, suggest the user try rephrasing their question or try another knowledge source"
516
+ ]
517
+ )
518
+
519
+ def _search_handler(self, args, raw_data):
520
+ """Handle search requests"""
521
+
522
+ # Debug logging to see what arguments are being passed
523
+ self.logger.info(f"Search handler called with args: {args}")
524
+ self.logger.info(f"Args type: {type(args)}")
525
+ self.logger.info(f"Raw data: {raw_data}")
526
+
527
+ if not self.search_available:
528
+ return SwaigFunctionResult(
529
+ f"Search functionality is not available. {getattr(self, 'import_error', '')}\n"
530
+ f"Install with: pip install signalwire-agents[search]"
531
+ )
532
+
533
+ if not self.use_remote and not self.search_engine:
534
+ return SwaigFunctionResult(
535
+ f"Search index not available. "
536
+ f"{'Index file not found: ' + (self.index_file or 'not specified') if self.index_file else 'No index file configured'}"
537
+ )
538
+
539
+ # Get arguments - the framework handles parsing correctly
540
+ query = args.get('query', '').strip()
541
+ self.logger.error(f"DEBUG: Extracted query: '{query}' (length: {len(query)})")
542
+ self.logger.info(f"Query bool value: {bool(query)}")
543
+
544
+ if not query:
545
+ self.logger.error(f"Query validation failed - returning error message")
546
+ return SwaigFunctionResult("Please provide a search query.")
547
+
548
+ self.logger.info(f"Query validation passed - proceeding with search")
549
+ count = args.get('count', self.count)
550
+
551
+ try:
552
+ # Perform search (local or remote)
553
+ self.logger.info(f"DEBUG: use_remote={self.use_remote}, remote_base_url={self.remote_base_url}")
554
+ if self.use_remote:
555
+ # For remote searches, let the server handle query preprocessing
556
+ self.logger.info(f"DEBUG: Calling _search_remote with query='{query}', count={count}")
557
+ results = self._search_remote(query, None, count)
558
+ self.logger.info(f"DEBUG: _search_remote returned {len(results)} results")
559
+ else:
560
+ # For local searches, preprocess the query locally
561
+ from signalwire_agents.search.query_processor import preprocess_query
562
+
563
+ # Get model name from index config if available
564
+ model_for_query = None
565
+ if hasattr(self.search_engine, 'config'):
566
+ model_for_query = self.search_engine.config.get('embedding_model')
567
+
568
+ enhanced = preprocess_query(
569
+ query,
570
+ language='en',
571
+ vector=True,
572
+ query_nlp_backend=self.query_nlp_backend,
573
+ model_name=model_for_query, # Use model from index
574
+ preserve_original=True, # Keep original query terms
575
+ max_synonyms=2 # Reduce synonym expansion
576
+ )
577
+ results = self.search_engine.search(
578
+ query_vector=enhanced.get('vector', []),
579
+ enhanced_text=enhanced['enhanced_text'],
580
+ count=count,
581
+ similarity_threshold=self.similarity_threshold,
582
+ tags=self.tags,
583
+ keyword_weight=self.keyword_weight,
584
+ original_query=query # Pass original for exact match boosting
585
+ )
586
+
587
+ if not results:
588
+ no_results_msg = self.no_results_message.format(query=query)
589
+ if self.response_prefix:
590
+ no_results_msg = f"{self.response_prefix} {no_results_msg}"
591
+ if self.response_postfix:
592
+ no_results_msg = f"{no_results_msg} {self.response_postfix}"
593
+
594
+ # Apply custom formatting callback for no results case
595
+ if self.response_format_callback and callable(self.response_format_callback):
596
+ try:
597
+ callback_context = {
598
+ 'response': no_results_msg,
599
+ 'agent': self.agent,
600
+ 'query': query,
601
+ 'results': [], # Empty results
602
+ 'args': args,
603
+ 'count': count,
604
+ 'skill': self
605
+ }
606
+ formatted_response = self.response_format_callback(**callback_context)
607
+ if isinstance(formatted_response, str):
608
+ no_results_msg = formatted_response
609
+ except Exception as e:
610
+ self.logger.error(f"Error in response_format_callback (no results): {e}", exc_info=True)
611
+
612
+ return SwaigFunctionResult(no_results_msg)
613
+
614
+ # Format results with dynamic per-result truncation
615
+ response_parts = []
616
+
617
+ # Add response prefix if configured
618
+ if self.response_prefix:
619
+ response_parts.append(self.response_prefix)
620
+
621
+ response_parts.append(f"Found {len(results)} relevant results for '{query}':\n")
622
+
623
+ # Calculate per-result content budget
624
+ # Estimate overhead per result: metadata (~200 chars) + formatting (~100 chars)
625
+ estimated_overhead_per_result = 300
626
+ # Account for prefix/postfix/header in total overhead
627
+ prefix_postfix_overhead = len(self.response_prefix) + len(self.response_postfix) + 100
628
+ total_overhead = (len(results) * estimated_overhead_per_result) + prefix_postfix_overhead
629
+ available_for_content = self.max_content_length - total_overhead
630
+
631
+ # Ensure minimum of 500 chars per result
632
+ per_result_limit = max(500, available_for_content // len(results)) if len(results) > 0 else 1000
633
+
634
+ for i, result in enumerate(results, 1):
635
+ filename = result['metadata']['filename']
636
+ section = result['metadata'].get('section', '')
637
+ score = result['score']
638
+ content = result['content']
639
+
640
+ # Truncate content to per-result limit
641
+ if len(content) > per_result_limit:
642
+ content = content[:per_result_limit] + "..."
643
+
644
+ # Get tags from either top level or metadata
645
+ tags = result.get('tags', [])
646
+ if not tags and 'metadata' in result['metadata'] and 'tags' in result['metadata']['metadata']:
647
+ # Handle double-nested metadata from older indexes
648
+ tags = result['metadata']['metadata']['tags']
649
+ elif not tags and 'tags' in result['metadata']:
650
+ # Check in metadata directly
651
+ tags = result['metadata']['tags']
652
+
653
+ result_text = f"**Result {i}** (from {filename}"
654
+ if section:
655
+ result_text += f", section: {section}"
656
+ if tags:
657
+ result_text += f", tags: {', '.join(tags)}"
658
+ result_text += f", relevance: {score:.2f})\n{content}\n"
659
+
660
+ response_parts.append(result_text)
661
+
662
+ # Add response postfix if configured
663
+ if self.response_postfix:
664
+ response_parts.append(self.response_postfix)
665
+
666
+ # Build the initial response
667
+ response = "\n".join(response_parts)
668
+
669
+ # Apply custom formatting callback if provided
670
+ if self.response_format_callback and callable(self.response_format_callback):
671
+ try:
672
+ # Prepare callback context
673
+ callback_context = {
674
+ 'response': response,
675
+ 'agent': self.agent,
676
+ 'query': query,
677
+ 'results': results,
678
+ 'args': args,
679
+ 'count': count,
680
+ 'skill': self
681
+ }
682
+
683
+ # Call the callback
684
+ formatted_response = self.response_format_callback(**callback_context)
685
+
686
+ # Validate callback returned a string
687
+ if isinstance(formatted_response, str):
688
+ response = formatted_response
689
+ else:
690
+ self.logger.warning(f"response_format_callback returned non-string type: {type(formatted_response)}")
691
+
692
+ except Exception as e:
693
+ self.logger.error(f"Error in response_format_callback: {e}", exc_info=True)
694
+ # Continue with original response if callback fails
695
+
696
+ return SwaigFunctionResult(response)
697
+
698
+ except Exception as e:
699
+ # Log the full error details for debugging
700
+ self.logger.error(f"Search error for query '{query}': {str(e)}", exc_info=True)
701
+
702
+ # Return user-friendly error message
703
+ user_msg = "I'm sorry, I encountered an issue while searching. "
704
+
705
+ # Check for specific error types and provide helpful guidance
706
+ error_str = str(e).lower()
707
+ if 'punkt' in error_str or 'nltk' in error_str:
708
+ user_msg += "It looks like some language processing resources are missing. Please try again in a moment."
709
+ elif 'vector' in error_str or 'embedding' in error_str:
710
+ user_msg += "There was an issue with the search indexing. Please try rephrasing your question."
711
+ elif 'timeout' in error_str or 'connection' in error_str:
712
+ user_msg += "The search service is temporarily unavailable. Please try again later."
713
+ else:
714
+ user_msg += "Please try rephrasing your question or contact support if the issue persists."
715
+
716
+ return SwaigFunctionResult(user_msg)
717
+
718
+ def _search_remote(self, query: str, enhanced: dict, count: int) -> list:
719
+ """Perform search using remote search server"""
720
+ try:
721
+ import requests
722
+
723
+ search_request = {
724
+ "query": query,
725
+ "index_name": self.index_name,
726
+ "count": count,
727
+ "similarity_threshold": self.similarity_threshold,
728
+ "tags": self.tags
729
+ }
730
+
731
+ url = f"{self.remote_base_url}/search"
732
+ self.logger.info(f"DEBUG: Sending POST to {url} with request: {search_request}")
733
+
734
+ response = requests.post(
735
+ url,
736
+ json=search_request,
737
+ auth=self.remote_auth,
738
+ timeout=30
739
+ )
740
+
741
+ if response.status_code == 200:
742
+ data = response.json()
743
+ self.logger.info(f"DEBUG: Got response with {len(data.get('results', []))} results")
744
+ # Convert remote response format to local format
745
+ results = []
746
+ for result in data.get('results', []):
747
+ results.append({
748
+ 'content': result['content'],
749
+ 'score': result['score'],
750
+ 'metadata': result['metadata']
751
+ })
752
+ return results
753
+ else:
754
+ self.logger.error(f"Remote search failed with status {response.status_code}: {response.text}")
755
+ return []
756
+
757
+ except Exception as e:
758
+ self.logger.error(f"Remote search error: {e}")
759
+ return []
760
+
761
+ def get_hints(self) -> List[str]:
762
+ """Return speech recognition hints for this skill"""
763
+ hints = [
764
+ "search",
765
+ "find",
766
+ "look up",
767
+ "documentation",
768
+ "knowledge base"
769
+ ]
770
+
771
+ # Add custom hints from params
772
+ custom_hints = self.params.get('hints', [])
773
+ hints.extend(custom_hints)
774
+
775
+ return hints
776
+
777
+ def get_global_data(self) -> Dict[str, Any]:
778
+ """Return data to add to agent's global context"""
779
+ global_data = {}
780
+
781
+ if self.search_engine:
782
+ try:
783
+ stats = self.search_engine.get_stats()
784
+ global_data['search_stats'] = stats
785
+ except:
786
+ pass
787
+
788
+ return global_data
789
+
790
+ def get_prompt_sections(self) -> List[Dict[str, Any]]:
791
+ """Return prompt sections to add to agent"""
792
+ # We'll handle this in register_tools after the agent is set
793
+ return []
794
+
795
+ def _add_prompt_section(self, agent):
796
+ """Add prompt section to agent (called during skill loading)"""
797
+ try:
798
+ agent.prompt_add_section(
799
+ title="Local Document Search",
800
+ body=f"You can search local document indexes using the {self.tool_name} tool.",
801
+ bullets=[
802
+ f"Use the {self.tool_name} tool when users ask questions about topics that might be in the indexed documents",
803
+ "Search for relevant information using clear, specific queries",
804
+ "Provide helpful summaries of the search results",
805
+ "If no results are found, suggest the user try rephrasing their question or ask about different topics"
806
+ ]
807
+ )
808
+ except Exception as e:
809
+ self.logger.error(f"Failed to add prompt section: {e}")
810
+ # Continue without the prompt section
811
+
812
+ def cleanup(self) -> None:
813
+ """Cleanup when skill is removed or agent shuts down"""
814
+ # Clean up any temporary files if we created them
815
+ if hasattr(self, '_temp_dirs'):
816
+ for temp_dir in self._temp_dirs:
817
+ try:
818
+ shutil.rmtree(temp_dir)
819
+ except:
820
+ pass