signalwire-agents 0.1.13__py3-none-any.whl → 1.0.17.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. signalwire_agents/__init__.py +99 -15
  2. signalwire_agents/agent_server.py +248 -60
  3. signalwire_agents/agents/bedrock.py +296 -0
  4. signalwire_agents/cli/__init__.py +9 -0
  5. signalwire_agents/cli/build_search.py +951 -41
  6. signalwire_agents/cli/config.py +80 -0
  7. signalwire_agents/cli/core/__init__.py +10 -0
  8. signalwire_agents/cli/core/agent_loader.py +470 -0
  9. signalwire_agents/cli/core/argparse_helpers.py +179 -0
  10. signalwire_agents/cli/core/dynamic_config.py +71 -0
  11. signalwire_agents/cli/core/service_loader.py +303 -0
  12. signalwire_agents/cli/dokku.py +2320 -0
  13. signalwire_agents/cli/execution/__init__.py +10 -0
  14. signalwire_agents/cli/execution/datamap_exec.py +446 -0
  15. signalwire_agents/cli/execution/webhook_exec.py +134 -0
  16. signalwire_agents/cli/init_project.py +2636 -0
  17. signalwire_agents/cli/output/__init__.py +10 -0
  18. signalwire_agents/cli/output/output_formatter.py +255 -0
  19. signalwire_agents/cli/output/swml_dump.py +186 -0
  20. signalwire_agents/cli/simulation/__init__.py +10 -0
  21. signalwire_agents/cli/simulation/data_generation.py +374 -0
  22. signalwire_agents/cli/simulation/data_overrides.py +200 -0
  23. signalwire_agents/cli/simulation/mock_env.py +282 -0
  24. signalwire_agents/cli/swaig_test_wrapper.py +52 -0
  25. signalwire_agents/cli/test_swaig.py +566 -2366
  26. signalwire_agents/cli/types.py +81 -0
  27. signalwire_agents/core/__init__.py +2 -2
  28. signalwire_agents/core/agent/__init__.py +12 -0
  29. signalwire_agents/core/agent/config/__init__.py +12 -0
  30. signalwire_agents/core/agent/deployment/__init__.py +9 -0
  31. signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
  32. signalwire_agents/core/agent/prompt/__init__.py +14 -0
  33. signalwire_agents/core/agent/prompt/manager.py +306 -0
  34. signalwire_agents/core/agent/routing/__init__.py +9 -0
  35. signalwire_agents/core/agent/security/__init__.py +9 -0
  36. signalwire_agents/core/agent/swml/__init__.py +9 -0
  37. signalwire_agents/core/agent/tools/__init__.py +15 -0
  38. signalwire_agents/core/agent/tools/decorator.py +97 -0
  39. signalwire_agents/core/agent/tools/registry.py +210 -0
  40. signalwire_agents/core/agent_base.py +845 -2916
  41. signalwire_agents/core/auth_handler.py +233 -0
  42. signalwire_agents/core/config_loader.py +259 -0
  43. signalwire_agents/core/contexts.py +418 -0
  44. signalwire_agents/core/data_map.py +3 -15
  45. signalwire_agents/core/function_result.py +116 -44
  46. signalwire_agents/core/logging_config.py +162 -18
  47. signalwire_agents/core/mixins/__init__.py +28 -0
  48. signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
  49. signalwire_agents/core/mixins/auth_mixin.py +280 -0
  50. signalwire_agents/core/mixins/prompt_mixin.py +358 -0
  51. signalwire_agents/core/mixins/serverless_mixin.py +460 -0
  52. signalwire_agents/core/mixins/skill_mixin.py +55 -0
  53. signalwire_agents/core/mixins/state_mixin.py +153 -0
  54. signalwire_agents/core/mixins/tool_mixin.py +230 -0
  55. signalwire_agents/core/mixins/web_mixin.py +1142 -0
  56. signalwire_agents/core/security_config.py +333 -0
  57. signalwire_agents/core/skill_base.py +84 -1
  58. signalwire_agents/core/skill_manager.py +62 -20
  59. signalwire_agents/core/swaig_function.py +18 -5
  60. signalwire_agents/core/swml_builder.py +207 -11
  61. signalwire_agents/core/swml_handler.py +27 -21
  62. signalwire_agents/core/swml_renderer.py +123 -312
  63. signalwire_agents/core/swml_service.py +171 -203
  64. signalwire_agents/mcp_gateway/__init__.py +29 -0
  65. signalwire_agents/mcp_gateway/gateway_service.py +564 -0
  66. signalwire_agents/mcp_gateway/mcp_manager.py +513 -0
  67. signalwire_agents/mcp_gateway/session_manager.py +218 -0
  68. signalwire_agents/prefabs/concierge.py +0 -3
  69. signalwire_agents/prefabs/faq_bot.py +0 -3
  70. signalwire_agents/prefabs/info_gatherer.py +0 -3
  71. signalwire_agents/prefabs/receptionist.py +0 -3
  72. signalwire_agents/prefabs/survey.py +0 -3
  73. signalwire_agents/schema.json +9218 -5489
  74. signalwire_agents/search/__init__.py +7 -1
  75. signalwire_agents/search/document_processor.py +490 -31
  76. signalwire_agents/search/index_builder.py +307 -37
  77. signalwire_agents/search/migration.py +418 -0
  78. signalwire_agents/search/models.py +30 -0
  79. signalwire_agents/search/pgvector_backend.py +748 -0
  80. signalwire_agents/search/query_processor.py +162 -31
  81. signalwire_agents/search/search_engine.py +916 -35
  82. signalwire_agents/search/search_service.py +376 -53
  83. signalwire_agents/skills/README.md +452 -0
  84. signalwire_agents/skills/__init__.py +14 -2
  85. signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
  86. signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
  87. signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
  88. signalwire_agents/skills/datasphere/README.md +210 -0
  89. signalwire_agents/skills/datasphere/skill.py +84 -3
  90. signalwire_agents/skills/datasphere_serverless/README.md +258 -0
  91. signalwire_agents/skills/datasphere_serverless/__init__.py +9 -0
  92. signalwire_agents/skills/datasphere_serverless/skill.py +82 -1
  93. signalwire_agents/skills/datetime/README.md +132 -0
  94. signalwire_agents/skills/datetime/__init__.py +9 -0
  95. signalwire_agents/skills/datetime/skill.py +20 -7
  96. signalwire_agents/skills/joke/README.md +149 -0
  97. signalwire_agents/skills/joke/__init__.py +9 -0
  98. signalwire_agents/skills/joke/skill.py +21 -0
  99. signalwire_agents/skills/math/README.md +161 -0
  100. signalwire_agents/skills/math/__init__.py +9 -0
  101. signalwire_agents/skills/math/skill.py +18 -4
  102. signalwire_agents/skills/mcp_gateway/README.md +230 -0
  103. signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
  104. signalwire_agents/skills/mcp_gateway/skill.py +421 -0
  105. signalwire_agents/skills/native_vector_search/README.md +210 -0
  106. signalwire_agents/skills/native_vector_search/__init__.py +9 -0
  107. signalwire_agents/skills/native_vector_search/skill.py +569 -101
  108. signalwire_agents/skills/play_background_file/README.md +218 -0
  109. signalwire_agents/skills/play_background_file/__init__.py +12 -0
  110. signalwire_agents/skills/play_background_file/skill.py +242 -0
  111. signalwire_agents/skills/registry.py +395 -40
  112. signalwire_agents/skills/spider/README.md +236 -0
  113. signalwire_agents/skills/spider/__init__.py +13 -0
  114. signalwire_agents/skills/spider/skill.py +598 -0
  115. signalwire_agents/skills/swml_transfer/README.md +395 -0
  116. signalwire_agents/skills/swml_transfer/__init__.py +10 -0
  117. signalwire_agents/skills/swml_transfer/skill.py +359 -0
  118. signalwire_agents/skills/weather_api/README.md +178 -0
  119. signalwire_agents/skills/weather_api/__init__.py +12 -0
  120. signalwire_agents/skills/weather_api/skill.py +191 -0
  121. signalwire_agents/skills/web_search/README.md +163 -0
  122. signalwire_agents/skills/web_search/__init__.py +9 -0
  123. signalwire_agents/skills/web_search/skill.py +586 -112
  124. signalwire_agents/skills/wikipedia_search/README.md +228 -0
  125. signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
  126. signalwire_agents/skills/{wikipedia → wikipedia_search}/skill.py +33 -3
  127. signalwire_agents/web/__init__.py +17 -0
  128. signalwire_agents/web/web_service.py +559 -0
  129. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-agent-init.1 +400 -0
  130. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-search.1 +483 -0
  131. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/swaig-test.1 +308 -0
  132. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/METADATA +347 -215
  133. signalwire_agents-1.0.17.dev4.dist-info/RECORD +147 -0
  134. signalwire_agents-1.0.17.dev4.dist-info/entry_points.txt +6 -0
  135. signalwire_agents/core/state/file_state_manager.py +0 -219
  136. signalwire_agents/core/state/state_manager.py +0 -101
  137. signalwire_agents/skills/wikipedia/__init__.py +0 -9
  138. signalwire_agents-0.1.13.data/data/schema.json +0 -5611
  139. signalwire_agents-0.1.13.dist-info/RECORD +0 -67
  140. signalwire_agents-0.1.13.dist-info/entry_points.txt +0 -3
  141. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/WHEEL +0 -0
  142. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/licenses/LICENSE +0 -0
  143. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,206 @@ class NativeVectorSearchSkill(SkillBase):
28
28
  # Enable multiple instances support
29
29
  SUPPORTS_MULTIPLE_INSTANCES = True
30
30
 
31
+ @classmethod
32
+ def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
33
+ """Get parameter schema for Native Vector Search skill
34
+
35
+ This skill supports three modes of operation:
36
+ 1. Network Mode: Set 'remote_url' to connect to a remote search server
37
+ 2. Local pgvector: Set backend='pgvector' with connection_string and collection_name
38
+ 3. Local SQLite: Set 'index_file' to use a local .swsearch file (default)
39
+ """
40
+ schema = super().get_parameter_schema()
41
+ schema.update({
42
+ "index_file": {
43
+ "type": "string",
44
+ "description": "Path to .swsearch index file (SQLite backend only). Use this for local file-based search",
45
+ "required": False
46
+ },
47
+ "build_index": {
48
+ "type": "boolean",
49
+ "description": "Whether to build index from source files",
50
+ "default": False,
51
+ "required": False
52
+ },
53
+ "source_dir": {
54
+ "type": "string",
55
+ "description": "Directory containing documents to index (required if build_index=True)",
56
+ "required": False
57
+ },
58
+ "remote_url": {
59
+ "type": "string",
60
+ "description": "URL of remote search server for network mode (e.g., http://localhost:8001). Use this instead of index_file or pgvector for centralized search",
61
+ "required": False
62
+ },
63
+ "index_name": {
64
+ "type": "string",
65
+ "description": "Name of index on remote server (network mode only, used with remote_url)",
66
+ "default": "default",
67
+ "required": False
68
+ },
69
+ "count": {
70
+ "type": "integer",
71
+ "description": "Number of search results to return",
72
+ "default": 5,
73
+ "required": False,
74
+ "minimum": 1,
75
+ "maximum": 20
76
+ },
77
+ "similarity_threshold": {
78
+ "type": "number",
79
+ "description": "Minimum similarity score for results (0.0 = no limit, 1.0 = exact match)",
80
+ "default": 0.0,
81
+ "required": False,
82
+ "minimum": 0.0,
83
+ "maximum": 1.0
84
+ },
85
+ "tags": {
86
+ "type": "array",
87
+ "description": "Tags to filter search results",
88
+ "default": [],
89
+ "required": False,
90
+ "items": {
91
+ "type": "string"
92
+ }
93
+ },
94
+ "global_tags": {
95
+ "type": "array",
96
+ "description": "Tags to apply to all indexed documents",
97
+ "default": [],
98
+ "required": False,
99
+ "items": {
100
+ "type": "string"
101
+ }
102
+ },
103
+ "file_types": {
104
+ "type": "array",
105
+ "description": "File extensions to include when building index",
106
+ "default": ["md", "txt", "pdf", "docx", "html"],
107
+ "required": False,
108
+ "items": {
109
+ "type": "string"
110
+ }
111
+ },
112
+ "exclude_patterns": {
113
+ "type": "array",
114
+ "description": "Patterns to exclude when building index",
115
+ "default": ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
116
+ "required": False,
117
+ "items": {
118
+ "type": "string"
119
+ }
120
+ },
121
+ "no_results_message": {
122
+ "type": "string",
123
+ "description": "Message when no results are found",
124
+ "default": "No information found for '{query}'",
125
+ "required": False
126
+ },
127
+ "response_prefix": {
128
+ "type": "string",
129
+ "description": "Prefix to add to search results",
130
+ "default": "",
131
+ "required": False
132
+ },
133
+ "response_postfix": {
134
+ "type": "string",
135
+ "description": "Postfix to add to search results",
136
+ "default": "",
137
+ "required": False
138
+ },
139
+ "max_content_length": {
140
+ "type": "integer",
141
+ "description": "Maximum total response size in characters (distributed across all results)",
142
+ "default": 32768,
143
+ "required": False,
144
+ "minimum": 1000
145
+ },
146
+ "response_format_callback": {
147
+ "type": "callable",
148
+ "description": "Optional callback function to format/transform the response. Called with (response, agent, query, results, args). Must return a string.",
149
+ "required": False
150
+ },
151
+ "description": {
152
+ "type": "string",
153
+ "description": "Tool description",
154
+ "default": "Search the knowledge base for information",
155
+ "required": False
156
+ },
157
+ "hints": {
158
+ "type": "array",
159
+ "description": "Speech recognition hints",
160
+ "default": [],
161
+ "required": False,
162
+ "items": {
163
+ "type": "string"
164
+ }
165
+ },
166
+ "nlp_backend": {
167
+ "type": "string",
168
+ "description": "NLP backend for query processing",
169
+ "default": "basic",
170
+ "required": False,
171
+ "enum": ["basic", "spacy", "nltk"]
172
+ },
173
+ "query_nlp_backend": {
174
+ "type": "string",
175
+ "description": "NLP backend for query expansion",
176
+ "required": False,
177
+ "enum": ["basic", "spacy", "nltk"]
178
+ },
179
+ "index_nlp_backend": {
180
+ "type": "string",
181
+ "description": "NLP backend for indexing",
182
+ "required": False,
183
+ "enum": ["basic", "spacy", "nltk"]
184
+ },
185
+ "backend": {
186
+ "type": "string",
187
+ "description": "Storage backend for local database mode: 'sqlite' for file-based or 'pgvector' for PostgreSQL. Ignored if remote_url is set",
188
+ "default": "sqlite",
189
+ "required": False,
190
+ "enum": ["sqlite", "pgvector"]
191
+ },
192
+ "connection_string": {
193
+ "type": "string",
194
+ "description": "PostgreSQL connection string (pgvector backend only, e.g., 'postgresql://user:pass@localhost:5432/dbname'). Required when backend='pgvector'",
195
+ "required": False
196
+ },
197
+ "collection_name": {
198
+ "type": "string",
199
+ "description": "Collection/table name in PostgreSQL (pgvector backend only). Required when backend='pgvector'",
200
+ "required": False
201
+ },
202
+ "verbose": {
203
+ "type": "boolean",
204
+ "description": "Enable verbose logging",
205
+ "default": False,
206
+ "required": False
207
+ },
208
+ "keyword_weight": {
209
+ "type": "number",
210
+ "description": "Manual keyword weight (0.0-1.0). Overrides automatic weight detection",
211
+ "default": None,
212
+ "required": False,
213
+ "minimum": 0.0,
214
+ "maximum": 1.0
215
+ },
216
+ "model_name": {
217
+ "type": "string",
218
+ "description": "Embedding model to use. Options: 'mini' (fastest, 384 dims), 'base' (balanced, 768 dims), 'large' (same as base). Or specify full model name like 'sentence-transformers/all-MiniLM-L6-v2'",
219
+ "default": "mini",
220
+ "required": False
221
+ },
222
+ "overwrite": {
223
+ "type": "boolean",
224
+ "description": "Overwrite existing pgvector collection when building index (pgvector backend only)",
225
+ "default": False,
226
+ "required": False
227
+ }
228
+ })
229
+ return schema
230
+
31
231
  def get_instance_key(self) -> str:
32
232
  """
33
233
  Get the key used to track this skill instance
@@ -41,24 +241,16 @@ class NativeVectorSearchSkill(SkillBase):
41
241
  def setup(self) -> bool:
42
242
  """Setup the native vector search skill"""
43
243
 
44
- # Check if search functionality is available
45
- try:
46
- from signalwire_agents.search import IndexBuilder, SearchEngine
47
- from signalwire_agents.search.query_processor import preprocess_query
48
- self.search_available = True
49
- except ImportError as e:
50
- self.search_available = False
51
- self.import_error = str(e)
52
- self.logger.warning(f"Search dependencies not available: {e}")
53
- # Don't fail setup - we'll provide helpful error messages at runtime
54
-
55
- # Get configuration
244
+ # Get configuration first
56
245
  self.tool_name = self.params.get('tool_name', 'search_knowledge')
246
+ self.backend = self.params.get('backend', 'sqlite')
247
+ self.connection_string = self.params.get('connection_string')
248
+ self.collection_name = self.params.get('collection_name')
57
249
  self.index_file = self.params.get('index_file')
58
250
  self.build_index = self.params.get('build_index', False)
59
251
  self.source_dir = self.params.get('source_dir')
60
252
  self.count = self.params.get('count', 5)
61
- self.distance_threshold = self.params.get('distance_threshold', 0.0)
253
+ self.similarity_threshold = self.params.get('similarity_threshold', 0.0)
62
254
  self.tags = self.params.get('tags', [])
63
255
  self.no_results_message = self.params.get(
64
256
  'no_results_message',
@@ -66,73 +258,204 @@ class NativeVectorSearchSkill(SkillBase):
66
258
  )
67
259
  self.response_prefix = self.params.get('response_prefix', '')
68
260
  self.response_postfix = self.params.get('response_postfix', '')
261
+ self.max_content_length = self.params.get('max_content_length', 32768)
262
+ self.response_format_callback = self.params.get('response_format_callback')
263
+ self.keyword_weight = self.params.get('keyword_weight')
264
+ self.model_name = self.params.get('model_name', 'mini')
69
265
 
70
266
  # Remote search server configuration
71
- self.remote_url = self.params.get('remote_url') # e.g., "http://localhost:8001"
267
+ self.remote_url = self.params.get('remote_url') # e.g., "http://user:pass@localhost:8001"
72
268
  self.index_name = self.params.get('index_name', 'default') # For remote searches
73
269
 
74
- # SWAIG fields for function fillers
75
- self.swaig_fields = self.params.get('swaig_fields', {})
270
+ # Parse auth from URL if present
271
+ self.remote_auth = None
272
+ self.remote_base_url = self.remote_url
273
+ if self.remote_url:
274
+ from urllib.parse import urlparse
275
+ parsed = urlparse(self.remote_url)
276
+ if parsed.username and parsed.password:
277
+ self.remote_auth = (parsed.username, parsed.password)
278
+ # Reconstruct URL without auth for display
279
+ self.remote_base_url = f"{parsed.scheme}://{parsed.hostname}"
280
+ if parsed.port:
281
+ self.remote_base_url += f":{parsed.port}"
282
+ if parsed.path:
283
+ self.remote_base_url += parsed.path
76
284
 
77
- # NLP backend configuration
78
- self.nlp_backend = self.params.get('nlp_backend', 'nltk') # Default to faster NLTK
79
- if self.nlp_backend not in ['nltk', 'spacy']:
80
- self.logger.warning(f"Invalid nlp_backend '{self.nlp_backend}', using 'nltk'")
81
- self.nlp_backend = 'nltk'
285
+ # SWAIG fields are already extracted by SkillBase.__init__()
286
+ # No need to re-fetch from params - use self.swaig_fields inherited from parent
82
287
 
83
- # Auto-build index if requested and search is available
84
- if self.build_index and self.source_dir and self.search_available:
85
- if not self.index_file:
86
- # Generate index filename from source directory
87
- source_name = Path(self.source_dir).name
88
- self.index_file = f"{source_name}.swsearch"
89
-
90
- # Build index if it doesn't exist
91
- if not os.path.exists(self.index_file):
92
- try:
93
- self.logger.info(f"Building search index from {self.source_dir}...")
94
- from signalwire_agents.search import IndexBuilder
95
-
96
- builder = IndexBuilder(verbose=self.params.get('verbose', False))
97
- builder.build_index(
98
- source_dir=self.source_dir,
99
- output_file=self.index_file,
100
- file_types=self.params.get('file_types', ['md', 'txt']),
101
- exclude_patterns=self.params.get('exclude_patterns'),
102
- tags=self.params.get('global_tags')
103
- )
104
- self.logger.info(f"Search index created: {self.index_file}")
105
- except Exception as e:
106
- self.logger.error(f"Failed to build search index: {e}")
107
- self.search_available = False
108
-
109
- # Initialize search engine
110
- self.search_engine = None
111
- if self.search_available and self.index_file and os.path.exists(self.index_file):
112
- try:
113
- from signalwire_agents.search import SearchEngine
114
- self.search_engine = SearchEngine(self.index_file)
115
- except Exception as e:
116
- self.logger.error(f"Failed to load search index {self.index_file}: {e}")
117
- self.search_available = False
118
-
119
- # Check if we should use remote search mode
120
- self.use_remote = bool(self.remote_url)
121
- if self.use_remote:
288
+ # **EARLY REMOTE CHECK - Option 1**
289
+ # If remote URL is configured, skip all heavy local imports and just validate remote connectivity
290
+ if self.remote_url:
291
+ self.use_remote = True
292
+ self.search_engine = None # No local search engine needed
122
293
  self.logger.info(f"Using remote search server: {self.remote_url}")
123
- # Test remote connection
294
+
295
+ # Test remote connection (lightweight check)
124
296
  try:
125
297
  import requests
126
- response = requests.get(f"{self.remote_url}/health", timeout=5)
298
+ # Use parsed base URL and auth
299
+ response = requests.get(
300
+ f"{self.remote_base_url}/health",
301
+ auth=self.remote_auth,
302
+ timeout=5
303
+ )
127
304
  if response.status_code == 200:
128
- self.logger.info("Remote search server is available")
305
+ self.logger.info(f"Remote search server is available at {self.remote_base_url}")
129
306
  self.search_available = True
307
+ return True # Success - skip all local setup
308
+ elif response.status_code == 401:
309
+ self.logger.error("Authentication failed for remote search server. Check credentials.")
310
+ self.search_available = False
311
+ return False
130
312
  else:
131
313
  self.logger.error(f"Remote search server returned status {response.status_code}")
132
314
  self.search_available = False
315
+ return False
133
316
  except Exception as e:
134
317
  self.logger.error(f"Failed to connect to remote search server: {e}")
135
318
  self.search_available = False
319
+ return False
320
+
321
+ # **LOCAL MODE SETUP - Only when no remote URL**
322
+ self.use_remote = False
323
+
324
+ # NLP backend configuration (only needed for local mode)
325
+ self.nlp_backend = self.params.get('nlp_backend') # Backward compatibility
326
+ self.index_nlp_backend = self.params.get('index_nlp_backend', 'nltk') # Default to fast NLTK for indexing
327
+ self.query_nlp_backend = self.params.get('query_nlp_backend', 'nltk') # Default to fast NLTK for search
328
+
329
+ # Handle backward compatibility
330
+ if self.nlp_backend is not None:
331
+ self.logger.warning("Parameter 'nlp_backend' is deprecated. Use 'index_nlp_backend' and 'query_nlp_backend' instead.")
332
+ # If old parameter is used, apply it to both
333
+ self.index_nlp_backend = self.nlp_backend
334
+ self.query_nlp_backend = self.nlp_backend
335
+
336
+ # Validate parameters
337
+ if self.index_nlp_backend not in ['nltk', 'spacy']:
338
+ self.logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
339
+ self.index_nlp_backend = 'nltk'
340
+
341
+ if self.query_nlp_backend not in ['nltk', 'spacy']:
342
+ self.logger.warning(f"Invalid query_nlp_backend '{self.query_nlp_backend}', using 'nltk'")
343
+ self.query_nlp_backend = 'nltk'
344
+
345
+ # Check if local search functionality is available (heavy imports only for local mode)
346
+ try:
347
+ from signalwire_agents.search import IndexBuilder, SearchEngine
348
+ from signalwire_agents.search.query_processor import preprocess_query
349
+ self.search_available = True
350
+ except ImportError as e:
351
+ self.search_available = False
352
+ self.import_error = str(e)
353
+ self.logger.warning(f"Search dependencies not available: {e}")
354
+ # Don't fail setup - we'll provide helpful error messages at runtime
355
+
356
+ # Auto-build index if requested and search is available
357
+ if self.build_index and self.source_dir and self.search_available:
358
+ # Handle auto-build for different backends
359
+ if self.backend == 'sqlite':
360
+ if not self.index_file:
361
+ # Generate index filename from source directory
362
+ source_name = Path(self.source_dir).name
363
+ self.index_file = f"{source_name}.swsearch"
364
+
365
+ # Build index if it doesn't exist
366
+ if not os.path.exists(self.index_file):
367
+ try:
368
+ self.logger.info(f"Building search index from {self.source_dir}...")
369
+ from signalwire_agents.search import IndexBuilder
370
+
371
+ # Resolve model alias if needed
372
+ from signalwire_agents.search.models import resolve_model_alias
373
+ model_to_use = resolve_model_alias(self.model_name)
374
+
375
+ builder = IndexBuilder(
376
+ model_name=model_to_use,
377
+ verbose=self.params.get('verbose', False),
378
+ index_nlp_backend=self.index_nlp_backend
379
+ )
380
+ builder.build_index(
381
+ source_dir=self.source_dir,
382
+ output_file=self.index_file,
383
+ file_types=self.params.get('file_types', ['md', 'txt']),
384
+ exclude_patterns=self.params.get('exclude_patterns'),
385
+ tags=self.params.get('global_tags')
386
+ )
387
+ self.logger.info(f"Search index created: {self.index_file}")
388
+ except Exception as e:
389
+ self.logger.error(f"Failed to build search index: {e}")
390
+ self.search_available = False
391
+
392
+ elif self.backend == 'pgvector':
393
+ # Auto-build for pgvector
394
+ if self.connection_string and self.collection_name:
395
+ try:
396
+ self.logger.info(f"Building pgvector index from {self.source_dir}...")
397
+ from signalwire_agents.search import IndexBuilder
398
+ from signalwire_agents.search.models import resolve_model_alias
399
+
400
+ model_to_use = resolve_model_alias(self.model_name)
401
+
402
+ builder = IndexBuilder(
403
+ backend='pgvector',
404
+ connection_string=self.connection_string,
405
+ model_name=model_to_use,
406
+ verbose=self.params.get('verbose', False),
407
+ index_nlp_backend=self.index_nlp_backend
408
+ )
409
+
410
+ builder.build_index(
411
+ source_dir=self.source_dir,
412
+ output_file=self.collection_name, # pgvector uses this as collection name
413
+ file_types=self.params.get('file_types', ['md', 'txt']),
414
+ exclude_patterns=self.params.get('exclude_patterns'),
415
+ tags=self.params.get('global_tags'),
416
+ overwrite=self.params.get('overwrite', False)
417
+ )
418
+ self.logger.info(f"pgvector collection created: {self.collection_name}")
419
+ except Exception as e:
420
+ self.logger.error(f"Failed to build pgvector index: {e}")
421
+ # Don't set search_available to False - we might be connecting to existing collection
422
+ else:
423
+ self.logger.warning("pgvector auto-build requires connection_string and collection_name")
424
+
425
+ # Initialize local search engine
426
+ self.search_engine = None
427
+ if self.search_available:
428
+ if self.backend == 'pgvector':
429
+ # Initialize pgvector backend
430
+ if self.connection_string and self.collection_name:
431
+ try:
432
+ from signalwire_agents.search import SearchEngine
433
+ self.search_engine = SearchEngine(
434
+ backend='pgvector',
435
+ connection_string=self.connection_string,
436
+ collection_name=self.collection_name
437
+ )
438
+ self.logger.info(f"Connected to pgvector collection: {self.collection_name}")
439
+ except Exception as e:
440
+ self.logger.error(f"Failed to connect to pgvector: {e}")
441
+ self.search_available = False
442
+ else:
443
+ self.logger.error("pgvector backend requires connection_string and collection_name")
444
+ self.search_available = False
445
+ elif self.index_file and os.path.exists(self.index_file):
446
+ # Initialize SQLite backend
447
+ try:
448
+ from signalwire_agents.search import SearchEngine
449
+ self.search_engine = SearchEngine(backend='sqlite', index_path=self.index_file)
450
+ # The SearchEngine will auto-detect the model from the index
451
+ # Get the model name from config for query processing
452
+ if hasattr(self.search_engine, 'config'):
453
+ index_model = self.search_engine.config.get('embedding_model')
454
+ if index_model:
455
+ self.logger.info(f"Using model from index: {index_model}")
456
+ except Exception as e:
457
+ self.logger.error(f"Failed to load search index {self.index_file}: {e}")
458
+ self.search_available = False
136
459
 
137
460
  return True
138
461
 
@@ -145,7 +468,7 @@ class NativeVectorSearchSkill(SkillBase):
145
468
  'Search the local knowledge base for information'
146
469
  )
147
470
 
148
- self.agent.define_tool(
471
+ self.define_tool(
149
472
  name=self.tool_name,
150
473
  description=description,
151
474
  parameters={
@@ -159,13 +482,48 @@ class NativeVectorSearchSkill(SkillBase):
159
482
  "default": self.count
160
483
  }
161
484
  },
162
- handler=self._search_handler,
163
- **self.swaig_fields
485
+ handler=self._search_handler
164
486
  )
165
487
 
488
+ # Add our tool to the Knowledge Search section
489
+ search_mode = "remote search server" if self.use_remote else "local document indexes"
490
+ section_title = "Knowledge Search"
491
+
492
+ # Try to check if section exists, but handle if method doesn't exist
493
+ section_exists = False
494
+ try:
495
+ if hasattr(self.agent, 'prompt_has_section'):
496
+ section_exists = self.agent.prompt_has_section(section_title)
497
+ except Exception:
498
+ # Method might not work, assume section doesn't exist
499
+ pass
500
+
501
+ if section_exists:
502
+ # Add bullet to existing section
503
+ self.agent.prompt_add_to_section(
504
+ title=section_title,
505
+ bullet=f"Use {self.tool_name} to search {search_mode}: {description}"
506
+ )
507
+ else:
508
+ # Create the section with this tool
509
+ self.agent.prompt_add_section(
510
+ title=section_title,
511
+ body="You can search various knowledge sources using the following tools:",
512
+ bullets=[
513
+ f"Use {self.tool_name} to search {search_mode}: {description}",
514
+ "Search for relevant information using clear, specific queries",
515
+ "If no results are found, suggest the user try rephrasing their question or try another knowledge source"
516
+ ]
517
+ )
518
+
166
519
  def _search_handler(self, args, raw_data):
167
520
  """Handle search requests"""
168
521
 
522
+ # Debug logging to see what arguments are being passed
523
+ self.logger.info(f"Search handler called with args: {args}")
524
+ self.logger.info(f"Args type: {type(args)}")
525
+ self.logger.info(f"Raw data: {raw_data}")
526
+
169
527
  if not self.search_available:
170
528
  return SwaigFunctionResult(
171
529
  f"Search functionality is not available. {getattr(self, 'import_error', '')}\n"
@@ -178,27 +536,52 @@ class NativeVectorSearchSkill(SkillBase):
178
536
  f"{'Index file not found: ' + (self.index_file or 'not specified') if self.index_file else 'No index file configured'}"
179
537
  )
180
538
 
539
+ # Get arguments - the framework handles parsing correctly
181
540
  query = args.get('query', '').strip()
541
+ self.logger.error(f"DEBUG: Extracted query: '{query}' (length: {len(query)})")
542
+ self.logger.info(f"Query bool value: {bool(query)}")
543
+
182
544
  if not query:
545
+ self.logger.error(f"Query validation failed - returning error message")
183
546
  return SwaigFunctionResult("Please provide a search query.")
184
547
 
548
+ self.logger.info(f"Query validation passed - proceeding with search")
185
549
  count = args.get('count', self.count)
186
550
 
187
551
  try:
188
- # Preprocess the query
189
- from signalwire_agents.search.query_processor import preprocess_query
190
- enhanced = preprocess_query(query, language='en', vector=True, nlp_backend=self.nlp_backend)
191
-
192
552
  # Perform search (local or remote)
553
+ self.logger.info(f"DEBUG: use_remote={self.use_remote}, remote_base_url={self.remote_base_url}")
193
554
  if self.use_remote:
194
- results = self._search_remote(query, enhanced, count)
555
+ # For remote searches, let the server handle query preprocessing
556
+ self.logger.info(f"DEBUG: Calling _search_remote with query='{query}', count={count}")
557
+ results = self._search_remote(query, None, count)
558
+ self.logger.info(f"DEBUG: _search_remote returned {len(results)} results")
195
559
  else:
560
+ # For local searches, preprocess the query locally
561
+ from signalwire_agents.search.query_processor import preprocess_query
562
+
563
+ # Get model name from index config if available
564
+ model_for_query = None
565
+ if hasattr(self.search_engine, 'config'):
566
+ model_for_query = self.search_engine.config.get('embedding_model')
567
+
568
+ enhanced = preprocess_query(
569
+ query,
570
+ language='en',
571
+ vector=True,
572
+ query_nlp_backend=self.query_nlp_backend,
573
+ model_name=model_for_query, # Use model from index
574
+ preserve_original=True, # Keep original query terms
575
+ max_synonyms=2 # Reduce synonym expansion
576
+ )
196
577
  results = self.search_engine.search(
197
578
  query_vector=enhanced.get('vector', []),
198
579
  enhanced_text=enhanced['enhanced_text'],
199
580
  count=count,
200
- distance_threshold=self.distance_threshold,
201
- tags=self.tags
581
+ similarity_threshold=self.similarity_threshold,
582
+ tags=self.tags,
583
+ keyword_weight=self.keyword_weight,
584
+ original_query=query # Pass original for exact match boosting
202
585
  )
203
586
 
204
587
  if not results:
@@ -207,38 +590,130 @@ class NativeVectorSearchSkill(SkillBase):
207
590
  no_results_msg = f"{self.response_prefix} {no_results_msg}"
208
591
  if self.response_postfix:
209
592
  no_results_msg = f"{no_results_msg} {self.response_postfix}"
593
+
594
+ # Apply custom formatting callback for no results case
595
+ if self.response_format_callback and callable(self.response_format_callback):
596
+ try:
597
+ callback_context = {
598
+ 'response': no_results_msg,
599
+ 'agent': self.agent,
600
+ 'query': query,
601
+ 'results': [], # Empty results
602
+ 'args': args,
603
+ 'count': count,
604
+ 'skill': self
605
+ }
606
+ formatted_response = self.response_format_callback(**callback_context)
607
+ if isinstance(formatted_response, str):
608
+ no_results_msg = formatted_response
609
+ except Exception as e:
610
+ self.logger.error(f"Error in response_format_callback (no results): {e}", exc_info=True)
611
+
210
612
  return SwaigFunctionResult(no_results_msg)
211
613
 
212
- # Format results
614
+ # Format results with dynamic per-result truncation
213
615
  response_parts = []
214
-
616
+
215
617
  # Add response prefix if configured
216
618
  if self.response_prefix:
217
619
  response_parts.append(self.response_prefix)
218
-
620
+
219
621
  response_parts.append(f"Found {len(results)} relevant results for '{query}':\n")
220
-
622
+
623
+ # Calculate per-result content budget
624
+ # Estimate overhead per result: metadata (~200 chars) + formatting (~100 chars)
625
+ estimated_overhead_per_result = 300
626
+ # Account for prefix/postfix/header in total overhead
627
+ prefix_postfix_overhead = len(self.response_prefix) + len(self.response_postfix) + 100
628
+ total_overhead = (len(results) * estimated_overhead_per_result) + prefix_postfix_overhead
629
+ available_for_content = self.max_content_length - total_overhead
630
+
631
+ # Ensure minimum of 500 chars per result
632
+ per_result_limit = max(500, available_for_content // len(results)) if len(results) > 0 else 1000
633
+
221
634
  for i, result in enumerate(results, 1):
222
635
  filename = result['metadata']['filename']
223
636
  section = result['metadata'].get('section', '')
224
637
  score = result['score']
225
638
  content = result['content']
226
-
639
+
640
+ # Truncate content to per-result limit
641
+ if len(content) > per_result_limit:
642
+ content = content[:per_result_limit] + "..."
643
+
644
+ # Get tags from either top level or metadata
645
+ tags = result.get('tags', [])
646
+ if not tags and 'metadata' in result['metadata'] and 'tags' in result['metadata']['metadata']:
647
+ # Handle double-nested metadata from older indexes
648
+ tags = result['metadata']['metadata']['tags']
649
+ elif not tags and 'tags' in result['metadata']:
650
+ # Check in metadata directly
651
+ tags = result['metadata']['tags']
652
+
227
653
  result_text = f"**Result {i}** (from {filename}"
228
654
  if section:
229
655
  result_text += f", section: {section}"
656
+ if tags:
657
+ result_text += f", tags: {', '.join(tags)}"
230
658
  result_text += f", relevance: {score:.2f})\n{content}\n"
231
-
659
+
232
660
  response_parts.append(result_text)
233
-
661
+
234
662
  # Add response postfix if configured
235
663
  if self.response_postfix:
236
664
  response_parts.append(self.response_postfix)
237
665
 
238
- return SwaigFunctionResult("\n".join(response_parts))
666
+ # Build the initial response
667
+ response = "\n".join(response_parts)
668
+
669
+ # Apply custom formatting callback if provided
670
+ if self.response_format_callback and callable(self.response_format_callback):
671
+ try:
672
+ # Prepare callback context
673
+ callback_context = {
674
+ 'response': response,
675
+ 'agent': self.agent,
676
+ 'query': query,
677
+ 'results': results,
678
+ 'args': args,
679
+ 'count': count,
680
+ 'skill': self
681
+ }
682
+
683
+ # Call the callback
684
+ formatted_response = self.response_format_callback(**callback_context)
685
+
686
+ # Validate callback returned a string
687
+ if isinstance(formatted_response, str):
688
+ response = formatted_response
689
+ else:
690
+ self.logger.warning(f"response_format_callback returned non-string type: {type(formatted_response)}")
691
+
692
+ except Exception as e:
693
+ self.logger.error(f"Error in response_format_callback: {e}", exc_info=True)
694
+ # Continue with original response if callback fails
695
+
696
+ return SwaigFunctionResult(response)
239
697
 
240
698
  except Exception as e:
241
- return SwaigFunctionResult(f"Search error: {str(e)}")
699
+ # Log the full error details for debugging
700
+ self.logger.error(f"Search error for query '{query}': {str(e)}", exc_info=True)
701
+
702
+ # Return user-friendly error message
703
+ user_msg = "I'm sorry, I encountered an issue while searching. "
704
+
705
+ # Check for specific error types and provide helpful guidance
706
+ error_str = str(e).lower()
707
+ if 'punkt' in error_str or 'nltk' in error_str:
708
+ user_msg += "It looks like some language processing resources are missing. Please try again in a moment."
709
+ elif 'vector' in error_str or 'embedding' in error_str:
710
+ user_msg += "There was an issue with the search indexing. Please try rephrasing your question."
711
+ elif 'timeout' in error_str or 'connection' in error_str:
712
+ user_msg += "The search service is temporarily unavailable. Please try again later."
713
+ else:
714
+ user_msg += "Please try rephrasing your question or contact support if the issue persists."
715
+
716
+ return SwaigFunctionResult(user_msg)
242
717
 
243
718
  def _search_remote(self, query: str, enhanced: dict, count: int) -> list:
244
719
  """Perform search using remote search server"""
@@ -249,19 +724,23 @@ class NativeVectorSearchSkill(SkillBase):
249
724
  "query": query,
250
725
  "index_name": self.index_name,
251
726
  "count": count,
252
- "distance": self.distance_threshold,
253
- "tags": self.tags,
254
- "language": "en"
727
+ "similarity_threshold": self.similarity_threshold,
728
+ "tags": self.tags
255
729
  }
256
-
730
+
731
+ url = f"{self.remote_base_url}/search"
732
+ self.logger.info(f"DEBUG: Sending POST to {url} with request: {search_request}")
733
+
257
734
  response = requests.post(
258
- f"{self.remote_url}/search",
735
+ url,
259
736
  json=search_request,
737
+ auth=self.remote_auth,
260
738
  timeout=30
261
739
  )
262
-
740
+
263
741
  if response.status_code == 200:
264
742
  data = response.json()
743
+ self.logger.info(f"DEBUG: Got response with {len(data.get('results', []))} results")
265
744
  # Convert remote response format to local format
266
745
  results = []
267
746
  for result in data.get('results', []):
@@ -310,19 +789,8 @@ class NativeVectorSearchSkill(SkillBase):
310
789
 
311
790
  def get_prompt_sections(self) -> List[Dict[str, Any]]:
312
791
  """Return prompt sections to add to agent"""
313
- search_mode = "remote search server" if self.use_remote else "local document indexes"
314
- return [
315
- {
316
- "title": "Document Search",
317
- "body": f"You can search {search_mode} using the {self.tool_name} tool.",
318
- "bullets": [
319
- f"Use the {self.tool_name} tool when users ask questions about topics that might be in the indexed documents",
320
- "Search for relevant information using clear, specific queries",
321
- "Provide helpful summaries of the search results",
322
- "If no results are found, suggest the user try rephrasing their question or ask about different topics"
323
- ]
324
- }
325
- ]
792
+ # We'll handle this in register_tools after the agent is set
793
+ return []
326
794
 
327
795
  def _add_prompt_section(self, agent):
328
796
  """Add prompt section to agent (called during skill loading)"""