signalwire-agents 0.1.19__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {signalwire_agents-0.1.19/signalwire_agents.egg-info → signalwire_agents-0.1.20}/PKG-INFO +17 -2
  2. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/README.md +11 -1
  3. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/pyproject.toml +10 -1
  4. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/__init__.py +1 -1
  5. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/index_builder.py +48 -7
  6. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/query_processor.py +52 -11
  7. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/native_vector_search/skill.py +75 -38
  8. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20/signalwire_agents.egg-info}/PKG-INFO +17 -2
  9. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents.egg-info/requires.txt +6 -0
  10. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/LICENSE +0 -0
  11. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/schema.json +0 -0
  12. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/setup.cfg +0 -0
  13. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/setup.py +0 -0
  14. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/agent_server.py +0 -0
  15. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/cli/__init__.py +0 -0
  16. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/cli/build_search.py +0 -0
  17. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/cli/test_swaig.py +0 -0
  18. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/__init__.py +0 -0
  19. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/agent_base.py +0 -0
  20. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/contexts.py +0 -0
  21. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/data_map.py +0 -0
  22. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/function_result.py +0 -0
  23. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/logging_config.py +0 -0
  24. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/pom_builder.py +0 -0
  25. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/security/__init__.py +0 -0
  26. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/security/session_manager.py +0 -0
  27. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/skill_base.py +0 -0
  28. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/skill_manager.py +0 -0
  29. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/state/__init__.py +0 -0
  30. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/state/file_state_manager.py +0 -0
  31. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/state/state_manager.py +0 -0
  32. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/swaig_function.py +0 -0
  33. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/swml_builder.py +0 -0
  34. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/swml_handler.py +0 -0
  35. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/swml_renderer.py +0 -0
  36. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/core/swml_service.py +0 -0
  37. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/__init__.py +0 -0
  38. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/concierge.py +0 -0
  39. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/faq_bot.py +0 -0
  40. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/info_gatherer.py +0 -0
  41. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/receptionist.py +0 -0
  42. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/prefabs/survey.py +0 -0
  43. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/schema.json +0 -0
  44. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/__init__.py +0 -0
  45. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/document_processor.py +0 -0
  46. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/search_engine.py +0 -0
  47. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/search/search_service.py +0 -0
  48. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/__init__.py +0 -0
  49. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datasphere/__init__.py +0 -0
  50. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datasphere/skill.py +0 -0
  51. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datasphere_serverless/__init__.py +0 -0
  52. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datasphere_serverless/skill.py +0 -0
  53. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datetime/__init__.py +0 -0
  54. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/datetime/skill.py +0 -0
  55. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/joke/__init__.py +0 -0
  56. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/joke/skill.py +0 -0
  57. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/math/__init__.py +0 -0
  58. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/math/skill.py +0 -0
  59. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/native_vector_search/__init__.py +0 -0
  60. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/registry.py +0 -0
  61. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/web_search/__init__.py +0 -0
  62. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/web_search/skill.py +0 -0
  63. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/wikipedia_search/__init__.py +0 -0
  64. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/skills/wikipedia_search/skill.py +0 -0
  65. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/utils/__init__.py +0 -0
  66. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/utils/pom_utils.py +0 -0
  67. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/utils/schema_utils.py +0 -0
  68. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/utils/token_generators.py +0 -0
  69. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents/utils/validators.py +0 -0
  70. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents.egg-info/SOURCES.txt +0 -0
  71. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents.egg-info/dependency_links.txt +0 -0
  72. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents.egg-info/entry_points.txt +0 -0
  73. {signalwire_agents-0.1.19 → signalwire_agents-0.1.20}/signalwire_agents.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: signalwire_agents
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: SignalWire AI Agents SDK
5
5
  Author-email: SignalWire Team <info@signalwire.com>
6
6
  Project-URL: Homepage, https://github.com/signalwire/signalwire-ai-agents
@@ -26,6 +26,11 @@ Requires-Dist: structlog==25.3.0
26
26
  Requires-Dist: uvicorn==0.34.2
27
27
  Requires-Dist: beautifulsoup4==4.12.3
28
28
  Requires-Dist: pytz==2023.3
29
+ Provides-Extra: search-queryonly
30
+ Requires-Dist: numpy>=1.24.0; extra == "search-queryonly"
31
+ Requires-Dist: scikit-learn>=1.3.0; extra == "search-queryonly"
32
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "search-queryonly"
33
+ Requires-Dist: nltk>=3.8; extra == "search-queryonly"
29
34
  Provides-Extra: search
30
35
  Requires-Dist: sentence-transformers>=2.2.0; extra == "search"
31
36
  Requires-Dist: scikit-learn>=1.3.0; extra == "search"
@@ -488,7 +493,10 @@ The SDK includes optional local search capabilities that can be installed separa
488
493
  #### Search Installation Options
489
494
 
490
495
  ```bash
491
- # Basic search (vector search + keyword search)
496
+ # Query existing .swsearch files only (smallest footprint)
497
+ pip install signalwire-agents[search-queryonly]
498
+
499
+ # Basic search (vector search + keyword search + building indexes)
492
500
  pip install signalwire-agents[search]
493
501
 
494
502
  # Full search with document processing (PDF, DOCX, etc.)
@@ -505,11 +513,18 @@ pip install signalwire-agents[search-all]
505
513
 
506
514
  | Option | Size | Features |
507
515
  |--------|------|----------|
516
+ | `search-queryonly` | ~400MB | Query existing .swsearch files only (no building/processing) |
508
517
  | `search` | ~500MB | Vector embeddings, keyword search, basic text processing |
509
518
  | `search-full` | ~600MB | + PDF, DOCX, Excel, PowerPoint, HTML, Markdown processing |
510
519
  | `search-nlp` | ~600MB | + Advanced spaCy NLP features |
511
520
  | `search-all` | ~700MB | All search features combined |
512
521
 
522
+ **When to use `search-queryonly`:**
523
+ - Production containers with pre-built `.swsearch` files
524
+ - Lambda/serverless deployments
525
+ - Agents that only need to query knowledge bases (not build them)
526
+ - Smaller deployment footprint requirements
527
+
513
528
  #### Search Features
514
529
 
515
530
  - **Local/Offline Search**: No external API dependencies
@@ -422,7 +422,10 @@ The SDK includes optional local search capabilities that can be installed separa
422
422
  #### Search Installation Options
423
423
 
424
424
  ```bash
425
- # Basic search (vector search + keyword search)
425
+ # Query existing .swsearch files only (smallest footprint)
426
+ pip install signalwire-agents[search-queryonly]
427
+
428
+ # Basic search (vector search + keyword search + building indexes)
426
429
  pip install signalwire-agents[search]
427
430
 
428
431
  # Full search with document processing (PDF, DOCX, etc.)
@@ -439,11 +442,18 @@ pip install signalwire-agents[search-all]
439
442
 
440
443
  | Option | Size | Features |
441
444
  |--------|------|----------|
445
+ | `search-queryonly` | ~400MB | Query existing .swsearch files only (no building/processing) |
442
446
  | `search` | ~500MB | Vector embeddings, keyword search, basic text processing |
443
447
  | `search-full` | ~600MB | + PDF, DOCX, Excel, PowerPoint, HTML, Markdown processing |
444
448
  | `search-nlp` | ~600MB | + Advanced spaCy NLP features |
445
449
  | `search-all` | ~700MB | All search features combined |
446
450
 
451
+ **When to use `search-queryonly`:**
452
+ - Production containers with pre-built `.swsearch` files
453
+ - Lambda/serverless deployments
454
+ - Agents that only need to query knowledge bases (not build them)
455
+ - Smaller deployment footprint requirements
456
+
447
457
  #### Search Features
448
458
 
449
459
  - **Local/Offline Search**: No external API dependencies
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "signalwire_agents"
7
- version = "0.1.19"
7
+ version = "0.1.20"
8
8
  description = "SignalWire AI Agents SDK"
9
9
  authors = [
10
10
  {name = "SignalWire Team", email = "info@signalwire.com"}
@@ -37,6 +37,15 @@ dependencies = [
37
37
 
38
38
  # Optional dependencies for search functionality
39
39
  [project.optional-dependencies]
40
+ # Query existing .swsearch files only (no document processing/building)
41
+ search-queryonly = [
42
+ "numpy>=1.24.0",
43
+ "scikit-learn>=1.3.0",
44
+ "sentence-transformers>=2.2.0",
45
+ "nltk>=3.8",
46
+ ]
47
+
48
+ # Full search functionality (includes document processing and building)
40
49
  search = [
41
50
  "sentence-transformers>=2.2.0",
42
51
  "scikit-learn>=1.3.0",
@@ -18,7 +18,7 @@ A package for building AI agents using SignalWire's AI and SWML capabilities.
18
18
  from .core.logging_config import configure_logging
19
19
  configure_logging()
20
20
 
21
- __version__ = "0.1.19"
21
+ __version__ = "0.1.20"
22
22
 
23
23
  # Import core classes for easier access
24
24
  from .core.agent_base import AgentBase
@@ -367,16 +367,57 @@ class IndexBuilder:
367
367
  global_tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
368
368
  """Process single file into chunks"""
369
369
  try:
370
- # Try to read as text first
371
- try:
372
- content = file_path.read_text(encoding='utf-8')
373
- except UnicodeDecodeError:
370
+ relative_path = str(file_path.relative_to(source_dir))
371
+ file_extension = file_path.suffix.lower()
372
+
373
+ # Handle different file types appropriately
374
+ if file_extension == '.pdf':
375
+ # Use document processor for PDF extraction
376
+ content_result = self.doc_processor._extract_text_from_file(str(file_path))
377
+ if isinstance(content_result, str) and content_result.startswith('{"error"'):
378
+ if self.verbose:
379
+ print(f"Skipping PDF file (extraction failed): {file_path}")
380
+ return []
381
+ content = content_result
382
+ elif file_extension in ['.docx', '.xlsx', '.pptx']:
383
+ # Use document processor for Office documents
384
+ content_result = self.doc_processor._extract_text_from_file(str(file_path))
385
+ if isinstance(content_result, str) and content_result.startswith('{"error"'):
386
+ if self.verbose:
387
+ print(f"Skipping office document (extraction failed): {file_path}")
388
+ return []
389
+ content = content_result
390
+ elif file_extension == '.html':
391
+ # Use document processor for HTML
392
+ content_result = self.doc_processor._extract_text_from_file(str(file_path))
393
+ if isinstance(content_result, str) and content_result.startswith('{"error"'):
394
+ if self.verbose:
395
+ print(f"Skipping HTML file (extraction failed): {file_path}")
396
+ return []
397
+ content = content_result
398
+ elif file_extension == '.rtf':
399
+ # Use document processor for RTF
400
+ content_result = self.doc_processor._extract_text_from_file(str(file_path))
401
+ if isinstance(content_result, str) and content_result.startswith('{"error"'):
402
+ if self.verbose:
403
+ print(f"Skipping RTF file (extraction failed): {file_path}")
404
+ return []
405
+ content = content_result
406
+ else:
407
+ # Try to read as text file (markdown, txt, code, etc.)
408
+ try:
409
+ content = file_path.read_text(encoding='utf-8')
410
+ except UnicodeDecodeError:
411
+ if self.verbose:
412
+ print(f"Skipping binary file: {file_path}")
413
+ return []
414
+
415
+ # Validate content
416
+ if not content or (isinstance(content, str) and len(content.strip()) == 0):
374
417
  if self.verbose:
375
- print(f"Skipping binary file: {file_path}")
418
+ print(f"Skipping empty file: {file_path}")
376
419
  return []
377
420
 
378
- relative_path = str(file_path.relative_to(source_dir))
379
-
380
421
  # Create chunks using document processor - pass content directly, not file path
381
422
  chunks = self.doc_processor.create_chunks(
382
423
  content=content, # Pass the actual content, not the file path
@@ -118,15 +118,28 @@ stopwords_language_map = {
118
118
  # Function to ensure NLTK resources are downloaded
119
119
  def ensure_nltk_resources():
120
120
  """Download required NLTK resources if not already present"""
121
- resources = ['punkt', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']
121
+ resources = ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']
122
122
  for resource in resources:
123
123
  try:
124
- nltk.data.find(f'corpora/{resource}')
124
+ # Try different paths for different resource types
125
+ if resource in ['punkt', 'punkt_tab']:
126
+ nltk.data.find(f'tokenizers/{resource}')
127
+ elif resource in ['wordnet']:
128
+ nltk.data.find(f'corpora/{resource}')
129
+ elif resource in ['averaged_perceptron_tagger']:
130
+ nltk.data.find(f'taggers/{resource}')
131
+ elif resource in ['stopwords']:
132
+ nltk.data.find(f'corpora/{resource}')
133
+ else:
134
+ nltk.data.find(f'corpora/{resource}')
125
135
  except LookupError:
126
136
  try:
137
+ logger.info(f"Downloading NLTK resource '{resource}'...")
127
138
  nltk.download(resource, quiet=True)
139
+ logger.info(f"Successfully downloaded NLTK resource '{resource}'")
128
140
  except Exception as e:
129
141
  logger.warning(f"Failed to download NLTK resource '{resource}': {e}")
142
+ # Continue without this resource - some functionality may be degraded
130
143
 
131
144
  # Initialize NLTK resources
132
145
  ensure_nltk_resources()
@@ -246,7 +259,20 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
246
259
  query_nlp_backend = 'nltk'
247
260
 
248
261
  # Tokenization and stop word removal
249
- tokens = nltk.word_tokenize(query)
262
+ try:
263
+ tokens = nltk.word_tokenize(query)
264
+ except LookupError as e:
265
+ # If tokenization fails, try to download punkt resources
266
+ logger.warning(f"NLTK tokenization failed: {e}")
267
+ try:
268
+ nltk.download('punkt', quiet=True)
269
+ nltk.download('punkt_tab', quiet=True)
270
+ tokens = nltk.word_tokenize(query)
271
+ except Exception as fallback_error:
272
+ # If all else fails, use simple split as fallback
273
+ logger.warning(f"NLTK tokenization fallback failed: {fallback_error}. Using simple word splitting.")
274
+ tokens = query.split()
275
+
250
276
  nltk_language = stopwords_language_map.get(language, 'english')
251
277
 
252
278
  try:
@@ -279,14 +305,29 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
279
305
  logger.info(f"POS Tagging Results (spaCy): {pos_tags}")
280
306
  else:
281
307
  # Use NLTK (default or fallback)
282
- nltk_pos_tags = nltk.pos_tag(tokens)
283
- for token, pos_tag in nltk_pos_tags:
284
- lemma = lemmatizer.lemmatize(token, get_wordnet_pos(pos_tag)).lower()
285
- stemmed = stemmer.stem(lemma)
286
- lemmas.append((token.lower(), stemmed))
287
- pos_tags[token.lower()] = pos_tag
288
- if debug:
289
- logger.info(f"POS Tagging Results (NLTK): {pos_tags}")
308
+ try:
309
+ nltk_pos_tags = nltk.pos_tag(tokens)
310
+ for token, pos_tag in nltk_pos_tags:
311
+ try:
312
+ lemma = lemmatizer.lemmatize(token, get_wordnet_pos(pos_tag)).lower()
313
+ except Exception:
314
+ # Fallback if lemmatization fails
315
+ lemma = token.lower()
316
+ stemmed = stemmer.stem(lemma)
317
+ lemmas.append((token.lower(), stemmed))
318
+ pos_tags[token.lower()] = pos_tag
319
+ if debug:
320
+ logger.info(f"POS Tagging Results (NLTK): {pos_tags}")
321
+ except Exception as pos_error:
322
+ # Fallback if POS tagging fails completely
323
+ logger.warning(f"NLTK POS tagging failed: {pos_error}. Using basic token processing.")
324
+ for token in tokens:
325
+ lemma = token.lower()
326
+ stemmed = stemmer.stem(lemma)
327
+ lemmas.append((token.lower(), stemmed))
328
+ pos_tags[token.lower()] = 'NN' # Default to noun
329
+ if debug:
330
+ logger.info(f"Using fallback token processing for: {tokens}")
290
331
 
291
332
  # Expanding query with synonyms
292
333
  expanded_query_set = set()
@@ -41,18 +41,7 @@ class NativeVectorSearchSkill(SkillBase):
41
41
  def setup(self) -> bool:
42
42
  """Setup the native vector search skill"""
43
43
 
44
- # Check if search functionality is available
45
- try:
46
- from signalwire_agents.search import IndexBuilder, SearchEngine
47
- from signalwire_agents.search.query_processor import preprocess_query
48
- self.search_available = True
49
- except ImportError as e:
50
- self.search_available = False
51
- self.import_error = str(e)
52
- self.logger.warning(f"Search dependencies not available: {e}")
53
- # Don't fail setup - we'll provide helpful error messages at runtime
54
-
55
- # Get configuration
44
+ # Get configuration first
56
45
  self.tool_name = self.params.get('tool_name', 'search_knowledge')
57
46
  self.index_file = self.params.get('index_file')
58
47
  self.build_index = self.params.get('build_index', False)
@@ -74,7 +63,34 @@ class NativeVectorSearchSkill(SkillBase):
74
63
  # SWAIG fields for function fillers
75
64
  self.swaig_fields = self.params.get('swaig_fields', {})
76
65
 
77
- # NLP backend configuration
66
+ # **EARLY REMOTE CHECK - Option 1**
67
+ # If remote URL is configured, skip all heavy local imports and just validate remote connectivity
68
+ if self.remote_url:
69
+ self.use_remote = True
70
+ self.search_engine = None # No local search engine needed
71
+ self.logger.info(f"Using remote search server: {self.remote_url}")
72
+
73
+ # Test remote connection (lightweight check)
74
+ try:
75
+ import requests
76
+ response = requests.get(f"{self.remote_url}/health", timeout=5)
77
+ if response.status_code == 200:
78
+ self.logger.info("Remote search server is available")
79
+ self.search_available = True
80
+ return True # Success - skip all local setup
81
+ else:
82
+ self.logger.error(f"Remote search server returned status {response.status_code}")
83
+ self.search_available = False
84
+ return False
85
+ except Exception as e:
86
+ self.logger.error(f"Failed to connect to remote search server: {e}")
87
+ self.search_available = False
88
+ return False
89
+
90
+ # **LOCAL MODE SETUP - Only when no remote URL**
91
+ self.use_remote = False
92
+
93
+ # NLP backend configuration (only needed for local mode)
78
94
  self.nlp_backend = self.params.get('nlp_backend') # Backward compatibility
79
95
  self.index_nlp_backend = self.params.get('index_nlp_backend', 'nltk') # Default to fast NLTK for indexing
80
96
  self.query_nlp_backend = self.params.get('query_nlp_backend', 'nltk') # Default to fast NLTK for search
@@ -95,6 +111,17 @@ class NativeVectorSearchSkill(SkillBase):
95
111
  self.logger.warning(f"Invalid query_nlp_backend '{self.query_nlp_backend}', using 'nltk'")
96
112
  self.query_nlp_backend = 'nltk'
97
113
 
114
+ # Check if local search functionality is available (heavy imports only for local mode)
115
+ try:
116
+ from signalwire_agents.search import IndexBuilder, SearchEngine
117
+ from signalwire_agents.search.query_processor import preprocess_query
118
+ self.search_available = True
119
+ except ImportError as e:
120
+ self.search_available = False
121
+ self.import_error = str(e)
122
+ self.logger.warning(f"Search dependencies not available: {e}")
123
+ # Don't fail setup - we'll provide helpful error messages at runtime
124
+
98
125
  # Auto-build index if requested and search is available
99
126
  if self.build_index and self.source_dir and self.search_available:
100
127
  if not self.index_file:
@@ -124,7 +151,7 @@ class NativeVectorSearchSkill(SkillBase):
124
151
  self.logger.error(f"Failed to build search index: {e}")
125
152
  self.search_available = False
126
153
 
127
- # Initialize search engine
154
+ # Initialize local search engine
128
155
  self.search_engine = None
129
156
  if self.search_available and self.index_file and os.path.exists(self.index_file):
130
157
  try:
@@ -134,24 +161,6 @@ class NativeVectorSearchSkill(SkillBase):
134
161
  self.logger.error(f"Failed to load search index {self.index_file}: {e}")
135
162
  self.search_available = False
136
163
 
137
- # Check if we should use remote search mode
138
- self.use_remote = bool(self.remote_url)
139
- if self.use_remote:
140
- self.logger.info(f"Using remote search server: {self.remote_url}")
141
- # Test remote connection
142
- try:
143
- import requests
144
- response = requests.get(f"{self.remote_url}/health", timeout=5)
145
- if response.status_code == 200:
146
- self.logger.info("Remote search server is available")
147
- self.search_available = True
148
- else:
149
- self.logger.error(f"Remote search server returned status {response.status_code}")
150
- self.search_available = False
151
- except Exception as e:
152
- self.logger.error(f"Failed to connect to remote search server: {e}")
153
- self.search_available = False
154
-
155
164
  return True
156
165
 
157
166
  def register_tools(self) -> None:
@@ -184,6 +193,11 @@ class NativeVectorSearchSkill(SkillBase):
184
193
  def _search_handler(self, args, raw_data):
185
194
  """Handle search requests"""
186
195
 
196
+ # Debug logging to see what arguments are being passed
197
+ self.logger.info(f"Search handler called with args: {args}")
198
+ self.logger.info(f"Args type: {type(args)}")
199
+ self.logger.info(f"Raw data: {raw_data}")
200
+
187
201
  if not self.search_available:
188
202
  return SwaigFunctionResult(
189
203
  f"Search functionality is not available. {getattr(self, 'import_error', '')}\n"
@@ -196,21 +210,27 @@ class NativeVectorSearchSkill(SkillBase):
196
210
  f"{'Index file not found: ' + (self.index_file or 'not specified') if self.index_file else 'No index file configured'}"
197
211
  )
198
212
 
213
+ # Get arguments - the framework handles parsing correctly
199
214
  query = args.get('query', '').strip()
215
+ self.logger.error(f"DEBUG: Extracted query: '{query}' (length: {len(query)})")
216
+ self.logger.info(f"Query bool value: {bool(query)}")
217
+
200
218
  if not query:
219
+ self.logger.error(f"Query validation failed - returning error message")
201
220
  return SwaigFunctionResult("Please provide a search query.")
202
221
 
222
+ self.logger.info(f"Query validation passed - proceeding with search")
203
223
  count = args.get('count', self.count)
204
224
 
205
225
  try:
206
- # Preprocess the query
207
- from signalwire_agents.search.query_processor import preprocess_query
208
- enhanced = preprocess_query(query, language='en', vector=True, query_nlp_backend=self.query_nlp_backend)
209
-
210
226
  # Perform search (local or remote)
211
227
  if self.use_remote:
212
- results = self._search_remote(query, enhanced, count)
228
+ # For remote searches, let the server handle query preprocessing
229
+ results = self._search_remote(query, None, count)
213
230
  else:
231
+ # For local searches, preprocess the query locally
232
+ from signalwire_agents.search.query_processor import preprocess_query
233
+ enhanced = preprocess_query(query, language='en', vector=True, query_nlp_backend=self.query_nlp_backend)
214
234
  results = self.search_engine.search(
215
235
  query_vector=enhanced.get('vector', []),
216
236
  enhanced_text=enhanced['enhanced_text'],
@@ -256,7 +276,24 @@ class NativeVectorSearchSkill(SkillBase):
256
276
  return SwaigFunctionResult("\n".join(response_parts))
257
277
 
258
278
  except Exception as e:
259
- return SwaigFunctionResult(f"Search error: {str(e)}")
279
+ # Log the full error details for debugging
280
+ self.logger.error(f"Search error for query '{query}': {str(e)}", exc_info=True)
281
+
282
+ # Return user-friendly error message
283
+ user_msg = "I'm sorry, I encountered an issue while searching. "
284
+
285
+ # Check for specific error types and provide helpful guidance
286
+ error_str = str(e).lower()
287
+ if 'punkt' in error_str or 'nltk' in error_str:
288
+ user_msg += "It looks like some language processing resources are missing. Please try again in a moment."
289
+ elif 'vector' in error_str or 'embedding' in error_str:
290
+ user_msg += "There was an issue with the search indexing. Please try rephrasing your question."
291
+ elif 'timeout' in error_str or 'connection' in error_str:
292
+ user_msg += "The search service is temporarily unavailable. Please try again later."
293
+ else:
294
+ user_msg += "Please try rephrasing your question or contact support if the issue persists."
295
+
296
+ return SwaigFunctionResult(user_msg)
260
297
 
261
298
  def _search_remote(self, query: str, enhanced: dict, count: int) -> list:
262
299
  """Perform search using remote search server"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: signalwire_agents
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: SignalWire AI Agents SDK
5
5
  Author-email: SignalWire Team <info@signalwire.com>
6
6
  Project-URL: Homepage, https://github.com/signalwire/signalwire-ai-agents
@@ -26,6 +26,11 @@ Requires-Dist: structlog==25.3.0
26
26
  Requires-Dist: uvicorn==0.34.2
27
27
  Requires-Dist: beautifulsoup4==4.12.3
28
28
  Requires-Dist: pytz==2023.3
29
+ Provides-Extra: search-queryonly
30
+ Requires-Dist: numpy>=1.24.0; extra == "search-queryonly"
31
+ Requires-Dist: scikit-learn>=1.3.0; extra == "search-queryonly"
32
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "search-queryonly"
33
+ Requires-Dist: nltk>=3.8; extra == "search-queryonly"
29
34
  Provides-Extra: search
30
35
  Requires-Dist: sentence-transformers>=2.2.0; extra == "search"
31
36
  Requires-Dist: scikit-learn>=1.3.0; extra == "search"
@@ -488,7 +493,10 @@ The SDK includes optional local search capabilities that can be installed separa
488
493
  #### Search Installation Options
489
494
 
490
495
  ```bash
491
- # Basic search (vector search + keyword search)
496
+ # Query existing .swsearch files only (smallest footprint)
497
+ pip install signalwire-agents[search-queryonly]
498
+
499
+ # Basic search (vector search + keyword search + building indexes)
492
500
  pip install signalwire-agents[search]
493
501
 
494
502
  # Full search with document processing (PDF, DOCX, etc.)
@@ -505,11 +513,18 @@ pip install signalwire-agents[search-all]
505
513
 
506
514
  | Option | Size | Features |
507
515
  |--------|------|----------|
516
+ | `search-queryonly` | ~400MB | Query existing .swsearch files only (no building/processing) |
508
517
  | `search` | ~500MB | Vector embeddings, keyword search, basic text processing |
509
518
  | `search-full` | ~600MB | + PDF, DOCX, Excel, PowerPoint, HTML, Markdown processing |
510
519
  | `search-nlp` | ~600MB | + Advanced spaCy NLP features |
511
520
  | `search-all` | ~700MB | All search features combined |
512
521
 
522
+ **When to use `search-queryonly`:**
523
+ - Production containers with pre-built `.swsearch` files
524
+ - Lambda/serverless deployments
525
+ - Agents that only need to query knowledge bases (not build them)
526
+ - Smaller deployment footprint requirements
527
+
513
528
  #### Search Features
514
529
 
515
530
  - **Local/Offline Search**: No external API dependencies
@@ -48,3 +48,9 @@ scikit-learn>=1.3.0
48
48
  nltk>=3.8
49
49
  numpy>=1.24.0
50
50
  spacy>=3.6.0
51
+
52
+ [search-queryonly]
53
+ numpy>=1.24.0
54
+ scikit-learn>=1.3.0
55
+ sentence-transformers>=2.2.0
56
+ nltk>=3.8