signalwire-agents 0.1.36__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. signalwire_agents/__init__.py +1 -1
  2. signalwire_agents/cli/build_search.py +95 -19
  3. signalwire_agents/core/agent_base.py +38 -0
  4. signalwire_agents/core/mixins/ai_config_mixin.py +120 -0
  5. signalwire_agents/core/skill_manager.py +47 -0
  6. signalwire_agents/search/index_builder.py +105 -10
  7. signalwire_agents/search/pgvector_backend.py +523 -0
  8. signalwire_agents/search/search_engine.py +41 -4
  9. signalwire_agents/search/search_service.py +86 -35
  10. signalwire_agents/skills/api_ninjas_trivia/skill.py +37 -1
  11. signalwire_agents/skills/datasphere/skill.py +82 -0
  12. signalwire_agents/skills/datasphere_serverless/skill.py +82 -0
  13. signalwire_agents/skills/joke/skill.py +21 -0
  14. signalwire_agents/skills/mcp_gateway/skill.py +82 -0
  15. signalwire_agents/skills/native_vector_search/README.md +210 -0
  16. signalwire_agents/skills/native_vector_search/skill.py +197 -7
  17. signalwire_agents/skills/play_background_file/skill.py +36 -0
  18. signalwire_agents/skills/registry.py +36 -0
  19. signalwire_agents/skills/spider/skill.py +113 -0
  20. signalwire_agents/skills/swml_transfer/skill.py +90 -0
  21. signalwire_agents/skills/weather_api/skill.py +28 -0
  22. signalwire_agents/skills/wikipedia_search/skill.py +22 -0
  23. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/METADATA +66 -1
  24. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/RECORD +28 -26
  25. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/WHEEL +0 -0
  26. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/entry_points.txt +0 -0
  27. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/licenses/LICENSE +0 -0
  28. {signalwire_agents-0.1.36.dist-info → signalwire_agents-0.1.38.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,523 @@
1
+ """
2
+ Copyright (c) 2025 SignalWire
3
+
4
+ This file is part of the SignalWire AI Agents SDK.
5
+
6
+ Licensed under the MIT License.
7
+ See LICENSE file in the project root for full license information.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ from typing import List, Dict, Any, Optional
13
+ from datetime import datetime
14
+
15
+ try:
16
+ import psycopg2
17
+ from psycopg2.extras import execute_values
18
+ from pgvector.psycopg2 import register_vector
19
+ PGVECTOR_AVAILABLE = True
20
+ except ImportError:
21
+ PGVECTOR_AVAILABLE = False
22
+ psycopg2 = None
23
+ register_vector = None
24
+
25
+ try:
26
+ import numpy as np
27
+ except ImportError:
28
+ np = None
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class PgVectorBackend:
34
+ """PostgreSQL pgvector backend for search indexing and retrieval"""
35
+
36
+ def __init__(self, connection_string: str):
37
+ """
38
+ Initialize pgvector backend
39
+
40
+ Args:
41
+ connection_string: PostgreSQL connection string
42
+ """
43
+ if not PGVECTOR_AVAILABLE:
44
+ raise ImportError(
45
+ "pgvector dependencies not available. Install with: "
46
+ "pip install psycopg2-binary pgvector"
47
+ )
48
+
49
+ self.connection_string = connection_string
50
+ self.conn = None
51
+ self._connect()
52
+
53
+ def _connect(self):
54
+ """Establish database connection"""
55
+ try:
56
+ self.conn = psycopg2.connect(self.connection_string)
57
+ register_vector(self.conn)
58
+ logger.info("Connected to PostgreSQL database")
59
+ except Exception as e:
60
+ error_msg = str(e)
61
+ if "vector type not found" in error_msg:
62
+ logger.error(
63
+ "pgvector extension not installed in database. "
64
+ "Run: CREATE EXTENSION IF NOT EXISTS vector;"
65
+ )
66
+ else:
67
+ logger.error(f"Failed to connect to database: {e}")
68
+ raise
69
+
70
+ def _ensure_connection(self):
71
+ """Ensure database connection is active"""
72
+ if self.conn is None or self.conn.closed:
73
+ self._connect()
74
+
75
+ def create_schema(self, collection_name: str, embedding_dim: int = 768):
76
+ """
77
+ Create database schema for a collection
78
+
79
+ Args:
80
+ collection_name: Name of the collection
81
+ embedding_dim: Dimension of embeddings
82
+ """
83
+ self._ensure_connection()
84
+
85
+ with self.conn.cursor() as cursor:
86
+ # Create extensions
87
+ cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
88
+ cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
89
+
90
+ # Create table
91
+ table_name = f"chunks_{collection_name}"
92
+ cursor.execute(f"""
93
+ CREATE TABLE IF NOT EXISTS {table_name} (
94
+ id SERIAL PRIMARY KEY,
95
+ content TEXT NOT NULL,
96
+ processed_content TEXT,
97
+ embedding vector({embedding_dim}),
98
+ filename TEXT,
99
+ section TEXT,
100
+ tags JSONB DEFAULT '[]'::jsonb,
101
+ metadata JSONB DEFAULT '{{}}'::jsonb,
102
+ created_at TIMESTAMP DEFAULT NOW()
103
+ )
104
+ """)
105
+
106
+ # Create indexes
107
+ cursor.execute(f"""
108
+ CREATE INDEX IF NOT EXISTS idx_{table_name}_embedding
109
+ ON {table_name} USING ivfflat (embedding vector_cosine_ops)
110
+ WITH (lists = 100)
111
+ """)
112
+
113
+ cursor.execute(f"""
114
+ CREATE INDEX IF NOT EXISTS idx_{table_name}_content
115
+ ON {table_name} USING gin (content gin_trgm_ops)
116
+ """)
117
+
118
+ cursor.execute(f"""
119
+ CREATE INDEX IF NOT EXISTS idx_{table_name}_tags
120
+ ON {table_name} USING gin (tags)
121
+ """)
122
+
123
+ # Create config table
124
+ cursor.execute("""
125
+ CREATE TABLE IF NOT EXISTS collection_config (
126
+ collection_name TEXT PRIMARY KEY,
127
+ model_name TEXT,
128
+ embedding_dimensions INTEGER,
129
+ chunking_strategy TEXT,
130
+ languages JSONB,
131
+ created_at TIMESTAMP DEFAULT NOW(),
132
+ metadata JSONB DEFAULT '{}'::jsonb
133
+ )
134
+ """)
135
+
136
+ self.conn.commit()
137
+ logger.info(f"Created schema for collection '{collection_name}'")
138
+
139
+ def store_chunks(self, chunks: List[Dict[str, Any]], collection_name: str,
140
+ config: Dict[str, Any]):
141
+ """
142
+ Store document chunks in the database
143
+
144
+ Args:
145
+ chunks: List of processed chunks with embeddings
146
+ collection_name: Name of the collection
147
+ config: Configuration metadata
148
+ """
149
+ self._ensure_connection()
150
+
151
+ table_name = f"chunks_{collection_name}"
152
+
153
+ # Prepare data for batch insert
154
+ data = []
155
+ for chunk in chunks:
156
+ embedding = chunk.get('embedding')
157
+ if embedding is not None:
158
+ # Convert to list if it's a numpy array
159
+ if hasattr(embedding, 'tolist'):
160
+ embedding = embedding.tolist()
161
+
162
+ metadata = chunk.get('metadata', {})
163
+
164
+ # Extract fields - they might be at top level or in metadata
165
+ filename = chunk.get('filename') or metadata.get('filename', '')
166
+ section = chunk.get('section') or metadata.get('section', '')
167
+ tags = chunk.get('tags', []) or metadata.get('tags', [])
168
+
169
+ # Build metadata from all fields except the ones we store separately
170
+ chunk_metadata = {}
171
+ for key, value in chunk.items():
172
+ if key not in ['content', 'processed_content', 'embedding', 'filename', 'section', 'tags']:
173
+ chunk_metadata[key] = value
174
+ # Also include any extra metadata
175
+ for key, value in metadata.items():
176
+ if key not in ['filename', 'section', 'tags']:
177
+ chunk_metadata[key] = value
178
+
179
+ data.append((
180
+ chunk['content'],
181
+ chunk.get('processed_content', chunk['content']),
182
+ embedding,
183
+ filename,
184
+ section,
185
+ json.dumps(tags),
186
+ json.dumps(chunk_metadata)
187
+ ))
188
+
189
+ # Batch insert chunks
190
+ with self.conn.cursor() as cursor:
191
+ execute_values(
192
+ cursor,
193
+ f"""
194
+ INSERT INTO {table_name}
195
+ (content, processed_content, embedding, filename, section, tags, metadata)
196
+ VALUES %s
197
+ """,
198
+ data,
199
+ template="(%s, %s, %s, %s, %s, %s::jsonb, %s::jsonb)"
200
+ )
201
+
202
+ # Update or insert config
203
+ cursor.execute("""
204
+ INSERT INTO collection_config
205
+ (collection_name, model_name, embedding_dimensions, chunking_strategy,
206
+ languages, metadata)
207
+ VALUES (%s, %s, %s, %s, %s, %s)
208
+ ON CONFLICT (collection_name)
209
+ DO UPDATE SET
210
+ model_name = EXCLUDED.model_name,
211
+ embedding_dimensions = EXCLUDED.embedding_dimensions,
212
+ chunking_strategy = EXCLUDED.chunking_strategy,
213
+ languages = EXCLUDED.languages,
214
+ metadata = EXCLUDED.metadata
215
+ """, (
216
+ collection_name,
217
+ config.get('model_name'),
218
+ config.get('embedding_dimensions'),
219
+ config.get('chunking_strategy'),
220
+ json.dumps(config.get('languages', [])),
221
+ json.dumps(config.get('metadata', {}))
222
+ ))
223
+
224
+ self.conn.commit()
225
+ logger.info(f"Stored {len(chunks)} chunks in collection '{collection_name}'")
226
+
227
+ def get_stats(self, collection_name: str) -> Dict[str, Any]:
228
+ """Get statistics for a collection"""
229
+ self._ensure_connection()
230
+
231
+ table_name = f"chunks_{collection_name}"
232
+
233
+ with self.conn.cursor() as cursor:
234
+ # Get chunk count
235
+ cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
236
+ total_chunks = cursor.fetchone()[0]
237
+
238
+ # Get unique files
239
+ cursor.execute(f"SELECT COUNT(DISTINCT filename) FROM {table_name}")
240
+ total_files = cursor.fetchone()[0]
241
+
242
+ # Get config
243
+ cursor.execute(
244
+ "SELECT * FROM collection_config WHERE collection_name = %s",
245
+ (collection_name,)
246
+ )
247
+ config_row = cursor.fetchone()
248
+
249
+ if config_row:
250
+ config = {
251
+ 'model_name': config_row[1],
252
+ 'embedding_dimensions': config_row[2],
253
+ 'chunking_strategy': config_row[3],
254
+ 'languages': config_row[4],
255
+ 'created_at': config_row[5].isoformat() if config_row[5] else None,
256
+ 'metadata': config_row[6]
257
+ }
258
+ else:
259
+ config = {}
260
+
261
+ return {
262
+ 'total_chunks': total_chunks,
263
+ 'total_files': total_files,
264
+ 'config': config
265
+ }
266
+
267
+ def list_collections(self) -> List[str]:
268
+ """List all collections in the database"""
269
+ self._ensure_connection()
270
+
271
+ with self.conn.cursor() as cursor:
272
+ cursor.execute("SELECT collection_name FROM collection_config ORDER BY collection_name")
273
+ return [row[0] for row in cursor.fetchall()]
274
+
275
+ def delete_collection(self, collection_name: str):
276
+ """Delete a collection and its data"""
277
+ self._ensure_connection()
278
+
279
+ table_name = f"chunks_{collection_name}"
280
+
281
+ with self.conn.cursor() as cursor:
282
+ cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
283
+ cursor.execute(
284
+ "DELETE FROM collection_config WHERE collection_name = %s",
285
+ (collection_name,)
286
+ )
287
+ self.conn.commit()
288
+ logger.info(f"Deleted collection '{collection_name}'")
289
+
290
+ def close(self):
291
+ """Close database connection"""
292
+ if self.conn and not self.conn.closed:
293
+ self.conn.close()
294
+ logger.info("Closed database connection")
295
+
296
+
297
+ class PgVectorSearchBackend:
298
+ """PostgreSQL pgvector backend for search operations"""
299
+
300
+ def __init__(self, connection_string: str, collection_name: str):
301
+ """
302
+ Initialize search backend
303
+
304
+ Args:
305
+ connection_string: PostgreSQL connection string
306
+ collection_name: Name of the collection to search
307
+ """
308
+ if not PGVECTOR_AVAILABLE:
309
+ raise ImportError(
310
+ "pgvector dependencies not available. Install with: "
311
+ "pip install psycopg2-binary pgvector"
312
+ )
313
+
314
+ self.connection_string = connection_string
315
+ self.collection_name = collection_name
316
+ self.table_name = f"chunks_{collection_name}"
317
+ self.conn = None
318
+ self._connect()
319
+ self.config = self._load_config()
320
+
321
+ def _connect(self):
322
+ """Establish database connection"""
323
+ try:
324
+ self.conn = psycopg2.connect(self.connection_string)
325
+ register_vector(self.conn)
326
+ except Exception as e:
327
+ logger.error(f"Failed to connect to database: {e}")
328
+ raise
329
+
330
+ def _ensure_connection(self):
331
+ """Ensure database connection is active"""
332
+ if self.conn is None or self.conn.closed:
333
+ self._connect()
334
+
335
+ def _load_config(self) -> Dict[str, Any]:
336
+ """Load collection configuration"""
337
+ self._ensure_connection()
338
+
339
+ with self.conn.cursor() as cursor:
340
+ cursor.execute(
341
+ "SELECT * FROM collection_config WHERE collection_name = %s",
342
+ (self.collection_name,)
343
+ )
344
+ row = cursor.fetchone()
345
+
346
+ if row:
347
+ return {
348
+ 'model_name': row[1],
349
+ 'embedding_dimensions': row[2],
350
+ 'chunking_strategy': row[3],
351
+ 'languages': row[4],
352
+ 'metadata': row[6]
353
+ }
354
+ return {}
355
+
356
+ def search(self, query_vector: List[float], enhanced_text: str,
357
+ count: int = 5, distance_threshold: float = 0.0,
358
+ tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
359
+ """
360
+ Perform hybrid search (vector + keyword)
361
+
362
+ Args:
363
+ query_vector: Embedding vector for the query
364
+ enhanced_text: Processed query text for keyword search
365
+ count: Number of results to return
366
+ distance_threshold: Minimum similarity score
367
+ tags: Filter by tags
368
+
369
+ Returns:
370
+ List of search results with scores and metadata
371
+ """
372
+ self._ensure_connection()
373
+
374
+ # Vector search
375
+ vector_results = self._vector_search(query_vector, count * 2, tags)
376
+
377
+ # Keyword search
378
+ keyword_results = self._keyword_search(enhanced_text, count * 2, tags)
379
+
380
+ # Merge and rank results
381
+ merged_results = self._merge_results(vector_results, keyword_results)
382
+
383
+ # Filter by distance threshold
384
+ filtered_results = [
385
+ r for r in merged_results
386
+ if r['score'] >= distance_threshold
387
+ ]
388
+
389
+ return filtered_results[:count]
390
+
391
+ def _vector_search(self, query_vector: List[float], count: int,
392
+ tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
393
+ """Perform vector similarity search"""
394
+ with self.conn.cursor() as cursor:
395
+ # Build query
396
+ query = f"""
397
+ SELECT id, content, filename, section, tags, metadata,
398
+ 1 - (embedding <=> %s::vector) as similarity
399
+ FROM {self.table_name}
400
+ WHERE embedding IS NOT NULL
401
+ """
402
+
403
+ params = [query_vector]
404
+
405
+ # Add tag filter if specified
406
+ if tags:
407
+ query += " AND tags ?| %s"
408
+ params.append(tags)
409
+
410
+ query += " ORDER BY embedding <=> %s::vector LIMIT %s"
411
+ params.extend([query_vector, count])
412
+
413
+ cursor.execute(query, params)
414
+
415
+ results = []
416
+ for row in cursor.fetchall():
417
+ chunk_id, content, filename, section, tags_json, metadata_json, similarity = row
418
+
419
+ results.append({
420
+ 'id': chunk_id,
421
+ 'content': content,
422
+ 'score': float(similarity),
423
+ 'metadata': {
424
+ 'filename': filename,
425
+ 'section': section,
426
+ 'tags': tags_json if isinstance(tags_json, list) else [],
427
+ **metadata_json
428
+ },
429
+ 'search_type': 'vector'
430
+ })
431
+
432
+ return results
433
+
434
+ def _keyword_search(self, enhanced_text: str, count: int,
435
+ tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
436
+ """Perform full-text search"""
437
+ with self.conn.cursor() as cursor:
438
+ # Use PostgreSQL text search
439
+ query = f"""
440
+ SELECT id, content, filename, section, tags, metadata,
441
+ ts_rank(to_tsvector('english', content),
442
+ plainto_tsquery('english', %s)) as rank
443
+ FROM {self.table_name}
444
+ WHERE to_tsvector('english', content) @@ plainto_tsquery('english', %s)
445
+ """
446
+
447
+ params = [enhanced_text, enhanced_text]
448
+
449
+ # Add tag filter if specified
450
+ if tags:
451
+ query += " AND tags ?| %s"
452
+ params.append(tags)
453
+
454
+ query += " ORDER BY rank DESC LIMIT %s"
455
+ params.append(count)
456
+
457
+ cursor.execute(query, params)
458
+
459
+ results = []
460
+ for row in cursor.fetchall():
461
+ chunk_id, content, filename, section, tags_json, metadata_json, rank = row
462
+
463
+ # Normalize rank to 0-1 score
464
+ score = min(1.0, rank / 10.0)
465
+
466
+ results.append({
467
+ 'id': chunk_id,
468
+ 'content': content,
469
+ 'score': float(score),
470
+ 'metadata': {
471
+ 'filename': filename,
472
+ 'section': section,
473
+ 'tags': tags_json if isinstance(tags_json, list) else [],
474
+ **metadata_json
475
+ },
476
+ 'search_type': 'keyword'
477
+ })
478
+
479
+ return results
480
+
481
+ def _merge_results(self, vector_results: List[Dict[str, Any]],
482
+ keyword_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
483
+ """Merge and rank results from vector and keyword search"""
484
+ # Create a map to track unique results
485
+ results_map = {}
486
+
487
+ # Add vector results (weighted higher)
488
+ for result in vector_results:
489
+ chunk_id = result['id']
490
+ if chunk_id not in results_map:
491
+ results_map[chunk_id] = result
492
+ results_map[chunk_id]['score'] *= 0.7 # Weight vector results
493
+ else:
494
+ # Combine scores if result appears in both
495
+ results_map[chunk_id]['score'] += result['score'] * 0.7
496
+
497
+ # Add keyword results
498
+ for result in keyword_results:
499
+ chunk_id = result['id']
500
+ if chunk_id not in results_map:
501
+ results_map[chunk_id] = result
502
+ results_map[chunk_id]['score'] *= 0.3 # Weight keyword results
503
+ else:
504
+ # Combine scores if result appears in both
505
+ results_map[chunk_id]['score'] += result['score'] * 0.3
506
+
507
+ # Sort by combined score
508
+ merged = list(results_map.values())
509
+ merged.sort(key=lambda x: x['score'], reverse=True)
510
+
511
+ return merged
512
+
513
+ def get_stats(self) -> Dict[str, Any]:
514
+ """Get statistics for the collection"""
515
+ backend = PgVectorBackend(self.connection_string)
516
+ stats = backend.get_stats(self.collection_name)
517
+ backend.close()
518
+ return stats
519
+
520
+ def close(self):
521
+ """Close database connection"""
522
+ if self.conn and not self.conn.closed:
523
+ self.conn.close()
@@ -26,11 +26,38 @@ logger = logging.getLogger(__name__)
26
26
  class SearchEngine:
27
27
  """Hybrid search engine for vector and keyword search"""
28
28
 
29
- def __init__(self, index_path: str, model=None):
30
- self.index_path = index_path
29
+ def __init__(self, backend: str = 'sqlite', index_path: Optional[str] = None,
30
+ connection_string: Optional[str] = None, collection_name: Optional[str] = None,
31
+ model=None):
32
+ """
33
+ Initialize search engine
34
+
35
+ Args:
36
+ backend: Storage backend ('sqlite' or 'pgvector')
37
+ index_path: Path to .swsearch file (for sqlite backend)
38
+ connection_string: PostgreSQL connection string (for pgvector backend)
39
+ collection_name: Collection name (for pgvector backend)
40
+ model: Optional sentence transformer model
41
+ """
42
+ self.backend = backend
31
43
  self.model = model
32
- self.config = self._load_config()
33
- self.embedding_dim = int(self.config.get('embedding_dimensions', 768))
44
+
45
+ if backend == 'sqlite':
46
+ if not index_path:
47
+ raise ValueError("index_path is required for sqlite backend")
48
+ self.index_path = index_path
49
+ self.config = self._load_config()
50
+ self.embedding_dim = int(self.config.get('embedding_dimensions', 768))
51
+ self._backend = None # SQLite uses direct connection
52
+ elif backend == 'pgvector':
53
+ if not connection_string or not collection_name:
54
+ raise ValueError("connection_string and collection_name are required for pgvector backend")
55
+ from .pgvector_backend import PgVectorSearchBackend
56
+ self._backend = PgVectorSearchBackend(connection_string, collection_name)
57
+ self.config = self._backend.config
58
+ self.embedding_dim = int(self.config.get('embedding_dimensions', 768))
59
+ else:
60
+ raise ValueError(f"Invalid backend '{backend}'. Must be 'sqlite' or 'pgvector'")
34
61
 
35
62
  def _load_config(self) -> Dict[str, str]:
36
63
  """Load index configuration"""
@@ -62,6 +89,11 @@ class SearchEngine:
62
89
  List of search results with scores and metadata
63
90
  """
64
91
 
92
+ # Use pgvector backend if available
93
+ if self.backend == 'pgvector':
94
+ return self._backend.search(query_vector, enhanced_text, count, distance_threshold, tags)
95
+
96
+ # Original SQLite implementation
65
97
  if not np or not cosine_similarity:
66
98
  logger.warning("NumPy or scikit-learn not available. Using keyword search only.")
67
99
  return self._keyword_search_only(enhanced_text, count, tags)
@@ -333,6 +365,11 @@ class SearchEngine:
333
365
 
334
366
  def get_stats(self) -> Dict[str, Any]:
335
367
  """Get statistics about the search index"""
368
+ # Use pgvector backend if available
369
+ if self.backend == 'pgvector':
370
+ return self._backend.get_stats()
371
+
372
+ # Original SQLite implementation
336
373
  conn = sqlite3.connect(self.index_path)
337
374
  cursor = conn.cursor()
338
375