signalwire-agents 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalwire_agents/__init__.py +43 -4
- signalwire_agents/agent_server.py +268 -15
- signalwire_agents/cli/__init__.py +9 -0
- signalwire_agents/cli/build_search.py +457 -0
- signalwire_agents/cli/test_swaig.py +2609 -0
- signalwire_agents/core/agent_base.py +691 -82
- signalwire_agents/core/contexts.py +289 -0
- signalwire_agents/core/data_map.py +499 -0
- signalwire_agents/core/function_result.py +57 -10
- signalwire_agents/core/logging_config.py +232 -0
- signalwire_agents/core/skill_base.py +27 -37
- signalwire_agents/core/skill_manager.py +89 -23
- signalwire_agents/core/swaig_function.py +13 -1
- signalwire_agents/core/swml_handler.py +37 -13
- signalwire_agents/core/swml_service.py +37 -28
- signalwire_agents/search/__init__.py +131 -0
- signalwire_agents/search/document_processor.py +764 -0
- signalwire_agents/search/index_builder.py +534 -0
- signalwire_agents/search/query_processor.py +371 -0
- signalwire_agents/search/search_engine.py +383 -0
- signalwire_agents/search/search_service.py +251 -0
- signalwire_agents/skills/datasphere/__init__.py +12 -0
- signalwire_agents/skills/datasphere/skill.py +229 -0
- signalwire_agents/skills/datasphere_serverless/__init__.py +1 -0
- signalwire_agents/skills/datasphere_serverless/skill.py +156 -0
- signalwire_agents/skills/datetime/skill.py +9 -5
- signalwire_agents/skills/joke/__init__.py +1 -0
- signalwire_agents/skills/joke/skill.py +88 -0
- signalwire_agents/skills/math/skill.py +9 -6
- signalwire_agents/skills/native_vector_search/__init__.py +1 -0
- signalwire_agents/skills/native_vector_search/skill.py +352 -0
- signalwire_agents/skills/registry.py +10 -4
- signalwire_agents/skills/web_search/skill.py +57 -21
- signalwire_agents/skills/wikipedia/__init__.py +9 -0
- signalwire_agents/skills/wikipedia/skill.py +180 -0
- signalwire_agents/utils/__init__.py +14 -0
- signalwire_agents/utils/schema_utils.py +111 -44
- signalwire_agents-0.1.12.dist-info/METADATA +863 -0
- signalwire_agents-0.1.12.dist-info/RECORD +67 -0
- {signalwire_agents-0.1.10.dist-info → signalwire_agents-0.1.12.dist-info}/WHEEL +1 -1
- signalwire_agents-0.1.12.dist-info/entry_points.txt +3 -0
- signalwire_agents-0.1.10.dist-info/METADATA +0 -319
- signalwire_agents-0.1.10.dist-info/RECORD +0 -44
- {signalwire_agents-0.1.10.data → signalwire_agents-0.1.12.data}/data/schema.json +0 -0
- {signalwire_agents-0.1.10.dist-info → signalwire_agents-0.1.12.dist-info}/licenses/LICENSE +0 -0
- {signalwire_agents-0.1.10.dist-info → signalwire_agents-0.1.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,457 @@
|
|
1
|
+
"""
|
2
|
+
Copyright (c) 2025 SignalWire
|
3
|
+
|
4
|
+
This file is part of the SignalWire AI Agents SDK.
|
5
|
+
|
6
|
+
Licensed under the MIT License.
|
7
|
+
See LICENSE file in the project root for full license information.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import argparse
|
11
|
+
import sys
|
12
|
+
from pathlib import Path
|
13
|
+
from ..search.index_builder import IndexBuilder
|
14
|
+
|
15
|
+
def main():
|
16
|
+
"""Main entry point for the build-search command"""
|
17
|
+
parser = argparse.ArgumentParser(
|
18
|
+
description='Build local search index from documents',
|
19
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
20
|
+
epilog="""
|
21
|
+
Examples:
|
22
|
+
# Basic usage with directory (defaults to sentence chunking with 50 sentences per chunk)
|
23
|
+
sw-search ./docs
|
24
|
+
|
25
|
+
# Multiple directories
|
26
|
+
sw-search ./docs ./examples --file-types md,txt,py
|
27
|
+
|
28
|
+
# Individual files
|
29
|
+
sw-search README.md ./docs/guide.md ./src/main.py
|
30
|
+
|
31
|
+
# Mixed sources (directories and files)
|
32
|
+
sw-search ./docs README.md ./examples specific_file.txt --file-types md,txt,py
|
33
|
+
|
34
|
+
# Sentence-based chunking with custom parameters
|
35
|
+
sw-search ./docs \\
|
36
|
+
--chunking-strategy sentence \\
|
37
|
+
--max-sentences-per-chunk 30 \\
|
38
|
+
--split-newlines 2
|
39
|
+
|
40
|
+
# Sliding window chunking
|
41
|
+
sw-search ./docs \\
|
42
|
+
--chunking-strategy sliding \\
|
43
|
+
--chunk-size 100 \\
|
44
|
+
--overlap-size 20
|
45
|
+
|
46
|
+
# Paragraph-based chunking
|
47
|
+
sw-search ./docs \\
|
48
|
+
--chunking-strategy paragraph \\
|
49
|
+
--file-types md,txt,rst
|
50
|
+
|
51
|
+
# Page-based chunking (good for PDFs)
|
52
|
+
sw-search ./docs \\
|
53
|
+
--chunking-strategy page \\
|
54
|
+
--file-types pdf
|
55
|
+
|
56
|
+
# Full configuration example
|
57
|
+
sw-search ./docs ./examples README.md \\
|
58
|
+
--output ./knowledge.swsearch \\
|
59
|
+
--chunking-strategy sentence \\
|
60
|
+
--max-sentences-per-chunk 50 \\
|
61
|
+
--file-types md,txt,rst,py \\
|
62
|
+
--exclude "**/test/**,**/__pycache__/**" \\
|
63
|
+
--languages en,es,fr \\
|
64
|
+
--model sentence-transformers/all-mpnet-base-v2 \\
|
65
|
+
--tags documentation,api \\
|
66
|
+
--verbose
|
67
|
+
|
68
|
+
# Validate an existing index
|
69
|
+
sw-search validate ./docs.swsearch
|
70
|
+
|
71
|
+
# Search within an index
|
72
|
+
sw-search search ./docs.swsearch "how to create an agent"
|
73
|
+
sw-search search ./docs.swsearch "API reference" --count 3 --verbose
|
74
|
+
sw-search search ./docs.swsearch "configuration" --tags documentation --json
|
75
|
+
"""
|
76
|
+
)
|
77
|
+
|
78
|
+
parser.add_argument(
|
79
|
+
'sources',
|
80
|
+
nargs='+',
|
81
|
+
help='Source files and/or directories to index'
|
82
|
+
)
|
83
|
+
|
84
|
+
parser.add_argument(
|
85
|
+
'--output',
|
86
|
+
help='Output .swsearch file (default: sources.swsearch)'
|
87
|
+
)
|
88
|
+
|
89
|
+
parser.add_argument(
|
90
|
+
'--chunking-strategy',
|
91
|
+
choices=['sentence', 'sliding', 'paragraph', 'page'],
|
92
|
+
default='sentence',
|
93
|
+
help='Chunking strategy to use (default: sentence)'
|
94
|
+
)
|
95
|
+
|
96
|
+
parser.add_argument(
|
97
|
+
'--max-sentences-per-chunk',
|
98
|
+
type=int,
|
99
|
+
default=50,
|
100
|
+
help='Maximum sentences per chunk for sentence strategy (default: 50)'
|
101
|
+
)
|
102
|
+
|
103
|
+
parser.add_argument(
|
104
|
+
'--chunk-size',
|
105
|
+
type=int,
|
106
|
+
default=50,
|
107
|
+
help='Chunk size in words for sliding window strategy (default: 50)'
|
108
|
+
)
|
109
|
+
|
110
|
+
parser.add_argument(
|
111
|
+
'--overlap-size',
|
112
|
+
type=int,
|
113
|
+
default=10,
|
114
|
+
help='Overlap size in words for sliding window strategy (default: 10)'
|
115
|
+
)
|
116
|
+
|
117
|
+
parser.add_argument(
|
118
|
+
'--split-newlines',
|
119
|
+
type=int,
|
120
|
+
help='Split on multiple newlines for sentence strategy (optional)'
|
121
|
+
)
|
122
|
+
|
123
|
+
parser.add_argument(
|
124
|
+
'--file-types',
|
125
|
+
default='md,txt,rst',
|
126
|
+
help='Comma-separated file extensions to include for directories (default: md,txt,rst)'
|
127
|
+
)
|
128
|
+
|
129
|
+
parser.add_argument(
|
130
|
+
'--exclude',
|
131
|
+
help='Comma-separated glob patterns to exclude (e.g., "**/test/**,**/__pycache__/**")'
|
132
|
+
)
|
133
|
+
|
134
|
+
parser.add_argument(
|
135
|
+
'--languages',
|
136
|
+
default='en',
|
137
|
+
help='Comma-separated language codes (default: en)'
|
138
|
+
)
|
139
|
+
|
140
|
+
parser.add_argument(
|
141
|
+
'--model',
|
142
|
+
default='sentence-transformers/all-mpnet-base-v2',
|
143
|
+
help='Sentence transformer model name (default: sentence-transformers/all-mpnet-base-v2)'
|
144
|
+
)
|
145
|
+
|
146
|
+
parser.add_argument(
|
147
|
+
'--tags',
|
148
|
+
help='Comma-separated tags to add to all chunks'
|
149
|
+
)
|
150
|
+
|
151
|
+
parser.add_argument(
|
152
|
+
'--verbose',
|
153
|
+
action='store_true',
|
154
|
+
help='Enable verbose output'
|
155
|
+
)
|
156
|
+
|
157
|
+
parser.add_argument(
|
158
|
+
'--validate',
|
159
|
+
action='store_true',
|
160
|
+
help='Validate the created index after building'
|
161
|
+
)
|
162
|
+
|
163
|
+
args = parser.parse_args()
|
164
|
+
|
165
|
+
# Validate sources
|
166
|
+
valid_sources = []
|
167
|
+
for source in args.sources:
|
168
|
+
source_path = Path(source)
|
169
|
+
if not source_path.exists():
|
170
|
+
print(f"Warning: Source does not exist, skipping: {source}")
|
171
|
+
continue
|
172
|
+
valid_sources.append(source_path)
|
173
|
+
|
174
|
+
if not valid_sources:
|
175
|
+
print("Error: No valid sources found")
|
176
|
+
sys.exit(1)
|
177
|
+
|
178
|
+
# Default output filename
|
179
|
+
if not args.output:
|
180
|
+
if len(valid_sources) == 1:
|
181
|
+
# Single source - use its name
|
182
|
+
source_name = valid_sources[0].stem if valid_sources[0].is_file() else valid_sources[0].name
|
183
|
+
args.output = f"{source_name}.swsearch"
|
184
|
+
else:
|
185
|
+
# Multiple sources - use generic name
|
186
|
+
args.output = "sources.swsearch"
|
187
|
+
|
188
|
+
# Ensure output has .swsearch extension
|
189
|
+
if not args.output.endswith('.swsearch'):
|
190
|
+
args.output += '.swsearch'
|
191
|
+
|
192
|
+
# Parse lists
|
193
|
+
file_types = [ft.strip() for ft in args.file_types.split(',')]
|
194
|
+
exclude_patterns = [p.strip() for p in args.exclude.split(',')] if args.exclude else None
|
195
|
+
languages = [lang.strip() for lang in args.languages.split(',')]
|
196
|
+
tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
|
197
|
+
|
198
|
+
if args.verbose:
|
199
|
+
print(f"Building search index:")
|
200
|
+
print(f" Sources: {[str(s) for s in valid_sources]}")
|
201
|
+
print(f" Output: {args.output}")
|
202
|
+
print(f" File types (for directories): {file_types}")
|
203
|
+
print(f" Exclude patterns: {exclude_patterns}")
|
204
|
+
print(f" Languages: {languages}")
|
205
|
+
print(f" Model: {args.model}")
|
206
|
+
print(f" Chunking strategy: {args.chunking_strategy}")
|
207
|
+
|
208
|
+
if args.chunking_strategy == 'sentence':
|
209
|
+
print(f" Max sentences per chunk: {args.max_sentences_per_chunk}")
|
210
|
+
if args.split_newlines:
|
211
|
+
print(f" Split on newlines: {args.split_newlines}")
|
212
|
+
elif args.chunking_strategy == 'sliding':
|
213
|
+
print(f" Chunk size (words): {args.chunk_size}")
|
214
|
+
print(f" Overlap size (words): {args.overlap_size}")
|
215
|
+
elif args.chunking_strategy == 'paragraph':
|
216
|
+
print(f" Chunking by paragraphs (double newlines)")
|
217
|
+
elif args.chunking_strategy == 'page':
|
218
|
+
print(f" Chunking by pages")
|
219
|
+
|
220
|
+
print(f" Tags: {tags}")
|
221
|
+
print()
|
222
|
+
|
223
|
+
try:
|
224
|
+
# Create index builder
|
225
|
+
builder = IndexBuilder(
|
226
|
+
model_name=args.model,
|
227
|
+
chunking_strategy=args.chunking_strategy,
|
228
|
+
max_sentences_per_chunk=args.max_sentences_per_chunk,
|
229
|
+
chunk_size=args.chunk_size,
|
230
|
+
chunk_overlap=args.overlap_size,
|
231
|
+
split_newlines=args.split_newlines,
|
232
|
+
verbose=args.verbose
|
233
|
+
)
|
234
|
+
|
235
|
+
# Build index with multiple sources
|
236
|
+
builder.build_index_from_sources(
|
237
|
+
sources=valid_sources,
|
238
|
+
output_file=args.output,
|
239
|
+
file_types=file_types,
|
240
|
+
exclude_patterns=exclude_patterns,
|
241
|
+
languages=languages,
|
242
|
+
tags=tags
|
243
|
+
)
|
244
|
+
|
245
|
+
# Validate if requested
|
246
|
+
if args.validate:
|
247
|
+
if args.verbose:
|
248
|
+
print("\nValidating index...")
|
249
|
+
|
250
|
+
validation = builder.validate_index(args.output)
|
251
|
+
if validation['valid']:
|
252
|
+
print(f"✓ Index validation successful:")
|
253
|
+
print(f" Chunks: {validation['chunk_count']}")
|
254
|
+
print(f" Files: {validation['file_count']}")
|
255
|
+
if args.verbose:
|
256
|
+
print(f" Config: {validation['config']}")
|
257
|
+
else:
|
258
|
+
print(f"✗ Index validation failed: {validation['error']}")
|
259
|
+
sys.exit(1)
|
260
|
+
|
261
|
+
print(f"\n✓ Search index created successfully: {args.output}")
|
262
|
+
|
263
|
+
except KeyboardInterrupt:
|
264
|
+
print("\n\nBuild interrupted by user")
|
265
|
+
sys.exit(1)
|
266
|
+
except Exception as e:
|
267
|
+
print(f"\nError building index: {e}")
|
268
|
+
if args.verbose:
|
269
|
+
import traceback
|
270
|
+
traceback.print_exc()
|
271
|
+
sys.exit(1)
|
272
|
+
|
273
|
+
def validate_command():
|
274
|
+
"""Validate an existing search index"""
|
275
|
+
parser = argparse.ArgumentParser(description='Validate a search index file')
|
276
|
+
parser.add_argument('index_file', help='Path to .swsearch file to validate')
|
277
|
+
parser.add_argument('--verbose', action='store_true', help='Show detailed information')
|
278
|
+
|
279
|
+
args = parser.parse_args()
|
280
|
+
|
281
|
+
if not Path(args.index_file).exists():
|
282
|
+
print(f"Error: Index file does not exist: {args.index_file}")
|
283
|
+
sys.exit(1)
|
284
|
+
|
285
|
+
try:
|
286
|
+
from ..search.index_builder import IndexBuilder
|
287
|
+
builder = IndexBuilder()
|
288
|
+
|
289
|
+
validation = builder.validate_index(args.index_file)
|
290
|
+
|
291
|
+
if validation['valid']:
|
292
|
+
print(f"✓ Index is valid: {args.index_file}")
|
293
|
+
print(f" Chunks: {validation['chunk_count']}")
|
294
|
+
print(f" Files: {validation['file_count']}")
|
295
|
+
|
296
|
+
if args.verbose and 'config' in validation:
|
297
|
+
print("\nConfiguration:")
|
298
|
+
for key, value in validation['config'].items():
|
299
|
+
print(f" {key}: {value}")
|
300
|
+
else:
|
301
|
+
print(f"✗ Index validation failed: {validation['error']}")
|
302
|
+
sys.exit(1)
|
303
|
+
|
304
|
+
except Exception as e:
|
305
|
+
print(f"Error validating index: {e}")
|
306
|
+
if args.verbose:
|
307
|
+
import traceback
|
308
|
+
traceback.print_exc()
|
309
|
+
sys.exit(1)
|
310
|
+
|
311
|
+
def search_command():
|
312
|
+
"""Search within an existing search index"""
|
313
|
+
parser = argparse.ArgumentParser(description='Search within a .swsearch index file')
|
314
|
+
parser.add_argument('index_file', help='Path to .swsearch file to search')
|
315
|
+
parser.add_argument('query', help='Search query')
|
316
|
+
parser.add_argument('--count', type=int, default=5, help='Number of results to return (default: 5)')
|
317
|
+
parser.add_argument('--distance-threshold', type=float, default=0.0, help='Minimum similarity score (default: 0.0)')
|
318
|
+
parser.add_argument('--tags', help='Comma-separated tags to filter by')
|
319
|
+
parser.add_argument('--nlp-backend', choices=['nltk', 'spacy'], default='nltk',
|
320
|
+
help='NLP backend to use: nltk (fast, default) or spacy (better quality, requires model download)')
|
321
|
+
parser.add_argument('--verbose', action='store_true', help='Show detailed information')
|
322
|
+
parser.add_argument('--json', action='store_true', help='Output results as JSON')
|
323
|
+
parser.add_argument('--no-content', action='store_true', help='Hide content in results (show only metadata)')
|
324
|
+
|
325
|
+
args = parser.parse_args()
|
326
|
+
|
327
|
+
if not Path(args.index_file).exists():
|
328
|
+
print(f"Error: Index file does not exist: {args.index_file}")
|
329
|
+
sys.exit(1)
|
330
|
+
|
331
|
+
try:
|
332
|
+
# Import search dependencies
|
333
|
+
try:
|
334
|
+
from ..search.search_engine import SearchEngine
|
335
|
+
from ..search.query_processor import preprocess_query
|
336
|
+
except ImportError as e:
|
337
|
+
print(f"Error: Search functionality not available. Install with: pip install signalwire-agents[search]")
|
338
|
+
print(f"Details: {e}")
|
339
|
+
sys.exit(1)
|
340
|
+
|
341
|
+
# Load search engine
|
342
|
+
if args.verbose:
|
343
|
+
print(f"Loading search index: {args.index_file}")
|
344
|
+
|
345
|
+
engine = SearchEngine(args.index_file)
|
346
|
+
|
347
|
+
# Get index stats
|
348
|
+
stats = engine.get_stats()
|
349
|
+
if args.verbose:
|
350
|
+
print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
|
351
|
+
print(f"Searching for: '{args.query}'")
|
352
|
+
print(f"NLP Backend: {args.nlp_backend}")
|
353
|
+
print()
|
354
|
+
|
355
|
+
# Preprocess query
|
356
|
+
enhanced = preprocess_query(args.query, vector=True, nlp_backend=args.nlp_backend)
|
357
|
+
|
358
|
+
# Parse tags if provided
|
359
|
+
tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
|
360
|
+
|
361
|
+
# Perform search
|
362
|
+
results = engine.search(
|
363
|
+
query_vector=enhanced.get('vector'),
|
364
|
+
enhanced_text=enhanced.get('enhanced_text', args.query),
|
365
|
+
count=args.count,
|
366
|
+
distance_threshold=args.distance_threshold,
|
367
|
+
tags=tags
|
368
|
+
)
|
369
|
+
|
370
|
+
if args.json:
|
371
|
+
# Output as JSON
|
372
|
+
import json
|
373
|
+
output = {
|
374
|
+
'query': args.query,
|
375
|
+
'enhanced_query': enhanced.get('enhanced_text', args.query),
|
376
|
+
'count': len(results),
|
377
|
+
'results': []
|
378
|
+
}
|
379
|
+
|
380
|
+
for i, result in enumerate(results):
|
381
|
+
result_data = {
|
382
|
+
'rank': i + 1,
|
383
|
+
'score': result['score'],
|
384
|
+
'metadata': result['metadata']
|
385
|
+
}
|
386
|
+
if not args.no_content:
|
387
|
+
result_data['content'] = result['content']
|
388
|
+
output['results'].append(result_data)
|
389
|
+
|
390
|
+
print(json.dumps(output, indent=2))
|
391
|
+
else:
|
392
|
+
# Human-readable output
|
393
|
+
if not results:
|
394
|
+
print(f"No results found for '{args.query}'")
|
395
|
+
if tags:
|
396
|
+
print(f"(searched with tags: {tags})")
|
397
|
+
sys.exit(0)
|
398
|
+
|
399
|
+
print(f"Found {len(results)} result(s) for '{args.query}':")
|
400
|
+
if enhanced.get('enhanced_text') != args.query:
|
401
|
+
print(f"Enhanced query: '{enhanced.get('enhanced_text')}'")
|
402
|
+
if tags:
|
403
|
+
print(f"Filtered by tags: {tags}")
|
404
|
+
print("=" * 80)
|
405
|
+
|
406
|
+
for i, result in enumerate(results):
|
407
|
+
print(f"\n[{i+1}] Score: {result['score']:.4f}")
|
408
|
+
|
409
|
+
# Show metadata
|
410
|
+
metadata = result['metadata']
|
411
|
+
print(f"File: {metadata.get('filename', 'Unknown')}")
|
412
|
+
if metadata.get('section'):
|
413
|
+
print(f"Section: {metadata['section']}")
|
414
|
+
if metadata.get('line_start'):
|
415
|
+
print(f"Lines: {metadata['line_start']}-{metadata.get('line_end', metadata['line_start'])}")
|
416
|
+
if metadata.get('tags'):
|
417
|
+
print(f"Tags: {', '.join(metadata['tags'])}")
|
418
|
+
|
419
|
+
# Show content unless suppressed
|
420
|
+
if not args.no_content:
|
421
|
+
content = result['content']
|
422
|
+
if len(content) > 500 and not args.verbose:
|
423
|
+
content = content[:500] + "..."
|
424
|
+
print(f"\nContent:\n{content}")
|
425
|
+
|
426
|
+
if i < len(results) - 1:
|
427
|
+
print("-" * 80)
|
428
|
+
|
429
|
+
except Exception as e:
|
430
|
+
print(f"Error searching index: {e}")
|
431
|
+
if args.verbose:
|
432
|
+
import traceback
|
433
|
+
traceback.print_exc()
|
434
|
+
sys.exit(1)
|
435
|
+
|
436
|
+
def console_entry_point():
|
437
|
+
"""Console script entry point for pip installation"""
|
438
|
+
import sys
|
439
|
+
|
440
|
+
# Check for subcommands
|
441
|
+
if len(sys.argv) > 1:
|
442
|
+
if sys.argv[1] == 'validate':
|
443
|
+
# Remove 'validate' from argv and call validate_command
|
444
|
+
sys.argv.pop(1)
|
445
|
+
validate_command()
|
446
|
+
return
|
447
|
+
elif sys.argv[1] == 'search':
|
448
|
+
# Remove 'search' from argv and call search_command
|
449
|
+
sys.argv.pop(1)
|
450
|
+
search_command()
|
451
|
+
return
|
452
|
+
|
453
|
+
# Regular build command
|
454
|
+
main()
|
455
|
+
|
456
|
+
if __name__ == '__main__':
|
457
|
+
main()
|