signalwire-agents 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ A package for building AI agents using SignalWire's AI and SWML capabilities.
18
18
  from .core.logging_config import configure_logging
19
19
  configure_logging()
20
20
 
21
- __version__ = "0.1.47"
21
+ __version__ = "0.1.49"
22
22
 
23
23
  # Import core classes for easier access
24
24
  from .core.agent_base import AgentBase
@@ -10,6 +10,9 @@ See LICENSE file in the project root for full license information.
10
10
  import argparse
11
11
  import sys
12
12
  from pathlib import Path
13
+ from datetime import datetime
14
+
15
+ from signalwire_agents.search.models import MODEL_ALIASES, DEFAULT_MODEL, resolve_model_alias
13
16
 
14
17
  def main():
15
18
  """Main entry point for the build-search command"""
@@ -66,11 +69,35 @@ Examples:
66
69
  sw-search ./docs \\
67
70
  --chunking-strategy qa
68
71
 
72
+ # Model selection examples (performance vs quality tradeoff)
73
+ sw-search ./docs --model mini # Fastest (~5x faster), 384 dims, good for most use cases
74
+ sw-search ./docs --model base # Balanced speed/quality, 768 dims (previous default)
75
+ sw-search ./docs --model large # Best quality (same as base currently)
76
+ # Or use full model names:
77
+ sw-search ./docs --model sentence-transformers/all-MiniLM-L6-v2
78
+ sw-search ./docs --model sentence-transformers/all-mpnet-base-v2
69
79
 
70
80
  # JSON-based chunking (pre-chunked content)
71
81
  sw-search ./api_chunks.json \
72
82
  --chunking-strategy json \
73
83
  --file-types json
84
+
85
+ # Export chunks to JSON for review (single file)
86
+ sw-search ./docs \\
87
+ --output-format json \\
88
+ --output all_chunks.json
89
+
90
+ # Export chunks to JSON (one file per source)
91
+ sw-search ./docs \\
92
+ --output-format json \\
93
+ --output-dir ./chunks/
94
+
95
+ # Build index from exported JSON chunks
96
+ sw-search ./chunks/ \\
97
+ --chunking-strategy json \\
98
+ --file-types json \\
99
+ --output final.swsearch
100
+
74
101
  # Full configuration example
75
102
  sw-search ./docs ./examples README.md \\
76
103
  --output ./knowledge.swsearch \\
@@ -95,6 +122,12 @@ Examples:
95
122
  sw-search remote http://localhost:8001 "how to create an agent" --index-name docs
96
123
  sw-search remote localhost:8001 "API reference" --index-name docs --count 3 --verbose
97
124
 
125
+ # Migrate between backends
126
+ sw-search migrate ./docs.swsearch --to-pgvector \\
127
+ --connection-string "postgresql://user:pass@localhost/db" \\
128
+ --collection-name docs_collection
129
+ sw-search migrate --info ./docs.swsearch
130
+
98
131
  # PostgreSQL pgvector backend
99
132
  sw-search ./docs \\
100
133
  --backend pgvector \\
@@ -126,6 +159,18 @@ Examples:
126
159
  help='Output .swsearch file (default: sources.swsearch) or collection name for pgvector'
127
160
  )
128
161
 
162
+ parser.add_argument(
163
+ '--output-dir',
164
+ help='Output directory for results (creates one file per source file when used with --output-format json, or auto-names index files)'
165
+ )
166
+
167
+ parser.add_argument(
168
+ '--output-format',
169
+ choices=['index', 'json'],
170
+ default='index',
171
+ help='Output format: index (create search index) or json (export chunks as JSON) (default: index)'
172
+ )
173
+
129
174
  parser.add_argument(
130
175
  '--backend',
131
176
  choices=['sqlite', 'pgvector'],
@@ -197,8 +242,8 @@ Examples:
197
242
 
198
243
  parser.add_argument(
199
244
  '--model',
200
- default='sentence-transformers/all-mpnet-base-v2',
201
- help='Sentence transformer model name (default: sentence-transformers/all-mpnet-base-v2)'
245
+ default=DEFAULT_MODEL,
246
+ help=f'Sentence transformer model name or alias (mini/base/large). Default: mini ({DEFAULT_MODEL})'
202
247
  )
203
248
 
204
249
  parser.add_argument(
@@ -241,6 +286,9 @@ Examples:
241
286
 
242
287
  args = parser.parse_args()
243
288
 
289
+ # Resolve model aliases
290
+ args.model = resolve_model_alias(args.model)
291
+
244
292
  # Validate sources
245
293
  valid_sources = []
246
294
  for source in args.sources:
@@ -259,8 +307,35 @@ Examples:
259
307
  print("Error: --connection-string is required for pgvector backend")
260
308
  sys.exit(1)
261
309
 
262
- # Default output filename
263
- if not args.output:
310
+ # Validate output options
311
+ if args.output and args.output_dir:
312
+ print("Error: Cannot specify both --output and --output-dir")
313
+ sys.exit(1)
314
+
315
+ # Handle JSON output format differently
316
+ if args.output_format == 'json':
317
+ # JSON export doesn't use backend
318
+ if args.backend != 'sqlite':
319
+ print("Warning: --backend is ignored when using --output-format json")
320
+
321
+ # Determine output location
322
+ if args.output_dir:
323
+ # Multiple files mode
324
+ output_path = Path(args.output_dir)
325
+ if not output_path.exists():
326
+ output_path.mkdir(parents=True, exist_ok=True)
327
+ elif args.output:
328
+ # Single file mode
329
+ output_path = Path(args.output)
330
+ if not output_path.suffix:
331
+ output_path = output_path.with_suffix('.json')
332
+ else:
333
+ # Default to single file
334
+ output_path = Path('chunks.json')
335
+ args.output = str(output_path)
336
+
337
+ # Default output filename (for index format)
338
+ if args.output_format == 'index' and not args.output and not args.output_dir:
264
339
  if args.backend == 'sqlite':
265
340
  if len(valid_sources) == 1:
266
341
  # Single source - use its name
@@ -277,8 +352,25 @@ Examples:
277
352
  else:
278
353
  args.output = "documents"
279
354
 
280
- # Ensure output has .swsearch extension for sqlite
281
- if args.backend == 'sqlite' and not args.output.endswith('.swsearch'):
355
+ # Handle --output-dir for index format
356
+ if args.output_format == 'index' and args.output_dir:
357
+ # Auto-generate output filename in the directory
358
+ if len(valid_sources) == 1:
359
+ source_name = valid_sources[0].stem if valid_sources[0].is_file() else valid_sources[0].name
360
+ else:
361
+ source_name = "combined"
362
+
363
+ output_dir = Path(args.output_dir)
364
+ output_dir.mkdir(parents=True, exist_ok=True)
365
+
366
+ if args.backend == 'sqlite':
367
+ args.output = str(output_dir / f"{source_name}.swsearch")
368
+ else:
369
+ # For pgvector, still use the name as collection
370
+ args.output = source_name
371
+
372
+ # Ensure output has .swsearch extension for sqlite (but not for JSON format)
373
+ if args.output_format == 'index' and args.backend == 'sqlite' and args.output and not args.output.endswith('.swsearch'):
282
374
  args.output += '.swsearch'
283
375
 
284
376
  # Parse lists
@@ -325,6 +417,103 @@ Examples:
325
417
  print()
326
418
 
327
419
  try:
420
+ # Handle JSON export mode
421
+ if args.output_format == 'json':
422
+ # Import what we need for chunking
423
+ from signalwire_agents.search.index_builder import IndexBuilder
424
+ import json
425
+
426
+ builder = IndexBuilder(
427
+ chunking_strategy=args.chunking_strategy,
428
+ max_sentences_per_chunk=args.max_sentences_per_chunk,
429
+ chunk_size=args.chunk_size,
430
+ chunk_overlap=args.overlap_size,
431
+ split_newlines=args.split_newlines,
432
+ index_nlp_backend=args.index_nlp_backend,
433
+ verbose=args.verbose,
434
+ semantic_threshold=args.semantic_threshold,
435
+ topic_threshold=args.topic_threshold
436
+ )
437
+
438
+ # Process files and export chunks
439
+ all_chunks = []
440
+ chunk_files_created = []
441
+
442
+ # Discover files from sources
443
+ files = builder._discover_files_from_sources(valid_sources, file_types, exclude_patterns)
444
+
445
+ if args.verbose:
446
+ print(f"Processing {len(files)} files...")
447
+
448
+ for file_path in files:
449
+ try:
450
+ # Determine base directory for relative paths
451
+ base_dir = builder._get_base_directory_for_file(file_path, valid_sources)
452
+
453
+ # Process file into chunks
454
+ chunks = builder._process_file(file_path, base_dir, tags)
455
+
456
+ if args.output_dir:
457
+ # Create individual JSON file
458
+ relative_path = file_path.relative_to(base_dir) if base_dir else file_path.name
459
+ json_filename = relative_path.with_suffix('.json')
460
+ json_path = Path(args.output_dir) / json_filename
461
+
462
+ # Create subdirectories if needed
463
+ json_path.parent.mkdir(parents=True, exist_ok=True)
464
+
465
+ # Save chunks to JSON
466
+ chunk_data = {
467
+ "chunks": chunks,
468
+ "metadata": {
469
+ "source_file": str(relative_path),
470
+ "total_chunks": len(chunks),
471
+ "chunking_strategy": args.chunking_strategy,
472
+ "processing_date": datetime.now().isoformat()
473
+ }
474
+ }
475
+
476
+ with open(json_path, 'w', encoding='utf-8') as f:
477
+ json.dump(chunk_data, f, indent=2, ensure_ascii=False)
478
+
479
+ chunk_files_created.append(json_path)
480
+ if args.verbose:
481
+ print(f" Created: {json_path} ({len(chunks)} chunks)")
482
+ else:
483
+ # Accumulate all chunks for single file output
484
+ all_chunks.extend(chunks)
485
+
486
+ except Exception as e:
487
+ print(f"Error processing {file_path}: {e}")
488
+ if args.verbose:
489
+ import traceback
490
+ traceback.print_exc()
491
+
492
+ # Handle single file output
493
+ if not args.output_dir:
494
+ output_data = {
495
+ "chunks": all_chunks,
496
+ "metadata": {
497
+ "total_chunks": len(all_chunks),
498
+ "total_files": len(files),
499
+ "chunking_strategy": args.chunking_strategy,
500
+ "processing_date": datetime.now().isoformat()
501
+ }
502
+ }
503
+
504
+ with open(args.output, 'w', encoding='utf-8') as f:
505
+ json.dump(output_data, f, indent=2, ensure_ascii=False)
506
+
507
+ print(f"✓ Exported {len(all_chunks)} chunks to {args.output}")
508
+ else:
509
+ print(f"✓ Created {len(chunk_files_created)} JSON files in {args.output_dir}")
510
+ total_chunks = sum(len(json.load(open(f))['chunks']) for f in chunk_files_created)
511
+ print(f" Total chunks: {total_chunks}")
512
+
513
+ # Exit early for JSON format
514
+ return
515
+
516
+ # Regular index building mode
328
517
  # Create index builder - import only when actually needed
329
518
  from signalwire_agents.search.index_builder import IndexBuilder
330
519
  builder = IndexBuilder(
@@ -370,7 +559,13 @@ Examples:
370
559
  sys.exit(1)
371
560
 
372
561
  if args.backend == 'sqlite':
373
- print(f"\n✓ Search index created successfully: {args.output}")
562
+ # Check if the index was actually created
563
+ import os
564
+ if os.path.exists(args.output):
565
+ print(f"\n✓ Search index created successfully: {args.output}")
566
+ else:
567
+ print(f"\n✗ Search index creation failed - no files were processed")
568
+ sys.exit(1)
374
569
  else:
375
570
  print(f"\n✓ Search collection created successfully: {args.output}")
376
571
  print(f" Connection: {args.connection_string}")
@@ -427,21 +622,41 @@ def search_command():
427
622
  """Search within an existing search index"""
428
623
  parser = argparse.ArgumentParser(description='Search within a .swsearch index file or pgvector collection')
429
624
  parser.add_argument('index_source', help='Path to .swsearch file or collection name for pgvector')
430
- parser.add_argument('query', help='Search query')
625
+ parser.add_argument('query', nargs='?', help='Search query (optional if using --shell)')
431
626
  parser.add_argument('--backend', choices=['sqlite', 'pgvector'], default='sqlite',
432
627
  help='Storage backend (default: sqlite)')
433
628
  parser.add_argument('--connection-string', help='PostgreSQL connection string for pgvector backend')
629
+ parser.add_argument('--shell', action='store_true',
630
+ help='Interactive shell mode - load once and search multiple times')
434
631
  parser.add_argument('--count', type=int, default=5, help='Number of results to return (default: 5)')
435
632
  parser.add_argument('--distance-threshold', type=float, default=0.0, help='Minimum similarity score (default: 0.0)')
436
633
  parser.add_argument('--tags', help='Comma-separated tags to filter by')
437
634
  parser.add_argument('--query-nlp-backend', choices=['nltk', 'spacy'], default='nltk',
438
635
  help='NLP backend for query processing: nltk (fast, default) or spacy (better quality, slower)')
636
+ parser.add_argument('--keyword-weight', type=float, default=None,
637
+ help='Manual keyword weight (0.0-1.0). Overrides automatic weight detection.')
439
638
  parser.add_argument('--verbose', action='store_true', help='Show detailed information')
440
639
  parser.add_argument('--json', action='store_true', help='Output results as JSON')
441
640
  parser.add_argument('--no-content', action='store_true', help='Hide content in results (show only metadata)')
641
+ parser.add_argument('--model', help='Override embedding model for query (mini/base/large or full model name)')
442
642
 
443
643
  args = parser.parse_args()
444
644
 
645
+ # Validate arguments
646
+ if not args.shell and not args.query:
647
+ print("Error: Query is required unless using --shell mode")
648
+ sys.exit(1)
649
+
650
+ # Resolve model aliases
651
+ if args.model and args.model in MODEL_ALIASES:
652
+ args.model = MODEL_ALIASES[args.model]
653
+
654
+ # Validate keyword weight if provided
655
+ if args.keyword_weight is not None:
656
+ if args.keyword_weight < 0.0 or args.keyword_weight > 1.0:
657
+ print("Error: --keyword-weight must be between 0.0 and 1.0")
658
+ sys.exit(1)
659
+
445
660
  # Validate backend configuration
446
661
  if args.backend == 'pgvector' and not args.connection_string:
447
662
  print("Error: --connection-string is required for pgvector backend")
@@ -469,21 +684,167 @@ def search_command():
469
684
  print(f"Connecting to pgvector collection: {args.index_source}")
470
685
 
471
686
  if args.backend == 'sqlite':
472
- engine = SearchEngine(backend='sqlite', index_path=args.index_source)
687
+ # Pass the model from the index or override if specified
688
+ model = args.model if args.model else None
689
+ engine = SearchEngine(backend='sqlite', index_path=args.index_source, model=model)
473
690
  else:
691
+ # Pass the model override if specified
692
+ model = args.model if args.model else None
474
693
  engine = SearchEngine(backend='pgvector', connection_string=args.connection_string,
475
- collection_name=args.index_source)
694
+ collection_name=args.index_source, model=model)
476
695
 
477
696
  # Get index stats
478
697
  stats = engine.get_stats()
698
+
699
+ # Get the model from index config if not overridden
700
+ model_to_use = args.model
701
+ if not model_to_use and 'config' in stats:
702
+ # SQLite uses 'embedding_model', pgvector uses 'model_name'
703
+ model_to_use = stats['config'].get('embedding_model') or stats['config'].get('model_name')
704
+
705
+ # Shell mode implementation
706
+ if args.shell:
707
+ import time
708
+ print(f"Search Shell - Index: {args.index_source}")
709
+ print(f"Backend: {args.backend}")
710
+ print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
711
+ if model_to_use:
712
+ print(f"Model: {model_to_use}")
713
+ print("Type 'exit' or 'quit' to leave, 'help' for options")
714
+ print("-" * 60)
715
+
716
+ while True:
717
+ try:
718
+ query = input("\nsearch> ").strip()
719
+
720
+ if not query:
721
+ continue
722
+
723
+ if query.lower() in ['exit', 'quit', 'q']:
724
+ print("Goodbye!")
725
+ break
726
+
727
+ if query.lower() == 'help':
728
+ print("\nShell commands:")
729
+ print(" help - Show this help")
730
+ print(" exit/quit/q - Exit shell")
731
+ print(" count=N - Set result count (current: {})".format(args.count))
732
+ print(" tags=tag1,tag2 - Set tag filter (current: {})".format(args.tags or 'none'))
733
+ print(" verbose - Toggle verbose output")
734
+ print("\nOr type any search query...")
735
+ continue
736
+
737
+ # Handle shell commands
738
+ if query.startswith('count='):
739
+ try:
740
+ args.count = int(query.split('=')[1])
741
+ print(f"Result count set to: {args.count}")
742
+ except:
743
+ print("Invalid count value")
744
+ continue
745
+
746
+ if query.startswith('tags='):
747
+ tag_str = query.split('=', 1)[1]
748
+ args.tags = tag_str if tag_str else None
749
+ tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
750
+ print(f"Tags filter set to: {tags or 'none'}")
751
+ continue
752
+
753
+ if query == 'verbose':
754
+ args.verbose = not args.verbose
755
+ print(f"Verbose output: {'on' if args.verbose else 'off'}")
756
+ continue
757
+
758
+ # Perform search with timing
759
+ start_time = time.time()
760
+
761
+ # Preprocess query
762
+ enhanced = preprocess_query(
763
+ query,
764
+ vector=True,
765
+ query_nlp_backend=args.query_nlp_backend,
766
+ model_name=model_to_use,
767
+ preserve_original=True,
768
+ max_synonyms=2
769
+ )
770
+
771
+ # Parse tags
772
+ tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
773
+
774
+ # Perform search
775
+ results = engine.search(
776
+ query_vector=enhanced.get('vector'),
777
+ enhanced_text=enhanced.get('enhanced_text', query),
778
+ count=args.count,
779
+ distance_threshold=args.distance_threshold,
780
+ tags=tags,
781
+ keyword_weight=args.keyword_weight,
782
+ original_query=query
783
+ )
784
+
785
+ search_time = time.time() - start_time
786
+
787
+ # Display results
788
+ if not results:
789
+ print(f"\nNo results found for '{query}' ({search_time:.3f}s)")
790
+ else:
791
+ print(f"\nFound {len(results)} result(s) for '{query}' ({search_time:.3f}s):")
792
+ if enhanced.get('enhanced_text') != query and args.verbose:
793
+ print(f"Enhanced query: '{enhanced.get('enhanced_text')}'")
794
+ print("=" * 60)
795
+
796
+ for i, result in enumerate(results):
797
+ print(f"\n[{i+1}] Score: {result['score']:.4f}")
798
+
799
+ # Show metadata
800
+ metadata = result['metadata']
801
+ print(f"File: {metadata.get('filename', 'Unknown')}")
802
+ if metadata.get('section'):
803
+ print(f"Section: {metadata['section']}")
804
+
805
+ # Show content unless suppressed
806
+ if not args.no_content:
807
+ content = result['content']
808
+ if len(content) > 300 and not args.verbose:
809
+ content = content[:300] + "..."
810
+ print(f"\n{content}")
811
+
812
+ if i < len(results) - 1:
813
+ print("-" * 40)
814
+
815
+ except KeyboardInterrupt:
816
+ print("\nUse 'exit' to quit")
817
+ except EOFError:
818
+ print("\nGoodbye!")
819
+ break
820
+ except Exception as e:
821
+ print(f"\nError: {e}")
822
+ if args.verbose:
823
+ import traceback
824
+ traceback.print_exc()
825
+
826
+ return # Exit after shell mode
827
+
828
+ # Normal single query mode
479
829
  if args.verbose:
480
830
  print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
481
831
  print(f"Searching for: '{args.query}'")
482
832
  print(f"Query NLP Backend: {args.query_nlp_backend}")
833
+ if args.model:
834
+ print(f"Override model: {args.model}")
835
+ elif model_to_use:
836
+ print(f"Using index model: {model_to_use}")
483
837
  print()
484
838
 
485
839
  # Preprocess query
486
- enhanced = preprocess_query(args.query, vector=True, query_nlp_backend=args.query_nlp_backend)
840
+ enhanced = preprocess_query(
841
+ args.query,
842
+ vector=True, # Both backends need vector for similarity search
843
+ query_nlp_backend=args.query_nlp_backend,
844
+ model_name=model_to_use,
845
+ preserve_original=True, # Keep original query terms
846
+ max_synonyms=2 # Reduce synonym expansion
847
+ )
487
848
 
488
849
  # Parse tags if provided
489
850
  tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
@@ -494,7 +855,9 @@ def search_command():
494
855
  enhanced_text=enhanced.get('enhanced_text', args.query),
495
856
  count=args.count,
496
857
  distance_threshold=args.distance_threshold,
497
- tags=tags
858
+ tags=tags,
859
+ keyword_weight=args.keyword_weight,
860
+ original_query=args.query # Pass original for exact match boosting
498
861
  )
499
862
 
500
863
  if args.json:
@@ -563,6 +926,142 @@ def search_command():
563
926
  traceback.print_exc()
564
927
  sys.exit(1)
565
928
 
929
+ def migrate_command():
930
+ """Migrate search indexes between backends"""
931
+ parser = argparse.ArgumentParser(
932
+ description='Migrate search indexes between SQLite and pgvector backends',
933
+ epilog="""
934
+ Examples:
935
+ # Migrate SQLite to pgvector
936
+ sw-search migrate ./docs.swsearch \\
937
+ --to-pgvector \\
938
+ --connection-string "postgresql://user:pass@localhost/db" \\
939
+ --collection-name docs_collection
940
+
941
+ # Migrate with overwrite
942
+ sw-search migrate ./docs.swsearch \\
943
+ --to-pgvector \\
944
+ --connection-string "postgresql://user:pass@localhost/db" \\
945
+ --collection-name docs_collection \\
946
+ --overwrite
947
+
948
+ # Get index information
949
+ sw-search migrate --info ./docs.swsearch
950
+ """,
951
+ formatter_class=argparse.RawDescriptionHelpFormatter
952
+ )
953
+
954
+ # Source argument (optional if using --info)
955
+ parser.add_argument('source', nargs='?', help='Source index file or collection')
956
+
957
+ # Migration direction
958
+ migration_group = parser.add_mutually_exclusive_group()
959
+ migration_group.add_argument('--to-pgvector', action='store_true',
960
+ help='Migrate SQLite index to pgvector')
961
+ migration_group.add_argument('--to-sqlite', action='store_true',
962
+ help='Migrate pgvector collection to SQLite (not yet implemented)')
963
+ migration_group.add_argument('--info', action='store_true',
964
+ help='Show information about an index')
965
+
966
+ # pgvector options
967
+ parser.add_argument('--connection-string',
968
+ help='PostgreSQL connection string for pgvector')
969
+ parser.add_argument('--collection-name',
970
+ help='Collection name for pgvector')
971
+ parser.add_argument('--overwrite', action='store_true',
972
+ help='Overwrite existing collection')
973
+
974
+ # SQLite options
975
+ parser.add_argument('--output',
976
+ help='Output .swsearch file path (for --to-sqlite)')
977
+
978
+ # Common options
979
+ parser.add_argument('--batch-size', type=int, default=100,
980
+ help='Number of chunks to process at once (default: 100)')
981
+ parser.add_argument('--verbose', action='store_true',
982
+ help='Show detailed progress')
983
+
984
+ args = parser.parse_args()
985
+
986
+ # Handle --info flag
987
+ if args.info:
988
+ if not args.source:
989
+ print("Error: Source index required with --info")
990
+ sys.exit(1)
991
+
992
+ try:
993
+ from signalwire_agents.search.migration import SearchIndexMigrator
994
+ migrator = SearchIndexMigrator(verbose=args.verbose)
995
+ info = migrator.get_index_info(args.source)
996
+
997
+ print(f"Index Information: {args.source}")
998
+ print(f" Type: {info['type']}")
999
+ if info['type'] == 'sqlite':
1000
+ print(f" Total chunks: {info['total_chunks']}")
1001
+ print(f" Total files: {info['total_files']}")
1002
+ print(f" Model: {info['config'].get('embedding_model', 'Unknown')}")
1003
+ print(f" Dimensions: {info['config'].get('embedding_dimensions', 'Unknown')}")
1004
+ print(f" Created: {info['config'].get('created_at', 'Unknown')}")
1005
+ if args.verbose:
1006
+ print("\n Full configuration:")
1007
+ for key, value in info['config'].items():
1008
+ print(f" {key}: {value}")
1009
+ else:
1010
+ print(" Unable to determine index type")
1011
+ except Exception as e:
1012
+ print(f"Error getting index info: {e}")
1013
+ sys.exit(1)
1014
+ return
1015
+
1016
+ # Validate arguments for migration
1017
+ if not args.source:
1018
+ print("Error: Source index required for migration")
1019
+ sys.exit(1)
1020
+
1021
+ if not args.to_pgvector and not args.to_sqlite:
1022
+ print("Error: Must specify migration direction (--to-pgvector or --to-sqlite)")
1023
+ sys.exit(1)
1024
+
1025
+ try:
1026
+ from signalwire_agents.search.migration import SearchIndexMigrator
1027
+ migrator = SearchIndexMigrator(verbose=args.verbose)
1028
+
1029
+ if args.to_pgvector:
1030
+ # Validate pgvector arguments
1031
+ if not args.connection_string:
1032
+ print("Error: --connection-string required for pgvector migration")
1033
+ sys.exit(1)
1034
+ if not args.collection_name:
1035
+ print("Error: --collection-name required for pgvector migration")
1036
+ sys.exit(1)
1037
+
1038
+ # Perform migration
1039
+ print(f"Migrating {args.source} to pgvector collection '{args.collection_name}'...")
1040
+ stats = migrator.migrate_sqlite_to_pgvector(
1041
+ sqlite_path=args.source,
1042
+ connection_string=args.connection_string,
1043
+ collection_name=args.collection_name,
1044
+ overwrite=args.overwrite,
1045
+ batch_size=args.batch_size
1046
+ )
1047
+
1048
+ print(f"\n✓ Migration completed successfully!")
1049
+ print(f" Chunks migrated: {stats['chunks_migrated']}")
1050
+ print(f" Errors: {stats['errors']}")
1051
+
1052
+ elif args.to_sqlite:
1053
+ print("Error: pgvector to SQLite migration not yet implemented")
1054
+ print("This feature is planned for future development")
1055
+ sys.exit(1)
1056
+
1057
+ except Exception as e:
1058
+ print(f"\nError during migration: {e}")
1059
+ if args.verbose:
1060
+ import traceback
1061
+ traceback.print_exc()
1062
+ sys.exit(1)
1063
+
1064
+
566
1065
  def remote_command():
567
1066
  """Search via remote API endpoint"""
568
1067
  parser = argparse.ArgumentParser(description='Search via remote API endpoint')
@@ -838,6 +1337,11 @@ Examples:
838
1337
  sys.argv.pop(1)
839
1338
  remote_command()
840
1339
  return
1340
+ elif sys.argv[1] == 'migrate':
1341
+ # Remove 'migrate' from argv and call migrate_command
1342
+ sys.argv.pop(1)
1343
+ migrate_command()
1344
+ return
841
1345
 
842
1346
  # Regular build command
843
1347
  main()