signalwire-agents 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ A package for building AI agents using SignalWire's AI and SWML capabilities.
18
18
  from .core.logging_config import configure_logging
19
19
  configure_logging()
20
20
 
21
- __version__ = "0.1.46"
21
+ __version__ = "0.1.48"
22
22
 
23
23
  # Import core classes for easier access
24
24
  from .core.agent_base import AgentBase
@@ -10,6 +10,9 @@ See LICENSE file in the project root for full license information.
10
10
  import argparse
11
11
  import sys
12
12
  from pathlib import Path
13
+ from datetime import datetime
14
+
15
+ from signalwire_agents.search.models import MODEL_ALIASES, DEFAULT_MODEL, resolve_model_alias
13
16
 
14
17
  def main():
15
18
  """Main entry point for the build-search command"""
@@ -66,6 +69,35 @@ Examples:
66
69
  sw-search ./docs \\
67
70
  --chunking-strategy qa
68
71
 
72
+ # Model selection examples (performance vs quality tradeoff)
73
+ sw-search ./docs --model mini # Fastest (~5x faster), 384 dims, good for most use cases
74
+ sw-search ./docs --model base # Balanced speed/quality, 768 dims (previous default)
75
+ sw-search ./docs --model large # Best quality (same as base currently)
76
+ # Or use full model names:
77
+ sw-search ./docs --model sentence-transformers/all-MiniLM-L6-v2
78
+ sw-search ./docs --model sentence-transformers/all-mpnet-base-v2
79
+
80
+ # JSON-based chunking (pre-chunked content)
81
+ sw-search ./api_chunks.json \
82
+ --chunking-strategy json \
83
+ --file-types json
84
+
85
+ # Export chunks to JSON for review (single file)
86
+ sw-search ./docs \\
87
+ --output-format json \\
88
+ --output all_chunks.json
89
+
90
+ # Export chunks to JSON (one file per source)
91
+ sw-search ./docs \\
92
+ --output-format json \\
93
+ --output-dir ./chunks/
94
+
95
+ # Build index from exported JSON chunks
96
+ sw-search ./chunks/ \\
97
+ --chunking-strategy json \\
98
+ --file-types json \\
99
+ --output final.swsearch
100
+
69
101
  # Full configuration example
70
102
  sw-search ./docs ./examples README.md \\
71
103
  --output ./knowledge.swsearch \\
@@ -90,6 +122,12 @@ Examples:
90
122
  sw-search remote http://localhost:8001 "how to create an agent" --index-name docs
91
123
  sw-search remote localhost:8001 "API reference" --index-name docs --count 3 --verbose
92
124
 
125
+ # Migrate between backends
126
+ sw-search migrate ./docs.swsearch --to-pgvector \\
127
+ --connection-string "postgresql://user:pass@localhost/db" \\
128
+ --collection-name docs_collection
129
+ sw-search migrate --info ./docs.swsearch
130
+
93
131
  # PostgreSQL pgvector backend
94
132
  sw-search ./docs \\
95
133
  --backend pgvector \\
@@ -121,6 +159,18 @@ Examples:
121
159
  help='Output .swsearch file (default: sources.swsearch) or collection name for pgvector'
122
160
  )
123
161
 
162
+ parser.add_argument(
163
+ '--output-dir',
164
+ help='Output directory for results (creates one file per source file when used with --output-format json, or auto-names index files)'
165
+ )
166
+
167
+ parser.add_argument(
168
+ '--output-format',
169
+ choices=['index', 'json'],
170
+ default='index',
171
+ help='Output format: index (create search index) or json (export chunks as JSON) (default: index)'
172
+ )
173
+
124
174
  parser.add_argument(
125
175
  '--backend',
126
176
  choices=['sqlite', 'pgvector'],
@@ -141,7 +191,7 @@ Examples:
141
191
 
142
192
  parser.add_argument(
143
193
  '--chunking-strategy',
144
- choices=['sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa'],
194
+ choices=['sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa', 'json'],
145
195
  default='sentence',
146
196
  help='Chunking strategy to use (default: sentence)'
147
197
  )
@@ -192,8 +242,8 @@ Examples:
192
242
 
193
243
  parser.add_argument(
194
244
  '--model',
195
- default='sentence-transformers/all-mpnet-base-v2',
196
- help='Sentence transformer model name (default: sentence-transformers/all-mpnet-base-v2)'
245
+ default=DEFAULT_MODEL,
246
+ help=f'Sentence transformer model name or alias (mini/base/large). Default: mini ({DEFAULT_MODEL})'
197
247
  )
198
248
 
199
249
  parser.add_argument(
@@ -236,6 +286,9 @@ Examples:
236
286
 
237
287
  args = parser.parse_args()
238
288
 
289
+ # Resolve model aliases
290
+ args.model = resolve_model_alias(args.model)
291
+
239
292
  # Validate sources
240
293
  valid_sources = []
241
294
  for source in args.sources:
@@ -254,8 +307,35 @@ Examples:
254
307
  print("Error: --connection-string is required for pgvector backend")
255
308
  sys.exit(1)
256
309
 
257
- # Default output filename
258
- if not args.output:
310
+ # Validate output options
311
+ if args.output and args.output_dir:
312
+ print("Error: Cannot specify both --output and --output-dir")
313
+ sys.exit(1)
314
+
315
+ # Handle JSON output format differently
316
+ if args.output_format == 'json':
317
+ # JSON export doesn't use backend
318
+ if args.backend != 'sqlite':
319
+ print("Warning: --backend is ignored when using --output-format json")
320
+
321
+ # Determine output location
322
+ if args.output_dir:
323
+ # Multiple files mode
324
+ output_path = Path(args.output_dir)
325
+ if not output_path.exists():
326
+ output_path.mkdir(parents=True, exist_ok=True)
327
+ elif args.output:
328
+ # Single file mode
329
+ output_path = Path(args.output)
330
+ if not output_path.suffix:
331
+ output_path = output_path.with_suffix('.json')
332
+ else:
333
+ # Default to single file
334
+ output_path = Path('chunks.json')
335
+ args.output = str(output_path)
336
+
337
+ # Default output filename (for index format)
338
+ if args.output_format == 'index' and not args.output and not args.output_dir:
259
339
  if args.backend == 'sqlite':
260
340
  if len(valid_sources) == 1:
261
341
  # Single source - use its name
@@ -272,8 +352,25 @@ Examples:
272
352
  else:
273
353
  args.output = "documents"
274
354
 
275
- # Ensure output has .swsearch extension for sqlite
276
- if args.backend == 'sqlite' and not args.output.endswith('.swsearch'):
355
+ # Handle --output-dir for index format
356
+ if args.output_format == 'index' and args.output_dir:
357
+ # Auto-generate output filename in the directory
358
+ if len(valid_sources) == 1:
359
+ source_name = valid_sources[0].stem if valid_sources[0].is_file() else valid_sources[0].name
360
+ else:
361
+ source_name = "combined"
362
+
363
+ output_dir = Path(args.output_dir)
364
+ output_dir.mkdir(parents=True, exist_ok=True)
365
+
366
+ if args.backend == 'sqlite':
367
+ args.output = str(output_dir / f"{source_name}.swsearch")
368
+ else:
369
+ # For pgvector, still use the name as collection
370
+ args.output = source_name
371
+
372
+ # Ensure output has .swsearch extension for sqlite (but not for JSON format)
373
+ if args.output_format == 'index' and args.backend == 'sqlite' and args.output and not args.output.endswith('.swsearch'):
277
374
  args.output += '.swsearch'
278
375
 
279
376
  # Parse lists
@@ -320,6 +417,103 @@ Examples:
320
417
  print()
321
418
 
322
419
  try:
420
+ # Handle JSON export mode
421
+ if args.output_format == 'json':
422
+ # Import what we need for chunking
423
+ from signalwire_agents.search.index_builder import IndexBuilder
424
+ import json
425
+
426
+ builder = IndexBuilder(
427
+ chunking_strategy=args.chunking_strategy,
428
+ max_sentences_per_chunk=args.max_sentences_per_chunk,
429
+ chunk_size=args.chunk_size,
430
+ chunk_overlap=args.overlap_size,
431
+ split_newlines=args.split_newlines,
432
+ index_nlp_backend=args.index_nlp_backend,
433
+ verbose=args.verbose,
434
+ semantic_threshold=args.semantic_threshold,
435
+ topic_threshold=args.topic_threshold
436
+ )
437
+
438
+ # Process files and export chunks
439
+ all_chunks = []
440
+ chunk_files_created = []
441
+
442
+ # Discover files from sources
443
+ files = builder._discover_files_from_sources(valid_sources, file_types, exclude_patterns)
444
+
445
+ if args.verbose:
446
+ print(f"Processing {len(files)} files...")
447
+
448
+ for file_path in files:
449
+ try:
450
+ # Determine base directory for relative paths
451
+ base_dir = builder._get_base_directory_for_file(file_path, valid_sources)
452
+
453
+ # Process file into chunks
454
+ chunks = builder._process_file(file_path, base_dir, tags)
455
+
456
+ if args.output_dir:
457
+ # Create individual JSON file
458
+ relative_path = file_path.relative_to(base_dir) if base_dir else file_path.name
459
+ json_filename = relative_path.with_suffix('.json')
460
+ json_path = Path(args.output_dir) / json_filename
461
+
462
+ # Create subdirectories if needed
463
+ json_path.parent.mkdir(parents=True, exist_ok=True)
464
+
465
+ # Save chunks to JSON
466
+ chunk_data = {
467
+ "chunks": chunks,
468
+ "metadata": {
469
+ "source_file": str(relative_path),
470
+ "total_chunks": len(chunks),
471
+ "chunking_strategy": args.chunking_strategy,
472
+ "processing_date": datetime.now().isoformat()
473
+ }
474
+ }
475
+
476
+ with open(json_path, 'w', encoding='utf-8') as f:
477
+ json.dump(chunk_data, f, indent=2, ensure_ascii=False)
478
+
479
+ chunk_files_created.append(json_path)
480
+ if args.verbose:
481
+ print(f" Created: {json_path} ({len(chunks)} chunks)")
482
+ else:
483
+ # Accumulate all chunks for single file output
484
+ all_chunks.extend(chunks)
485
+
486
+ except Exception as e:
487
+ print(f"Error processing {file_path}: {e}")
488
+ if args.verbose:
489
+ import traceback
490
+ traceback.print_exc()
491
+
492
+ # Handle single file output
493
+ if not args.output_dir:
494
+ output_data = {
495
+ "chunks": all_chunks,
496
+ "metadata": {
497
+ "total_chunks": len(all_chunks),
498
+ "total_files": len(files),
499
+ "chunking_strategy": args.chunking_strategy,
500
+ "processing_date": datetime.now().isoformat()
501
+ }
502
+ }
503
+
504
+ with open(args.output, 'w', encoding='utf-8') as f:
505
+ json.dump(output_data, f, indent=2, ensure_ascii=False)
506
+
507
+ print(f"✓ Exported {len(all_chunks)} chunks to {args.output}")
508
+ else:
509
+ print(f"✓ Created {len(chunk_files_created)} JSON files in {args.output_dir}")
510
+ total_chunks = sum(len(json.load(open(f))['chunks']) for f in chunk_files_created)
511
+ print(f" Total chunks: {total_chunks}")
512
+
513
+ # Exit early for JSON format
514
+ return
515
+
516
+ # Regular index building mode
323
517
  # Create index builder - import only when actually needed
324
518
  from signalwire_agents.search.index_builder import IndexBuilder
325
519
  builder = IndexBuilder(
@@ -365,7 +559,13 @@ Examples:
365
559
  sys.exit(1)
366
560
 
367
561
  if args.backend == 'sqlite':
368
- print(f"\n✓ Search index created successfully: {args.output}")
562
+ # Check if the index was actually created
563
+ import os
564
+ if os.path.exists(args.output):
565
+ print(f"\n✓ Search index created successfully: {args.output}")
566
+ else:
567
+ print(f"\n✗ Search index creation failed - no files were processed")
568
+ sys.exit(1)
369
569
  else:
370
570
  print(f"\n✓ Search collection created successfully: {args.output}")
371
571
  print(f" Connection: {args.connection_string}")
@@ -422,21 +622,41 @@ def search_command():
422
622
  """Search within an existing search index"""
423
623
  parser = argparse.ArgumentParser(description='Search within a .swsearch index file or pgvector collection')
424
624
  parser.add_argument('index_source', help='Path to .swsearch file or collection name for pgvector')
425
- parser.add_argument('query', help='Search query')
625
+ parser.add_argument('query', nargs='?', help='Search query (optional if using --shell)')
426
626
  parser.add_argument('--backend', choices=['sqlite', 'pgvector'], default='sqlite',
427
627
  help='Storage backend (default: sqlite)')
428
628
  parser.add_argument('--connection-string', help='PostgreSQL connection string for pgvector backend')
629
+ parser.add_argument('--shell', action='store_true',
630
+ help='Interactive shell mode - load once and search multiple times')
429
631
  parser.add_argument('--count', type=int, default=5, help='Number of results to return (default: 5)')
430
632
  parser.add_argument('--distance-threshold', type=float, default=0.0, help='Minimum similarity score (default: 0.0)')
431
633
  parser.add_argument('--tags', help='Comma-separated tags to filter by')
432
634
  parser.add_argument('--query-nlp-backend', choices=['nltk', 'spacy'], default='nltk',
433
635
  help='NLP backend for query processing: nltk (fast, default) or spacy (better quality, slower)')
636
+ parser.add_argument('--keyword-weight', type=float, default=None,
637
+ help='Manual keyword weight (0.0-1.0). Overrides automatic weight detection.')
434
638
  parser.add_argument('--verbose', action='store_true', help='Show detailed information')
435
639
  parser.add_argument('--json', action='store_true', help='Output results as JSON')
436
640
  parser.add_argument('--no-content', action='store_true', help='Hide content in results (show only metadata)')
641
+ parser.add_argument('--model', help='Override embedding model for query (mini/base/large or full model name)')
437
642
 
438
643
  args = parser.parse_args()
439
644
 
645
+ # Validate arguments
646
+ if not args.shell and not args.query:
647
+ print("Error: Query is required unless using --shell mode")
648
+ sys.exit(1)
649
+
650
+ # Resolve model aliases
651
+ if args.model and args.model in MODEL_ALIASES:
652
+ args.model = MODEL_ALIASES[args.model]
653
+
654
+ # Validate keyword weight if provided
655
+ if args.keyword_weight is not None:
656
+ if args.keyword_weight < 0.0 or args.keyword_weight > 1.0:
657
+ print("Error: --keyword-weight must be between 0.0 and 1.0")
658
+ sys.exit(1)
659
+
440
660
  # Validate backend configuration
441
661
  if args.backend == 'pgvector' and not args.connection_string:
442
662
  print("Error: --connection-string is required for pgvector backend")
@@ -464,21 +684,167 @@ def search_command():
464
684
  print(f"Connecting to pgvector collection: {args.index_source}")
465
685
 
466
686
  if args.backend == 'sqlite':
467
- engine = SearchEngine(backend='sqlite', index_path=args.index_source)
687
+ # Pass the model from the index or override if specified
688
+ model = args.model if args.model else None
689
+ engine = SearchEngine(backend='sqlite', index_path=args.index_source, model=model)
468
690
  else:
691
+ # Pass the model override if specified
692
+ model = args.model if args.model else None
469
693
  engine = SearchEngine(backend='pgvector', connection_string=args.connection_string,
470
- collection_name=args.index_source)
694
+ collection_name=args.index_source, model=model)
471
695
 
472
696
  # Get index stats
473
697
  stats = engine.get_stats()
698
+
699
+ # Get the model from index config if not overridden
700
+ model_to_use = args.model
701
+ if not model_to_use and 'config' in stats:
702
+ # SQLite uses 'embedding_model', pgvector uses 'model_name'
703
+ model_to_use = stats['config'].get('embedding_model') or stats['config'].get('model_name')
704
+
705
+ # Shell mode implementation
706
+ if args.shell:
707
+ import time
708
+ print(f"Search Shell - Index: {args.index_source}")
709
+ print(f"Backend: {args.backend}")
710
+ print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
711
+ if model_to_use:
712
+ print(f"Model: {model_to_use}")
713
+ print("Type 'exit' or 'quit' to leave, 'help' for options")
714
+ print("-" * 60)
715
+
716
+ while True:
717
+ try:
718
+ query = input("\nsearch> ").strip()
719
+
720
+ if not query:
721
+ continue
722
+
723
+ if query.lower() in ['exit', 'quit', 'q']:
724
+ print("Goodbye!")
725
+ break
726
+
727
+ if query.lower() == 'help':
728
+ print("\nShell commands:")
729
+ print(" help - Show this help")
730
+ print(" exit/quit/q - Exit shell")
731
+ print(" count=N - Set result count (current: {})".format(args.count))
732
+ print(" tags=tag1,tag2 - Set tag filter (current: {})".format(args.tags or 'none'))
733
+ print(" verbose - Toggle verbose output")
734
+ print("\nOr type any search query...")
735
+ continue
736
+
737
+ # Handle shell commands
738
+ if query.startswith('count='):
739
+ try:
740
+ args.count = int(query.split('=')[1])
741
+ print(f"Result count set to: {args.count}")
742
+ except:
743
+ print("Invalid count value")
744
+ continue
745
+
746
+ if query.startswith('tags='):
747
+ tag_str = query.split('=', 1)[1]
748
+ args.tags = tag_str if tag_str else None
749
+ tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
750
+ print(f"Tags filter set to: {tags or 'none'}")
751
+ continue
752
+
753
+ if query == 'verbose':
754
+ args.verbose = not args.verbose
755
+ print(f"Verbose output: {'on' if args.verbose else 'off'}")
756
+ continue
757
+
758
+ # Perform search with timing
759
+ start_time = time.time()
760
+
761
+ # Preprocess query
762
+ enhanced = preprocess_query(
763
+ query,
764
+ vector=True,
765
+ query_nlp_backend=args.query_nlp_backend,
766
+ model_name=model_to_use,
767
+ preserve_original=True,
768
+ max_synonyms=2
769
+ )
770
+
771
+ # Parse tags
772
+ tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
773
+
774
+ # Perform search
775
+ results = engine.search(
776
+ query_vector=enhanced.get('vector'),
777
+ enhanced_text=enhanced.get('enhanced_text', query),
778
+ count=args.count,
779
+ distance_threshold=args.distance_threshold,
780
+ tags=tags,
781
+ keyword_weight=args.keyword_weight,
782
+ original_query=query
783
+ )
784
+
785
+ search_time = time.time() - start_time
786
+
787
+ # Display results
788
+ if not results:
789
+ print(f"\nNo results found for '{query}' ({search_time:.3f}s)")
790
+ else:
791
+ print(f"\nFound {len(results)} result(s) for '{query}' ({search_time:.3f}s):")
792
+ if enhanced.get('enhanced_text') != query and args.verbose:
793
+ print(f"Enhanced query: '{enhanced.get('enhanced_text')}'")
794
+ print("=" * 60)
795
+
796
+ for i, result in enumerate(results):
797
+ print(f"\n[{i+1}] Score: {result['score']:.4f}")
798
+
799
+ # Show metadata
800
+ metadata = result['metadata']
801
+ print(f"File: {metadata.get('filename', 'Unknown')}")
802
+ if metadata.get('section'):
803
+ print(f"Section: {metadata['section']}")
804
+
805
+ # Show content unless suppressed
806
+ if not args.no_content:
807
+ content = result['content']
808
+ if len(content) > 300 and not args.verbose:
809
+ content = content[:300] + "..."
810
+ print(f"\n{content}")
811
+
812
+ if i < len(results) - 1:
813
+ print("-" * 40)
814
+
815
+ except KeyboardInterrupt:
816
+ print("\nUse 'exit' to quit")
817
+ except EOFError:
818
+ print("\nGoodbye!")
819
+ break
820
+ except Exception as e:
821
+ print(f"\nError: {e}")
822
+ if args.verbose:
823
+ import traceback
824
+ traceback.print_exc()
825
+
826
+ return # Exit after shell mode
827
+
828
+ # Normal single query mode
474
829
  if args.verbose:
475
830
  print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
476
831
  print(f"Searching for: '{args.query}'")
477
832
  print(f"Query NLP Backend: {args.query_nlp_backend}")
833
+ if args.model:
834
+ print(f"Override model: {args.model}")
835
+ elif model_to_use:
836
+ print(f"Using index model: {model_to_use}")
478
837
  print()
479
838
 
480
839
  # Preprocess query
481
- enhanced = preprocess_query(args.query, vector=True, query_nlp_backend=args.query_nlp_backend)
840
+ enhanced = preprocess_query(
841
+ args.query,
842
+ vector=True, # Both backends need vector for similarity search
843
+ query_nlp_backend=args.query_nlp_backend,
844
+ model_name=model_to_use,
845
+ preserve_original=True, # Keep original query terms
846
+ max_synonyms=2 # Reduce synonym expansion
847
+ )
482
848
 
483
849
  # Parse tags if provided
484
850
  tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
@@ -489,7 +855,9 @@ def search_command():
489
855
  enhanced_text=enhanced.get('enhanced_text', args.query),
490
856
  count=args.count,
491
857
  distance_threshold=args.distance_threshold,
492
- tags=tags
858
+ tags=tags,
859
+ keyword_weight=args.keyword_weight,
860
+ original_query=args.query # Pass original for exact match boosting
493
861
  )
494
862
 
495
863
  if args.json:
@@ -558,6 +926,142 @@ def search_command():
558
926
  traceback.print_exc()
559
927
  sys.exit(1)
560
928
 
929
+ def migrate_command():
930
+ """Migrate search indexes between backends"""
931
+ parser = argparse.ArgumentParser(
932
+ description='Migrate search indexes between SQLite and pgvector backends',
933
+ epilog="""
934
+ Examples:
935
+ # Migrate SQLite to pgvector
936
+ sw-search migrate ./docs.swsearch \\
937
+ --to-pgvector \\
938
+ --connection-string "postgresql://user:pass@localhost/db" \\
939
+ --collection-name docs_collection
940
+
941
+ # Migrate with overwrite
942
+ sw-search migrate ./docs.swsearch \\
943
+ --to-pgvector \\
944
+ --connection-string "postgresql://user:pass@localhost/db" \\
945
+ --collection-name docs_collection \\
946
+ --overwrite
947
+
948
+ # Get index information
949
+ sw-search migrate --info ./docs.swsearch
950
+ """,
951
+ formatter_class=argparse.RawDescriptionHelpFormatter
952
+ )
953
+
954
+ # Source argument (optional if using --info)
955
+ parser.add_argument('source', nargs='?', help='Source index file or collection')
956
+
957
+ # Migration direction
958
+ migration_group = parser.add_mutually_exclusive_group()
959
+ migration_group.add_argument('--to-pgvector', action='store_true',
960
+ help='Migrate SQLite index to pgvector')
961
+ migration_group.add_argument('--to-sqlite', action='store_true',
962
+ help='Migrate pgvector collection to SQLite (not yet implemented)')
963
+ migration_group.add_argument('--info', action='store_true',
964
+ help='Show information about an index')
965
+
966
+ # pgvector options
967
+ parser.add_argument('--connection-string',
968
+ help='PostgreSQL connection string for pgvector')
969
+ parser.add_argument('--collection-name',
970
+ help='Collection name for pgvector')
971
+ parser.add_argument('--overwrite', action='store_true',
972
+ help='Overwrite existing collection')
973
+
974
+ # SQLite options
975
+ parser.add_argument('--output',
976
+ help='Output .swsearch file path (for --to-sqlite)')
977
+
978
+ # Common options
979
+ parser.add_argument('--batch-size', type=int, default=100,
980
+ help='Number of chunks to process at once (default: 100)')
981
+ parser.add_argument('--verbose', action='store_true',
982
+ help='Show detailed progress')
983
+
984
+ args = parser.parse_args()
985
+
986
+ # Handle --info flag
987
+ if args.info:
988
+ if not args.source:
989
+ print("Error: Source index required with --info")
990
+ sys.exit(1)
991
+
992
+ try:
993
+ from signalwire_agents.search.migration import SearchIndexMigrator
994
+ migrator = SearchIndexMigrator(verbose=args.verbose)
995
+ info = migrator.get_index_info(args.source)
996
+
997
+ print(f"Index Information: {args.source}")
998
+ print(f" Type: {info['type']}")
999
+ if info['type'] == 'sqlite':
1000
+ print(f" Total chunks: {info['total_chunks']}")
1001
+ print(f" Total files: {info['total_files']}")
1002
+ print(f" Model: {info['config'].get('embedding_model', 'Unknown')}")
1003
+ print(f" Dimensions: {info['config'].get('embedding_dimensions', 'Unknown')}")
1004
+ print(f" Created: {info['config'].get('created_at', 'Unknown')}")
1005
+ if args.verbose:
1006
+ print("\n Full configuration:")
1007
+ for key, value in info['config'].items():
1008
+ print(f" {key}: {value}")
1009
+ else:
1010
+ print(" Unable to determine index type")
1011
+ except Exception as e:
1012
+ print(f"Error getting index info: {e}")
1013
+ sys.exit(1)
1014
+ return
1015
+
1016
+ # Validate arguments for migration
1017
+ if not args.source:
1018
+ print("Error: Source index required for migration")
1019
+ sys.exit(1)
1020
+
1021
+ if not args.to_pgvector and not args.to_sqlite:
1022
+ print("Error: Must specify migration direction (--to-pgvector or --to-sqlite)")
1023
+ sys.exit(1)
1024
+
1025
+ try:
1026
+ from signalwire_agents.search.migration import SearchIndexMigrator
1027
+ migrator = SearchIndexMigrator(verbose=args.verbose)
1028
+
1029
+ if args.to_pgvector:
1030
+ # Validate pgvector arguments
1031
+ if not args.connection_string:
1032
+ print("Error: --connection-string required for pgvector migration")
1033
+ sys.exit(1)
1034
+ if not args.collection_name:
1035
+ print("Error: --collection-name required for pgvector migration")
1036
+ sys.exit(1)
1037
+
1038
+ # Perform migration
1039
+ print(f"Migrating {args.source} to pgvector collection '{args.collection_name}'...")
1040
+ stats = migrator.migrate_sqlite_to_pgvector(
1041
+ sqlite_path=args.source,
1042
+ connection_string=args.connection_string,
1043
+ collection_name=args.collection_name,
1044
+ overwrite=args.overwrite,
1045
+ batch_size=args.batch_size
1046
+ )
1047
+
1048
+ print(f"\n✓ Migration completed successfully!")
1049
+ print(f" Chunks migrated: {stats['chunks_migrated']}")
1050
+ print(f" Errors: {stats['errors']}")
1051
+
1052
+ elif args.to_sqlite:
1053
+ print("Error: pgvector to SQLite migration not yet implemented")
1054
+ print("This feature is planned for future development")
1055
+ sys.exit(1)
1056
+
1057
+ except Exception as e:
1058
+ print(f"\nError during migration: {e}")
1059
+ if args.verbose:
1060
+ import traceback
1061
+ traceback.print_exc()
1062
+ sys.exit(1)
1063
+
1064
+
561
1065
  def remote_command():
562
1066
  """Search via remote API endpoint"""
563
1067
  parser = argparse.ArgumentParser(description='Search via remote API endpoint')
@@ -833,6 +1337,11 @@ Examples:
833
1337
  sys.argv.pop(1)
834
1338
  remote_command()
835
1339
  return
1340
+ elif sys.argv[1] == 'migrate':
1341
+ # Remove 'migrate' from argv and call migrate_command
1342
+ sys.argv.pop(1)
1343
+ migrate_command()
1344
+ return
836
1345
 
837
1346
  # Regular build command
838
1347
  main()