signalwire-agents 0.1.47__tar.gz → 0.1.48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {signalwire_agents-0.1.47/signalwire_agents.egg-info → signalwire_agents-0.1.48}/PKG-INFO +1 -1
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/pyproject.toml +1 -1
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/__init__.py +1 -1
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/build_search.py +516 -12
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/__init__.py +7 -1
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/document_processor.py +11 -8
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/index_builder.py +112 -13
- signalwire_agents-0.1.48/signalwire_agents/search/migration.py +418 -0
- signalwire_agents-0.1.48/signalwire_agents/search/models.py +30 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/pgvector_backend.py +236 -13
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/query_processor.py +87 -9
- signalwire_agents-0.1.48/signalwire_agents/search/search_engine.py +1224 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/search/search_service.py +56 -6
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/native_vector_search/skill.py +208 -33
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48/signalwire_agents.egg-info}/PKG-INFO +1 -1
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents.egg-info/SOURCES.txt +2 -0
- signalwire_agents-0.1.47/signalwire_agents/search/search_engine.py +0 -420
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/LICENSE +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/setup.cfg +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/setup.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/agent_server.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/agents/bedrock.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/config.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/core/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/core/agent_loader.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/core/argparse_helpers.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/core/dynamic_config.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/core/service_loader.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/execution/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/execution/datamap_exec.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/execution/webhook_exec.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/output/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/output/output_formatter.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/output/swml_dump.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/simulation/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/simulation/data_generation.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/simulation/data_overrides.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/simulation/mock_env.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/swaig_test_wrapper.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/test_swaig.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/cli/types.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/config/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/deployment/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/deployment/handlers/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/prompt/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/prompt/manager.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/routing/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/security/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/swml/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/tools/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/tools/decorator.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent/tools/registry.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/agent_base.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/auth_handler.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/config_loader.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/contexts.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/data_map.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/function_result.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/logging_config.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/ai_config_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/auth_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/prompt_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/serverless_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/skill_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/state_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/tool_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/mixins/web_mixin.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/pom_builder.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/security/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/security/session_manager.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/security_config.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/skill_base.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/skill_manager.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/swaig_function.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/swml_builder.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/swml_handler.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/swml_renderer.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/core/swml_service.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/concierge.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/faq_bot.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/info_gatherer.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/receptionist.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/prefabs/survey.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/schema.json +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/api_ninjas_trivia/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/api_ninjas_trivia/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/api_ninjas_trivia/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere_serverless/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere_serverless/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datasphere_serverless/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datetime/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datetime/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/datetime/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/joke/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/joke/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/joke/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/math/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/math/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/math/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/mcp_gateway/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/mcp_gateway/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/mcp_gateway/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/native_vector_search/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/native_vector_search/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/play_background_file/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/play_background_file/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/play_background_file/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/registry.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/spider/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/spider/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/spider/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/swml_transfer/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/swml_transfer/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/swml_transfer/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/weather_api/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/weather_api/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/weather_api/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/web_search/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/web_search/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/web_search/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/wikipedia_search/README.md +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/wikipedia_search/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/skills/wikipedia_search/skill.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/utils/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/utils/pom_utils.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/utils/schema_utils.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/utils/token_generators.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/utils/validators.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/web/__init__.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents/web/web_service.py +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents.egg-info/dependency_links.txt +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents.egg-info/entry_points.txt +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents.egg-info/requires.txt +0 -0
- {signalwire_agents-0.1.47 → signalwire_agents-0.1.48}/signalwire_agents.egg-info/top_level.txt +0 -0
@@ -18,7 +18,7 @@ A package for building AI agents using SignalWire's AI and SWML capabilities.
|
|
18
18
|
from .core.logging_config import configure_logging
|
19
19
|
configure_logging()
|
20
20
|
|
21
|
-
__version__ = "0.1.
|
21
|
+
__version__ = "0.1.48"
|
22
22
|
|
23
23
|
# Import core classes for easier access
|
24
24
|
from .core.agent_base import AgentBase
|
@@ -10,6 +10,9 @@ See LICENSE file in the project root for full license information.
|
|
10
10
|
import argparse
|
11
11
|
import sys
|
12
12
|
from pathlib import Path
|
13
|
+
from datetime import datetime
|
14
|
+
|
15
|
+
from signalwire_agents.search.models import MODEL_ALIASES, DEFAULT_MODEL, resolve_model_alias
|
13
16
|
|
14
17
|
def main():
|
15
18
|
"""Main entry point for the build-search command"""
|
@@ -66,11 +69,35 @@ Examples:
|
|
66
69
|
sw-search ./docs \\
|
67
70
|
--chunking-strategy qa
|
68
71
|
|
72
|
+
# Model selection examples (performance vs quality tradeoff)
|
73
|
+
sw-search ./docs --model mini # Fastest (~5x faster), 384 dims, good for most use cases
|
74
|
+
sw-search ./docs --model base # Balanced speed/quality, 768 dims (previous default)
|
75
|
+
sw-search ./docs --model large # Best quality (same as base currently)
|
76
|
+
# Or use full model names:
|
77
|
+
sw-search ./docs --model sentence-transformers/all-MiniLM-L6-v2
|
78
|
+
sw-search ./docs --model sentence-transformers/all-mpnet-base-v2
|
69
79
|
|
70
80
|
# JSON-based chunking (pre-chunked content)
|
71
81
|
sw-search ./api_chunks.json \
|
72
82
|
--chunking-strategy json \
|
73
83
|
--file-types json
|
84
|
+
|
85
|
+
# Export chunks to JSON for review (single file)
|
86
|
+
sw-search ./docs \\
|
87
|
+
--output-format json \\
|
88
|
+
--output all_chunks.json
|
89
|
+
|
90
|
+
# Export chunks to JSON (one file per source)
|
91
|
+
sw-search ./docs \\
|
92
|
+
--output-format json \\
|
93
|
+
--output-dir ./chunks/
|
94
|
+
|
95
|
+
# Build index from exported JSON chunks
|
96
|
+
sw-search ./chunks/ \\
|
97
|
+
--chunking-strategy json \\
|
98
|
+
--file-types json \\
|
99
|
+
--output final.swsearch
|
100
|
+
|
74
101
|
# Full configuration example
|
75
102
|
sw-search ./docs ./examples README.md \\
|
76
103
|
--output ./knowledge.swsearch \\
|
@@ -95,6 +122,12 @@ Examples:
|
|
95
122
|
sw-search remote http://localhost:8001 "how to create an agent" --index-name docs
|
96
123
|
sw-search remote localhost:8001 "API reference" --index-name docs --count 3 --verbose
|
97
124
|
|
125
|
+
# Migrate between backends
|
126
|
+
sw-search migrate ./docs.swsearch --to-pgvector \\
|
127
|
+
--connection-string "postgresql://user:pass@localhost/db" \\
|
128
|
+
--collection-name docs_collection
|
129
|
+
sw-search migrate --info ./docs.swsearch
|
130
|
+
|
98
131
|
# PostgreSQL pgvector backend
|
99
132
|
sw-search ./docs \\
|
100
133
|
--backend pgvector \\
|
@@ -126,6 +159,18 @@ Examples:
|
|
126
159
|
help='Output .swsearch file (default: sources.swsearch) or collection name for pgvector'
|
127
160
|
)
|
128
161
|
|
162
|
+
parser.add_argument(
|
163
|
+
'--output-dir',
|
164
|
+
help='Output directory for results (creates one file per source file when used with --output-format json, or auto-names index files)'
|
165
|
+
)
|
166
|
+
|
167
|
+
parser.add_argument(
|
168
|
+
'--output-format',
|
169
|
+
choices=['index', 'json'],
|
170
|
+
default='index',
|
171
|
+
help='Output format: index (create search index) or json (export chunks as JSON) (default: index)'
|
172
|
+
)
|
173
|
+
|
129
174
|
parser.add_argument(
|
130
175
|
'--backend',
|
131
176
|
choices=['sqlite', 'pgvector'],
|
@@ -197,8 +242,8 @@ Examples:
|
|
197
242
|
|
198
243
|
parser.add_argument(
|
199
244
|
'--model',
|
200
|
-
default=
|
201
|
-
help='Sentence transformer model name (
|
245
|
+
default=DEFAULT_MODEL,
|
246
|
+
help=f'Sentence transformer model name or alias (mini/base/large). Default: mini ({DEFAULT_MODEL})'
|
202
247
|
)
|
203
248
|
|
204
249
|
parser.add_argument(
|
@@ -241,6 +286,9 @@ Examples:
|
|
241
286
|
|
242
287
|
args = parser.parse_args()
|
243
288
|
|
289
|
+
# Resolve model aliases
|
290
|
+
args.model = resolve_model_alias(args.model)
|
291
|
+
|
244
292
|
# Validate sources
|
245
293
|
valid_sources = []
|
246
294
|
for source in args.sources:
|
@@ -259,8 +307,35 @@ Examples:
|
|
259
307
|
print("Error: --connection-string is required for pgvector backend")
|
260
308
|
sys.exit(1)
|
261
309
|
|
262
|
-
#
|
263
|
-
if
|
310
|
+
# Validate output options
|
311
|
+
if args.output and args.output_dir:
|
312
|
+
print("Error: Cannot specify both --output and --output-dir")
|
313
|
+
sys.exit(1)
|
314
|
+
|
315
|
+
# Handle JSON output format differently
|
316
|
+
if args.output_format == 'json':
|
317
|
+
# JSON export doesn't use backend
|
318
|
+
if args.backend != 'sqlite':
|
319
|
+
print("Warning: --backend is ignored when using --output-format json")
|
320
|
+
|
321
|
+
# Determine output location
|
322
|
+
if args.output_dir:
|
323
|
+
# Multiple files mode
|
324
|
+
output_path = Path(args.output_dir)
|
325
|
+
if not output_path.exists():
|
326
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
327
|
+
elif args.output:
|
328
|
+
# Single file mode
|
329
|
+
output_path = Path(args.output)
|
330
|
+
if not output_path.suffix:
|
331
|
+
output_path = output_path.with_suffix('.json')
|
332
|
+
else:
|
333
|
+
# Default to single file
|
334
|
+
output_path = Path('chunks.json')
|
335
|
+
args.output = str(output_path)
|
336
|
+
|
337
|
+
# Default output filename (for index format)
|
338
|
+
if args.output_format == 'index' and not args.output and not args.output_dir:
|
264
339
|
if args.backend == 'sqlite':
|
265
340
|
if len(valid_sources) == 1:
|
266
341
|
# Single source - use its name
|
@@ -277,8 +352,25 @@ Examples:
|
|
277
352
|
else:
|
278
353
|
args.output = "documents"
|
279
354
|
|
280
|
-
#
|
281
|
-
if args.
|
355
|
+
# Handle --output-dir for index format
|
356
|
+
if args.output_format == 'index' and args.output_dir:
|
357
|
+
# Auto-generate output filename in the directory
|
358
|
+
if len(valid_sources) == 1:
|
359
|
+
source_name = valid_sources[0].stem if valid_sources[0].is_file() else valid_sources[0].name
|
360
|
+
else:
|
361
|
+
source_name = "combined"
|
362
|
+
|
363
|
+
output_dir = Path(args.output_dir)
|
364
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
365
|
+
|
366
|
+
if args.backend == 'sqlite':
|
367
|
+
args.output = str(output_dir / f"{source_name}.swsearch")
|
368
|
+
else:
|
369
|
+
# For pgvector, still use the name as collection
|
370
|
+
args.output = source_name
|
371
|
+
|
372
|
+
# Ensure output has .swsearch extension for sqlite (but not for JSON format)
|
373
|
+
if args.output_format == 'index' and args.backend == 'sqlite' and args.output and not args.output.endswith('.swsearch'):
|
282
374
|
args.output += '.swsearch'
|
283
375
|
|
284
376
|
# Parse lists
|
@@ -325,6 +417,103 @@ Examples:
|
|
325
417
|
print()
|
326
418
|
|
327
419
|
try:
|
420
|
+
# Handle JSON export mode
|
421
|
+
if args.output_format == 'json':
|
422
|
+
# Import what we need for chunking
|
423
|
+
from signalwire_agents.search.index_builder import IndexBuilder
|
424
|
+
import json
|
425
|
+
|
426
|
+
builder = IndexBuilder(
|
427
|
+
chunking_strategy=args.chunking_strategy,
|
428
|
+
max_sentences_per_chunk=args.max_sentences_per_chunk,
|
429
|
+
chunk_size=args.chunk_size,
|
430
|
+
chunk_overlap=args.overlap_size,
|
431
|
+
split_newlines=args.split_newlines,
|
432
|
+
index_nlp_backend=args.index_nlp_backend,
|
433
|
+
verbose=args.verbose,
|
434
|
+
semantic_threshold=args.semantic_threshold,
|
435
|
+
topic_threshold=args.topic_threshold
|
436
|
+
)
|
437
|
+
|
438
|
+
# Process files and export chunks
|
439
|
+
all_chunks = []
|
440
|
+
chunk_files_created = []
|
441
|
+
|
442
|
+
# Discover files from sources
|
443
|
+
files = builder._discover_files_from_sources(valid_sources, file_types, exclude_patterns)
|
444
|
+
|
445
|
+
if args.verbose:
|
446
|
+
print(f"Processing {len(files)} files...")
|
447
|
+
|
448
|
+
for file_path in files:
|
449
|
+
try:
|
450
|
+
# Determine base directory for relative paths
|
451
|
+
base_dir = builder._get_base_directory_for_file(file_path, valid_sources)
|
452
|
+
|
453
|
+
# Process file into chunks
|
454
|
+
chunks = builder._process_file(file_path, base_dir, tags)
|
455
|
+
|
456
|
+
if args.output_dir:
|
457
|
+
# Create individual JSON file
|
458
|
+
relative_path = file_path.relative_to(base_dir) if base_dir else file_path.name
|
459
|
+
json_filename = relative_path.with_suffix('.json')
|
460
|
+
json_path = Path(args.output_dir) / json_filename
|
461
|
+
|
462
|
+
# Create subdirectories if needed
|
463
|
+
json_path.parent.mkdir(parents=True, exist_ok=True)
|
464
|
+
|
465
|
+
# Save chunks to JSON
|
466
|
+
chunk_data = {
|
467
|
+
"chunks": chunks,
|
468
|
+
"metadata": {
|
469
|
+
"source_file": str(relative_path),
|
470
|
+
"total_chunks": len(chunks),
|
471
|
+
"chunking_strategy": args.chunking_strategy,
|
472
|
+
"processing_date": datetime.now().isoformat()
|
473
|
+
}
|
474
|
+
}
|
475
|
+
|
476
|
+
with open(json_path, 'w', encoding='utf-8') as f:
|
477
|
+
json.dump(chunk_data, f, indent=2, ensure_ascii=False)
|
478
|
+
|
479
|
+
chunk_files_created.append(json_path)
|
480
|
+
if args.verbose:
|
481
|
+
print(f" Created: {json_path} ({len(chunks)} chunks)")
|
482
|
+
else:
|
483
|
+
# Accumulate all chunks for single file output
|
484
|
+
all_chunks.extend(chunks)
|
485
|
+
|
486
|
+
except Exception as e:
|
487
|
+
print(f"Error processing {file_path}: {e}")
|
488
|
+
if args.verbose:
|
489
|
+
import traceback
|
490
|
+
traceback.print_exc()
|
491
|
+
|
492
|
+
# Handle single file output
|
493
|
+
if not args.output_dir:
|
494
|
+
output_data = {
|
495
|
+
"chunks": all_chunks,
|
496
|
+
"metadata": {
|
497
|
+
"total_chunks": len(all_chunks),
|
498
|
+
"total_files": len(files),
|
499
|
+
"chunking_strategy": args.chunking_strategy,
|
500
|
+
"processing_date": datetime.now().isoformat()
|
501
|
+
}
|
502
|
+
}
|
503
|
+
|
504
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
505
|
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
506
|
+
|
507
|
+
print(f"✓ Exported {len(all_chunks)} chunks to {args.output}")
|
508
|
+
else:
|
509
|
+
print(f"✓ Created {len(chunk_files_created)} JSON files in {args.output_dir}")
|
510
|
+
total_chunks = sum(len(json.load(open(f))['chunks']) for f in chunk_files_created)
|
511
|
+
print(f" Total chunks: {total_chunks}")
|
512
|
+
|
513
|
+
# Exit early for JSON format
|
514
|
+
return
|
515
|
+
|
516
|
+
# Regular index building mode
|
328
517
|
# Create index builder - import only when actually needed
|
329
518
|
from signalwire_agents.search.index_builder import IndexBuilder
|
330
519
|
builder = IndexBuilder(
|
@@ -370,7 +559,13 @@ Examples:
|
|
370
559
|
sys.exit(1)
|
371
560
|
|
372
561
|
if args.backend == 'sqlite':
|
373
|
-
|
562
|
+
# Check if the index was actually created
|
563
|
+
import os
|
564
|
+
if os.path.exists(args.output):
|
565
|
+
print(f"\n✓ Search index created successfully: {args.output}")
|
566
|
+
else:
|
567
|
+
print(f"\n✗ Search index creation failed - no files were processed")
|
568
|
+
sys.exit(1)
|
374
569
|
else:
|
375
570
|
print(f"\n✓ Search collection created successfully: {args.output}")
|
376
571
|
print(f" Connection: {args.connection_string}")
|
@@ -427,21 +622,41 @@ def search_command():
|
|
427
622
|
"""Search within an existing search index"""
|
428
623
|
parser = argparse.ArgumentParser(description='Search within a .swsearch index file or pgvector collection')
|
429
624
|
parser.add_argument('index_source', help='Path to .swsearch file or collection name for pgvector')
|
430
|
-
parser.add_argument('query', help='Search query')
|
625
|
+
parser.add_argument('query', nargs='?', help='Search query (optional if using --shell)')
|
431
626
|
parser.add_argument('--backend', choices=['sqlite', 'pgvector'], default='sqlite',
|
432
627
|
help='Storage backend (default: sqlite)')
|
433
628
|
parser.add_argument('--connection-string', help='PostgreSQL connection string for pgvector backend')
|
629
|
+
parser.add_argument('--shell', action='store_true',
|
630
|
+
help='Interactive shell mode - load once and search multiple times')
|
434
631
|
parser.add_argument('--count', type=int, default=5, help='Number of results to return (default: 5)')
|
435
632
|
parser.add_argument('--distance-threshold', type=float, default=0.0, help='Minimum similarity score (default: 0.0)')
|
436
633
|
parser.add_argument('--tags', help='Comma-separated tags to filter by')
|
437
634
|
parser.add_argument('--query-nlp-backend', choices=['nltk', 'spacy'], default='nltk',
|
438
635
|
help='NLP backend for query processing: nltk (fast, default) or spacy (better quality, slower)')
|
636
|
+
parser.add_argument('--keyword-weight', type=float, default=None,
|
637
|
+
help='Manual keyword weight (0.0-1.0). Overrides automatic weight detection.')
|
439
638
|
parser.add_argument('--verbose', action='store_true', help='Show detailed information')
|
440
639
|
parser.add_argument('--json', action='store_true', help='Output results as JSON')
|
441
640
|
parser.add_argument('--no-content', action='store_true', help='Hide content in results (show only metadata)')
|
641
|
+
parser.add_argument('--model', help='Override embedding model for query (mini/base/large or full model name)')
|
442
642
|
|
443
643
|
args = parser.parse_args()
|
444
644
|
|
645
|
+
# Validate arguments
|
646
|
+
if not args.shell and not args.query:
|
647
|
+
print("Error: Query is required unless using --shell mode")
|
648
|
+
sys.exit(1)
|
649
|
+
|
650
|
+
# Resolve model aliases
|
651
|
+
if args.model and args.model in MODEL_ALIASES:
|
652
|
+
args.model = MODEL_ALIASES[args.model]
|
653
|
+
|
654
|
+
# Validate keyword weight if provided
|
655
|
+
if args.keyword_weight is not None:
|
656
|
+
if args.keyword_weight < 0.0 or args.keyword_weight > 1.0:
|
657
|
+
print("Error: --keyword-weight must be between 0.0 and 1.0")
|
658
|
+
sys.exit(1)
|
659
|
+
|
445
660
|
# Validate backend configuration
|
446
661
|
if args.backend == 'pgvector' and not args.connection_string:
|
447
662
|
print("Error: --connection-string is required for pgvector backend")
|
@@ -469,21 +684,167 @@ def search_command():
|
|
469
684
|
print(f"Connecting to pgvector collection: {args.index_source}")
|
470
685
|
|
471
686
|
if args.backend == 'sqlite':
|
472
|
-
|
687
|
+
# Pass the model from the index or override if specified
|
688
|
+
model = args.model if args.model else None
|
689
|
+
engine = SearchEngine(backend='sqlite', index_path=args.index_source, model=model)
|
473
690
|
else:
|
691
|
+
# Pass the model override if specified
|
692
|
+
model = args.model if args.model else None
|
474
693
|
engine = SearchEngine(backend='pgvector', connection_string=args.connection_string,
|
475
|
-
collection_name=args.index_source)
|
694
|
+
collection_name=args.index_source, model=model)
|
476
695
|
|
477
696
|
# Get index stats
|
478
697
|
stats = engine.get_stats()
|
698
|
+
|
699
|
+
# Get the model from index config if not overridden
|
700
|
+
model_to_use = args.model
|
701
|
+
if not model_to_use and 'config' in stats:
|
702
|
+
# SQLite uses 'embedding_model', pgvector uses 'model_name'
|
703
|
+
model_to_use = stats['config'].get('embedding_model') or stats['config'].get('model_name')
|
704
|
+
|
705
|
+
# Shell mode implementation
|
706
|
+
if args.shell:
|
707
|
+
import time
|
708
|
+
print(f"Search Shell - Index: {args.index_source}")
|
709
|
+
print(f"Backend: {args.backend}")
|
710
|
+
print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
|
711
|
+
if model_to_use:
|
712
|
+
print(f"Model: {model_to_use}")
|
713
|
+
print("Type 'exit' or 'quit' to leave, 'help' for options")
|
714
|
+
print("-" * 60)
|
715
|
+
|
716
|
+
while True:
|
717
|
+
try:
|
718
|
+
query = input("\nsearch> ").strip()
|
719
|
+
|
720
|
+
if not query:
|
721
|
+
continue
|
722
|
+
|
723
|
+
if query.lower() in ['exit', 'quit', 'q']:
|
724
|
+
print("Goodbye!")
|
725
|
+
break
|
726
|
+
|
727
|
+
if query.lower() == 'help':
|
728
|
+
print("\nShell commands:")
|
729
|
+
print(" help - Show this help")
|
730
|
+
print(" exit/quit/q - Exit shell")
|
731
|
+
print(" count=N - Set result count (current: {})".format(args.count))
|
732
|
+
print(" tags=tag1,tag2 - Set tag filter (current: {})".format(args.tags or 'none'))
|
733
|
+
print(" verbose - Toggle verbose output")
|
734
|
+
print("\nOr type any search query...")
|
735
|
+
continue
|
736
|
+
|
737
|
+
# Handle shell commands
|
738
|
+
if query.startswith('count='):
|
739
|
+
try:
|
740
|
+
args.count = int(query.split('=')[1])
|
741
|
+
print(f"Result count set to: {args.count}")
|
742
|
+
except:
|
743
|
+
print("Invalid count value")
|
744
|
+
continue
|
745
|
+
|
746
|
+
if query.startswith('tags='):
|
747
|
+
tag_str = query.split('=', 1)[1]
|
748
|
+
args.tags = tag_str if tag_str else None
|
749
|
+
tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
|
750
|
+
print(f"Tags filter set to: {tags or 'none'}")
|
751
|
+
continue
|
752
|
+
|
753
|
+
if query == 'verbose':
|
754
|
+
args.verbose = not args.verbose
|
755
|
+
print(f"Verbose output: {'on' if args.verbose else 'off'}")
|
756
|
+
continue
|
757
|
+
|
758
|
+
# Perform search with timing
|
759
|
+
start_time = time.time()
|
760
|
+
|
761
|
+
# Preprocess query
|
762
|
+
enhanced = preprocess_query(
|
763
|
+
query,
|
764
|
+
vector=True,
|
765
|
+
query_nlp_backend=args.query_nlp_backend,
|
766
|
+
model_name=model_to_use,
|
767
|
+
preserve_original=True,
|
768
|
+
max_synonyms=2
|
769
|
+
)
|
770
|
+
|
771
|
+
# Parse tags
|
772
|
+
tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
|
773
|
+
|
774
|
+
# Perform search
|
775
|
+
results = engine.search(
|
776
|
+
query_vector=enhanced.get('vector'),
|
777
|
+
enhanced_text=enhanced.get('enhanced_text', query),
|
778
|
+
count=args.count,
|
779
|
+
distance_threshold=args.distance_threshold,
|
780
|
+
tags=tags,
|
781
|
+
keyword_weight=args.keyword_weight,
|
782
|
+
original_query=query
|
783
|
+
)
|
784
|
+
|
785
|
+
search_time = time.time() - start_time
|
786
|
+
|
787
|
+
# Display results
|
788
|
+
if not results:
|
789
|
+
print(f"\nNo results found for '{query}' ({search_time:.3f}s)")
|
790
|
+
else:
|
791
|
+
print(f"\nFound {len(results)} result(s) for '{query}' ({search_time:.3f}s):")
|
792
|
+
if enhanced.get('enhanced_text') != query and args.verbose:
|
793
|
+
print(f"Enhanced query: '{enhanced.get('enhanced_text')}'")
|
794
|
+
print("=" * 60)
|
795
|
+
|
796
|
+
for i, result in enumerate(results):
|
797
|
+
print(f"\n[{i+1}] Score: {result['score']:.4f}")
|
798
|
+
|
799
|
+
# Show metadata
|
800
|
+
metadata = result['metadata']
|
801
|
+
print(f"File: {metadata.get('filename', 'Unknown')}")
|
802
|
+
if metadata.get('section'):
|
803
|
+
print(f"Section: {metadata['section']}")
|
804
|
+
|
805
|
+
# Show content unless suppressed
|
806
|
+
if not args.no_content:
|
807
|
+
content = result['content']
|
808
|
+
if len(content) > 300 and not args.verbose:
|
809
|
+
content = content[:300] + "..."
|
810
|
+
print(f"\n{content}")
|
811
|
+
|
812
|
+
if i < len(results) - 1:
|
813
|
+
print("-" * 40)
|
814
|
+
|
815
|
+
except KeyboardInterrupt:
|
816
|
+
print("\nUse 'exit' to quit")
|
817
|
+
except EOFError:
|
818
|
+
print("\nGoodbye!")
|
819
|
+
break
|
820
|
+
except Exception as e:
|
821
|
+
print(f"\nError: {e}")
|
822
|
+
if args.verbose:
|
823
|
+
import traceback
|
824
|
+
traceback.print_exc()
|
825
|
+
|
826
|
+
return # Exit after shell mode
|
827
|
+
|
828
|
+
# Normal single query mode
|
479
829
|
if args.verbose:
|
480
830
|
print(f"Index contains {stats['total_chunks']} chunks from {stats['total_files']} files")
|
481
831
|
print(f"Searching for: '{args.query}'")
|
482
832
|
print(f"Query NLP Backend: {args.query_nlp_backend}")
|
833
|
+
if args.model:
|
834
|
+
print(f"Override model: {args.model}")
|
835
|
+
elif model_to_use:
|
836
|
+
print(f"Using index model: {model_to_use}")
|
483
837
|
print()
|
484
838
|
|
485
839
|
# Preprocess query
|
486
|
-
enhanced = preprocess_query(
|
840
|
+
enhanced = preprocess_query(
|
841
|
+
args.query,
|
842
|
+
vector=True, # Both backends need vector for similarity search
|
843
|
+
query_nlp_backend=args.query_nlp_backend,
|
844
|
+
model_name=model_to_use,
|
845
|
+
preserve_original=True, # Keep original query terms
|
846
|
+
max_synonyms=2 # Reduce synonym expansion
|
847
|
+
)
|
487
848
|
|
488
849
|
# Parse tags if provided
|
489
850
|
tags = [tag.strip() for tag in args.tags.split(',')] if args.tags else None
|
@@ -494,7 +855,9 @@ def search_command():
|
|
494
855
|
enhanced_text=enhanced.get('enhanced_text', args.query),
|
495
856
|
count=args.count,
|
496
857
|
distance_threshold=args.distance_threshold,
|
497
|
-
tags=tags
|
858
|
+
tags=tags,
|
859
|
+
keyword_weight=args.keyword_weight,
|
860
|
+
original_query=args.query # Pass original for exact match boosting
|
498
861
|
)
|
499
862
|
|
500
863
|
if args.json:
|
@@ -563,6 +926,142 @@ def search_command():
|
|
563
926
|
traceback.print_exc()
|
564
927
|
sys.exit(1)
|
565
928
|
|
929
|
+
def migrate_command():
|
930
|
+
"""Migrate search indexes between backends"""
|
931
|
+
parser = argparse.ArgumentParser(
|
932
|
+
description='Migrate search indexes between SQLite and pgvector backends',
|
933
|
+
epilog="""
|
934
|
+
Examples:
|
935
|
+
# Migrate SQLite to pgvector
|
936
|
+
sw-search migrate ./docs.swsearch \\
|
937
|
+
--to-pgvector \\
|
938
|
+
--connection-string "postgresql://user:pass@localhost/db" \\
|
939
|
+
--collection-name docs_collection
|
940
|
+
|
941
|
+
# Migrate with overwrite
|
942
|
+
sw-search migrate ./docs.swsearch \\
|
943
|
+
--to-pgvector \\
|
944
|
+
--connection-string "postgresql://user:pass@localhost/db" \\
|
945
|
+
--collection-name docs_collection \\
|
946
|
+
--overwrite
|
947
|
+
|
948
|
+
# Get index information
|
949
|
+
sw-search migrate --info ./docs.swsearch
|
950
|
+
""",
|
951
|
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
952
|
+
)
|
953
|
+
|
954
|
+
# Source argument (optional if using --info)
|
955
|
+
parser.add_argument('source', nargs='?', help='Source index file or collection')
|
956
|
+
|
957
|
+
# Migration direction
|
958
|
+
migration_group = parser.add_mutually_exclusive_group()
|
959
|
+
migration_group.add_argument('--to-pgvector', action='store_true',
|
960
|
+
help='Migrate SQLite index to pgvector')
|
961
|
+
migration_group.add_argument('--to-sqlite', action='store_true',
|
962
|
+
help='Migrate pgvector collection to SQLite (not yet implemented)')
|
963
|
+
migration_group.add_argument('--info', action='store_true',
|
964
|
+
help='Show information about an index')
|
965
|
+
|
966
|
+
# pgvector options
|
967
|
+
parser.add_argument('--connection-string',
|
968
|
+
help='PostgreSQL connection string for pgvector')
|
969
|
+
parser.add_argument('--collection-name',
|
970
|
+
help='Collection name for pgvector')
|
971
|
+
parser.add_argument('--overwrite', action='store_true',
|
972
|
+
help='Overwrite existing collection')
|
973
|
+
|
974
|
+
# SQLite options
|
975
|
+
parser.add_argument('--output',
|
976
|
+
help='Output .swsearch file path (for --to-sqlite)')
|
977
|
+
|
978
|
+
# Common options
|
979
|
+
parser.add_argument('--batch-size', type=int, default=100,
|
980
|
+
help='Number of chunks to process at once (default: 100)')
|
981
|
+
parser.add_argument('--verbose', action='store_true',
|
982
|
+
help='Show detailed progress')
|
983
|
+
|
984
|
+
args = parser.parse_args()
|
985
|
+
|
986
|
+
# Handle --info flag
|
987
|
+
if args.info:
|
988
|
+
if not args.source:
|
989
|
+
print("Error: Source index required with --info")
|
990
|
+
sys.exit(1)
|
991
|
+
|
992
|
+
try:
|
993
|
+
from signalwire_agents.search.migration import SearchIndexMigrator
|
994
|
+
migrator = SearchIndexMigrator(verbose=args.verbose)
|
995
|
+
info = migrator.get_index_info(args.source)
|
996
|
+
|
997
|
+
print(f"Index Information: {args.source}")
|
998
|
+
print(f" Type: {info['type']}")
|
999
|
+
if info['type'] == 'sqlite':
|
1000
|
+
print(f" Total chunks: {info['total_chunks']}")
|
1001
|
+
print(f" Total files: {info['total_files']}")
|
1002
|
+
print(f" Model: {info['config'].get('embedding_model', 'Unknown')}")
|
1003
|
+
print(f" Dimensions: {info['config'].get('embedding_dimensions', 'Unknown')}")
|
1004
|
+
print(f" Created: {info['config'].get('created_at', 'Unknown')}")
|
1005
|
+
if args.verbose:
|
1006
|
+
print("\n Full configuration:")
|
1007
|
+
for key, value in info['config'].items():
|
1008
|
+
print(f" {key}: {value}")
|
1009
|
+
else:
|
1010
|
+
print(" Unable to determine index type")
|
1011
|
+
except Exception as e:
|
1012
|
+
print(f"Error getting index info: {e}")
|
1013
|
+
sys.exit(1)
|
1014
|
+
return
|
1015
|
+
|
1016
|
+
# Validate arguments for migration
|
1017
|
+
if not args.source:
|
1018
|
+
print("Error: Source index required for migration")
|
1019
|
+
sys.exit(1)
|
1020
|
+
|
1021
|
+
if not args.to_pgvector and not args.to_sqlite:
|
1022
|
+
print("Error: Must specify migration direction (--to-pgvector or --to-sqlite)")
|
1023
|
+
sys.exit(1)
|
1024
|
+
|
1025
|
+
try:
|
1026
|
+
from signalwire_agents.search.migration import SearchIndexMigrator
|
1027
|
+
migrator = SearchIndexMigrator(verbose=args.verbose)
|
1028
|
+
|
1029
|
+
if args.to_pgvector:
|
1030
|
+
# Validate pgvector arguments
|
1031
|
+
if not args.connection_string:
|
1032
|
+
print("Error: --connection-string required for pgvector migration")
|
1033
|
+
sys.exit(1)
|
1034
|
+
if not args.collection_name:
|
1035
|
+
print("Error: --collection-name required for pgvector migration")
|
1036
|
+
sys.exit(1)
|
1037
|
+
|
1038
|
+
# Perform migration
|
1039
|
+
print(f"Migrating {args.source} to pgvector collection '{args.collection_name}'...")
|
1040
|
+
stats = migrator.migrate_sqlite_to_pgvector(
|
1041
|
+
sqlite_path=args.source,
|
1042
|
+
connection_string=args.connection_string,
|
1043
|
+
collection_name=args.collection_name,
|
1044
|
+
overwrite=args.overwrite,
|
1045
|
+
batch_size=args.batch_size
|
1046
|
+
)
|
1047
|
+
|
1048
|
+
print(f"\n✓ Migration completed successfully!")
|
1049
|
+
print(f" Chunks migrated: {stats['chunks_migrated']}")
|
1050
|
+
print(f" Errors: {stats['errors']}")
|
1051
|
+
|
1052
|
+
elif args.to_sqlite:
|
1053
|
+
print("Error: pgvector to SQLite migration not yet implemented")
|
1054
|
+
print("This feature is planned for future development")
|
1055
|
+
sys.exit(1)
|
1056
|
+
|
1057
|
+
except Exception as e:
|
1058
|
+
print(f"\nError during migration: {e}")
|
1059
|
+
if args.verbose:
|
1060
|
+
import traceback
|
1061
|
+
traceback.print_exc()
|
1062
|
+
sys.exit(1)
|
1063
|
+
|
1064
|
+
|
566
1065
|
def remote_command():
|
567
1066
|
"""Search via remote API endpoint"""
|
568
1067
|
parser = argparse.ArgumentParser(description='Search via remote API endpoint')
|
@@ -838,6 +1337,11 @@ Examples:
|
|
838
1337
|
sys.argv.pop(1)
|
839
1338
|
remote_command()
|
840
1339
|
return
|
1340
|
+
elif sys.argv[1] == 'migrate':
|
1341
|
+
# Remove 'migrate' from argv and call migrate_command
|
1342
|
+
sys.argv.pop(1)
|
1343
|
+
migrate_command()
|
1344
|
+
return
|
841
1345
|
|
842
1346
|
# Regular build command
|
843
1347
|
main()
|