PyPI - signalwire-agents - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

signalwire-agents 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

signalwire_agents/search/search_engine.py ADDED Viewed

@@ -0,0 +1,383 @@
+"""
+Copyright (c) 2025 SignalWire
+This file is part of the SignalWire AI Agents SDK.
+Licensed under the MIT License.
+See LICENSE file in the project root for full license information.
+"""
+import sqlite3
+import json
+import logging
+from typing import List, Dict, Any, Optional, Union
+try:
+    import numpy as np
+    from sklearn.metrics.pairwise import cosine_similarity
+    NDArray = np.ndarray
+except ImportError:
+    np = None
+    cosine_similarity = None
+    NDArray = Any  # Fallback type for when numpy is not available
+logger = logging.getLogger(__name__)
+class SearchEngine:
+    """Hybrid search engine for vector and keyword search"""
+    def __init__(self, index_path: str, model=None):
+        self.index_path = index_path
+        self.model = model
+        self.config = self._load_config()
+        self.embedding_dim = int(self.config.get('embedding_dimensions', 768))
+    def _load_config(self) -> Dict[str, str]:
+        """Load index configuration"""
+        try:
+            conn = sqlite3.connect(self.index_path)
+            cursor = conn.cursor()
+            cursor.execute("SELECT key, value FROM config")
+            config = dict(cursor.fetchall())
+            conn.close()
+            return config
+        except Exception as e:
+            logger.error(f"Error loading config from {self.index_path}: {e}")
+            return {}
+    def search(self, query_vector: List[float], enhanced_text: str,
+              count: int = 3, distance_threshold: float = 0.0,
+              tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
+        """
+        Perform hybrid search (vector + keyword)
+        Args:
+            query_vector: Embedding vector for the query
+            enhanced_text: Processed query text for keyword search
+            count: Number of results to return
+            distance_threshold: Minimum similarity score
+            tags: Filter by tags
+        Returns:
+            List of search results with scores and metadata
+        """
+        if not np or not cosine_similarity:
+            logger.warning("NumPy or scikit-learn not available. Using keyword search only.")
+            return self._keyword_search_only(enhanced_text, count, tags)
+        # Convert query vector to numpy array
+        try:
+            query_array = np.array(query_vector).reshape(1, -1)
+        except Exception as e:
+            logger.error(f"Error converting query vector: {e}")
+            return self._keyword_search_only(enhanced_text, count, tags)
+        # Vector search
+        vector_results = self._vector_search(query_array, count * 2)
+        # Keyword search
+        keyword_results = self._keyword_search(enhanced_text, count * 2)
+        # Merge and rank results
+        merged_results = self._merge_results(vector_results, keyword_results)
+        # Filter by tags if specified
+        if tags:
+            merged_results = self._filter_by_tags(merged_results, tags)
+        # Filter by distance threshold
+        filtered_results = [
+            r for r in merged_results
+            if r['score'] >= distance_threshold
+        ]
+        return filtered_results[:count]
+    def _keyword_search_only(self, enhanced_text: str, count: int,
+                           tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
+        """Fallback to keyword search only when vector search is unavailable"""
+        keyword_results = self._keyword_search(enhanced_text, count)
+        if tags:
+            keyword_results = self._filter_by_tags(keyword_results, tags)
+        return keyword_results[:count]
+    def _vector_search(self, query_vector: Union[NDArray, Any], count: int) -> List[Dict[str, Any]]:
+        """Perform vector similarity search"""
+        if not np or not cosine_similarity:
+            return []
+        try:
+            conn = sqlite3.connect(self.index_path)
+            cursor = conn.cursor()
+            # Get all embeddings (for small datasets, this is fine)
+            # For large datasets, we'd use FAISS or similar
+            cursor.execute('''
+                SELECT id, content, embedding, filename, section, tags, metadata
+                FROM chunks
+                WHERE embedding IS NOT NULL AND embedding != ''
+            ''')
+            results = []
+            for row in cursor.fetchall():
+                chunk_id, content, embedding_blob, filename, section, tags_json, metadata_json = row
+                if not embedding_blob:
+                    continue
+                try:
+                    # Convert embedding back to numpy array
+                    embedding = np.frombuffer(embedding_blob, dtype=np.float32).reshape(1, -1)
+                    # Calculate similarity
+                    similarity = cosine_similarity(query_vector, embedding)[0][0]
+                    results.append({
+                        'id': chunk_id,
+                        'content': content,
+                        'score': float(similarity),
+                        'metadata': {
+                            'filename': filename,
+                            'section': section,
+                            'tags': json.loads(tags_json) if tags_json else [],
+                            'metadata': json.loads(metadata_json) if metadata_json else {}
+                        },
+                        'search_type': 'vector'
+                    })
+                except Exception as e:
+                    logger.warning(f"Error processing embedding for chunk {chunk_id}: {e}")
+                    continue
+            conn.close()
+            # Sort by similarity score
+            results.sort(key=lambda x: x['score'], reverse=True)
+            return results[:count]
+        except Exception as e:
+            logger.error(f"Error in vector search: {e}")
+            return []
+    def _keyword_search(self, enhanced_text: str, count: int) -> List[Dict[str, Any]]:
+        """Perform full-text search"""
+        try:
+            conn = sqlite3.connect(self.index_path)
+            cursor = conn.cursor()
+            # Escape FTS5 special characters
+            escaped_text = self._escape_fts_query(enhanced_text)
+            # FTS5 search
+            cursor.execute('''
+                SELECT c.id, c.content, c.filename, c.section, c.tags, c.metadata,
+                       chunks_fts.rank
+                FROM chunks_fts
+                JOIN chunks c ON chunks_fts.rowid = c.id
+                WHERE chunks_fts MATCH ?
+                ORDER BY chunks_fts.rank
+                LIMIT ?
+            ''', (escaped_text, count))
+            results = []
+            for row in cursor.fetchall():
+                chunk_id, content, filename, section, tags_json, metadata_json, rank = row
+                # Convert FTS rank to similarity score (higher rank = lower score)
+                # FTS5 rank is negative, so we convert it to a positive similarity score
+                score = 1.0 / (1.0 + abs(rank))
+                results.append({
+                    'id': chunk_id,
+                    'content': content,
+                    'score': float(score),
+                    'metadata': {
+                        'filename': filename,
+                        'section': section,
+                        'tags': json.loads(tags_json) if tags_json else [],
+                        'metadata': json.loads(metadata_json) if metadata_json else {}
+                    },
+                    'search_type': 'keyword'
+                })
+            conn.close()
+            return results
+        except Exception as e:
+            logger.error(f"Error in keyword search: {e}")
+            # Fallback to simple LIKE search
+            return self._fallback_search(enhanced_text, count)
+    def _escape_fts_query(self, query: str) -> str:
+        """Escape special characters for FTS5 queries"""
+        # FTS5 special characters that need escaping
+        special_chars = ['"', "'", '(', ')', '*', '-', '+', ':', '^']
+        escaped = query
+        for char in special_chars:
+            escaped = escaped.replace(char, f'\\{char}')
+        return escaped
+    def _fallback_search(self, enhanced_text: str, count: int) -> List[Dict[str, Any]]:
+        """Fallback search using LIKE when FTS fails"""
+        try:
+            conn = sqlite3.connect(self.index_path)
+            cursor = conn.cursor()
+            # Simple LIKE search
+            search_terms = enhanced_text.lower().split()
+            like_conditions = []
+            params = []
+            for term in search_terms[:5]:  # Limit to 5 terms to avoid too complex queries
+                like_conditions.append("LOWER(processed_content) LIKE ?")
+                params.append(f"%{term}%")
+            if not like_conditions:
+                return []
+            query = f'''
+                SELECT id, content, filename, section, tags, metadata
+                FROM chunks
+                WHERE {" OR ".join(like_conditions)}
+                LIMIT ?
+            '''
+            params.append(count)
+            cursor.execute(query, params)
+            results = []
+            for row in cursor.fetchall():
+                chunk_id, content, filename, section, tags_json, metadata_json = row
+                # Simple scoring based on term matches
+                content_lower = content.lower()
+                score = sum(1 for term in search_terms if term.lower() in content_lower) / len(search_terms)
+                results.append({
+                    'id': chunk_id,
+                    'content': content,
+                    'score': float(score),
+                    'metadata': {
+                        'filename': filename,
+                        'section': section,
+                        'tags': json.loads(tags_json) if tags_json else [],
+                        'metadata': json.loads(metadata_json) if metadata_json else {}
+                    },
+                    'search_type': 'fallback'
+                })
+            conn.close()
+            # Sort by score
+            results.sort(key=lambda x: x['score'], reverse=True)
+            return results
+        except Exception as e:
+            logger.error(f"Error in fallback search: {e}")
+            return []
+    def _merge_results(self, vector_results: List[Dict], keyword_results: List[Dict]) -> List[Dict[str, Any]]:
+        """Merge and rank vector and keyword search results"""
+        # Create a combined list with weighted scores
+        combined = {}
+        # Add vector results with weight
+        for result in vector_results:
+            chunk_id = result['id']
+            combined[chunk_id] = result.copy()
+            combined[chunk_id]['vector_score'] = result['score']
+            combined[chunk_id]['keyword_score'] = 0.0
+        # Add keyword results with weight
+        for result in keyword_results:
+            chunk_id = result['id']
+            if chunk_id in combined:
+                combined[chunk_id]['keyword_score'] = result['score']
+            else:
+                combined[chunk_id] = result.copy()
+                combined[chunk_id]['vector_score'] = 0.0
+                combined[chunk_id]['keyword_score'] = result['score']
+        # Calculate combined score (weighted average)
+        vector_weight = 0.7
+        keyword_weight = 0.3
+        for chunk_id, result in combined.items():
+            vector_score = result.get('vector_score', 0.0)
+            keyword_score = result.get('keyword_score', 0.0)
+            result['score'] = (vector_score * vector_weight + keyword_score * keyword_weight)
+            # Add debug info
+            result['metadata']['search_scores'] = {
+                'vector': vector_score,
+                'keyword': keyword_score,
+                'combined': result['score']
+            }
+        # Sort by combined score
+        sorted_results = sorted(combined.values(), key=lambda x: x['score'], reverse=True)
+        return sorted_results
+    def _filter_by_tags(self, results: List[Dict], required_tags: List[str]) -> List[Dict[str, Any]]:
+        """Filter results by required tags"""
+        filtered = []
+        for result in results:
+            result_tags = result['metadata'].get('tags', [])
+            if any(tag in result_tags for tag in required_tags):
+                filtered.append(result)
+        return filtered
+    def get_stats(self) -> Dict[str, Any]:
+        """Get statistics about the search index"""
+        conn = sqlite3.connect(self.index_path)
+        cursor = conn.cursor()
+        try:
+            # Get total chunks
+            cursor.execute("SELECT COUNT(*) FROM chunks")
+            total_chunks = cursor.fetchone()[0]
+            # Get total files
+            cursor.execute("SELECT COUNT(DISTINCT filename) FROM chunks")
+            total_files = cursor.fetchone()[0]
+            # Get average chunk size
+            cursor.execute("SELECT AVG(LENGTH(content)) FROM chunks")
+            avg_chunk_size = cursor.fetchone()[0] or 0
+            # Get file types
+            cursor.execute("""
+                SELECT
+                    CASE
+                        WHEN filename LIKE '%.md' THEN 'markdown'
+                        WHEN filename LIKE '%.py' THEN 'python'
+                        WHEN filename LIKE '%.txt' THEN 'text'
+                        WHEN filename LIKE '%.pdf' THEN 'pdf'
+                        WHEN filename LIKE '%.docx' THEN 'docx'
+                        ELSE 'other'
+                    END as file_type,
+                    COUNT(DISTINCT filename) as count
+                FROM chunks
+                GROUP BY file_type
+            """)
+            file_types = dict(cursor.fetchall())
+            # Get languages
+            cursor.execute("SELECT language, COUNT(*) FROM chunks GROUP BY language")
+            languages = dict(cursor.fetchall())
+            return {
+                'total_chunks': total_chunks,
+                'total_files': total_files,
+                'avg_chunk_size': int(avg_chunk_size),
+                'file_types': file_types,
+                'languages': languages,
+                'config': self.config
+            }
+        finally:
+            conn.close()

signalwire_agents/search/search_service.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""
+Copyright (c) 2025 SignalWire
+This file is part of the SignalWire AI Agents SDK.
+Licensed under the MIT License.
+See LICENSE file in the project root for full license information.
+"""
+import logging
+from typing import Dict, Any, List, Optional
+try:
+    from fastapi import FastAPI, HTTPException
+    from pydantic import BaseModel
+except ImportError:
+    FastAPI = None
+    HTTPException = None
+    BaseModel = None
+try:
+    from sentence_transformers import SentenceTransformer
+except ImportError:
+    SentenceTransformer = None
+from .query_processor import preprocess_query
+from .search_engine import SearchEngine
+logger = logging.getLogger(__name__)
+# Pydantic models for API
+if BaseModel:
+    class SearchRequest(BaseModel):
+        query: str
+        index_name: str = "default"
+        count: int = 3
+        distance: float = 0.0
+        tags: Optional[List[str]] = None
+        language: Optional[str] = None
+    class SearchResult(BaseModel):
+        content: str
+        score: float
+        metadata: Dict[str, Any]
+    class SearchResponse(BaseModel):
+        results: List[SearchResult]
+        query_analysis: Optional[Dict[str, Any]] = None
+else:
+    # Fallback classes when FastAPI is not available
+    class SearchRequest:
+        def __init__(self, query: str, index_name: str = "default", count: int = 3,
+                     distance: float = 0.0, tags: Optional[List[str]] = None,
+                     language: Optional[str] = None):
+            self.query = query
+            self.index_name = index_name
+            self.count = count
+            self.distance = distance
+            self.tags = tags
+            self.language = language
+    class SearchResult:
+        def __init__(self, content: str, score: float, metadata: Dict[str, Any]):
+            self.content = content
+            self.score = score
+            self.metadata = metadata
+    class SearchResponse:
+        def __init__(self, results: List[SearchResult], query_analysis: Optional[Dict[str, Any]] = None):
+            self.results = results
+            self.query_analysis = query_analysis
+class SearchService:
+    """Local search service with HTTP API"""
+    def __init__(self, port: int = 8001, indexes: Dict[str, str] = None):
+        self.port = port
+        self.indexes = indexes or {}
+        self.search_engines = {}
+        self.model = None
+        if FastAPI:
+            self.app = FastAPI(title="SignalWire Local Search Service")
+            self._setup_routes()
+        else:
+            self.app = None
+            logger.warning("FastAPI not available. HTTP service will not be available.")
+        self._load_resources()
+    def _setup_routes(self):
+        """Setup FastAPI routes"""
+        if not self.app:
+            return
+        @self.app.post("/search", response_model=SearchResponse)
+        async def search(request: SearchRequest):
+            return await self._handle_search(request)
+        @self.app.get("/health")
+        async def health():
+            return {"status": "healthy", "indexes": list(self.indexes.keys())}
+        @self.app.post("/reload_index")
+        async def reload_index(index_name: str, index_path: str):
+            """Reload or add new index"""
+            self.indexes[index_name] = index_path
+            self.search_engines[index_name] = SearchEngine(index_path, self.model)
+            return {"status": "reloaded", "index": index_name}
+    def _load_resources(self):
+        """Load embedding model and search indexes"""
+        # Load model (shared across all indexes)
+        if self.indexes and SentenceTransformer:
+            # Get model name from first index
+            sample_index = next(iter(self.indexes.values()))
+            model_name = self._get_model_name(sample_index)
+            try:
+                self.model = SentenceTransformer(model_name)
+            except Exception as e:
+                logger.warning(f"Could not load sentence transformer model: {e}")
+                self.model = None
+        # Load search engines for each index
+        for index_name, index_path in self.indexes.items():
+            try:
+                self.search_engines[index_name] = SearchEngine(index_path, self.model)
+            except Exception as e:
+                logger.error(f"Error loading search engine for {index_name}: {e}")
+    def _get_model_name(self, index_path: str) -> str:
+        """Get embedding model name from index config"""
+        try:
+            import sqlite3
+            conn = sqlite3.connect(index_path)
+            cursor = conn.cursor()
+            cursor.execute("SELECT value FROM config WHERE key = 'embedding_model'")
+            result = cursor.fetchone()
+            conn.close()
+            return result[0] if result else 'sentence-transformers/all-mpnet-base-v2'
+        except Exception as e:
+            logger.warning(f"Could not get model name from index: {e}")
+            return 'sentence-transformers/all-mpnet-base-v2'
+    async def _handle_search(self, request: SearchRequest) -> SearchResponse:
+        """Handle search request"""
+        if request.index_name not in self.search_engines:
+            if HTTPException:
+                raise HTTPException(status_code=404, detail=f"Index '{request.index_name}' not found")
+            else:
+                raise ValueError(f"Index '{request.index_name}' not found")
+        search_engine = self.search_engines[request.index_name]
+        # Enhance query
+        try:
+            enhanced = preprocess_query(
+                request.query,
+                language=request.language or 'auto',
+                vector=True
+            )
+        except Exception as e:
+            logger.error(f"Error preprocessing query: {e}")
+            enhanced = {
+                'enhanced_text': request.query,
+                'vector': [],
+                'language': 'en'
+            }
+        # Perform search
+        try:
+            results = search_engine.search(
+                query_vector=enhanced.get('vector', []),
+                enhanced_text=enhanced['enhanced_text'],
+                count=request.count,
+                distance_threshold=request.distance,
+                tags=request.tags
+            )
+        except Exception as e:
+            logger.error(f"Error performing search: {e}")
+            results = []
+        # Format response
+        search_results = [
+            SearchResult(
+                content=result['content'],
+                score=result['score'],
+                metadata=result['metadata']
+            )
+            for result in results
+        ]
+        return SearchResponse(
+            results=search_results,
+            query_analysis={
+                'original_query': request.query,
+                'enhanced_query': enhanced['enhanced_text'],
+                'detected_language': enhanced.get('language'),
+                'pos_analysis': enhanced.get('POS')
+            }
+        )
+    def search_direct(self, query: str, index_name: str = "default", count: int = 3,
+                     distance: float = 0.0, tags: Optional[List[str]] = None,
+                     language: Optional[str] = None) -> Dict[str, Any]:
+        """Direct search method (non-async) for programmatic use"""
+        request = SearchRequest(
+            query=query,
+            index_name=index_name,
+            count=count,
+            distance=distance,
+            tags=tags,
+            language=language
+        )
+        # Use asyncio to run the async method
+        import asyncio
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+        response = loop.run_until_complete(self._handle_search(request))
+        return {
+            'results': [
+                {
+                    'content': r.content,
+                    'score': r.score,
+                    'metadata': r.metadata
+                }
+                for r in response.results
+            ],
+            'query_analysis': response.query_analysis
+        }
+    def start(self):
+        """Start the service"""
+        if not self.app:
+            raise RuntimeError("FastAPI not available. Cannot start HTTP service.")
+        try:
+            import uvicorn
+            uvicorn.run(self.app, host="0.0.0.0", port=self.port)
+        except ImportError:
+            raise RuntimeError("uvicorn not available. Cannot start HTTP service.")
+    def stop(self):
+        """Stop the service (placeholder for cleanup)"""
+        pass

signalwire_agents/skills/native_vector_search/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Native Vector Search Skill

signalwire-agents 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

signalwire-agents 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl