PyPI - signalwire-agents - Versions diffs - 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

signalwire-agents 0.1.13py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

signalwire_agents/core/swml_service.py CHANGED Viewed

@@ -24,51 +24,11 @@ import types
 from typing import Dict, List, Any, Optional, Union, Callable, Tuple, Type
 from urllib.parse import urlparse
-# Import and configure structlog
-try:
-    import structlog
-    # Only configure if not already configured
-    if not hasattr(structlog, "_configured") or not structlog._configured:
-        structlog.configure(
-            processors=[
-                structlog.stdlib.filter_by_level,
-                structlog.stdlib.add_logger_name,
-                structlog.stdlib.add_log_level,
-                structlog.stdlib.PositionalArgumentsFormatter(),
-                structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S"),
-                structlog.processors.StackInfoRenderer(),
-                structlog.processors.format_exc_info,
-                structlog.processors.UnicodeDecoder(),
-                structlog.dev.ConsoleRenderer()
-            ],
-            context_class=dict,
-            logger_factory=structlog.stdlib.LoggerFactory(),
-            wrapper_class=structlog.stdlib.BoundLogger,
-            cache_logger_on_first_use=True,
-        )
-        # Set up root logger with structlog
-        logging.basicConfig(
-            format="%(message)s",
-            stream=sys.stdout,
-            level=logging.INFO,
-        )
-        # Mark as configured to avoid duplicate configuration
-        structlog._configured = True
-    # Create the module logger
-    logger = structlog.get_logger("swml_service")
-except ImportError:
-    # Fallback to standard logging if structlog is not available
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-        stream=sys.stdout
-    )
-    logger = logging.getLogger("swml_service")
+# Import centralized logging system
+from signalwire_agents.core.logging_config import get_logger
+# Create the module logger using centralized system
+logger = get_logger("swml_service")
 try:
     import fastapi

signalwire_agents/search/document_processor.py CHANGED Viewed

@@ -74,29 +74,42 @@ logger = logging.getLogger(__name__)
 class DocumentProcessor:
     """Enhanced document processor with smart chunking capabilities"""
-    def __init__(self, chunking_strategy: str = 'sentence',
-                 max_sentences_per_chunk: int = 50,
-                 chunk_size: int = 50,
-                 overlap_size: int = 10,
-                 split_newlines: Optional[int] = None):
+    def __init__(
+        self,
+        chunking_strategy: str = 'sentence',
+        max_sentences_per_chunk: int = 5,
+        chunk_size: int = 50,
+        chunk_overlap: int = 10,
+        split_newlines: Optional[int] = None,
+        index_nlp_backend: str = 'nltk',
+        verbose: bool = False,
+        semantic_threshold: float = 0.5,
+        topic_threshold: float = 0.3
+    ):
         """
-        Initialize document processor with chunking strategy
+        Initialize document processor
         Args:
-            chunking_strategy: 'sentence', 'sliding', 'paragraph', or 'page'
-            max_sentences_per_chunk: For sentence strategy (default: 50)
+            chunking_strategy: Strategy for chunking documents ('sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa')
+            max_sentences_per_chunk: For sentence strategy (default: 5)
             chunk_size: For sliding strategy - words per chunk (default: 50)
-            overlap_size: For sliding strategy - overlap in words (default: 10)
+            chunk_overlap: For sliding strategy - overlap in words (default: 10)
             split_newlines: For sentence strategy - split on multiple newlines (optional)
+            index_nlp_backend: NLP backend for indexing (default: 'nltk')
+            verbose: Whether to enable verbose logging (default: False)
+            semantic_threshold: Similarity threshold for semantic chunking (default: 0.5)
+            topic_threshold: Similarity threshold for topic chunking (default: 0.3)
         """
         self.chunking_strategy = chunking_strategy
         self.max_sentences_per_chunk = max_sentences_per_chunk
         self.chunk_size = chunk_size
-        self.overlap_size = overlap_size
+        self.chunk_overlap = chunk_overlap
         self.split_newlines = split_newlines
+        self.semantic_threshold = semantic_threshold
+        self.topic_threshold = topic_threshold
         # Legacy support for old character-based chunking
-        self.chunk_overlap = overlap_size
+        self.chunk_overlap = chunk_overlap
     def create_chunks(self, content: str, filename: str,
                      file_type: str) -> List[Dict[str, Any]]:
@@ -121,6 +134,12 @@ class DocumentProcessor:
             return self._chunk_by_paragraphs(content, filename, file_type)
         elif self.chunking_strategy == 'page':
             return self._chunk_by_pages(content, filename, file_type)
+        elif self.chunking_strategy == 'semantic':
+            return self._chunk_by_semantic(content, filename, file_type)
+        elif self.chunking_strategy == 'topic':
+            return self._chunk_by_topics(content, filename, file_type)
+        elif self.chunking_strategy == 'qa':
+            return self._chunk_by_qa_optimization(content, filename, file_type)
         else:
             # Fallback to sentence-based chunking
             return self._chunk_by_sentences(content, filename, file_type)
@@ -674,7 +693,7 @@ class DocumentProcessor:
         chunk_index = 0
         # Create overlapping chunks
-        for i in range(0, len(words), self.chunk_size - self.overlap_size):
+        for i in range(0, len(words), self.chunk_size - self.chunk_overlap):
             chunk_words = words[i:i + self.chunk_size]
             if chunk_words:
                 chunk_content = ' '.join(chunk_words)
@@ -686,7 +705,7 @@ class DocumentProcessor:
                         'chunk_method': 'sliding_window',
                         'chunk_index': chunk_index,
                         'chunk_size_words': self.chunk_size,
-                        'overlap_size_words': self.overlap_size,
+                        'overlap_size_words': self.chunk_overlap,
                         'start_word': i,
                         'end_word': i + len(chunk_words)
                     }
@@ -761,4 +780,246 @@ class DocumentProcessor:
                     }
                 ))
-        return chunks
+        return chunks
+    def _chunk_by_semantic(self, content: str, filename: str, file_type: str) -> List[Dict[str, Any]]:
+        """Chunk based on semantic similarity between sentences"""
+        if isinstance(content, list):
+            content = '\n'.join(content)
+        # Get sentences
+        if sent_tokenize:
+            sentences = sent_tokenize(content)
+        else:
+            sentences = content.split('. ')
+            sentences = [s.strip() + '.' for s in sentences if s.strip()]
+        if len(sentences) <= 1:
+            return [self._create_chunk(content, filename, "Section 1",
+                                     metadata={'chunk_method': 'semantic', 'chunk_index': 0})]
+        # Generate embeddings for sentences (using the same model as the index)
+        try:
+            from sentence_transformers import SentenceTransformer
+            from sklearn.metrics.pairwise import cosine_similarity
+            import numpy as np
+            model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
+            embeddings = model.encode(sentences, show_progress_bar=False)
+            # Calculate similarity between adjacent sentences
+            similarities = []
+            for i in range(len(embeddings) - 1):
+                sim = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0]
+                similarities.append(sim)
+            # Find split points where similarity drops below threshold
+            split_points = [0]
+            for i, sim in enumerate(similarities):
+                if sim < self.semantic_threshold:
+                    split_points.append(i + 1)
+            split_points.append(len(sentences))
+            # Create chunks
+            chunks = []
+            for i in range(len(split_points) - 1):
+                start_idx = split_points[i]
+                end_idx = split_points[i + 1]
+                chunk_sentences = sentences[start_idx:end_idx]
+                # Ensure minimum chunk size
+                if len(chunk_sentences) < 2 and i > 0:
+                    # Merge with previous chunk
+                    chunks[-1]['content'] += ' ' + ' '.join(chunk_sentences)
+                    continue
+                chunk_content = ' '.join(chunk_sentences)
+                chunks.append(self._create_chunk(
+                    content=chunk_content,
+                    filename=filename,
+                    section=f"Semantic Section {i+1}",
+                    metadata={
+                        'chunk_method': 'semantic',
+                        'chunk_index': i,
+                        'semantic_threshold': self.semantic_threshold,
+                        'sentence_count': len(chunk_sentences)
+                    }
+                ))
+            return chunks if chunks else [self._create_chunk(content, filename, "Section 1",
+                                                           metadata={'chunk_method': 'semantic', 'chunk_index': 0})]
+        except ImportError:
+            # Fallback to sentence-based chunking
+            return self._chunk_by_sentences(content, filename, file_type)
+    def _chunk_by_topics(self, content: str, filename: str, file_type: str) -> List[Dict[str, Any]]:
+        """Chunk based on topic changes using keyword analysis"""
+        if isinstance(content, list):
+            content = '\n'.join(content)
+        if sent_tokenize:
+            sentences = sent_tokenize(content)
+        else:
+            sentences = content.split('. ')
+            sentences = [s.strip() + '.' for s in sentences if s.strip()]
+        if len(sentences) <= 3:
+            return [self._create_chunk(content, filename, "Topic 1",
+                                     metadata={'chunk_method': 'topic', 'chunk_index': 0})]
+        try:
+            # Simple topic detection using keyword overlap
+            from collections import Counter
+            import re
+            # Extract keywords from each sentence
+            sentence_keywords = []
+            for sentence in sentences:
+                # Simple keyword extraction (could be enhanced with NLP)
+                words = re.findall(r'\b[a-zA-Z]{3,}\b', sentence.lower())
+                # Filter common words (basic stopwords)
+                stopwords = {'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two', 'who', 'boy', 'did', 'man', 'way', 'she', 'use', 'her', 'many', 'oil', 'sit', 'set', 'run', 'eat', 'far', 'sea', 'eye', 'ask', 'own', 'say', 'too', 'any', 'try', 'us', 'an', 'as', 'at', 'be', 'he', 'if', 'in', 'is', 'it', 'my', 'of', 'on', 'or', 'to', 'up', 'we', 'go', 'no', 'so', 'am', 'by', 'do', 'me'}
+                keywords = [w for w in words if w not in stopwords and len(w) > 3]
+                sentence_keywords.append(set(keywords))
+            # Find topic boundaries based on keyword overlap
+            chunks = []
+            current_chunk = [sentences[0]]
+            current_keywords = sentence_keywords[0]
+            for i in range(1, len(sentences)):
+                # Calculate keyword overlap with current chunk
+                overlap = len(current_keywords.intersection(sentence_keywords[i]))
+                total_keywords = len(current_keywords.union(sentence_keywords[i]))
+                if total_keywords > 0:
+                    similarity = overlap / total_keywords
+                else:
+                    similarity = 0
+                # If similarity is low, start new chunk
+                if similarity < self.topic_threshold and len(current_chunk) >= 2:
+                    chunk_content = ' '.join(current_chunk)
+                    chunks.append(self._create_chunk(
+                        content=chunk_content,
+                        filename=filename,
+                        section=f"Topic {len(chunks)+1}",
+                        metadata={
+                            'chunk_method': 'topic',
+                            'chunk_index': len(chunks),
+                            'topic_keywords': list(current_keywords)[:10],  # Top keywords
+                            'sentence_count': len(current_chunk),
+                            'topic_threshold': self.topic_threshold
+                        }
+                    ))
+                    current_chunk = [sentences[i]]
+                    current_keywords = sentence_keywords[i]
+                else:
+                    current_chunk.append(sentences[i])
+                    current_keywords = current_keywords.union(sentence_keywords[i])
+            # Add final chunk
+            if current_chunk:
+                chunk_content = ' '.join(current_chunk)
+                chunks.append(self._create_chunk(
+                    content=chunk_content,
+                    filename=filename,
+                    section=f"Topic {len(chunks)+1}",
+                    metadata={
+                        'chunk_method': 'topic',
+                        'chunk_index': len(chunks),
+                        'topic_keywords': list(current_keywords)[:10],
+                        'sentence_count': len(current_chunk),
+                        'topic_threshold': self.topic_threshold
+                    }
+                ))
+            return chunks if chunks else [self._create_chunk(content, filename, "Topic 1",
+                                                           metadata={'chunk_method': 'topic', 'chunk_index': 0})]
+        except Exception:
+            # Fallback to sentence-based chunking
+            return self._chunk_by_sentences(content, filename, file_type)
+    def _chunk_by_qa_optimization(self, content: str, filename: str, file_type: str) -> List[Dict[str, Any]]:
+        """Create chunks optimized for question-answering"""
+        if isinstance(content, list):
+            content = '\n'.join(content)
+        if sent_tokenize:
+            sentences = sent_tokenize(content)
+        else:
+            sentences = content.split('. ')
+            sentences = [s.strip() + '.' for s in sentences if s.strip()]
+        # Patterns that indicate Q&A structure
+        question_patterns = [
+            r'\?',  # Questions
+            r'^(what|how|why|when|where|who|which|can|does|is|are|will|would|should)',
+            r'(step|steps|process|procedure|method|way to)',
+            r'(example|examples|instance|case)',
+            r'(definition|meaning|refers to|means)',
+        ]
+        chunks = []
+        current_chunk = []
+        current_context = []
+        for i, sentence in enumerate(sentences):
+            sentence_lower = sentence.lower().strip()
+            # Check if this sentence contains Q&A indicators
+            is_qa_relevant = any(re.search(pattern, sentence_lower) for pattern in question_patterns)
+            if is_qa_relevant or len(current_chunk) == 0:
+                current_chunk.append(sentence)
+                # Add surrounding context (previous and next sentences)
+                if i > 0 and sentences[i-1] not in current_chunk:
+                    current_context.append(sentences[i-1])
+                if i < len(sentences) - 1:
+                    current_context.append(sentences[i+1])
+            else:
+                current_chunk.append(sentence)
+            # Create chunk when we have enough content or reach a natural break
+            if (len(current_chunk) >= 3 and
+                (i == len(sentences) - 1 or  # Last sentence
+                 sentence.endswith('.') and len(current_chunk) >= 5)):  # Natural break
+                # Combine chunk with context
+                full_content = current_context + current_chunk
+                chunk_content = ' '.join(full_content)
+                chunks.append(self._create_chunk(
+                    content=chunk_content,
+                    filename=filename,
+                    section=f"QA Section {len(chunks)+1}",
+                    metadata={
+                        'chunk_method': 'qa_optimized',
+                        'chunk_index': len(chunks),
+                        'has_question': any('?' in s for s in current_chunk),
+                        'has_process': any(re.search(r'(step|process|method)', s.lower()) for s in current_chunk),
+                        'sentence_count': len(full_content)
+                    }
+                ))
+                current_chunk = []
+                current_context = []
+        # Handle remaining content
+        if current_chunk:
+            chunk_content = ' '.join(current_context + current_chunk)
+            chunks.append(self._create_chunk(
+                content=chunk_content,
+                filename=filename,
+                section=f"QA Section {len(chunks)+1}",
+                metadata={
+                    'chunk_method': 'qa_optimized',
+                    'chunk_index': len(chunks),
+                    'sentence_count': len(current_context + current_chunk)
+                }
+            ))
+        return chunks if chunks else [self._create_chunk(content, filename, "QA Section 1",
+                                                       metadata={'chunk_method': 'qa_optimized', 'chunk_index': 0})]

signalwire_agents/search/index_builder.py CHANGED Viewed

@@ -35,27 +35,61 @@ logger = logging.getLogger(__name__)
 class IndexBuilder:
     """Build searchable indexes from document directories"""
-    def __init__(self, model_name: str = 'sentence-transformers/all-mpnet-base-v2',
-                 chunking_strategy: str = 'sentence',
-                 max_sentences_per_chunk: int = 50,
-                 chunk_size: int = 50,
-                 chunk_overlap: int = 10,
-                 split_newlines: Optional[int] = None,
-                 verbose: bool = False):
+    def __init__(
+        self,
+        model_name: str = 'sentence-transformers/all-mpnet-base-v2',
+        chunking_strategy: str = 'sentence',
+        max_sentences_per_chunk: int = 5,
+        chunk_size: int = 50,
+        chunk_overlap: int = 10,
+        split_newlines: Optional[int] = None,
+        index_nlp_backend: str = 'nltk',
+        verbose: bool = False,
+        semantic_threshold: float = 0.5,
+        topic_threshold: float = 0.3
+    ):
+        """
+        Initialize the index builder
+        Args:
+            model_name: Name of the sentence transformer model to use
+            chunking_strategy: Strategy for chunking documents ('sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa')
+            max_sentences_per_chunk: For sentence strategy (default: 5)
+            chunk_size: For sliding strategy - words per chunk (default: 50)
+            chunk_overlap: For sliding strategy - overlap in words (default: 10)
+            split_newlines: For sentence strategy - split on multiple newlines (optional)
+            index_nlp_backend: NLP backend for indexing (default: 'nltk')
+            verbose: Whether to enable verbose logging (default: False)
+            semantic_threshold: Similarity threshold for semantic chunking (default: 0.5)
+            topic_threshold: Similarity threshold for topic chunking (default: 0.3)
+        """
         self.model_name = model_name
         self.chunking_strategy = chunking_strategy
         self.max_sentences_per_chunk = max_sentences_per_chunk
         self.chunk_size = chunk_size
         self.chunk_overlap = chunk_overlap
         self.split_newlines = split_newlines
+        self.index_nlp_backend = index_nlp_backend
         self.verbose = verbose
+        self.semantic_threshold = semantic_threshold
+        self.topic_threshold = topic_threshold
         self.model = None
+        # Validate NLP backend
+        if self.index_nlp_backend not in ['nltk', 'spacy']:
+            logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
+            self.index_nlp_backend = 'nltk'
         self.doc_processor = DocumentProcessor(
             chunking_strategy=chunking_strategy,
             max_sentences_per_chunk=max_sentences_per_chunk,
             chunk_size=chunk_size,
-            overlap_size=chunk_overlap,
-            split_newlines=split_newlines
+            chunk_overlap=chunk_overlap,
+            split_newlines=split_newlines,
+            index_nlp_backend=self.index_nlp_backend,
+            verbose=self.verbose,
+            semantic_threshold=self.semantic_threshold,
+            topic_threshold=self.topic_threshold
         )
     def _load_model(self):
@@ -130,7 +164,8 @@ class IndexBuilder:
                 # Preprocess content for better search
                 processed = preprocess_document_content(
                     chunk['content'],
-                    language=chunk.get('language', 'en')
+                    language=chunk.get('language', 'en'),
+                    index_nlp_backend=self.index_nlp_backend
                 )
                 chunk['processed_content'] = processed['enhanced_text']

signalwire_agents/search/query_processor.py CHANGED Viewed

@@ -186,7 +186,8 @@ def remove_duplicate_words(input_string: str) -> str:
 def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[List[str]] = None,
                     max_synonyms: int = 5, debug: bool = False, vector: bool = False,
-                    vectorize_query_param: bool = False, nlp_backend: str = 'nltk') -> Dict[str, Any]:
+                    vectorize_query_param: bool = False, nlp_backend: str = None,
+                    query_nlp_backend: str = 'nltk') -> Dict[str, Any]:
     """
     Advanced query preprocessing with language detection, POS tagging, synonym expansion, and vectorization
@@ -198,12 +199,19 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
         debug: Enable debug output
         vector: Include vector embedding in output
         vectorize_query_param: If True, just vectorize without other processing
-        nlp_backend: NLP backend to use ('nltk' for fast, 'spacy' for better quality)
+        nlp_backend: DEPRECATED - use query_nlp_backend instead
+        query_nlp_backend: NLP backend for query processing ('nltk' for fast, 'spacy' for better quality)
     Returns:
         Dict containing processed query, language, POS tags, and optionally vector
     """
+    # Handle backward compatibility
+    if nlp_backend is not None:
+        query_nlp_backend = nlp_backend
+        if debug:
+            logger.info(f"Using deprecated 'nlp_backend' parameter, please use 'query_nlp_backend' instead")
     if vectorize_query_param:
         # Vectorize the query directly
         vectorized_query = vectorize_query(query)
@@ -226,15 +234,16 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
     # Load spaCy model based on the language and backend choice
     nlp = None
-    if nlp_backend == 'spacy':
+    if query_nlp_backend == 'spacy':
         nlp = load_spacy_model(language)
         if nlp is None and debug:
             logger.info("spaCy backend requested but not available, falling back to NLTK")
-    elif nlp_backend == 'nltk':
+    elif query_nlp_backend == 'nltk':
         if debug:
-            logger.info("Using NLTK backend for NLP processing")
+            logger.info("Using NLTK backend for query processing")
     else:
-        logger.warning(f"Unknown NLP backend '{nlp_backend}', using NLTK")
+        logger.warning(f"Unknown query NLP backend '{query_nlp_backend}', using NLTK")
+        query_nlp_backend = 'nltk'
     # Tokenization and stop word removal
     tokens = nltk.word_tokenize(query)
@@ -258,7 +267,7 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
     lemmas = []
     pos_tags = {}
-    if nlp and nlp_backend == 'spacy':
+    if nlp and query_nlp_backend == 'spacy':
         # Use spaCy for better POS tagging
         doc = nlp(" ".join(tokens))
         for token in doc:
@@ -303,14 +312,14 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
     if debug:
         logger.info(f"Expanded Query: {final_query_str}")
-        logger.info(f"NLP Backend Used: {nlp_backend if nlp or nlp_backend == 'nltk' else 'nltk (fallback)'}")
+        logger.info(f"NLP Backend Used: {query_nlp_backend if nlp or query_nlp_backend == 'nltk' else 'nltk (fallback)'}")
     formatted_output = {
         'input': final_query_str,
         'enhanced_text': final_query_str,  # Alias for compatibility
         'language': language,
         'POS': pos_tags,
-        'nlp_backend_used': nlp_backend if nlp or nlp_backend == 'nltk' else 'nltk'
+        'nlp_backend_used': query_nlp_backend if nlp or query_nlp_backend == 'nltk' else 'nltk'
     }
     # Vectorize query if requested
@@ -323,19 +332,25 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
     return formatted_output
-def preprocess_document_content(content: str, language: str = 'en', nlp_backend: str = 'nltk') -> Dict[str, Any]:
+def preprocess_document_content(content: str, language: str = 'en', nlp_backend: str = None,
+                               index_nlp_backend: str = 'nltk') -> Dict[str, Any]:
     """
     Preprocess document content for better searchability
     Args:
         content: Document content to process
         language: Language code for processing
-        nlp_backend: NLP backend to use ('nltk' for fast, 'spacy' for better quality)
+        nlp_backend: DEPRECATED - use index_nlp_backend instead
+        index_nlp_backend: NLP backend for document processing ('nltk' for fast, 'spacy' for better quality)
     Returns:
         Dict containing enhanced text and extracted keywords
     """
+    # Handle backward compatibility
+    if nlp_backend is not None:
+        index_nlp_backend = nlp_backend
     # Use existing preprocessing but adapted for documents
     processed = preprocess_query(
         content,
@@ -344,7 +359,7 @@ def preprocess_document_content(content: str, language: str = 'en', nlp_backend:
         max_synonyms=2,  # Fewer synonyms for documents
         debug=False,
         vector=False,
-        nlp_backend=nlp_backend
+        query_nlp_backend=index_nlp_backend
     )
     # Extract key terms for keyword search

signalwire_agents/skills/__init__.py CHANGED Viewed

@@ -9,6 +9,6 @@ Skills are automatically discovered from subdirectories.
 from .registry import skill_registry
 # Trigger skill discovery on import
-skill_registry.discover_skills()
+# skill_registry.discover_skills()
 __all__ = ["skill_registry"]

signalwire_agents/skills/native_vector_search/skill.py CHANGED Viewed

@@ -75,10 +75,25 @@ class NativeVectorSearchSkill(SkillBase):
         self.swaig_fields = self.params.get('swaig_fields', {})
         # NLP backend configuration
-        self.nlp_backend = self.params.get('nlp_backend', 'nltk')  # Default to faster NLTK
-        if self.nlp_backend not in ['nltk', 'spacy']:
-            self.logger.warning(f"Invalid nlp_backend '{self.nlp_backend}', using 'nltk'")
-            self.nlp_backend = 'nltk'
+        self.nlp_backend = self.params.get('nlp_backend')  # Backward compatibility
+        self.index_nlp_backend = self.params.get('index_nlp_backend', 'nltk')  # Default to fast NLTK for indexing
+        self.query_nlp_backend = self.params.get('query_nlp_backend', 'nltk')  # Default to fast NLTK for search
+        # Handle backward compatibility
+        if self.nlp_backend is not None:
+            self.logger.warning("Parameter 'nlp_backend' is deprecated. Use 'index_nlp_backend' and 'query_nlp_backend' instead.")
+            # If old parameter is used, apply it to both
+            self.index_nlp_backend = self.nlp_backend
+            self.query_nlp_backend = self.nlp_backend
+        # Validate parameters
+        if self.index_nlp_backend not in ['nltk', 'spacy']:
+            self.logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
+            self.index_nlp_backend = 'nltk'
+        if self.query_nlp_backend not in ['nltk', 'spacy']:
+            self.logger.warning(f"Invalid query_nlp_backend '{self.query_nlp_backend}', using 'nltk'")
+            self.query_nlp_backend = 'nltk'
         # Auto-build index if requested and search is available
         if self.build_index and self.source_dir and self.search_available:
@@ -93,7 +108,10 @@ class NativeVectorSearchSkill(SkillBase):
                     self.logger.info(f"Building search index from {self.source_dir}...")
                     from signalwire_agents.search import IndexBuilder
-                    builder = IndexBuilder(verbose=self.params.get('verbose', False))
+                    builder = IndexBuilder(
+                        verbose=self.params.get('verbose', False),
+                        index_nlp_backend=self.index_nlp_backend
+                    )
                     builder.build_index(
                         source_dir=self.source_dir,
                         output_file=self.index_file,
@@ -187,7 +205,7 @@ class NativeVectorSearchSkill(SkillBase):
         try:
             # Preprocess the query
             from signalwire_agents.search.query_processor import preprocess_query
-            enhanced = preprocess_query(query, language='en', vector=True, nlp_backend=self.nlp_backend)
+            enhanced = preprocess_query(query, language='en', vector=True, query_nlp_backend=self.query_nlp_backend)
             # Perform search (local or remote)
             if self.use_remote:

signalwire-agents 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

signalwire-agents 0.1.13py3-none-any.whl → 0.1.14py3-none-any.whl