PyPI - signalwire-agents - Versions diffs - 0.1.51__tar.gz → 0.1.53__tar.gz - Mend

signalwire-agents 0.1.51tar.gz → 0.1.53tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

{signalwire_agents-0.1.51/signalwire_agents.egg-info → signalwire_agents-0.1.53}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: signalwire_agents
-Version: 0.1.51
+Version: 0.1.53
 Summary: SignalWire AI Agents SDK
 Author-email: SignalWire Team <info@signalwire.com>
 License: MIT
@@ -18,16 +18,16 @@ Classifier: Programming Language :: Python :: 3.11
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: fastapi==0.115.12
-Requires-Dist: pydantic==2.11.4
-Requires-Dist: PyYAML==6.0.2
-Requires-Dist: Requests==2.32.3
-Requires-Dist: setuptools==66.1.1
-Requires-Dist: signalwire_pom==2.7.1
-Requires-Dist: structlog==25.3.0
-Requires-Dist: uvicorn==0.34.2
-Requires-Dist: beautifulsoup4==4.12.3
-Requires-Dist: pytz==2023.3
+Requires-Dist: fastapi>=0.115.12
+Requires-Dist: pydantic>=2.11.4
+Requires-Dist: PyYAML>=6.0.2
+Requires-Dist: Requests>=2.32.3
+Requires-Dist: setuptools>=66.1.1
+Requires-Dist: signalwire_pom>=2.7.1
+Requires-Dist: structlog>=25.3.0
+Requires-Dist: uvicorn>=0.34.2
+Requires-Dist: beautifulsoup4>=4.12.3
+Requires-Dist: pytz>=2023.3
 Requires-Dist: lxml>=4.9.0
 Provides-Extra: search-queryonly
 Requires-Dist: numpy>=1.24.0; extra == "search-queryonly"

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "signalwire_agents"
-version = "0.1.51"
+version = "0.1.53"
 description = "SignalWire AI Agents SDK"
 authors = [
     {name = "SignalWire Team", email = "info@signalwire.com"}
@@ -25,16 +25,16 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
 ]
 dependencies = [
-    "fastapi==0.115.12",
-    "pydantic==2.11.4",
-    "PyYAML==6.0.2",
-    "Requests==2.32.3",
-    "setuptools==66.1.1",
-    "signalwire_pom==2.7.1",
-    "structlog==25.3.0",
-    "uvicorn==0.34.2",
-    "beautifulsoup4==4.12.3",
-    "pytz==2023.3",
+    "fastapi>=0.115.12",
+    "pydantic>=2.11.4",
+    "PyYAML>=6.0.2",
+    "Requests>=2.32.3",
+    "setuptools>=66.1.1",
+    "signalwire_pom>=2.7.1",
+    "structlog>=25.3.0",
+    "uvicorn>=0.34.2",
+    "beautifulsoup4>=4.12.3",
+    "pytz>=2023.3",
     "lxml>=4.9.0",
 ]

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/signalwire_agents/__init__.py RENAMED Viewed

@@ -18,7 +18,7 @@ A package for building AI agents using SignalWire's AI and SWML capabilities.
 from .core.logging_config import configure_logging
 configure_logging()
-__version__ = "0.1.51"
+__version__ = "0.1.53"
 # Import core classes for easier access
 from .core.agent_base import AgentBase

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/signalwire_agents/cli/build_search.py RENAMED Viewed

@@ -69,6 +69,16 @@ Examples:
   sw-search ./docs \\
     --chunking-strategy qa
+  # Markdown-aware chunking (preserves headers, detects code blocks, adds tags)
+  sw-search ./docs \\
+    --chunking-strategy markdown \\
+    --file-types md
+  # This strategy:
+  #  - Chunks at header boundaries (h1, h2, h3...)
+  #  - Detects code blocks and extracts language (python, bash, etc)
+  #  - Adds "code" tags to chunks with code for better search
+  #  - Preserves section hierarchy in metadata
   # Model selection examples (performance vs quality tradeoff)
   sw-search ./docs --model mini     # Fastest (~5x faster), 384 dims, good for most use cases
   sw-search ./docs --model base     # Balanced speed/quality, 768 dims (previous default)
@@ -128,16 +138,23 @@ Examples:
     --collection-name docs_collection
   sw-search migrate --info ./docs.swsearch
-  # PostgreSQL pgvector backend
+  # PostgreSQL pgvector backend (direct build to PostgreSQL)
   sw-search ./docs \\
     --backend pgvector \\
-    --connection-string "postgresql://user:pass@localhost/knowledge" \\
+    --connection-string "postgresql://user:pass@localhost:5432/knowledge" \\
     --output docs_collection
+  # pgvector with markdown strategy (best for documentation with code examples)
+  sw-search ./docs \\
+    --backend pgvector \\
+    --connection-string "postgresql://user:pass@localhost:5432/knowledge" \\
+    --output docs_collection \\
+    --chunking-strategy markdown
   # Overwrite existing pgvector collection
   sw-search ./docs \\
     --backend pgvector \\
-    --connection-string "postgresql://user:pass@localhost/knowledge" \\
+    --connection-string "postgresql://user:pass@localhost:5432/knowledge" \\
     --output docs_collection \\
     --overwrite
@@ -191,9 +208,9 @@ Examples:
     parser.add_argument(
         '--chunking-strategy',
-        choices=['sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa', 'json'],
+        choices=['sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa', 'json', 'markdown'],
         default='sentence',
-        help='Chunking strategy to use (default: sentence)'
+        help='Chunking strategy to use (default: sentence). Use "markdown" for documentation with code blocks.'
     )
     parser.add_argument(

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/signalwire_agents/schema.json RENAMED Viewed

@@ -1937,9 +1937,13 @@
             {
               "type": "string",
               "const": "qwen3-235b-A22b-instruct"
+            },
+            {
+              "type": "string",
+              "const": "llama-3.1-8b-instruct-turbo@together.ai"
             }
           ],
-          "description": "The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, `nova-lite`, and `qwen3-235b-A22b-instruct`."
+          "description": "The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, `nova-lite`, and `qwen3-235b-A22b-instruct` and `qwen3-4b-instruct-2507@brian`."
         },
         "ai_volume": {
           "anyOf": [
@@ -7663,4 +7667,4 @@
     }
   },
   "unevaluatedProperties": false
-}
+}

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/signalwire_agents/search/document_processor.py RENAMED Viewed

@@ -88,9 +88,18 @@ class DocumentProcessor:
     ):
         """
         Initialize document processor
         Args:
-            chunking_strategy: Strategy for chunking documents ('sentence', 'sliding', 'paragraph', 'page', 'semantic', 'topic', 'qa')
+            chunking_strategy: Strategy for chunking documents:
+                - 'sentence': Sentence-based chunking with overlap
+                - 'sliding': Sliding window with word-based chunks
+                - 'paragraph': Natural paragraph boundaries
+                - 'page': Page-based chunking (for PDFs)
+                - 'semantic': Semantic similarity-based chunking
+                - 'topic': Topic modeling-based chunking
+                - 'qa': Question-answer optimized chunking
+                - 'json': JSON structure-aware chunking
+                - 'markdown': Markdown structure-aware chunking with code block detection
             max_sentences_per_chunk: For sentence strategy (default: 5)
             chunk_size: For sliding strategy - words per chunk (default: 50)
             chunk_overlap: For sliding strategy - overlap in words (default: 10)
@@ -142,6 +151,9 @@ class DocumentProcessor:
             return self._chunk_by_qa_optimization(content, filename, file_type)
         elif self.chunking_strategy == 'json':
             return self._chunk_from_json(content, filename, file_type)
+        elif self.chunking_strategy == 'markdown':
+            # Use markdown-aware chunking for better structure preservation
+            return self._chunk_markdown_enhanced(content, filename)
         else:
             # Fallback to sentence-based chunking
             return self._chunk_by_sentences(content, filename, file_type)
@@ -339,75 +351,114 @@ class DocumentProcessor:
         return chunks
     def _chunk_markdown_enhanced(self, content: str, filename: str) -> List[Dict[str, Any]]:
-        """Enhanced markdown chunking with better header handling"""
+        """Enhanced markdown chunking with code block detection and rich metadata
+        Features:
+        - Tracks header hierarchy for section paths
+        - Detects code blocks and extracts language
+        - Adds 'code' tags to chunks containing code
+        - Preserves markdown structure for better search
+        """
         chunks = []
         lines = content.split('\n')
         current_section = None
         current_hierarchy = []  # Track header hierarchy
         current_chunk = []
         current_size = 0
         line_start = 1
+        in_code_block = False
+        code_languages = []  # Track languages in current chunk
+        has_code = False
         for line_num, line in enumerate(lines, 1):
+            # Check for code block fences
+            code_fence_match = re.match(r'^```(\w+)?', line)
+            if code_fence_match:
+                in_code_block = not in_code_block
+                if in_code_block:
+                    # Starting code block
+                    has_code = True
+                    lang = code_fence_match.group(1)
+                    if lang and lang not in code_languages:
+                        code_languages.append(lang)
             # Check for headers with hierarchy tracking
-            header_match = re.match(r'^(#{1,6})\s+(.+)', line)
+            header_match = re.match(r'^(#{1,6})\s+(.+)', line) if not in_code_block else None
             if header_match:
                 header_level = len(header_match.group(1))
                 header_text = header_match.group(2).strip()
                 # Save current chunk if it exists
                 if current_chunk:
+                    chunk_metadata = self._build_markdown_metadata(
+                        current_hierarchy, code_languages, has_code
+                    )
                     chunks.append(self._create_chunk(
                         content='\n'.join(current_chunk),
                         filename=filename,
                         section=self._build_section_path(current_hierarchy),
                         start_line=line_start,
-                        end_line=line_num - 1
+                        end_line=line_num - 1,
+                        metadata=chunk_metadata
                     ))
                 # Update hierarchy
                 current_hierarchy = current_hierarchy[:header_level-1] + [header_text]
                 current_section = header_text
                 current_chunk = [line]
                 current_size = len(line)
                 line_start = line_num
+                code_languages = []
+                has_code = False
             else:
                 current_chunk.append(line)
                 current_size += len(line) + 1
                 # Check if chunk is getting too large - use smart splitting
-                if current_size >= self.chunk_size:
+                # But don't split inside code blocks
+                if current_size >= self.chunk_size and not in_code_block:
                     # Try to split at paragraph boundary first
                     split_point = self._find_best_split_point(current_chunk)
                     chunk_to_save = current_chunk[:split_point]
+                    chunk_metadata = self._build_markdown_metadata(
+                        current_hierarchy, code_languages, has_code
+                    )
                     chunks.append(self._create_chunk(
                         content='\n'.join(chunk_to_save),
                         filename=filename,
                         section=self._build_section_path(current_hierarchy),
                         start_line=line_start,
-                        end_line=line_start + split_point - 1
+                        end_line=line_start + split_point - 1,
+                        metadata=chunk_metadata
                     ))
                     # Start new chunk with overlap
                     overlap_lines = self._get_overlap_lines(chunk_to_save)
                     remaining_lines = current_chunk[split_point:]
                     current_chunk = overlap_lines + remaining_lines
                     current_size = sum(len(line) + 1 for line in current_chunk)
                     line_start = line_start + split_point - len(overlap_lines)
+                    # Reset code tracking for new chunk
+                    code_languages = []
+                    has_code = False
         # Add final chunk
         if current_chunk:
+            chunk_metadata = self._build_markdown_metadata(
+                current_hierarchy, code_languages, has_code
+            )
             chunks.append(self._create_chunk(
                 content='\n'.join(current_chunk),
                 filename=filename,
                 section=self._build_section_path(current_hierarchy),
                 start_line=line_start,
-                end_line=len(lines)
+                end_line=len(lines),
+                metadata=chunk_metadata
             ))
         return chunks
     def _chunk_python_enhanced(self, content: str, filename: str) -> List[Dict[str, Any]]:
@@ -575,6 +626,49 @@ class DocumentProcessor:
     def _build_section_path(self, hierarchy: List[str]) -> str:
         """Build hierarchical section path from header hierarchy"""
         return ' > '.join(hierarchy) if hierarchy else None
+    def _build_markdown_metadata(self, hierarchy: List[str], code_languages: List[str], has_code: bool) -> Dict[str, Any]:
+        """Build rich metadata for markdown chunks
+        Args:
+            hierarchy: Current header hierarchy (e.g., ['Installation', 'Requirements', 'Python'])
+            code_languages: List of code block languages found in chunk (e.g., ['python', 'bash'])
+            has_code: Whether chunk contains any code blocks
+        Returns:
+            Dictionary with markdown-specific metadata including tags
+        """
+        metadata = {
+            'chunk_type': 'markdown',
+        }
+        # Add header level metadata
+        if hierarchy:
+            for i, header in enumerate(hierarchy, 1):
+                metadata[f'h{i}'] = header
+        # Add code-related metadata
+        if has_code:
+            metadata['has_code'] = True
+            if code_languages:
+                metadata['code_languages'] = code_languages
+        # Build tags for enhanced searching
+        tags = []
+        if has_code:
+            tags.append('code')
+            # Add language-specific tags
+            for lang in code_languages:
+                tags.append(f'code:{lang}')
+        # Add tags for header levels (searchable by section depth)
+        if len(hierarchy) > 0:
+            tags.append(f'depth:{len(hierarchy)}')
+        if tags:
+            metadata['tags'] = tags
+        return metadata
     def _build_python_section(self, class_name: Optional[str], function_name: Optional[str]) -> str:
         """Build section name for Python code"""

{signalwire_agents-0.1.51 → signalwire_agents-0.1.53}/signalwire_agents/search/search_engine.py RENAMED Viewed

@@ -114,51 +114,48 @@ class SearchEngine:
             logger.error(f"Error converting query vector: {e}")
             return self._keyword_search_only(enhanced_text, count, tags, original_query)
-        # Stage 1: Collect candidates using fast methods
+        # HYBRID APPROACH: Search vector AND metadata in parallel
+        # Stage 1: Run both search types simultaneously
+        search_multiplier = 3
+        # Vector search (semantic similarity - primary ranking signal)
+        vector_results = self._vector_search(query_array, count * search_multiplier)
+        # Metadata/keyword searches (confirmation signals and backfill)
+        filename_results = self._filename_search(original_query or enhanced_text, count * search_multiplier)
+        metadata_results = self._metadata_search(original_query or enhanced_text, count * search_multiplier)
+        keyword_results = self._keyword_search(enhanced_text, count * search_multiplier, original_query)
+        logger.debug(f"Parallel search: vector={len(vector_results)}, filename={len(filename_results)}, "
+                    f"metadata={len(metadata_results)}, keyword={len(keyword_results)}")
+        # Stage 2: Merge all results into candidate pool
         candidates = {}
-        # Fast searches - collect all potential matches
-        filename_results = self._filename_search(original_query or enhanced_text, count * 3)
-        metadata_results = self._metadata_search(original_query or enhanced_text, count * 2)
-        keyword_results = self._keyword_search(enhanced_text, count * 2, original_query)
-        logger.debug(f"Search for '{original_query}': filename={len(filename_results)}, metadata={len(metadata_results)}, keyword={len(keyword_results)}")
-        # Merge candidates from different sources
-        for result_set, source_weight in [(filename_results, 2.0),
-                                         (metadata_results, 1.5),
-                                         (keyword_results, 1.0)]:
+        # Add vector results first (primary signal)
+        for result in vector_results:
+            chunk_id = result['id']
+            candidates[chunk_id] = result
+            candidates[chunk_id]['vector_score'] = result['score']
+            candidates[chunk_id]['vector_distance'] = 1 - result['score']
+            candidates[chunk_id]['sources'] = {'vector': True}
+            candidates[chunk_id]['source_scores'] = {'vector': result['score']}
+        # Add metadata/keyword results (secondary signals that boost or backfill)
+        for result_set, source_type, source_weight in [(filename_results, 'filename', 2.0),
+                                                        (metadata_results, 'metadata', 1.5),
+                                                        (keyword_results, 'keyword', 1.0)]:
             for result in result_set:
                 chunk_id = result['id']
                 if chunk_id not in candidates:
+                    # New candidate from metadata/keyword (no vector match)
                     candidates[chunk_id] = result
-                    candidates[chunk_id]['sources'] = {}
-                    candidates[chunk_id]['source_scores'] = {}
-                # Track which searches found this chunk
-                candidates[chunk_id]['sources'][result['search_type']] = True
-                candidates[chunk_id]['source_scores'][result['search_type']] = result['score'] * source_weight
-        # Stage 2: Check if we have enough candidates
-        if len(candidates) < count * 2:
-            # Not enough candidates from fast searches - add full vector search
-            logger.debug(f"Only {len(candidates)} candidates from fast search, adding full vector search")
-            vector_results = self._vector_search(query_array, count * 3)
-            for result in vector_results:
-                chunk_id = result['id']
-                if chunk_id not in candidates:
-                    candidates[chunk_id] = result
-                    candidates[chunk_id]['sources'] = {'vector': True}
-                    candidates[chunk_id]['source_scores'] = {}
-                # Add vector score
-                candidates[chunk_id]['vector_score'] = result['score']
-                candidates[chunk_id]['vector_distance'] = 1 - result['score']
-        else:
-            # We have enough candidates - just re-rank them with vectors
-            logger.debug(f"Re-ranking {len(candidates)} candidates with vector similarity")
-            self._add_vector_scores_to_candidates(candidates, query_array, distance_threshold)
+                    candidates[chunk_id]['sources'] = {source_type: True}
+                    candidates[chunk_id]['source_scores'] = {source_type: result['score'] * source_weight}
+                else:
+                    # Exists in vector results - add metadata/keyword as confirmation signal
+                    candidates[chunk_id]['sources'][source_type] = True
+                    candidates[chunk_id]['source_scores'][source_type] = result['score'] * source_weight
         # Stage 3: Score and rank all candidates
         final_results = []
@@ -190,12 +187,12 @@ class SearchEngine:
         # Apply diversity penalties to prevent single-file dominance
         final_results = self._apply_diversity_penalties(final_results, count)
         # Ensure 'score' field exists for CLI compatibility
         for r in final_results:
             if 'score' not in r:
                 r['score'] = r.get('final_score', 0.0)
         return final_results[:count]
     def _keyword_search_only(self, enhanced_text: str, count: int,
@@ -1038,70 +1035,55 @@ class SearchEngine:
             logger.error(f"Error in vector re-ranking: {e}")
     def _calculate_combined_score(self, candidate: Dict, distance_threshold: float) -> float:
-        """Calculate final score combining all signals with comprehensive match bonus"""
-        # Base scores from different sources
-        source_scores = candidate.get('source_scores', {})
-        # Check for comprehensive matching (multiple signals)
+        """Calculate final score with hybrid vector + metadata weighting
+        Hybrid approach:
+        - Vector score is the primary ranking signal (semantic similarity)
+        - Metadata/keyword matches provide confirmation boost
+        - Multiple signal types indicate high relevance (confirmation bonus)
+        - Special boost for 'code' tag matches when query contains code-related terms
+        """
         sources = candidate.get('sources', {})
-        num_sources = len(sources)
-        # Get match coverage information
-        match_coverage = candidate.get('match_coverage', 0)
-        fields_matched = candidate.get('fields_matched', 0)
-        # Calculate base score with exponential boost for multiple sources
-        if num_sources > 1:
-            # Multiple signal matches are exponentially better
-            multi_signal_boost = 1.0 + (0.3 * (num_sources - 1))
-            base_score = sum(source_scores.values()) * multi_signal_boost
-        else:
-            base_score = sum(source_scores.values())
-        # Apply comprehensive match bonus
-        if match_coverage > 0.5:  # More than 50% of query terms matched
-            coverage_bonus = 1.0 + (match_coverage - 0.5) * 0.5
-            base_score *= coverage_bonus
-        # Apply field diversity bonus (matching in multiple metadata fields)
-        if fields_matched > 2:
-            field_bonus = 1.0 + (fields_matched - 2) * 0.1
-            base_score *= field_bonus
-        # Apply vector similarity multiplier if available
+        source_scores = candidate.get('source_scores', {})
+        # Vector score is PRIMARY
         if 'vector_score' in candidate:
             vector_score = candidate['vector_score']
-            vector_distance = candidate.get('vector_distance', 1 - vector_score)
-            # Distance-aware scoring
-            if distance_threshold > 0:
-                if vector_distance <= distance_threshold:
-                    # Within threshold - full vector score
-                    vector_multiplier = vector_score
-                elif vector_distance <= distance_threshold * 1.5:
-                    # Near threshold - gradual decay
-                    overflow = (vector_distance - distance_threshold) / (distance_threshold * 0.5)
-                    vector_multiplier = vector_score * (1 - overflow * 0.3)
-                else:
-                    # Beyond threshold - minimal contribution
-                    vector_multiplier = vector_score * 0.3
-            else:
-                vector_multiplier = vector_score
-            # For chunks found by vector-only search, use vector score directly
-            if 'vector' in sources and len(sources) == 1:
-                base_score = vector_score
-            else:
-                # For chunks found by multiple methods, apply vector as quality check
-                base_score *= vector_multiplier
-        # Special handling for strong metadata matches
-        if 'metadata' in sources:
-            metadata_matches = candidate.get('metadata_matches', {})
-            # Strong category or product match should boost significantly
-            if metadata_matches.get('category', 0) > 0.8 or metadata_matches.get('product', 0) > 0.8:
-                base_score *= 1.2
+            base_score = vector_score
+            # Metadata/keyword matches provide confirmation boost
+            if len(sources) > 1:
+                # Has both vector AND metadata/keyword matches - strong confirmation signal
+                keyword_signals = sum(source_scores.get(k, 0) for k in ['keyword', 'filename', 'metadata'])
+                if keyword_signals > 0:
+                    # Normalize and apply boost (up to 30% for strong confirmation)
+                    keyword_boost = min(0.3, keyword_signals * 0.15)
+                    base_score = vector_score * (1.0 + keyword_boost)
+                    # Additional boost if multiple signal types confirm (2+ sources)
+                    num_metadata_sources = sum(1 for s in ['keyword', 'filename', 'metadata'] if s in sources)
+                    if num_metadata_sources >= 2:
+                        # Multiple confirmation signals - very high confidence
+                        base_score *= 1.1
+            # Check for code-related tags to boost code examples
+            tags = candidate.get('metadata', {}).get('tags', [])
+            if 'code' in tags:
+                # This chunk contains code - boost if query is code-related
+                # (metadata search would have found it if query mentioned code/example/python/etc)
+                if 'metadata' in sources or 'keyword' in sources:
+                    # Query matched code-related metadata - apply code boost
+                    base_score *= 1.2
+        else:
+            # No vector score - this is a keyword-only result (backfill)
+            # Use keyword scores but penalize for lack of semantic match
+            base_score = sum(source_scores.values()) * 0.6  # 40% penalty for no vector
+            # Still boost code chunks if metadata matched
+            tags = candidate.get('metadata', {}).get('tags', [])
+            if 'code' in tags and 'metadata' in sources:
+                base_score *= 1.15
         return base_score
     def _apply_diversity_penalties(self, results: List[Dict], target_count: int) -> List[Dict]:
@@ -1166,7 +1148,65 @@ class SearchEngine:
                 penalized_results[:target_count] = selected
         return penalized_results
+    def _apply_match_type_diversity(self, results: List[Dict], target_count: int) -> List[Dict]:
+        """Ensure diversity of match types in final results
+        Ensures we have a mix of:
+        - Vector-only matches (semantic similarity, good for code examples)
+        - Keyword-only matches (exact term matches)
+        - Hybrid matches (both vector + keyword/metadata)
+        """
+        if not results or len(results) <= target_count:
+            return results
+        # Categorize results by match type
+        vector_only = []
+        keyword_only = []
+        hybrid = []
+        for result in results:
+            sources = result.get('sources', {})
+            has_vector = 'vector' in sources
+            has_keyword = any(k in sources for k in ['keyword', 'filename', 'metadata'])
+            if has_vector and not has_keyword:
+                vector_only.append(result)
+            elif has_keyword and not has_vector:
+                keyword_only.append(result)
+            else:
+                hybrid.append(result)
+        # Build diverse result set
+        # Target distribution: 40% hybrid, 40% vector-only, 20% keyword-only
+        # This ensures we include semantic matches (code examples) even if keywords don't match
+        diversified = []
+        # Take top hybrid matches first (best overall)
+        hybrid_target = max(1, int(target_count * 0.4))
+        diversified.extend(hybrid[:hybrid_target])
+        # Ensure we have vector-only matches (critical for code examples)
+        vector_target = max(1, int(target_count * 0.4))
+        diversified.extend(vector_only[:vector_target])
+        # Add keyword-only matches
+        keyword_target = max(1, int(target_count * 0.2))
+        diversified.extend(keyword_only[:keyword_target])
+        # Fill remaining slots with best remaining results regardless of type
+        remaining_slots = target_count - len(diversified)
+        if remaining_slots > 0:
+            # Get all unused results
+            used_ids = set(r['id'] for r in diversified)
+            unused = [r for r in results if r['id'] not in used_ids]
+            diversified.extend(unused[:remaining_slots])
+        # Sort by final score to maintain quality ordering
+        diversified.sort(key=lambda x: x['final_score'], reverse=True)
+        return diversified
     def get_stats(self) -> Dict[str, Any]:
         """Get statistics about the search index"""
         # Use pgvector backend if available

signalwire-agents 0.1.51__tar.gz → 0.1.53__tar.gz

signalwire-agents 0.1.51tar.gz → 0.1.53tar.gz