PyPI - signalwire-agents - Versions diffs - 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl - Mend

signalwire-agents 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

signalwire_agents/core/mixins/ai_config_mixin.py CHANGED Viewed

@@ -380,6 +380,7 @@ class AIConfigMixin:
         The server will validate and apply parameters based on the target model's capabilities.
         Common parameters include:
+            model: The AI model to use (gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano, nova-micro, nova-lite)
             temperature: Randomness setting. Lower values make output more deterministic.
             top_p: Alternative to temperature. Controls nucleus sampling.
             barge_confidence: ASR confidence to interrupt. Higher values make it harder to interrupt.
@@ -394,6 +395,7 @@ class AIConfigMixin:
         Example:
             agent.set_prompt_llm_params(
+                model="nova-micro",  # Using Amazon's nova-micro model
                 temperature=0.7,
                 top_p=0.9,
                 barge_confidence=0.6
@@ -413,6 +415,7 @@ class AIConfigMixin:
         The server will validate and apply parameters based on the target model's capabilities.
         Common parameters include:
+            model: The AI model to use (gpt-4o-mini, gpt-4.1-mini, gpt-4.1-nano, nova-micro, nova-lite)
             temperature: Randomness setting. Lower values make output more deterministic.
             top_p: Alternative to temperature. Controls nucleus sampling.
             presence_penalty: Topic diversity. Positive values encourage new topics.
@@ -427,6 +430,7 @@ class AIConfigMixin:
         Example:
             agent.set_post_prompt_llm_params(
+                model="gpt-4o-mini",
                 temperature=0.5,  # More deterministic for post-prompt
                 top_p=0.9
             )

signalwire_agents/schema.json CHANGED Viewed

@@ -1925,9 +1925,17 @@
             {
               "type": "string",
               "const": "gpt-4.1-nano"
+            },
+            {
+              "type": "string",
+              "const": "nova-micro"
+            },
+            {
+              "type": "string",
+              "const": "nova-lite"
             }
           ],
-          "description": "The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`."
+          "description": "The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, and `nova-lite`."
         },
         "ai_volume": {
           "anyOf": [
@@ -4005,6 +4013,18 @@
           "maximum": 2,
           "description": "Aversion to repeating lines. Float value between -2.0 and 2.0. Positive values decrease the model's likelihood to repeat the same line verbatim."
         },
+        "model": {
+          "anyOf": [
+            {
+              "type": "string",
+              "enum": ["gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano", "nova-micro", "nova-lite"]
+            },
+            {
+              "$ref": "#/$defs/SWMLVar"
+            }
+          ],
+          "description": "The model to use for the post-prompt. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, and `nova-lite`."
+        },
         "text": {
           "type": "string",
           "description": "The instructions to send to the agent."
@@ -4084,6 +4104,18 @@
           "maximum": 2,
           "description": "Aversion to repeating lines. Float value between -2.0 and 2.0. Positive values decrease the model's likelihood to repeat the same line verbatim."
         },
+        "model": {
+          "anyOf": [
+            {
+              "type": "string",
+              "enum": ["gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano", "nova-micro", "nova-lite"]
+            },
+            {
+              "$ref": "#/$defs/SWMLVar"
+            }
+          ],
+          "description": "The model to use for the post-prompt. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, and `nova-lite`."
+        },
         "pom": {
           "type": "array",
           "items": {
@@ -4166,6 +4198,18 @@
           "maximum": 2,
           "description": "Aversion to repeating lines. Float value between -2.0 and 2.0. Positive values decrease the model's likelihood to repeat the same line verbatim."
         },
+        "model": {
+          "anyOf": [
+            {
+              "type": "string",
+              "enum": ["gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano", "nova-micro", "nova-lite"]
+            },
+            {
+              "$ref": "#/$defs/SWMLVar"
+            }
+          ],
+          "description": "The model to use for the prompt. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, and `nova-lite`."
+        },
         "text": {
           "type": "string",
           "description": "The instructions to send to the agent."
@@ -4249,6 +4293,18 @@
           "maximum": 2,
           "description": "Aversion to repeating lines. Float value between -2.0 and 2.0. Positive values decrease the model's likelihood to repeat the same line verbatim."
         },
+        "model": {
+          "anyOf": [
+            {
+              "type": "string",
+              "enum": ["gpt-4o-mini", "gpt-4.1-mini", "gpt-4.1-nano", "nova-micro", "nova-lite"]
+            },
+            {
+              "$ref": "#/$defs/SWMLVar"
+            }
+          ],
+          "description": "The model to use for the prompt. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, `gpt-4.1-nano`, `nova-micro`, and `nova-lite`."
+        },
         "pom": {
           "type": "array",
           "items": {

signalwire_agents/search/__init__.py CHANGED Viewed

@@ -68,6 +68,8 @@ if _SEARCH_AVAILABLE:
         from .index_builder import IndexBuilder
         from .search_engine import SearchEngine
         from .search_service import SearchService
+        from .models import MODEL_ALIASES, DEFAULT_MODEL, resolve_model_alias
+        from .migration import SearchIndexMigrator
         __all__ = [
             'preprocess_query',
@@ -75,7 +77,11 @@ if _SEARCH_AVAILABLE:
             'DocumentProcessor',
             'IndexBuilder',
             'SearchEngine',
-            'SearchService'
+            'SearchService',
+            'MODEL_ALIASES',
+            'DEFAULT_MODEL',
+            'resolve_model_alias',
+            'SearchIndexMigrator'
         ]
     except ImportError as e:
         # Some search components failed to import

signalwire_agents/search/document_processor.py CHANGED Viewed

@@ -1075,7 +1075,7 @@ class DocumentProcessor:
                 json_metadata = json_chunk.get('metadata', {})
                 chunk_type = json_chunk.get('type', 'content')
-                # Build chunk metadata
+                # Build chunk metadata (excluding tags which go at top level)
                 metadata = {
                     'chunk_method': 'json',
                     'chunk_index': idx,
@@ -1083,7 +1083,11 @@ class DocumentProcessor:
                     'original_chunk_id': json_chunk.get('chunk_id', f'chunk_{idx}')
                 }
-                # Merge JSON metadata
+                # Extract tags before merging metadata
+                tags = json_metadata.get('tags', [])
+                # Merge JSON metadata (this includes all fields including tags)
+                # We'll keep tags in metadata for backward compatibility but also set at top level
                 metadata.update(json_metadata)
                 # Determine section name
@@ -1100,12 +1104,11 @@ class DocumentProcessor:
                     metadata=metadata
                 )
-                # Add any additional fields from JSON
-                if 'tags' in json_chunk:
-                    chunk['tags'] = json_chunk['tags']
-                # For TOC entries, we might want to add special tags
-                if chunk_type == 'toc' and 'tags' not in chunk:
+                # Set tags at the top level for proper tag filtering
+                if tags:
+                    chunk['tags'] = tags
+                elif chunk_type == 'toc':
+                    # For TOC entries, add special tags if none provided
                     chunk['tags'] = ['toc', 'navigation']
                 chunks.append(chunk)

signalwire_agents/search/index_builder.py CHANGED Viewed

@@ -85,9 +85,6 @@ class IndexBuilder:
         if self.backend not in ['sqlite', 'pgvector']:
             raise ValueError(f"Invalid backend '{self.backend}'. Must be 'sqlite' or 'pgvector'")
-        if self.backend == 'pgvector' and not self.connection_string:
-            raise ValueError("connection_string is required for pgvector backend")
         # Validate NLP backend
         if self.index_nlp_backend not in ['nltk', 'spacy']:
             logger.warning(f"Invalid index_nlp_backend '{self.index_nlp_backend}', using 'nltk'")
@@ -105,6 +102,50 @@ class IndexBuilder:
             topic_threshold=self.topic_threshold
         )
+    def _extract_metadata_from_json_content(self, content: str) -> tuple[Dict[str, Any], str]:
+        """
+        Extract metadata from JSON content if present
+        Returns:
+            (metadata_dict, metadata_text)
+        """
+        metadata_dict = {}
+        # Try to extract metadata from JSON structure in content
+        if '"metadata":' in content:
+            try:
+                # Look for metadata object in content
+                import re
+                # Find all metadata objects
+                pattern = r'"metadata"\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})'
+                matches = re.finditer(pattern, content)
+                for match in matches:
+                    try:
+                        json_metadata = json.loads(match.group(1))
+                        # Merge all found metadata
+                        if isinstance(json_metadata, dict):
+                            metadata_dict.update(json_metadata)
+                    except:
+                        pass
+            except Exception as e:
+                logger.debug(f"Error extracting JSON metadata: {e}")
+        # Create searchable text from all metadata keys and values
+        metadata_text_parts = []
+        for key, value in metadata_dict.items():
+            # Add key
+            metadata_text_parts.append(str(key))
+            # Add value(s)
+            if isinstance(value, list):
+                metadata_text_parts.extend(str(v) for v in value)
+            else:
+                metadata_text_parts.append(str(value))
+        metadata_text = ' '.join(metadata_text_parts).lower()
+        return metadata_dict, metadata_text
     def _load_model(self):
         """Load embedding model (lazy loading)"""
         if self.model is None:
@@ -147,6 +188,7 @@ class IndexBuilder:
         # Process documents
         chunks = []
+        print(f"Processing {len(files)} files...")
         for file_path in files:
             try:
                 # For individual files, use the file's parent as the base directory
@@ -154,8 +196,8 @@ class IndexBuilder:
                 base_dir = self._get_base_directory_for_file(file_path, sources)
                 file_chunks = self._process_file(file_path, base_dir, tags)
                 chunks.extend(file_chunks)
-                if self.verbose:
-                    print(f"Processed {file_path}: {len(file_chunks)} chunks")
+                if self.verbose or file_path.suffix == '.json':
+                    print(f"  {file_path}: {len(file_chunks)} chunks")
             except Exception as e:
                 logger.error(f"Error processing {file_path}: {e}")
                 if self.verbose:
@@ -171,7 +213,9 @@ class IndexBuilder:
         # Generate embeddings
         self._load_model()
         if self.verbose:
-            print("Generating embeddings...")
+            print(f"Generating embeddings for {len(chunks)} chunks...")
+        else:
+            print(f"Generating embeddings for {len(chunks)} chunks...")
         for i, chunk in enumerate(chunks):
             try:
@@ -183,15 +227,33 @@ class IndexBuilder:
                 )
                 chunk['processed_content'] = processed['enhanced_text']
-                chunk['keywords'] = processed.get('keywords', [])
+                # Include tags in keywords for better search matching
+                keywords = processed.get('keywords', [])
+                chunk_tags = chunk.get('tags', [])
+                if chunk_tags:
+                    # Add tags to keywords list for FTS matching
+                    keywords.extend(chunk_tags)
+                    # Remove duplicates while preserving order
+                    keywords = list(dict.fromkeys(keywords))
+                chunk['keywords'] = keywords
+                # For embedding, include tags in the text for better semantic matching
+                embedding_text = processed['enhanced_text']
+                if chunk_tags:
+                    # Append tags to the text for embedding generation
+                    embedding_text += " " + " ".join(chunk_tags)
                 # Generate embedding (suppress progress bar)
-                embedding = self.model.encode(processed['enhanced_text'], show_progress_bar=False)
+                embedding = self.model.encode(embedding_text, show_progress_bar=False)
                 chunk['embedding'] = embedding.tobytes()
-                if self.verbose and (i + 1) % 50 == 0:
+                # Show progress more frequently
+                show_every = 50 if len(chunks) > 500 else max(10, len(chunks) // 10)
+                if (i + 1) % show_every == 0 or (i + 1) == len(chunks):
                     progress_pct = ((i + 1) / len(chunks)) * 100
-                    print(f"Generated embeddings: {i + 1}/{len(chunks)} chunks ({progress_pct:.1f}%)")
+                    print(f"  Progress: {i + 1}/{len(chunks)} chunks ({progress_pct:.1f}%)")
             except Exception as e:
                 logger.error(f"Error processing chunk {i}: {e}")
@@ -485,6 +547,7 @@ class IndexBuilder:
                     end_line INTEGER,
                     tags TEXT,
                     metadata TEXT,
+                    metadata_text TEXT,  -- Searchable text representation of all metadata
                     chunk_hash TEXT UNIQUE,
                     created_at TEXT DEFAULT CURRENT_TIMESTAMP
                 )
@@ -494,6 +557,7 @@ class IndexBuilder:
                 CREATE VIRTUAL TABLE chunks_fts USING fts5(
                     processed_content,
                     keywords,
+                    metadata_text,
                     content='chunks',
                     content_rowid='id'
                 )
@@ -555,13 +619,47 @@ class IndexBuilder:
                 # Prepare data
                 keywords_json = json.dumps(chunk.get('keywords', []))
                 tags_json = json.dumps(chunk.get('tags', []))
-                metadata_json = json.dumps(chunk.get('metadata', {}))
+                # Extract metadata from JSON content and merge with chunk metadata
+                json_metadata, json_metadata_text = self._extract_metadata_from_json_content(chunk['content'])
+                chunk_metadata = chunk.get('metadata', {})
+                # Merge metadata: chunk metadata takes precedence
+                merged_metadata = {**json_metadata, **chunk_metadata}
+                metadata_json = json.dumps(merged_metadata)
+                # Create comprehensive metadata_text including tags
+                metadata_text_parts = []
+                # Add metadata text from JSON content
+                if json_metadata_text:
+                    metadata_text_parts.append(json_metadata_text)
+                # Add tags
+                tags = chunk.get('tags', [])
+                if tags:
+                    metadata_text_parts.extend(str(tag).lower() for tag in tags)
+                # Add section if present
+                if chunk.get('section'):
+                    metadata_text_parts.append(chunk['section'].lower())
+                # Add any additional metadata values
+                for key, value in chunk_metadata.items():
+                    if key not in json_metadata:  # Avoid duplicates
+                        metadata_text_parts.append(str(key).lower())
+                        if isinstance(value, list):
+                            metadata_text_parts.extend(str(v).lower() for v in value)
+                        else:
+                            metadata_text_parts.append(str(value).lower())
+                metadata_text = ' '.join(metadata_text_parts)
                 cursor.execute('''
                     INSERT OR IGNORE INTO chunks (
                         content, processed_content, keywords, language, embedding,
-                        filename, section, start_line, end_line, tags, metadata, chunk_hash
-                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                        filename, section, start_line, end_line, tags, metadata, metadata_text, chunk_hash
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                 ''', (
                     chunk['content'],
                     chunk.get('processed_content', chunk['content']),
@@ -574,6 +672,7 @@ class IndexBuilder:
                     chunk.get('end_line'),
                     tags_json,
                     metadata_json,
+                    metadata_text,
                     chunk_hash
                 ))

signalwire-agents 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

signalwire-agents 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl