PyPI - ChatterBot - Versions diffs - 1.2.9__tar.gz → 1.2.10__tar.gz - Mend

ChatterBot 1.2.9tar.gz → 1.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

{chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ChatterBot
-Version: 1.2.9
+Version: 1.2.10
 Summary: ChatterBot is a machine learning, conversational dialog engine
 Author: Gunther Cox
 License-Expression: BSD-3-Clause
@@ -153,16 +153,10 @@ section of the documentation.
 See release notes for changes https://github.com/gunthercox/ChatterBot/releases
-# Development pattern for contributors
+# Contributing
-1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
-   the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
-2. Make your changes in a branch named something different from `master`, e.g. create
-   a new branch `my-pull-request`.
-3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
-4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
-5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
-   to help make sure that your contribution is free from errors.
+Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
+https://docs.chatterbot.us/contributing/
 # Sponsors

{chatterbot-1.2.9 → chatterbot-1.2.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ChatterBot
-Version: 1.2.9
+Version: 1.2.10
 Summary: ChatterBot is a machine learning, conversational dialog engine
 Author: Gunther Cox
 License-Expression: BSD-3-Clause
@@ -153,16 +153,10 @@ section of the documentation.
 See release notes for changes https://github.com/gunthercox/ChatterBot/releases
-# Development pattern for contributors
+# Contributing
-1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
-   the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
-2. Make your changes in a branch named something different from `master`, e.g. create
-   a new branch `my-pull-request`.
-3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
-4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
-5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
-   to help make sure that your contribution is free from errors.
+Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
+https://docs.chatterbot.us/contributing/
 # Sponsors

{chatterbot-1.2.9 → chatterbot-1.2.10}/README.md RENAMED Viewed

@@ -92,16 +92,10 @@ section of the documentation.
 See release notes for changes https://github.com/gunthercox/ChatterBot/releases
-# Development pattern for contributors
-1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
-   the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
-2. Make your changes in a branch named something different from `master`, e.g. create
-   a new branch `my-pull-request`.
-3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
-4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
-5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
-   to help make sure that your contribution is free from errors.
+# Contributing
+Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
+https://docs.chatterbot.us/contributing/
 # Sponsors

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/__init__.py RENAMED Viewed

@@ -4,7 +4,7 @@ ChatterBot is a machine learning, conversational dialog engine.
 from .chatterbot import ChatBot
-__version__ = '1.2.9'
+__version__ = '1.2.10'
 __all__ = (
     'ChatBot',

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/chatterbot.py RENAMED Viewed

@@ -2,7 +2,7 @@ import logging
 from typing import Union
 from chatterbot.storage import StorageAdapter
 from chatterbot.logic import LogicAdapter
-from chatterbot.search import TextSearch, IndexedTextSearch
+from chatterbot.search import TextSearch, IndexedTextSearch, SemanticVectorSearch
 from chatterbot.tagging import PosLemmaTagger
 from chatterbot.conversation import Statement
 from chatterbot import languages
@@ -74,41 +74,60 @@ class ChatBot(object):
         tagger_language = kwargs.get('tagger_language', languages.ENG)
-        try:
-            Tagger = kwargs.get('tagger', PosLemmaTagger)
-            # Allow instances to be provided for performance optimization
-            # (Example: a pre-loaded model in a tagger when unit testing)
-            if not isinstance(Tagger, type):
-                self.tagger = Tagger
-            else:
-                self.tagger = Tagger(language=tagger_language)
-        except IOError as io_error:
-            # Return a more helpful error message if possible
-            if "Can't find model" in str(io_error):
-                model_name = utils.get_model_for_language(tagger_language)
-                if hasattr(tagger_language, 'ENGLISH_NAME'):
-                    language_name = tagger_language.ENGLISH_NAME
+        # Check if storage adapter has a preferred tagger
+        PreferredTagger = self.storage.get_preferred_tagger()
+        if PreferredTagger is not None:
+            # Storage adapter specifies its own tagger
+            self.tagger = PreferredTagger(language=tagger_language)
+        else:
+            # Use default or user-specified tagger
+            try:
+                Tagger = kwargs.get('tagger', PosLemmaTagger)
+                # Allow instances to be provided for performance optimization
+                # (Example: a pre-loaded model in a tagger when unit testing)
+                if not isinstance(Tagger, type):
+                    self.tagger = Tagger
                 else:
-                    language_name = tagger_language
-                raise self.ChatBotException(
-                    'Setup error:\n'
-                    f'The Spacy model for "{language_name}" language is missing.\n'
-                    'Please install the model using the command:\n\n'
-                    f'python -m spacy download {model_name}\n\n'
-                    'See https://spacy.io/usage/models for more information about available models.'
-                ) from io_error
-            else:
-                raise io_error
+                    self.tagger = Tagger(language=tagger_language)
+            except IOError as io_error:
+                # Return a more helpful error message if possible
+                if "Can't find model" in str(io_error):
+                    model_name = utils.get_model_for_language(tagger_language)
+                    if hasattr(tagger_language, 'ENGLISH_NAME'):
+                        language_name = tagger_language.ENGLISH_NAME
+                    else:
+                        language_name = tagger_language
+                    raise self.ChatBotException(
+                        'Setup error:\n'
+                        f'The Spacy model for "{language_name}" language is missing.\n'
+                        'Please install the model using the command:\n\n'
+                        f'python -m spacy download {model_name}\n\n'
+                        'See https://spacy.io/usage/models for more information about available models.'
+                    ) from io_error
+                else:
+                    raise io_error
+        # Initialize search algorithms
         primary_search_algorithm = IndexedTextSearch(self, **kwargs)
         text_search_algorithm = TextSearch(self, **kwargs)
+        semantic_vector_search_algorithm = SemanticVectorSearch(self, **kwargs)
         self.search_algorithms = {
             primary_search_algorithm.name: primary_search_algorithm,
-            text_search_algorithm.name: text_search_algorithm
+            text_search_algorithm.name: text_search_algorithm,
+            semantic_vector_search_algorithm.name: semantic_vector_search_algorithm
         }
+        # Check if storage adapter has a preferred search algorithm
+        preferred_search_algorithm = self.storage.get_preferred_search_algorithm()
+        if preferred_search_algorithm and preferred_search_algorithm in self.search_algorithms:
+            # Set as default for logic adapters that don't specify their own search algorithm
+            # This ensures BestMatch and other adapters use the optimal search method
+            self.logger.info(f'Storage adapter prefers search algorithm: {preferred_search_algorithm}')
+            kwargs.setdefault('search_algorithm_name', preferred_search_algorithm)
         for adapter in logic_adapters:
             utils.validate_adapter_class(adapter, LogicAdapter)
             logic_adapter = utils.initialize_class(adapter, self, **kwargs)
@@ -191,15 +210,22 @@ class ChatBot(object):
                 input_statement.in_response_to = previous_statement.text
         # Make sure the input statement has its search text saved
-        if not input_statement.search_text:
-            _search_text = self.tagger.get_text_index_string(input_statement.text)
-            input_statement.search_text = _search_text
-        if not input_statement.search_in_response_to and input_statement.in_response_to:
-            input_statement.search_in_response_to = self.tagger.get_text_index_string(
-                input_statement.in_response_to
-            )
+        if not self.tagger.needs_text_indexing():
+            # Tagger doesn't transform text, use it directly
+            if not input_statement.search_text:
+                input_statement.search_text = input_statement.text
+            if not input_statement.search_in_response_to and input_statement.in_response_to:
+                input_statement.search_in_response_to = input_statement.in_response_to
+        else:
+            # Use tagger for text indexing or transformations
+            if not input_statement.search_text:
+                _search_text = self.tagger.get_text_index_string(input_statement.text)
+                input_statement.search_text = _search_text
+            if not input_statement.search_in_response_to and input_statement.in_response_to:
+                input_statement.search_in_response_to = self.tagger.get_text_index_string(
+                    input_statement.in_response_to
+                )
         response = self.generate_response(
             input_statement,

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/search.py RENAMED Viewed

@@ -157,3 +157,73 @@ class TextSearch:
                 if confidence >= 1.0:
                     self.chatbot.logger.info('Exact match found, stopping search')
                     break
+class SemanticVectorSearch:
+    """
+    Semantic vector search for storage adapters that use vector embeddings.
+    Does not require a tagger or comparison function - relies on the storage
+    adapter's native vector similarity search capabilities.
+    :param search_page_size:
+        The maximum number of records to load into memory at a time when searching.
+        Defaults to 1000
+    """
+    name = 'semantic_vector_search'
+    def __init__(self, chatbot, **kwargs):
+        self.chatbot = chatbot
+        self.search_page_size = kwargs.get(
+            'search_page_size', 1000
+        )
+    def search(self, input_statement, **additional_parameters):
+        """
+        Search for semantically similar statements using vector similarity.
+        Confidence scores are calculated by the storage adapter based on
+        vector distances and returned in the results.
+        :param input_statement: A statement.
+        :type input_statement: chatterbot.conversation.Statement
+        :param **additional_parameters: Additional parameters to be passed
+            to the ``filter`` method of the storage adapter when searching.
+        :rtype: Generator yielding one closest matching statement at a time.
+        """
+        self.chatbot.logger.info('Beginning semantic vector search')
+        search_parameters = {
+            'search_in_response_to_contains': input_statement.text,
+            'persona_not_startswith': 'bot:',
+            'page_size': self.search_page_size
+        }
+        if additional_parameters:
+            search_parameters.update(additional_parameters)
+        statement_list = self.chatbot.storage.filter(**search_parameters)
+        best_confidence_so_far = 0
+        self.chatbot.logger.info('Processing search results')
+        # Yield statements with confidence scores from vector similarity
+        for statement in statement_list:
+            # Confidence should already be set by the storage adapter
+            confidence = getattr(statement, 'confidence', 0.0)
+            if confidence > best_confidence_so_far:
+                best_confidence_so_far = confidence
+                self.chatbot.logger.info('Similar statement found: {} {}'.format(
+                    statement.in_response_to, confidence
+                ))
+                yield statement
+                if confidence >= 1.0:
+                    self.chatbot.logger.info('Exact match found, stopping search')
+                    break

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/redis.py RENAMED Viewed

@@ -30,13 +30,19 @@ class RedisVectorStorageAdapter(StorageAdapter):
         in the future and its behavior has not yet been finalized.
     The RedisVectorStorageAdapter allows ChatterBot to store conversation
-    data in a redis instance.
+    data in a redis instance using vector embeddings for semantic similarity search.
     All parameters are optional, by default a redis instance on localhost is assumed.
     :keyword database_uri: eg: redis://localhost:6379/0',
         The database_uri can be specified to choose a redis instance.
     :type database_uri: str
+    NOTES:
+    * Unlike other database based storage adapters, the RedisVectorStorageAdapter
+      does not leverage `search_text` and `search_in_response_to` fields for indexing.
+      Instead, it uses vector embeddings to find similar statements based on
+      semantic similarity. This allows for more flexible and context-aware matching.
     """
     class RedisMetaDataType:
@@ -100,6 +106,21 @@ class RedisVectorStorageAdapter(StorageAdapter):
         self.vector_store = RedisVectorStore(embeddings, config=config)
+    def get_preferred_tagger(self):
+        """
+        Redis uses vector embeddings and doesn't need POS-lemma indexing.
+        Returns NoOpTagger to avoid unnecessary spaCy processing.
+        """
+        from chatterbot.tagging import NoOpTagger
+        return NoOpTagger
+    def get_preferred_search_algorithm(self):
+        """
+        Redis uses semantic vector search instead of text-based matching.
+        Returns the name of the SemanticVectorSearch algorithm.
+        """
+        return 'semantic_vector_search'
     def get_statement_model(self):
         """
         Return the statement model.
@@ -127,6 +148,16 @@ class RedisVectorStorageAdapter(StorageAdapter):
         values.update(document.metadata)
+        # Convert Unix timestamp back to datetime for StatementObject
+        # Redis may return this as int, float, or string representation
+        if 'created_at' in values:
+            created_at_value = values['created_at']
+            if isinstance(created_at_value, str):
+                # Convert string to float first
+                created_at_value = float(created_at_value)
+            if isinstance(created_at_value, (int, float)):
+                values['created_at'] = datetime.fromtimestamp(created_at_value)
         tags = values['tags']
         values['tags'] = list(set(tags.split('|') if tags else []))
@@ -177,6 +208,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
             - exclude_text
             - exclude_text_words
             - persona_not_startswith
+            - search_text_contains
             - search_in_response_to_contains
             - order_by
         """
@@ -245,27 +277,26 @@ class RedisVectorStorageAdapter(StorageAdapter):
             else:
                 filter_condition = query
-        # Handle search_text parameter (used by BestMatch logic adapter)
-        # BestMatch uses search_text to find statements with matching indexed text.
-        # Since Redis doesn't store search_text as a field, we approximate this by:
-        # 1. Using the search_text value as a semantic query against in_response_to
-        # 2. This finds statements that are responses to similar inputs
-        # The effect is similar to BestMatch's Phase 2: finding alternate responses
-        if 'search_text' in kwargs:
-            _search_text = kwargs.get('search_text', '')
-            # Get embedding for the search text
-            # Note: search_text may be indexed (e.g., "NOUN:cat VERB:run") so this
-            # approximates finding responses to semantically similar queries
-            embedding = self.vector_store.embeddings.embed_query(_search_text)
+        if 'search_text_contains' in kwargs:
+            # Find statements whose text (responses) are similar.
+            #
+            # Use semantic similarity on the search query itself. This finds responses
+            # that would be semantically appropriate, even if they don't share exact words.
+            #
+            # Our vectors are of 'in_response_to' (what was said TO the bot),
+            # not 'text' (what the bot said). So we use the query as if it were an input,
+            # and find statements that would respond to similar inputs. The result is
+            # statements whose context (in_response_to) is similar, which tends to yield
+            # similar responses.
+            _search_query = kwargs['search_text_contains']
+            # Use vector similarity to find statements responding to similar contexts
+            embedding = self.vector_store.embeddings.embed_query(_search_query)
-            # Build return fields from metadata schema
             return_fields = [
                 'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at'
             ]
-            # Use direct index query via RedisVL
-            # Search on the vectorized content (in_response_to) to find similar response patterns
             query = VectorQuery(
                 vector=embedding,
                 vector_field_name='embedding',
@@ -274,20 +305,35 @@ class RedisVectorStorageAdapter(StorageAdapter):
                 filter_expression=filter_condition
             )
-            # Execute query
             results = self.vector_store.index.query(query)
-            # Convert results to Document objects
             Document = self.get_statement_model()
             documents = []
-            for result in results:
-                # Extract metadata and content
+            # Calculate confidence from vector distances
+            # Results are ordered by similarity (best match first)
+            for idx, result in enumerate(results):
                 in_response_to = result.get('in_response_to', '')
-                # Convert created_at from integer (YYMMDD) to datetime
-                created_at_int = int(result.get('created_at', 0))
-                if created_at_int:
-                    created_at = datetime.strptime(str(created_at_int), '%y%m%d')
+                # Redis vector_score is cosine distance (lower is better)
+                # Convert to confidence: confidence = 1 - distance
+                # If vector_score not available, use result order
+                vector_score = result.get('vector_score')
+                if vector_score is not None:
+                    # Cosine distance ranges from 0 (identical) to 2 (opposite)
+                    # Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
+                    confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
+                else:
+                    # Fallback: use result order (first result = highest confidence)
+                    # Start at 0.95 for first result, decay by 0.05 per position
+                    confidence = max(0.0, 0.95 - (idx * 0.05))
+                # Parse timestamp
+                created_at_value = result.get('created_at', 0)
+                if isinstance(created_at_value, str):
+                    created_at = datetime.fromtimestamp(float(created_at_value))
+                elif created_at_value:
+                    created_at = datetime.fromtimestamp(float(created_at_value))
                 else:
                     created_at = datetime.now()
@@ -297,6 +343,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
                     'persona': result.get('persona', ''),
                     'tags': result.get('tags', ''),
                     'created_at': created_at,
+                    'confidence': confidence,
                 }
                 doc = Document(
                     page_content=in_response_to,
@@ -307,6 +354,23 @@ class RedisVectorStorageAdapter(StorageAdapter):
             return [self.model_to_object(document) for document in documents]
+        # Redis uses vector similarity: we search for statements whose actual
+        # text field is semantically similar to the text that produced this search_text.
+        # This is stored in the closest_match.text field, but BestMatch only passes
+        # search_text. Since we can't reverse POS tags to original text (for now),
+        # we treat this parameter as a signal to do text-based similarity search.
+        #
+        # Note: The caller should ideally pass the actual text, but for compatibility
+        # we'll work with what we receive. In practice, search_text_contains is the
+        # better parameter for this use case.
+        if 'search_text' in kwargs:
+            # For now, we'll treat search_text as a filter-only parameter
+            # and fall through to the regular query_search below.
+            # This prevents the broken behavior of embedding POS tags.
+            # The proper fix requires BestMatch to pass additional context
+            # or use search_text_contains instead.
+            pass
         ordering = kwargs.get('order_by', None)
         if ordering:
@@ -341,14 +405,31 @@ class RedisVectorStorageAdapter(StorageAdapter):
             # Convert results to Document objects
             Document = self.get_statement_model()
             documents = []
-            for result in results:
+            # Calculate confidence from vector distances
+            # Results are ordered by similarity (best match first)
+            for idx, result in enumerate(results):
                 # Extract metadata and content
                 in_response_to = result.get('in_response_to', '')
-                # Convert created_at from integer (YYMMDD) to datetime
-                created_at_int = int(result.get('created_at', 0))
-                if created_at_int:
-                    created_at = datetime.strptime(str(created_at_int), '%y%m%d')
+                # Redis vector_score is cosine distance (lower is better)
+                # Convert to confidence: confidence = 1 - distance
+                # If vector_score not available, use result order
+                vector_score = result.get('vector_score')
+                if vector_score is not None:
+                    # Cosine distance ranges from 0 (identical) to 2 (opposite)
+                    # Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
+                    confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
+                else:
+                    # Fallback: use result order (first result = highest confidence)
+                    # Start at 0.95 for first result, decay by 0.05 per position
+                    confidence = max(0.0, 0.95 - (idx * 0.05))
+                # Convert Unix timestamp back to datetime
+                # Redis returns numeric fields as strings
+                created_at_timestamp = result.get('created_at', '0')
+                if created_at_timestamp and created_at_timestamp != '0':
+                    created_at = datetime.fromtimestamp(float(created_at_timestamp))
                 else:
                     created_at = datetime.now()
@@ -358,6 +439,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
                     'persona': result.get('persona', ''),
                     'tags': result.get('tags', ''),
                     'created_at': created_at,
+                    'confidence': confidence,
                 }
                 doc = Document(
                     page_content=in_response_to,
@@ -395,9 +477,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
         metadata = {
             'text': text,
             'category': kwargs.get('category', ''),
-            # NOTE: `created_at` must have a valid numeric value or results will
-            # not be returned for similarity_search for some reason
-            'created_at': kwargs.get('created_at') or int(_default_date.strftime('%y%m%d')),
+            # Store created_at as Unix timestamp with microseconds (float)
+            # This provides full datetime precision while maintaining Redis NUMERIC field compatibility
+            'created_at': kwargs.get('created_at') or _default_date.timestamp(),
             'tags': '|'.join(unique_tags) if unique_tags else '',
             'conversation': kwargs.get('conversation', ''),
             'persona': kwargs.get('persona', ''),
@@ -427,7 +509,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
                 metadata={
                     'text': statement.text,
                     'conversation': statement.conversation or '',
-                    'created_at': int(statement.created_at.strftime('%y%m%d')),
+                    'created_at': statement.created_at.timestamp(),
                     'persona': statement.persona or '',
                     # Prevent duplicate tag entries in the database
                     'tags': '|'.join(
@@ -452,7 +534,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
         metadata = {
             'text': statement.text,
             'conversation': statement.conversation or '',
-            'created_at': int(statement.created_at.strftime('%y%m%d')),
+            'created_at': statement.created_at.timestamp(),
             'persona': statement.persona or '',
             'tags': '|'.join(unique_tags) if unique_tags else '',
         }
@@ -508,11 +590,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
                 # Parse the metadata
                 metadata = json.loads(data[b'_metadata_json'].decode())
-                # Convert created_at from integer (YYMMDD) back to datetime
-                if 'created_at' in metadata and isinstance(metadata['created_at'], int):
-                    created_at_str = str(metadata['created_at'])
-                    # Parse YYMMDD format
-                    metadata['created_at'] = datetime.strptime(created_at_str, '%y%m%d')
+                # Convert created_at from Unix timestamp back to datetime
+                if 'created_at' in metadata and isinstance(metadata['created_at'], (int, float)):
+                    metadata['created_at'] = datetime.fromtimestamp(metadata['created_at'])
                 # Get the in_response_to from the hash
                 in_response_to = data.get(b'in_response_to', b'').decode()

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/storage_adapter.py RENAMED Viewed

@@ -173,6 +173,87 @@ class StorageAdapter(object):
         """
         pass
+    def get_preferred_tagger(self):
+        """
+        Returns the tagger class preferred by this storage adapter.
+        Returns None by default, meaning the default tagger will be used.
+        Storage adapters should override this method to specify their
+        preferred tagger based on their search capabilities.
+        Available Taggers:
+        - NoOpTagger: Returns text unchanged (for vector-based storage).
+          No spaCy model loading (~500MB memory saved).
+          Faster startup (<1 second vs 2-5 seconds).
+          Use when storage handles semantic search natively.
+        - PosLemmaTagger: Creates POS-lemma bigrams (default, for SQL).
+          Enables pattern matching (e.g., "NOUN:cat VERB:run").
+          Requires spaCy language model.
+          Best for exact phrase matching.
+        - LowercaseTagger: Simple lowercase transformation.
+          Minimal processing overhead.
+          Case-insensitive matching.
+        Example - Vector Storage::
+            def get_preferred_tagger(self):
+                from chatterbot.tagging import NoOpTagger
+                return NoOpTagger
+        Example - Traditional Storage::
+            def get_preferred_tagger(self):
+                return None  # Use default PosLemmaTagger
+        :return: Tagger class or None
+        """
+        return None
+    def get_preferred_search_algorithm(self):
+        """
+        Returns the search algorithm name preferred by this storage adapter.
+        Returns None by default, meaning the default search algorithm will be used.
+        Storage adapters should override this method to specify their
+        preferred search algorithm based on their capabilities.
+        Available Search Algorithms:
+        - 'indexed_text_search' (default):
+          Uses POS-lemma indexed fields (search_text, search_in_response_to).
+          Python-based Levenshtein distance comparison.
+          Requires PosLemmaTagger.
+          Best for: Exact pattern matching.
+        - 'semantic_vector_search':
+          Uses raw text with vector similarity.
+          Delegates to storage.filter(search_in_response_to_contains=text).
+          No tagger required (works with NoOpTagger).
+          Confidence from storage adapter (cosine similarity).
+          Best for: Context-aware AI responses, semantic understanding.
+        - 'text_search' (fallback):
+          Compares raw text without indexes.
+          Slower but works with any storage.
+          Uses comparison functions on all statements.
+        Example - Vector Storage::
+            def get_preferred_search_algorithm(self):
+                return 'semantic_vector_search'
+        Example - SQL Storage::
+            def get_preferred_search_algorithm(self):
+                return None  # Use default 'indexed_text_search'
+        :return: Search algorithm name string or None
+        """
+        return None
     class EmptyDatabaseException(Exception):
         def __init__(self, message=None):

{chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/tagging.py RENAMED Viewed

@@ -4,6 +4,56 @@ from chatterbot.utils import get_model_for_language
 import spacy
+class NoOpTagger(object):
+    """
+    A no-operation tagger that returns text unchanged.
+    Used by storage adapters that don't rely on indexed search_text fields.
+    """
+    def __init__(self, language=None):
+        self.language = language or languages.ENG
+    def needs_text_indexing(self):
+        """
+        Indicates whether this tagger performs text indexing/transformation.
+        Returns False since NoOpTagger passes text through unchanged.
+        :return: False
+        """
+        return False
+    def get_text_index_string(self, text: Union[str, List[str]]):
+        """
+        Return the text unchanged (no indexing applied).
+        """
+        return text
+    def as_nlp_pipeline(
+        self,
+        texts: Union[List[str], Tuple[str, dict]],
+        batch_size: int = 1000,
+        n_process: int = 1
+    ):
+        """
+        Returns texts unchanged without NLP processing.
+        Maintains API compatibility with other taggers.
+        :param texts: Text strings or tuples of (text, context_dict)
+        :param batch_size: Ignored (for API compatibility)
+        :param n_process: Ignored (for API compatibility)
+        """
+        process_as_tuples = texts and isinstance(texts[0], tuple)
+        if process_as_tuples:
+            # Return generator of (text, context) tuples
+            for text, context in texts:
+                yield (text, context)
+        else:
+            # Return generator of text strings
+            for text in texts:
+                yield text
 class LowercaseTagger(object):
     """
     Returns the text in lowercase.
@@ -21,6 +71,15 @@ class LowercaseTagger(object):
             'chatterbot_lowercase_indexer', name='chatterbot_lowercase_indexer', last=True
         )
+    def needs_text_indexing(self):
+        """
+        Indicates whether this tagger performs text indexing/transformation.
+        Returns True since LowercaseTagger transforms text to lowercase.
+        :return: True
+        """
+        return True
     def get_text_index_string(self, text: Union[str, List[str]]):
         if isinstance(text, list):
             documents = self.nlp.pipe(text, batch_size=1000, n_process=1)
@@ -73,6 +132,15 @@ class PosLemmaTagger(object):
             'chatterbot_bigram_indexer', name='chatterbot_bigram_indexer', last=True
         )
+    def needs_text_indexing(self):
+        """
+        Indicates whether this tagger performs text indexing/transformation.
+        Returns True since PosLemmaTagger creates POS-lemma bigram indexes.
+        :return: True
+        """
+        return True
     def get_text_index_string(self, text: Union[str, List[str]]) -> str:
         """
         Return a string of text containing part-of-speech, lemma pairs.