PyPI - ChatterBot - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl - Mend

ChatterBot 1.2.1py3-none-any.whl → 1.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

chatterbot/__init__.py +1 -1
chatterbot/chatterbot.py +41 -8
chatterbot/comparisons.py +32 -15
chatterbot/logic/best_match.py +42 -35
chatterbot/logic/specific_response.py +52 -9
chatterbot/logic/unit_conversion.py +4 -3
chatterbot/response_selection.py +1 -1
chatterbot/search.py +65 -17
chatterbot/storage/__init__.py +2 -0
chatterbot/storage/django_storage.py +13 -23
chatterbot/storage/mongodb.py +7 -26
chatterbot/storage/redis.py +390 -0
chatterbot/storage/sql_storage.py +77 -68
chatterbot/storage/storage_adapter.py +9 -7
chatterbot/trainers.py +3 -3
chatterbot/vectorstores.py +74 -0
{ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/METADATA +9 -3
{ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/RECORD +21 -19
{ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/WHEEL +1 -1
{ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/LICENSE +0 -0
{ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/top_level.txt +0 -0

chatterbot/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ ChatterBot is a machine learning, conversational dialog engine.
 from .chatterbot import ChatBot
-__version__ = '1.2.1'
+__version__ = '1.2.3'
 __all__ = (
     'ChatBot',

chatterbot/chatterbot.py CHANGED Viewed

@@ -2,7 +2,10 @@ import logging
 from chatterbot.storage import StorageAdapter
 from chatterbot.logic import LogicAdapter
 from chatterbot.search import TextSearch, IndexedTextSearch
+from chatterbot.tagging import PosLemmaTagger
+from chatterbot import languages
 from chatterbot import utils
+import spacy
 class ChatBot(object):
@@ -27,6 +30,12 @@ class ChatBot(object):
         self.storage = utils.initialize_class(storage_adapter, **kwargs)
+        Tagger = kwargs.get('tagger', PosLemmaTagger)
+        self.tagger = Tagger(language=kwargs.get(
+            'tagger_language', languages.ENG
+        ))
         primary_search_algorithm = IndexedTextSearch(self, **kwargs)
         text_search_algorithm = TextSearch(self, **kwargs)
@@ -51,6 +60,9 @@ class ChatBot(object):
         for preprocessor in preprocessors:
             self.preprocessors.append(utils.import_module(preprocessor))
+        # NOTE: 'xx' is the language code for a multi-language model
+        self.nlp = spacy.blank(self.tagger.language.ISO_639_1)
         self.logger = kwargs.get('logger', logging.getLogger(__name__))
         # Allow the bot to save input it receives so that it can learn
@@ -105,16 +117,27 @@ class ChatBot(object):
         for preprocessor in self.preprocessors:
             input_statement = preprocessor(input_statement)
+        # Mark the statement as being a response to the previous
+        if input_statement.in_response_to is None:
+            previous_statement = self.get_latest_response(input_statement.conversation)
+            if previous_statement:
+                input_statement.in_response_to = previous_statement.text
         # Make sure the input statement has its search text saved
         if not input_statement.search_text:
-            _search_text = self.storage.tagger.get_text_index_string(input_statement.text)
+            _search_text = self.tagger.get_text_index_string(input_statement.text)
             input_statement.search_text = _search_text
         if not input_statement.search_in_response_to and input_statement.in_response_to:
-            input_statement.search_in_response_to = self.storage.tagger.get_text_index_string(input_statement.in_response_to)
+            input_statement.search_in_response_to = self.tagger.get_text_index_string(
+                input_statement.in_response_to
+            )
-        response = self.generate_response(input_statement, additional_response_selection_parameters)
+        response = self.generate_response(
+            input_statement,
+            additional_response_selection_parameters
+        )
         # Update any response data that needs to be changed
         if persist_values_to_response:
@@ -128,10 +151,13 @@ class ChatBot(object):
                     setattr(response, response_key, response_value)
         if not self.read_only:
-            self.learn_response(input_statement)
+            # Save the input statement
+            self.storage.create(**input_statement.serialize())
             # Save the response generated for the input
-            self.storage.create(**response.serialize())
+            self.learn_response(response, previous_statement=input_statement)
         return response
@@ -194,6 +220,8 @@ class ChatBot(object):
                 if result_option.count > most_common.count:
                     most_common = result_option
+            self.logger.info('Selecting "{}" as the most common response'.format(most_common.statement.text))
             if most_common.count > 1:
                 result = most_common.statement
@@ -204,6 +232,8 @@ class ChatBot(object):
             persona='bot:' + self.name
         )
+        response.add_tags(*result.get_tags())
         response.confidence = result.confidence
         return response
@@ -228,11 +258,14 @@ class ChatBot(object):
             statement.in_response_to = previous_statement
         self.logger.info('Adding "{}" as a response to "{}"'.format(
-            statement.text,
-            previous_statement_text
+            previous_statement_text,
+            statement.text
         ))
-        # Save the input statement
+        if not statement.persona:
+            statement.persona = 'bot:' + self.name
+        # Save the response statement
         return self.storage.create(**statement.serialize())
     def get_latest_response(self, conversation):

chatterbot/comparisons.py CHANGED Viewed

@@ -19,15 +19,22 @@ class Comparator:
     def __call__(self, statement_a, statement_b):
         return self.compare(statement_a, statement_b)
-    def compare(self, statement_a, statement_b):
+    def compare_text(self, text_a, text_b):
         """
-        Implemented in subclasses: compare statement_a to statement_b.
+        Implemented in subclasses: compare text_a to text_b.
         :return: The percent of similarity between the statements based on the implemented algorithm.
         :rtype: float
         """
         return 0
+    def compare(self, statement_a, statement_b):
+        """
+        :return: The percent of similarity between the statements based on the implemented algorithm.
+        :rtype: float
+        """
+        return self.compare_text(statement_a.text, statement_b.text)
 class LevenshteinDistance(Comparator):
     """
@@ -39,21 +46,21 @@ class LevenshteinDistance(Comparator):
     based on the Levenshtein distance algorithm.
     """
-    def compare(self, statement_a, statement_b):
+    def compare_text(self, text_a, text_b):
         """
-        Compare the two input statements.
+        Compare the two pieces of text.
         :return: The percent of similarity between the text of the statements.
         :rtype: float
         """
-        # Return 0 if either statement has a falsy text value
-        if not statement_a.text or not statement_b.text:
+        # Return 0 if either statement has a None text value
+        if text_a is None or text_b is None:
             return 0
         # Get the lowercase version of both strings
-        statement_a_text = str(statement_a.text.lower())
-        statement_b_text = str(statement_b.text.lower())
+        statement_a_text = str(text_a.lower())
+        statement_b_text = str(text_b.lower())
         similarity = SequenceMatcher(
             None,
@@ -103,15 +110,20 @@ class SpacySimilarity(Comparator):
         # Disable the Named Entity Recognition (NER) component because it is not necessary
         self.nlp = spacy.load(model, exclude=['ner'])
-    def compare(self, statement_a, statement_b):
+    def compare_text(self, text_a, text_b):
         """
-        Compare the two input statements.
+        Compare the similarity of two strings.
         :return: The percent of similarity between the closest synset distance.
         :rtype: float
         """
-        document_a = self.nlp(statement_a.text)
-        document_b = self.nlp(statement_b.text)
+        # Return 0 if either statement has a None text value
+        if text_a is None or text_b is None:
+            return 0
+        document_a = self.nlp(text_a)
+        document_b = self.nlp(text_b)
         return document_a.similarity(document_b)
@@ -155,14 +167,19 @@ class JaccardSimilarity(Comparator):
         # Disable the Named Entity Recognition (NER) component because it is not necessary
         self.nlp = spacy.load(model, exclude=['ner'])
-    def compare(self, statement_a, statement_b):
+    def compare_text(self, text_a, text_b):
         """
         Return the calculated similarity of two
         statements based on the Jaccard index.
         """
+        # Return 0 if either statement has a None text value
+        if text_a is None or text_b is None:
+            return 0
         # Make both strings lowercase
-        document_a = self.nlp(statement_a.text.lower())
-        document_b = self.nlp(statement_b.text.lower())
+        document_a = self.nlp(text_a.lower())
+        document_b = self.nlp(text_b.lower())
         statement_a_lemmas = frozenset([
             token.lemma_ for token in document_a if not token.is_stop

chatterbot/logic/best_match.py CHANGED Viewed

@@ -23,10 +23,13 @@ class BestMatch(LogicAdapter):
         self.excluded_words = kwargs.get('excluded_words')
     def process(self, input_statement, additional_response_selection_parameters=None):
+        # Get all statements that have a response text similar to the input statement
         search_results = self.search_algorithm.search(input_statement)
         # Use the input statement as the closest match if no other results are found
-        closest_match = next(search_results, input_statement)
+        input_statement.confidence = 0  # Use 0 confidence when no other results are found
+        closest_match = input_statement
         # Search for the closest match to the input statement
         for result in search_results:
@@ -36,8 +39,8 @@ class BestMatch(LogicAdapter):
             if result.confidence >= self.maximum_similarity_threshold:
                 break
-        self.chatbot.logger.info('Using "{}" as a close match to "{}" with a confidence of {}'.format(
-            closest_match.text, input_statement.text, closest_match.confidence
+        self.chatbot.logger.info('Selecting "{}" as a response to "{}" with a confidence of {}'.format(
+            closest_match.in_response_to, input_statement.text, closest_match.confidence
         ))
         recent_repeated_responses = filters.get_recent_repeated_responses(
@@ -51,39 +54,34 @@ class BestMatch(LogicAdapter):
             ))
         response_selection_parameters = {
-            'search_in_response_to': closest_match.search_text,
+            'search_text': closest_match.search_text,
+            'persona_not_startswith': 'bot:',
             'exclude_text': recent_repeated_responses,
             'exclude_text_words': self.excluded_words
         }
         alternate_response_selection_parameters = {
-            'search_in_response_to': self.chatbot.storage.tagger.get_text_index_string(
+            'search_in_response_to': self.chatbot.tagger.get_text_index_string(
                 input_statement.text
             ),
+            'persona_not_startswith': 'bot:',
             'exclude_text': recent_repeated_responses,
             'exclude_text_words': self.excluded_words
         }
         if additional_response_selection_parameters:
-            response_selection_parameters.update(additional_response_selection_parameters)
-            alternate_response_selection_parameters.update(additional_response_selection_parameters)
-        # Get all statements that are in response to the closest match
-        response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
+            response_selection_parameters.update(
+                additional_response_selection_parameters
+            )
+            alternate_response_selection_parameters.update(
+                additional_response_selection_parameters
+            )
-        alternate_response_list = []
-        if not response_list:
-            self.chatbot.logger.info('No responses found. Generating alternate response list.')
-            alternate_response_list = list(self.chatbot.storage.filter(**alternate_response_selection_parameters))
+        # Get all statements with text similar to the closest match
+        response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
         if response_list:
-            self.chatbot.logger.info(
-                'Selecting response from {} optimal responses.'.format(
-                    len(response_list)
-                )
-            )
             response = self.select_response(
                 input_statement,
                 response_list,
@@ -91,26 +89,35 @@ class BestMatch(LogicAdapter):
             )
             response.confidence = closest_match.confidence
-            self.chatbot.logger.info('Response selected. Using "{}"'.format(response.text))
-        elif alternate_response_list:
+            self.chatbot.logger.info('Selecting "{}" from {} optimal responses.'.format(
+                response.text,
+                len(response_list)
+            ))
+        else:
             '''
             The case where there was no responses returned for the selected match
             but a value exists for the statement the match is in response to.
             '''
-            self.chatbot.logger.info(
-                'Selecting response from {} optimal alternate responses.'.format(
-                    len(alternate_response_list)
+            self.chatbot.logger.info('No responses found. Generating alternate response list.')
+            alternate_response_list = list(self.chatbot.storage.filter(
+                **alternate_response_selection_parameters
+            ))
+            if alternate_response_list:
+                response = self.select_response(
+                    input_statement,
+                    alternate_response_list,
+                    self.chatbot.storage
                 )
-            )
-            response = self.select_response(
-                input_statement,
-                alternate_response_list,
-                self.chatbot.storage
-            )
-            response.confidence = closest_match.confidence
-            self.chatbot.logger.info('Alternate response selected. Using "{}"'.format(response.text))
-        else:
-            response = self.get_default_response(input_statement)
+                response.confidence = closest_match.confidence
+                self.chatbot.logger.info('Selected alternative response "{}" from {} options'.format(
+                    response.text,
+                    len(alternate_response_list)
+                ))
+            else:
+                response = self.get_default_response(input_statement)
+                self.chatbot.logger.info('Using "%s" as a default response.', response.text)
         return response

chatterbot/logic/specific_response.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from chatterbot.logic import LogicAdapter
+from chatterbot.conversation import Statement
+from chatterbot import constants, languages
+import spacy
 class SpecificResponseAdapter(LogicAdapter):
@@ -8,30 +11,70 @@ class SpecificResponseAdapter(LogicAdapter):
     :kwargs:
         * *input_text* (``str``) --
           The input text that triggers this logic adapter.
-        * *output_text* (``str``) --
+        * *output_text* (``str`` or ``function``) --
           The output text returned by this logic adapter.
+          If a function is provided, it should return a string.
     """
     def __init__(self, chatbot, **kwargs):
         super().__init__(chatbot, **kwargs)
-        from chatterbot.conversation import Statement
         self.input_text = kwargs.get('input_text')
-        output_text = kwargs.get('output_text')
-        self.response_statement = Statement(text=output_text)
+        self.matcher = None
+        if MatcherClass := kwargs.get('matcher'):
+            language = kwargs.get('language', languages.ENG)
+            self.nlp = self._initialize_nlp(language)
+            self.matcher = MatcherClass(self.nlp.vocab)
+            self.matcher.add('SpecificResponse', [self.input_text])
+        self._output_text = kwargs.get('output_text')
+    def _initialize_nlp(self, language):
+        try:
+            model = constants.DEFAULT_LANGUAGE_TO_SPACY_MODEL_MAP[language]
+        except KeyError as e:
+            raise KeyError(
+                f'Spacy model is not available for language {language}'
+            ) from e
+        return spacy.load(model)
     def can_process(self, statement):
-        if statement.text == self.input_text:
+        if self.matcher:
+            doc = self.nlp(statement.text)
+            matches = self.matcher(doc)
+            if matches:
+                return True
+        elif statement.text == self.input_text:
             return True
         return False
     def process(self, statement, additional_response_selection_parameters=None):
-        if statement.text == self.input_text:
-            self.response_statement.confidence = 1
+        if callable(self._output_text):
+            response_statement = Statement(text=self._output_text())
+        else:
+            response_statement = Statement(text=self._output_text)
+        if self.matcher:
+            doc = self.nlp(statement.text)
+            matches = self.matcher(doc)
+            if matches:
+                response_statement.confidence = 1
+            else:
+                response_statement.confidence = 0
+        elif statement.text == self.input_text:
+            response_statement.confidence = 1
         else:
-            self.response_statement.confidence = 0
+            response_statement.confidence = 0
-        return self.response_statement
+        return response_statement

chatterbot/logic/unit_conversion.py CHANGED Viewed

@@ -158,7 +158,8 @@ class UnitConversion(LogicAdapter):
                     response = func(p)
                     if response.confidence == 1.0:
                         break
-        except Exception:
+        except Exception as e:
+            self.chatbot.logger.warning('Error during UnitConversion: {}'.format(str(e)))
             response.confidence = 0.0
-        finally:
-            return response
+        return response

chatterbot/response_selection.py CHANGED Viewed

@@ -37,7 +37,7 @@ def get_most_frequent_response(input_statement, response_list, storage=None):
             matching_response = statement
             occurrence_count = count
-    # Choose the most commonly occuring matching response
+    # Choose the most commonly occurring matching response
     return matching_response

chatterbot/search.py CHANGED Viewed

@@ -21,7 +21,7 @@ class IndexedTextSearch:
         )
         self.compare_statements = statement_comparison_function(
-            language=self.chatbot.storage.tagger.language
+            language=self.chatbot.tagger.language
         )
         self.search_page_size = kwargs.get(
@@ -43,19 +43,8 @@ class IndexedTextSearch:
         """
         self.chatbot.logger.info('Beginning search for close text match')
-        input_search_text = input_statement.search_text
-        if not input_statement.search_text:
-            self.chatbot.logger.warning(
-                'No value for search_text was available on the provided input'
-            )
-            input_search_text = self.chatbot.storage.tagger.get_text_index_string(
-                input_statement.text
-            )
         search_parameters = {
-            'search_text_contains': input_search_text,
+            'search_in_response_to_contains': input_statement.search_text,
             'persona_not_startswith': 'bot:',
             'page_size': self.search_page_size
         }
@@ -71,14 +60,16 @@ class IndexedTextSearch:
         # Find the closest matching known statement
         for statement in statement_list:
-            confidence = self.compare_statements(input_statement, statement)
+            confidence = self.compare_statements.compare_text(
+                input_statement.text, statement.in_response_to
+            )
             if confidence > best_confidence_so_far:
                 best_confidence_so_far = confidence
                 statement.confidence = confidence
                 self.chatbot.logger.info('Similar text found: {} {}'.format(
-                    statement.text, confidence
+                    statement.in_response_to, confidence
                 ))
                 yield statement
@@ -107,7 +98,7 @@ class TextSearch:
         )
         self.compare_statements = statement_comparison_function(
-            language=self.chatbot.storage.tagger.language
+            language=self.chatbot.tagger.language
         )
         self.search_page_size = kwargs.get(
@@ -145,7 +136,9 @@ class TextSearch:
         # Find the closest matching known statement
         for statement in statement_list:
-            confidence = self.compare_statements(input_statement, statement)
+            confidence = self.compare_statements.compare_text(
+                input_statement.text, statement.in_response_to
+            )
             if confidence > best_confidence_so_far:
                 best_confidence_so_far = confidence
@@ -156,3 +149,58 @@ class TextSearch:
                 ))
                 yield statement
+class VectorSearch:
+    """
+    .. note:: BETA feature: this search method is new and experimental.
+    Search for similar text based on a :term:`vector database`.
+    """
+    name = 'vector_search'
+    def __init__(self, chatbot, **kwargs):
+        from chatterbot.storage import RedisVectorStorageAdapter
+        # Good documentation:
+        # https://python.langchain.com/docs/integrations/vectorstores/redis/
+        #
+        # https://hub.docker.com/r/redis/redis-stack
+        # Mondodb:
+        # > Vector Search is only supported on Atlas Clusters
+        # https://www.mongodb.com/community/forums/t/can-a-local-mongodb-instance-be-used-when-working-with-langchain-mongodbatlasvectorsearch/265356
+        # FAISS:
+        # https://python.langchain.com/docs/integrations/vectorstores/faiss/
+        print("Starting Redis Vector Store")
+        # TODO: look into:
+        # https://python.langchain.com/api_reference/redis/chat_message_history/langchain_redis.chat_message_history.RedisChatMessageHistory.html
+        # The VectorSearch class is only compatible with the RedisVectorStorageAdapter
+        if not isinstance(chatbot.storage, RedisVectorStorageAdapter):
+            raise Exception(
+                'The VectorSearch search method requires the RedisVectorStorageAdapter storage adapter.'
+            )
+    def search(self, input_statement, **additional_parameters):
+        print("Querying Vector Store")
+        # Similarity search with score and filter
+        # NOTE: It looks like `return_all` is needed to return the full document
+        # specifically what we need here is the ID
+        scored_results = self.storage.vector_store.similarity_search_with_score(
+            input_statement.text, k=2, return_all=True
+        )
+        # sort_by="score", filter={"category": "likes"})
+        print("Similarity Search with Score Results:\n")
+        for doc, score in scored_results:
+            print(f"Content: {doc.page_content[:150]}...")
+            print(f"ID: {doc.id}")
+            print(f"Metadata: {doc.metadata}")
+            print(f"Score: {score}")
+            print()

chatterbot/storage/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from chatterbot.storage.storage_adapter import StorageAdapter
 from chatterbot.storage.django_storage import DjangoStorageAdapter
 from chatterbot.storage.mongodb import MongoDatabaseAdapter
 from chatterbot.storage.sql_storage import SQLStorageAdapter
+from chatterbot.storage.redis import RedisVectorStorageAdapter
 __all__ = (
@@ -9,4 +10,5 @@ __all__ = (
     'DjangoStorageAdapter',
     'MongoDatabaseAdapter',
     'SQLStorageAdapter',
+    'RedisVectorStorageAdapter',
 )

chatterbot/storage/django_storage.py CHANGED Viewed

@@ -44,6 +44,7 @@ class DjangoStorageAdapter(StorageAdapter):
         exclude_text_words = kwargs.pop('exclude_text_words', [])
         persona_not_startswith = kwargs.pop('persona_not_startswith', None)
         search_text_contains = kwargs.pop('search_text_contains', None)
+        search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
         # Convert a single sting into a list if only one tag is provided
         if type(tags) == str:
@@ -83,6 +84,16 @@ class DjangoStorageAdapter(StorageAdapter):
                 or_query
             )
+        if search_in_response_to_contains:
+            or_query = Q()
+            for word in search_in_response_to_contains.split(' '):
+                or_query |= Q(search_in_response_to__contains=word)
+            statements = statements.filter(
+                or_query
+            )
         if order_by:
             statements = statements.order_by(*order_by)
@@ -99,13 +110,6 @@ class DjangoStorageAdapter(StorageAdapter):
         tags = kwargs.pop('tags', [])
-        if 'search_text' not in kwargs:
-            kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
-        if 'search_in_response_to' not in kwargs:
-            if kwargs.get('in_response_to'):
-                kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
         statement = Statement(**kwargs)
         statement.save()
@@ -129,20 +133,6 @@ class DjangoStorageAdapter(StorageAdapter):
         tag_cache = {}
-        # Check if any statements already have a search text
-        have_search_text = any(statement.search_text for statement in statements)
-        # Generate search text values in bulk
-        if not have_search_text:
-            search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
-            response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
-            for statement, search_text_document, response_search_text_document in zip(
-                statements, search_text_documents, response_search_text_documents
-            ):
-                statement.search_text = search_text_document._.search_index
-                statement.search_in_response_to = response_search_text_document._.search_index
         for statement in statements:
             statement_data = statement.serialize()
@@ -176,10 +166,10 @@ class DjangoStorageAdapter(StorageAdapter):
         else:
             statement = Statement.objects.create(
                 text=statement.text,
-                search_text=self.tagger.get_text_index_string(statement.text),
+                search_text=statement.search_text,
                 conversation=statement.conversation,
                 in_response_to=statement.in_response_to,
-                search_in_response_to=self.tagger.get_text_index_string(statement.in_response_to),
+                search_in_response_to=statement.search_in_response_to,
                 created_at=statement.created_at
             )

ChatterBot 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

ChatterBot 1.2.1py3-none-any.whl → 1.2.3py3-none-any.whl