ChatterBot 1.2.9__tar.gz → 1.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/PKG-INFO +4 -10
  2. {chatterbot-1.2.9 → chatterbot-1.2.10}/PKG-INFO +4 -10
  3. {chatterbot-1.2.9 → chatterbot-1.2.10}/README.md +4 -10
  4. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/__init__.py +1 -1
  5. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/chatterbot.py +62 -36
  6. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/search.py +70 -0
  7. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/redis.py +120 -40
  8. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/storage_adapter.py +81 -0
  9. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/tagging.py +68 -0
  10. {chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/SOURCES.txt +0 -0
  11. {chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/dependency_links.txt +0 -0
  12. {chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/requires.txt +0 -0
  13. {chatterbot-1.2.9 → chatterbot-1.2.10}/ChatterBot.egg-info/top_level.txt +0 -0
  14. {chatterbot-1.2.9 → chatterbot-1.2.10}/LICENSE +0 -0
  15. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/__main__.py +0 -0
  16. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/adapters.py +0 -0
  17. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/comparisons.py +0 -0
  18. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/components.py +0 -0
  19. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/constants.py +0 -0
  20. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/conversation.py +0 -0
  21. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/corpus.py +0 -0
  22. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/exceptions.py +0 -0
  23. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/__init__.py +0 -0
  24. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/__init__.py +0 -0
  25. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/abstract_models.py +0 -0
  26. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/admin.py +0 -0
  27. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/apps.py +0 -0
  28. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0001_initial.py +0 -0
  29. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0002_statement_extra_data.py +0 -0
  30. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0003_change_occurrence_default.py +0 -0
  31. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0004_rename_in_response_to.py +0 -0
  32. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0005_statement_created_at.py +0 -0
  33. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0006_create_conversation.py +0 -0
  34. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0007_response_created_at.py +0 -0
  35. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0008_update_conversations.py +0 -0
  36. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0009_tags.py +0 -0
  37. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0010_statement_text.py +0 -0
  38. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0011_blank_extra_data.py +0 -0
  39. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0012_statement_created_at.py +0 -0
  40. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0013_change_conversations.py +0 -0
  41. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0014_remove_statement_extra_data.py +0 -0
  42. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0015_statement_persona.py +0 -0
  43. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0016_statement_stemmed_text.py +0 -0
  44. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0017_tags_unique.py +0 -0
  45. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0018_text_max_length.py +0 -0
  46. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0019_alter_statement_id_alter_tag_id_and_more.py +0 -0
  47. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/0020_alter_statement_conversation_and_more.py +0 -0
  48. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/migrations/__init__.py +0 -0
  49. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/model_admin.py +0 -0
  50. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/models.py +0 -0
  51. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/django_chatterbot/settings.py +0 -0
  52. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/sqlalchemy_app/__init__.py +0 -0
  53. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/ext/sqlalchemy_app/models.py +0 -0
  54. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/filters.py +0 -0
  55. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/languages.py +0 -0
  56. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/llm.py +0 -0
  57. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/__init__.py +0 -0
  58. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/best_match.py +0 -0
  59. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/logic_adapter.py +0 -0
  60. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/mathematical_evaluation.py +0 -0
  61. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/specific_response.py +0 -0
  62. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/time_adapter.py +0 -0
  63. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/logic/unit_conversion.py +0 -0
  64. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/parsing.py +0 -0
  65. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/preprocessors.py +0 -0
  66. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/response_selection.py +0 -0
  67. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/__init__.py +0 -0
  68. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/django_storage.py +0 -0
  69. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/mongodb.py +0 -0
  70. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/storage/sql_storage.py +0 -0
  71. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/trainers.py +0 -0
  72. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/utils.py +0 -0
  73. {chatterbot-1.2.9 → chatterbot-1.2.10}/chatterbot/vectorstores.py +0 -0
  74. {chatterbot-1.2.9 → chatterbot-1.2.10}/pyproject.toml +0 -0
  75. {chatterbot-1.2.9 → chatterbot-1.2.10}/setup.cfg +0 -0
  76. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_adapter_validation.py +0 -0
  77. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_benchmarks.py +0 -0
  78. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_chatbot.py +0 -0
  79. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_cli.py +0 -0
  80. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_comparisons.py +0 -0
  81. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_conversations.py +0 -0
  82. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_corpus.py +0 -0
  83. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_examples.py +0 -0
  84. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_filters.py +0 -0
  85. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_initialization.py +0 -0
  86. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_languages.py +0 -0
  87. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_parsing.py +0 -0
  88. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_preprocessors.py +0 -0
  89. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_response_selection.py +0 -0
  90. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_search.py +0 -0
  91. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_tagging.py +0 -0
  92. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_turing.py +0 -0
  93. {chatterbot-1.2.9 → chatterbot-1.2.10}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ChatterBot
3
- Version: 1.2.9
3
+ Version: 1.2.10
4
4
  Summary: ChatterBot is a machine learning, conversational dialog engine
5
5
  Author: Gunther Cox
6
6
  License-Expression: BSD-3-Clause
@@ -153,16 +153,10 @@ section of the documentation.
153
153
 
154
154
  See release notes for changes https://github.com/gunthercox/ChatterBot/releases
155
155
 
156
- # Development pattern for contributors
156
+ # Contributing
157
157
 
158
- 1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
159
- the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
160
- 2. Make your changes in a branch named something different from `master`, e.g. create
161
- a new branch `my-pull-request`.
162
- 3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
163
- 4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
164
- 5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
165
- to help make sure that your contribution is free from errors.
158
+ Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
159
+ https://docs.chatterbot.us/contributing/
166
160
 
167
161
  # Sponsors
168
162
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ChatterBot
3
- Version: 1.2.9
3
+ Version: 1.2.10
4
4
  Summary: ChatterBot is a machine learning, conversational dialog engine
5
5
  Author: Gunther Cox
6
6
  License-Expression: BSD-3-Clause
@@ -153,16 +153,10 @@ section of the documentation.
153
153
 
154
154
  See release notes for changes https://github.com/gunthercox/ChatterBot/releases
155
155
 
156
- # Development pattern for contributors
156
+ # Contributing
157
157
 
158
- 1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
159
- the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
160
- 2. Make your changes in a branch named something different from `master`, e.g. create
161
- a new branch `my-pull-request`.
162
- 3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
163
- 4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
164
- 5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
165
- to help make sure that your contribution is free from errors.
158
+ Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
159
+ https://docs.chatterbot.us/contributing/
166
160
 
167
161
  # Sponsors
168
162
 
@@ -92,16 +92,10 @@ section of the documentation.
92
92
 
93
93
  See release notes for changes https://github.com/gunthercox/ChatterBot/releases
94
94
 
95
- # Development pattern for contributors
96
-
97
- 1. [Create a fork](https://help.github.com/articles/fork-a-repo/) of
98
- the [main ChatterBot repository](https://github.com/gunthercox/ChatterBot) on GitHub.
99
- 2. Make your changes in a branch named something different from `master`, e.g. create
100
- a new branch `my-pull-request`.
101
- 3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
102
- 4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
103
- 5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
104
- to help make sure that your contribution is free from errors.
95
+ # Contributing
96
+
97
+ Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
98
+ https://docs.chatterbot.us/contributing/
105
99
 
106
100
  # Sponsors
107
101
 
@@ -4,7 +4,7 @@ ChatterBot is a machine learning, conversational dialog engine.
4
4
  from .chatterbot import ChatBot
5
5
 
6
6
 
7
- __version__ = '1.2.9'
7
+ __version__ = '1.2.10'
8
8
 
9
9
  __all__ = (
10
10
  'ChatBot',
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Union
3
3
  from chatterbot.storage import StorageAdapter
4
4
  from chatterbot.logic import LogicAdapter
5
- from chatterbot.search import TextSearch, IndexedTextSearch
5
+ from chatterbot.search import TextSearch, IndexedTextSearch, SemanticVectorSearch
6
6
  from chatterbot.tagging import PosLemmaTagger
7
7
  from chatterbot.conversation import Statement
8
8
  from chatterbot import languages
@@ -74,41 +74,60 @@ class ChatBot(object):
74
74
 
75
75
  tagger_language = kwargs.get('tagger_language', languages.ENG)
76
76
 
77
- try:
78
- Tagger = kwargs.get('tagger', PosLemmaTagger)
79
-
80
- # Allow instances to be provided for performance optimization
81
- # (Example: a pre-loaded model in a tagger when unit testing)
82
- if not isinstance(Tagger, type):
83
- self.tagger = Tagger
84
- else:
85
- self.tagger = Tagger(language=tagger_language)
86
- except IOError as io_error:
87
- # Return a more helpful error message if possible
88
- if "Can't find model" in str(io_error):
89
- model_name = utils.get_model_for_language(tagger_language)
90
- if hasattr(tagger_language, 'ENGLISH_NAME'):
91
- language_name = tagger_language.ENGLISH_NAME
77
+ # Check if storage adapter has a preferred tagger
78
+ PreferredTagger = self.storage.get_preferred_tagger()
79
+
80
+ if PreferredTagger is not None:
81
+ # Storage adapter specifies its own tagger
82
+ self.tagger = PreferredTagger(language=tagger_language)
83
+ else:
84
+ # Use default or user-specified tagger
85
+ try:
86
+ Tagger = kwargs.get('tagger', PosLemmaTagger)
87
+
88
+ # Allow instances to be provided for performance optimization
89
+ # (Example: a pre-loaded model in a tagger when unit testing)
90
+ if not isinstance(Tagger, type):
91
+ self.tagger = Tagger
92
92
  else:
93
- language_name = tagger_language
94
- raise self.ChatBotException(
95
- 'Setup error:\n'
96
- f'The Spacy model for "{language_name}" language is missing.\n'
97
- 'Please install the model using the command:\n\n'
98
- f'python -m spacy download {model_name}\n\n'
99
- 'See https://spacy.io/usage/models for more information about available models.'
100
- ) from io_error
101
- else:
102
- raise io_error
93
+ self.tagger = Tagger(language=tagger_language)
94
+ except IOError as io_error:
95
+ # Return a more helpful error message if possible
96
+ if "Can't find model" in str(io_error):
97
+ model_name = utils.get_model_for_language(tagger_language)
98
+ if hasattr(tagger_language, 'ENGLISH_NAME'):
99
+ language_name = tagger_language.ENGLISH_NAME
100
+ else:
101
+ language_name = tagger_language
102
+ raise self.ChatBotException(
103
+ 'Setup error:\n'
104
+ f'The Spacy model for "{language_name}" language is missing.\n'
105
+ 'Please install the model using the command:\n\n'
106
+ f'python -m spacy download {model_name}\n\n'
107
+ 'See https://spacy.io/usage/models for more information about available models.'
108
+ ) from io_error
109
+ else:
110
+ raise io_error
103
111
 
112
+ # Initialize search algorithms
104
113
  primary_search_algorithm = IndexedTextSearch(self, **kwargs)
105
114
  text_search_algorithm = TextSearch(self, **kwargs)
115
+ semantic_vector_search_algorithm = SemanticVectorSearch(self, **kwargs)
106
116
 
107
117
  self.search_algorithms = {
108
118
  primary_search_algorithm.name: primary_search_algorithm,
109
- text_search_algorithm.name: text_search_algorithm
119
+ text_search_algorithm.name: text_search_algorithm,
120
+ semantic_vector_search_algorithm.name: semantic_vector_search_algorithm
110
121
  }
111
122
 
123
+ # Check if storage adapter has a preferred search algorithm
124
+ preferred_search_algorithm = self.storage.get_preferred_search_algorithm()
125
+ if preferred_search_algorithm and preferred_search_algorithm in self.search_algorithms:
126
+ # Set as default for logic adapters that don't specify their own search algorithm
127
+ # This ensures BestMatch and other adapters use the optimal search method
128
+ self.logger.info(f'Storage adapter prefers search algorithm: {preferred_search_algorithm}')
129
+ kwargs.setdefault('search_algorithm_name', preferred_search_algorithm)
130
+
112
131
  for adapter in logic_adapters:
113
132
  utils.validate_adapter_class(adapter, LogicAdapter)
114
133
  logic_adapter = utils.initialize_class(adapter, self, **kwargs)
@@ -191,15 +210,22 @@ class ChatBot(object):
191
210
  input_statement.in_response_to = previous_statement.text
192
211
 
193
212
  # Make sure the input statement has its search text saved
194
-
195
- if not input_statement.search_text:
196
- _search_text = self.tagger.get_text_index_string(input_statement.text)
197
- input_statement.search_text = _search_text
198
-
199
- if not input_statement.search_in_response_to and input_statement.in_response_to:
200
- input_statement.search_in_response_to = self.tagger.get_text_index_string(
201
- input_statement.in_response_to
202
- )
213
+ if not self.tagger.needs_text_indexing():
214
+ # Tagger doesn't transform text, use it directly
215
+ if not input_statement.search_text:
216
+ input_statement.search_text = input_statement.text
217
+ if not input_statement.search_in_response_to and input_statement.in_response_to:
218
+ input_statement.search_in_response_to = input_statement.in_response_to
219
+ else:
220
+ # Use tagger for text indexing or transformations
221
+ if not input_statement.search_text:
222
+ _search_text = self.tagger.get_text_index_string(input_statement.text)
223
+ input_statement.search_text = _search_text
224
+
225
+ if not input_statement.search_in_response_to and input_statement.in_response_to:
226
+ input_statement.search_in_response_to = self.tagger.get_text_index_string(
227
+ input_statement.in_response_to
228
+ )
203
229
 
204
230
  response = self.generate_response(
205
231
  input_statement,
@@ -157,3 +157,73 @@ class TextSearch:
157
157
  if confidence >= 1.0:
158
158
  self.chatbot.logger.info('Exact match found, stopping search')
159
159
  break
160
+
161
+
162
+ class SemanticVectorSearch:
163
+ """
164
+ Semantic vector search for storage adapters that use vector embeddings.
165
+ Does not require a tagger or comparison function - relies on the storage
166
+ adapter's native vector similarity search capabilities.
167
+
168
+ :param search_page_size:
169
+ The maximum number of records to load into memory at a time when searching.
170
+ Defaults to 1000
171
+ """
172
+
173
+ name = 'semantic_vector_search'
174
+
175
+ def __init__(self, chatbot, **kwargs):
176
+ self.chatbot = chatbot
177
+
178
+ self.search_page_size = kwargs.get(
179
+ 'search_page_size', 1000
180
+ )
181
+
182
+ def search(self, input_statement, **additional_parameters):
183
+ """
184
+ Search for semantically similar statements using vector similarity.
185
+ Confidence scores are calculated by the storage adapter based on
186
+ vector distances and returned in the results.
187
+
188
+ :param input_statement: A statement.
189
+ :type input_statement: chatterbot.conversation.Statement
190
+
191
+ :param **additional_parameters: Additional parameters to be passed
192
+ to the ``filter`` method of the storage adapter when searching.
193
+
194
+ :rtype: Generator yielding one closest matching statement at a time.
195
+ """
196
+ self.chatbot.logger.info('Beginning semantic vector search')
197
+
198
+ search_parameters = {
199
+ 'search_in_response_to_contains': input_statement.text,
200
+ 'persona_not_startswith': 'bot:',
201
+ 'page_size': self.search_page_size
202
+ }
203
+
204
+ if additional_parameters:
205
+ search_parameters.update(additional_parameters)
206
+
207
+ statement_list = self.chatbot.storage.filter(**search_parameters)
208
+
209
+ best_confidence_so_far = 0
210
+
211
+ self.chatbot.logger.info('Processing search results')
212
+
213
+ # Yield statements with confidence scores from vector similarity
214
+ for statement in statement_list:
215
+ # Confidence should already be set by the storage adapter
216
+ confidence = getattr(statement, 'confidence', 0.0)
217
+
218
+ if confidence > best_confidence_so_far:
219
+ best_confidence_so_far = confidence
220
+
221
+ self.chatbot.logger.info('Similar statement found: {} {}'.format(
222
+ statement.in_response_to, confidence
223
+ ))
224
+
225
+ yield statement
226
+
227
+ if confidence >= 1.0:
228
+ self.chatbot.logger.info('Exact match found, stopping search')
229
+ break
@@ -30,13 +30,19 @@ class RedisVectorStorageAdapter(StorageAdapter):
30
30
  in the future and its behavior has not yet been finalized.
31
31
 
32
32
  The RedisVectorStorageAdapter allows ChatterBot to store conversation
33
- data in a redis instance.
33
+ data in a redis instance using vector embeddings for semantic similarity search.
34
34
 
35
35
  All parameters are optional, by default a redis instance on localhost is assumed.
36
36
 
37
37
  :keyword database_uri: eg: redis://localhost:6379/0',
38
38
  The database_uri can be specified to choose a redis instance.
39
39
  :type database_uri: str
40
+
41
+ NOTES:
42
+ * Unlike other database based storage adapters, the RedisVectorStorageAdapter
43
+ does not leverage `search_text` and `search_in_response_to` fields for indexing.
44
+ Instead, it uses vector embeddings to find similar statements based on
45
+ semantic similarity. This allows for more flexible and context-aware matching.
40
46
  """
41
47
 
42
48
  class RedisMetaDataType:
@@ -100,6 +106,21 @@ class RedisVectorStorageAdapter(StorageAdapter):
100
106
 
101
107
  self.vector_store = RedisVectorStore(embeddings, config=config)
102
108
 
109
+ def get_preferred_tagger(self):
110
+ """
111
+ Redis uses vector embeddings and doesn't need POS-lemma indexing.
112
+ Returns NoOpTagger to avoid unnecessary spaCy processing.
113
+ """
114
+ from chatterbot.tagging import NoOpTagger
115
+ return NoOpTagger
116
+
117
+ def get_preferred_search_algorithm(self):
118
+ """
119
+ Redis uses semantic vector search instead of text-based matching.
120
+ Returns the name of the SemanticVectorSearch algorithm.
121
+ """
122
+ return 'semantic_vector_search'
123
+
103
124
  def get_statement_model(self):
104
125
  """
105
126
  Return the statement model.
@@ -127,6 +148,16 @@ class RedisVectorStorageAdapter(StorageAdapter):
127
148
 
128
149
  values.update(document.metadata)
129
150
 
151
+ # Convert Unix timestamp back to datetime for StatementObject
152
+ # Redis may return this as int, float, or string representation
153
+ if 'created_at' in values:
154
+ created_at_value = values['created_at']
155
+ if isinstance(created_at_value, str):
156
+ # Convert string to float first
157
+ created_at_value = float(created_at_value)
158
+ if isinstance(created_at_value, (int, float)):
159
+ values['created_at'] = datetime.fromtimestamp(created_at_value)
160
+
130
161
  tags = values['tags']
131
162
  values['tags'] = list(set(tags.split('|') if tags else []))
132
163
 
@@ -177,6 +208,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
177
208
  - exclude_text
178
209
  - exclude_text_words
179
210
  - persona_not_startswith
211
+ - search_text_contains
180
212
  - search_in_response_to_contains
181
213
  - order_by
182
214
  """
@@ -245,27 +277,26 @@ class RedisVectorStorageAdapter(StorageAdapter):
245
277
  else:
246
278
  filter_condition = query
247
279
 
248
- # Handle search_text parameter (used by BestMatch logic adapter)
249
- # BestMatch uses search_text to find statements with matching indexed text.
250
- # Since Redis doesn't store search_text as a field, we approximate this by:
251
- # 1. Using the search_text value as a semantic query against in_response_to
252
- # 2. This finds statements that are responses to similar inputs
253
- # The effect is similar to BestMatch's Phase 2: finding alternate responses
254
- if 'search_text' in kwargs:
255
- _search_text = kwargs.get('search_text', '')
256
-
257
- # Get embedding for the search text
258
- # Note: search_text may be indexed (e.g., "NOUN:cat VERB:run") so this
259
- # approximates finding responses to semantically similar queries
260
- embedding = self.vector_store.embeddings.embed_query(_search_text)
280
+ if 'search_text_contains' in kwargs:
281
+ # Find statements whose text (responses) are similar.
282
+ #
283
+ # Use semantic similarity on the search query itself. This finds responses
284
+ # that would be semantically appropriate, even if they don't share exact words.
285
+ #
286
+ # Our vectors are of 'in_response_to' (what was said TO the bot),
287
+ # not 'text' (what the bot said). So we use the query as if it were an input,
288
+ # and find statements that would respond to similar inputs. The result is
289
+ # statements whose context (in_response_to) is similar, which tends to yield
290
+ # similar responses.
291
+ _search_query = kwargs['search_text_contains']
292
+
293
+ # Use vector similarity to find statements responding to similar contexts
294
+ embedding = self.vector_store.embeddings.embed_query(_search_query)
261
295
 
262
- # Build return fields from metadata schema
263
296
  return_fields = [
264
297
  'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at'
265
298
  ]
266
299
 
267
- # Use direct index query via RedisVL
268
- # Search on the vectorized content (in_response_to) to find similar response patterns
269
300
  query = VectorQuery(
270
301
  vector=embedding,
271
302
  vector_field_name='embedding',
@@ -274,20 +305,35 @@ class RedisVectorStorageAdapter(StorageAdapter):
274
305
  filter_expression=filter_condition
275
306
  )
276
307
 
277
- # Execute query
278
308
  results = self.vector_store.index.query(query)
279
309
 
280
- # Convert results to Document objects
281
310
  Document = self.get_statement_model()
282
311
  documents = []
283
- for result in results:
284
- # Extract metadata and content
312
+
313
+ # Calculate confidence from vector distances
314
+ # Results are ordered by similarity (best match first)
315
+ for idx, result in enumerate(results):
285
316
  in_response_to = result.get('in_response_to', '')
286
317
 
287
- # Convert created_at from integer (YYMMDD) to datetime
288
- created_at_int = int(result.get('created_at', 0))
289
- if created_at_int:
290
- created_at = datetime.strptime(str(created_at_int), '%y%m%d')
318
+ # Redis vector_score is cosine distance (lower is better)
319
+ # Convert to confidence: confidence = 1 - distance
320
+ # If vector_score not available, use result order
321
+ vector_score = result.get('vector_score')
322
+ if vector_score is not None:
323
+ # Cosine distance ranges from 0 (identical) to 2 (opposite)
324
+ # Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
325
+ confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
326
+ else:
327
+ # Fallback: use result order (first result = highest confidence)
328
+ # Start at 0.95 for first result, decay by 0.05 per position
329
+ confidence = max(0.0, 0.95 - (idx * 0.05))
330
+
331
+ # Parse timestamp
332
+ created_at_value = result.get('created_at', 0)
333
+ if isinstance(created_at_value, str):
334
+ created_at = datetime.fromtimestamp(float(created_at_value))
335
+ elif created_at_value:
336
+ created_at = datetime.fromtimestamp(float(created_at_value))
291
337
  else:
292
338
  created_at = datetime.now()
293
339
 
@@ -297,6 +343,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
297
343
  'persona': result.get('persona', ''),
298
344
  'tags': result.get('tags', ''),
299
345
  'created_at': created_at,
346
+ 'confidence': confidence,
300
347
  }
301
348
  doc = Document(
302
349
  page_content=in_response_to,
@@ -307,6 +354,23 @@ class RedisVectorStorageAdapter(StorageAdapter):
307
354
 
308
355
  return [self.model_to_object(document) for document in documents]
309
356
 
357
+ # Redis uses vector similarity: we search for statements whose actual
358
+ # text field is semantically similar to the text that produced this search_text.
359
+ # This is stored in the closest_match.text field, but BestMatch only passes
360
+ # search_text. Since we can't reverse POS tags to original text (for now),
361
+ # we treat this parameter as a signal to do text-based similarity search.
362
+ #
363
+ # Note: The caller should ideally pass the actual text, but for compatibility
364
+ # we'll work with what we receive. In practice, search_text_contains is the
365
+ # better parameter for this use case.
366
+ if 'search_text' in kwargs:
367
+ # For now, we'll treat search_text as a filter-only parameter
368
+ # and fall through to the regular query_search below.
369
+ # This prevents the broken behavior of embedding POS tags.
370
+ # The proper fix requires BestMatch to pass additional context
371
+ # or use search_text_contains instead.
372
+ pass
373
+
310
374
  ordering = kwargs.get('order_by', None)
311
375
 
312
376
  if ordering:
@@ -341,14 +405,31 @@ class RedisVectorStorageAdapter(StorageAdapter):
341
405
  # Convert results to Document objects
342
406
  Document = self.get_statement_model()
343
407
  documents = []
344
- for result in results:
408
+
409
+ # Calculate confidence from vector distances
410
+ # Results are ordered by similarity (best match first)
411
+ for idx, result in enumerate(results):
345
412
  # Extract metadata and content
346
413
  in_response_to = result.get('in_response_to', '')
347
414
 
348
- # Convert created_at from integer (YYMMDD) to datetime
349
- created_at_int = int(result.get('created_at', 0))
350
- if created_at_int:
351
- created_at = datetime.strptime(str(created_at_int), '%y%m%d')
415
+ # Redis vector_score is cosine distance (lower is better)
416
+ # Convert to confidence: confidence = 1 - distance
417
+ # If vector_score not available, use result order
418
+ vector_score = result.get('vector_score')
419
+ if vector_score is not None:
420
+ # Cosine distance ranges from 0 (identical) to 2 (opposite)
421
+ # Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
422
+ confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
423
+ else:
424
+ # Fallback: use result order (first result = highest confidence)
425
+ # Start at 0.95 for first result, decay by 0.05 per position
426
+ confidence = max(0.0, 0.95 - (idx * 0.05))
427
+
428
+ # Convert Unix timestamp back to datetime
429
+ # Redis returns numeric fields as strings
430
+ created_at_timestamp = result.get('created_at', '0')
431
+ if created_at_timestamp and created_at_timestamp != '0':
432
+ created_at = datetime.fromtimestamp(float(created_at_timestamp))
352
433
  else:
353
434
  created_at = datetime.now()
354
435
 
@@ -358,6 +439,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
358
439
  'persona': result.get('persona', ''),
359
440
  'tags': result.get('tags', ''),
360
441
  'created_at': created_at,
442
+ 'confidence': confidence,
361
443
  }
362
444
  doc = Document(
363
445
  page_content=in_response_to,
@@ -395,9 +477,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
395
477
  metadata = {
396
478
  'text': text,
397
479
  'category': kwargs.get('category', ''),
398
- # NOTE: `created_at` must have a valid numeric value or results will
399
- # not be returned for similarity_search for some reason
400
- 'created_at': kwargs.get('created_at') or int(_default_date.strftime('%y%m%d')),
480
+ # Store created_at as Unix timestamp with microseconds (float)
481
+ # This provides full datetime precision while maintaining Redis NUMERIC field compatibility
482
+ 'created_at': kwargs.get('created_at') or _default_date.timestamp(),
401
483
  'tags': '|'.join(unique_tags) if unique_tags else '',
402
484
  'conversation': kwargs.get('conversation', ''),
403
485
  'persona': kwargs.get('persona', ''),
@@ -427,7 +509,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
427
509
  metadata={
428
510
  'text': statement.text,
429
511
  'conversation': statement.conversation or '',
430
- 'created_at': int(statement.created_at.strftime('%y%m%d')),
512
+ 'created_at': statement.created_at.timestamp(),
431
513
  'persona': statement.persona or '',
432
514
  # Prevent duplicate tag entries in the database
433
515
  'tags': '|'.join(
@@ -452,7 +534,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
452
534
  metadata = {
453
535
  'text': statement.text,
454
536
  'conversation': statement.conversation or '',
455
- 'created_at': int(statement.created_at.strftime('%y%m%d')),
537
+ 'created_at': statement.created_at.timestamp(),
456
538
  'persona': statement.persona or '',
457
539
  'tags': '|'.join(unique_tags) if unique_tags else '',
458
540
  }
@@ -508,11 +590,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
508
590
  # Parse the metadata
509
591
  metadata = json.loads(data[b'_metadata_json'].decode())
510
592
 
511
- # Convert created_at from integer (YYMMDD) back to datetime
512
- if 'created_at' in metadata and isinstance(metadata['created_at'], int):
513
- created_at_str = str(metadata['created_at'])
514
- # Parse YYMMDD format
515
- metadata['created_at'] = datetime.strptime(created_at_str, '%y%m%d')
593
+ # Convert created_at from Unix timestamp back to datetime
594
+ if 'created_at' in metadata and isinstance(metadata['created_at'], (int, float)):
595
+ metadata['created_at'] = datetime.fromtimestamp(metadata['created_at'])
516
596
 
517
597
  # Get the in_response_to from the hash
518
598
  in_response_to = data.get(b'in_response_to', b'').decode()
@@ -173,6 +173,87 @@ class StorageAdapter(object):
173
173
  """
174
174
  pass
175
175
 
176
+ def get_preferred_tagger(self):
177
+ """
178
+ Returns the tagger class preferred by this storage adapter.
179
+ Returns None by default, meaning the default tagger will be used.
180
+
181
+ Storage adapters should override this method to specify their
182
+ preferred tagger based on their search capabilities.
183
+
184
+ Available Taggers:
185
+
186
+ - NoOpTagger: Returns text unchanged (for vector-based storage).
187
+ No spaCy model loading (~500MB memory saved).
188
+ Faster startup (<1 second vs 2-5 seconds).
189
+ Use when storage handles semantic search natively.
190
+
191
+ - PosLemmaTagger: Creates POS-lemma bigrams (default, for SQL).
192
+ Enables pattern matching (e.g., "NOUN:cat VERB:run").
193
+ Requires spaCy language model.
194
+ Best for exact phrase matching.
195
+
196
+ - LowercaseTagger: Simple lowercase transformation.
197
+ Minimal processing overhead.
198
+ Case-insensitive matching.
199
+
200
+ Example - Vector Storage::
201
+
202
+ def get_preferred_tagger(self):
203
+ from chatterbot.tagging import NoOpTagger
204
+ return NoOpTagger
205
+
206
+ Example - Traditional Storage::
207
+
208
+ def get_preferred_tagger(self):
209
+ return None # Use default PosLemmaTagger
210
+
211
+ :return: Tagger class or None
212
+ """
213
+ return None
214
+
215
+ def get_preferred_search_algorithm(self):
216
+ """
217
+ Returns the search algorithm name preferred by this storage adapter.
218
+ Returns None by default, meaning the default search algorithm will be used.
219
+
220
+ Storage adapters should override this method to specify their
221
+ preferred search algorithm based on their capabilities.
222
+
223
+ Available Search Algorithms:
224
+
225
+ - 'indexed_text_search' (default):
226
+ Uses POS-lemma indexed fields (search_text, search_in_response_to).
227
+ Python-based Levenshtein distance comparison.
228
+ Requires PosLemmaTagger.
229
+ Best for: Exact pattern matching.
230
+
231
+ - 'semantic_vector_search':
232
+ Uses raw text with vector similarity.
233
+ Delegates to storage.filter(search_in_response_to_contains=text).
234
+ No tagger required (works with NoOpTagger).
235
+ Confidence from storage adapter (cosine similarity).
236
+ Best for: Context-aware AI responses, semantic understanding.
237
+
238
+ - 'text_search' (fallback):
239
+ Compares raw text without indexes.
240
+ Slower but works with any storage.
241
+ Uses comparison functions on all statements.
242
+
243
+ Example - Vector Storage::
244
+
245
+ def get_preferred_search_algorithm(self):
246
+ return 'semantic_vector_search'
247
+
248
+ Example - SQL Storage::
249
+
250
+ def get_preferred_search_algorithm(self):
251
+ return None # Use default 'indexed_text_search'
252
+
253
+ :return: Search algorithm name string or None
254
+ """
255
+ return None
256
+
176
257
  class EmptyDatabaseException(Exception):
177
258
 
178
259
  def __init__(self, message=None):
@@ -4,6 +4,56 @@ from chatterbot.utils import get_model_for_language
4
4
  import spacy
5
5
 
6
6
 
7
+ class NoOpTagger(object):
8
+ """
9
+ A no-operation tagger that returns text unchanged.
10
+ Used by storage adapters that don't rely on indexed search_text fields.
11
+ """
12
+
13
+ def __init__(self, language=None):
14
+ self.language = language or languages.ENG
15
+
16
+ def needs_text_indexing(self):
17
+ """
18
+ Indicates whether this tagger performs text indexing/transformation.
19
+ Returns False since NoOpTagger passes text through unchanged.
20
+
21
+ :return: False
22
+ """
23
+ return False
24
+
25
+ def get_text_index_string(self, text: Union[str, List[str]]):
26
+ """
27
+ Return the text unchanged (no indexing applied).
28
+ """
29
+ return text
30
+
31
+ def as_nlp_pipeline(
32
+ self,
33
+ texts: Union[List[str], Tuple[str, dict]],
34
+ batch_size: int = 1000,
35
+ n_process: int = 1
36
+ ):
37
+ """
38
+ Returns texts unchanged without NLP processing.
39
+ Maintains API compatibility with other taggers.
40
+
41
+ :param texts: Text strings or tuples of (text, context_dict)
42
+ :param batch_size: Ignored (for API compatibility)
43
+ :param n_process: Ignored (for API compatibility)
44
+ """
45
+ process_as_tuples = texts and isinstance(texts[0], tuple)
46
+
47
+ if process_as_tuples:
48
+ # Return generator of (text, context) tuples
49
+ for text, context in texts:
50
+ yield (text, context)
51
+ else:
52
+ # Return generator of text strings
53
+ for text in texts:
54
+ yield text
55
+
56
+
7
57
  class LowercaseTagger(object):
8
58
  """
9
59
  Returns the text in lowercase.
@@ -21,6 +71,15 @@ class LowercaseTagger(object):
21
71
  'chatterbot_lowercase_indexer', name='chatterbot_lowercase_indexer', last=True
22
72
  )
23
73
 
74
+ def needs_text_indexing(self):
75
+ """
76
+ Indicates whether this tagger performs text indexing/transformation.
77
+ Returns True since LowercaseTagger transforms text to lowercase.
78
+
79
+ :return: True
80
+ """
81
+ return True
82
+
24
83
  def get_text_index_string(self, text: Union[str, List[str]]):
25
84
  if isinstance(text, list):
26
85
  documents = self.nlp.pipe(text, batch_size=1000, n_process=1)
@@ -73,6 +132,15 @@ class PosLemmaTagger(object):
73
132
  'chatterbot_bigram_indexer', name='chatterbot_bigram_indexer', last=True
74
133
  )
75
134
 
135
+ def needs_text_indexing(self):
136
+ """
137
+ Indicates whether this tagger performs text indexing/transformation.
138
+ Returns True since PosLemmaTagger creates POS-lemma bigram indexes.
139
+
140
+ :return: True
141
+ """
142
+ return True
143
+
76
144
  def get_text_index_string(self, text: Union[str, List[str]]) -> str:
77
145
  """
78
146
  Return a string of text containing part-of-speech, lemma pairs.
File without changes
File without changes
File without changes