ChatterBot 1.2.9__tar.gz → 1.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatterbot-1.2.9 → chatterbot-1.2.11}/ChatterBot.egg-info/PKG-INFO +4 -10
- {chatterbot-1.2.9 → chatterbot-1.2.11}/ChatterBot.egg-info/SOURCES.txt +2 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/PKG-INFO +4 -10
- {chatterbot-1.2.9 → chatterbot-1.2.11}/README.md +4 -10
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/__init__.py +1 -1
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/chatterbot.py +62 -36
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/search.py +70 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/redis.py +120 -40
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/sql_storage.py +153 -112
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/storage_adapter.py +81 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/tagging.py +68 -0
- chatterbot-1.2.11/tests/test_connection_pool.py +268 -0
- chatterbot-1.2.11/tests/test_poc_vulnerability.py +152 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/ChatterBot.egg-info/dependency_links.txt +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/ChatterBot.egg-info/requires.txt +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/ChatterBot.egg-info/top_level.txt +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/LICENSE +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/__main__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/adapters.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/comparisons.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/components.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/constants.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/conversation.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/corpus.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/exceptions.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/abstract_models.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/admin.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/apps.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0001_initial.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0002_statement_extra_data.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0003_change_occurrence_default.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0004_rename_in_response_to.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0005_statement_created_at.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0006_create_conversation.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0007_response_created_at.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0008_update_conversations.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0009_tags.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0010_statement_text.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0011_blank_extra_data.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0012_statement_created_at.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0013_change_conversations.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0014_remove_statement_extra_data.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0015_statement_persona.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0016_statement_stemmed_text.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0017_tags_unique.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0018_text_max_length.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0019_alter_statement_id_alter_tag_id_and_more.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/0020_alter_statement_conversation_and_more.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/migrations/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/model_admin.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/models.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/django_chatterbot/settings.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/sqlalchemy_app/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/ext/sqlalchemy_app/models.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/filters.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/languages.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/llm.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/best_match.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/logic_adapter.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/mathematical_evaluation.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/specific_response.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/time_adapter.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/logic/unit_conversion.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/parsing.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/preprocessors.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/response_selection.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/__init__.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/django_storage.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/storage/mongodb.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/trainers.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/utils.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/chatterbot/vectorstores.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/pyproject.toml +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/setup.cfg +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_adapter_validation.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_benchmarks.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_chatbot.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_cli.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_comparisons.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_conversations.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_corpus.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_examples.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_filters.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_initialization.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_languages.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_parsing.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_preprocessors.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_response_selection.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_search.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_tagging.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_turing.py +0 -0
- {chatterbot-1.2.9 → chatterbot-1.2.11}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ChatterBot
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.11
|
|
4
4
|
Summary: ChatterBot is a machine learning, conversational dialog engine
|
|
5
5
|
Author: Gunther Cox
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -153,16 +153,10 @@ section of the documentation.
|
|
|
153
153
|
|
|
154
154
|
See release notes for changes https://github.com/gunthercox/ChatterBot/releases
|
|
155
155
|
|
|
156
|
-
#
|
|
156
|
+
# Contributing
|
|
157
157
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
2. Make your changes in a branch named something different from `master`, e.g. create
|
|
161
|
-
a new branch `my-pull-request`.
|
|
162
|
-
3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
|
|
163
|
-
4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
|
|
164
|
-
5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
|
|
165
|
-
to help make sure that your contribution is free from errors.
|
|
158
|
+
Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
|
|
159
|
+
https://docs.chatterbot.us/contributing/
|
|
166
160
|
|
|
167
161
|
# Sponsors
|
|
168
162
|
|
|
@@ -77,6 +77,7 @@ tests/test_benchmarks.py
|
|
|
77
77
|
tests/test_chatbot.py
|
|
78
78
|
tests/test_cli.py
|
|
79
79
|
tests/test_comparisons.py
|
|
80
|
+
tests/test_connection_pool.py
|
|
80
81
|
tests/test_conversations.py
|
|
81
82
|
tests/test_corpus.py
|
|
82
83
|
tests/test_examples.py
|
|
@@ -84,6 +85,7 @@ tests/test_filters.py
|
|
|
84
85
|
tests/test_initialization.py
|
|
85
86
|
tests/test_languages.py
|
|
86
87
|
tests/test_parsing.py
|
|
88
|
+
tests/test_poc_vulnerability.py
|
|
87
89
|
tests/test_preprocessors.py
|
|
88
90
|
tests/test_response_selection.py
|
|
89
91
|
tests/test_search.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ChatterBot
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.11
|
|
4
4
|
Summary: ChatterBot is a machine learning, conversational dialog engine
|
|
5
5
|
Author: Gunther Cox
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -153,16 +153,10 @@ section of the documentation.
|
|
|
153
153
|
|
|
154
154
|
See release notes for changes https://github.com/gunthercox/ChatterBot/releases
|
|
155
155
|
|
|
156
|
-
#
|
|
156
|
+
# Contributing
|
|
157
157
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
2. Make your changes in a branch named something different from `master`, e.g. create
|
|
161
|
-
a new branch `my-pull-request`.
|
|
162
|
-
3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
|
|
163
|
-
4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
|
|
164
|
-
5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
|
|
165
|
-
to help make sure that your contribution is free from errors.
|
|
158
|
+
Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
|
|
159
|
+
https://docs.chatterbot.us/contributing/
|
|
166
160
|
|
|
167
161
|
# Sponsors
|
|
168
162
|
|
|
@@ -92,16 +92,10 @@ section of the documentation.
|
|
|
92
92
|
|
|
93
93
|
See release notes for changes https://github.com/gunthercox/ChatterBot/releases
|
|
94
94
|
|
|
95
|
-
#
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
2. Make your changes in a branch named something different from `master`, e.g. create
|
|
100
|
-
a new branch `my-pull-request`.
|
|
101
|
-
3. [Create a pull request](https://help.github.com/articles/creating-a-pull-request/).
|
|
102
|
-
4. Please follow the [Python style guide for PEP-8](https://www.python.org/dev/peps/pep-0008/).
|
|
103
|
-
5. Use the projects [built-in automated testing](https://docs.chatterbot.us/testing/).
|
|
104
|
-
to help make sure that your contribution is free from errors.
|
|
95
|
+
# Contributing
|
|
96
|
+
|
|
97
|
+
Contributions are welcomed, to help ensure a smooth process please start with the contributing guidelines in our documentation:
|
|
98
|
+
https://docs.chatterbot.us/contributing/
|
|
105
99
|
|
|
106
100
|
# Sponsors
|
|
107
101
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Union
|
|
3
3
|
from chatterbot.storage import StorageAdapter
|
|
4
4
|
from chatterbot.logic import LogicAdapter
|
|
5
|
-
from chatterbot.search import TextSearch, IndexedTextSearch
|
|
5
|
+
from chatterbot.search import TextSearch, IndexedTextSearch, SemanticVectorSearch
|
|
6
6
|
from chatterbot.tagging import PosLemmaTagger
|
|
7
7
|
from chatterbot.conversation import Statement
|
|
8
8
|
from chatterbot import languages
|
|
@@ -74,41 +74,60 @@ class ChatBot(object):
|
|
|
74
74
|
|
|
75
75
|
tagger_language = kwargs.get('tagger_language', languages.ENG)
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if
|
|
91
|
-
|
|
77
|
+
# Check if storage adapter has a preferred tagger
|
|
78
|
+
PreferredTagger = self.storage.get_preferred_tagger()
|
|
79
|
+
|
|
80
|
+
if PreferredTagger is not None:
|
|
81
|
+
# Storage adapter specifies its own tagger
|
|
82
|
+
self.tagger = PreferredTagger(language=tagger_language)
|
|
83
|
+
else:
|
|
84
|
+
# Use default or user-specified tagger
|
|
85
|
+
try:
|
|
86
|
+
Tagger = kwargs.get('tagger', PosLemmaTagger)
|
|
87
|
+
|
|
88
|
+
# Allow instances to be provided for performance optimization
|
|
89
|
+
# (Example: a pre-loaded model in a tagger when unit testing)
|
|
90
|
+
if not isinstance(Tagger, type):
|
|
91
|
+
self.tagger = Tagger
|
|
92
92
|
else:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
93
|
+
self.tagger = Tagger(language=tagger_language)
|
|
94
|
+
except IOError as io_error:
|
|
95
|
+
# Return a more helpful error message if possible
|
|
96
|
+
if "Can't find model" in str(io_error):
|
|
97
|
+
model_name = utils.get_model_for_language(tagger_language)
|
|
98
|
+
if hasattr(tagger_language, 'ENGLISH_NAME'):
|
|
99
|
+
language_name = tagger_language.ENGLISH_NAME
|
|
100
|
+
else:
|
|
101
|
+
language_name = tagger_language
|
|
102
|
+
raise self.ChatBotException(
|
|
103
|
+
'Setup error:\n'
|
|
104
|
+
f'The Spacy model for "{language_name}" language is missing.\n'
|
|
105
|
+
'Please install the model using the command:\n\n'
|
|
106
|
+
f'python -m spacy download {model_name}\n\n'
|
|
107
|
+
'See https://spacy.io/usage/models for more information about available models.'
|
|
108
|
+
) from io_error
|
|
109
|
+
else:
|
|
110
|
+
raise io_error
|
|
103
111
|
|
|
112
|
+
# Initialize search algorithms
|
|
104
113
|
primary_search_algorithm = IndexedTextSearch(self, **kwargs)
|
|
105
114
|
text_search_algorithm = TextSearch(self, **kwargs)
|
|
115
|
+
semantic_vector_search_algorithm = SemanticVectorSearch(self, **kwargs)
|
|
106
116
|
|
|
107
117
|
self.search_algorithms = {
|
|
108
118
|
primary_search_algorithm.name: primary_search_algorithm,
|
|
109
|
-
text_search_algorithm.name: text_search_algorithm
|
|
119
|
+
text_search_algorithm.name: text_search_algorithm,
|
|
120
|
+
semantic_vector_search_algorithm.name: semantic_vector_search_algorithm
|
|
110
121
|
}
|
|
111
122
|
|
|
123
|
+
# Check if storage adapter has a preferred search algorithm
|
|
124
|
+
preferred_search_algorithm = self.storage.get_preferred_search_algorithm()
|
|
125
|
+
if preferred_search_algorithm and preferred_search_algorithm in self.search_algorithms:
|
|
126
|
+
# Set as default for logic adapters that don't specify their own search algorithm
|
|
127
|
+
# This ensures BestMatch and other adapters use the optimal search method
|
|
128
|
+
self.logger.info(f'Storage adapter prefers search algorithm: {preferred_search_algorithm}')
|
|
129
|
+
kwargs.setdefault('search_algorithm_name', preferred_search_algorithm)
|
|
130
|
+
|
|
112
131
|
for adapter in logic_adapters:
|
|
113
132
|
utils.validate_adapter_class(adapter, LogicAdapter)
|
|
114
133
|
logic_adapter = utils.initialize_class(adapter, self, **kwargs)
|
|
@@ -191,15 +210,22 @@ class ChatBot(object):
|
|
|
191
210
|
input_statement.in_response_to = previous_statement.text
|
|
192
211
|
|
|
193
212
|
# Make sure the input statement has its search text saved
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
213
|
+
if not self.tagger.needs_text_indexing():
|
|
214
|
+
# Tagger doesn't transform text, use it directly
|
|
215
|
+
if not input_statement.search_text:
|
|
216
|
+
input_statement.search_text = input_statement.text
|
|
217
|
+
if not input_statement.search_in_response_to and input_statement.in_response_to:
|
|
218
|
+
input_statement.search_in_response_to = input_statement.in_response_to
|
|
219
|
+
else:
|
|
220
|
+
# Use tagger for text indexing or transformations
|
|
221
|
+
if not input_statement.search_text:
|
|
222
|
+
_search_text = self.tagger.get_text_index_string(input_statement.text)
|
|
223
|
+
input_statement.search_text = _search_text
|
|
224
|
+
|
|
225
|
+
if not input_statement.search_in_response_to and input_statement.in_response_to:
|
|
226
|
+
input_statement.search_in_response_to = self.tagger.get_text_index_string(
|
|
227
|
+
input_statement.in_response_to
|
|
228
|
+
)
|
|
203
229
|
|
|
204
230
|
response = self.generate_response(
|
|
205
231
|
input_statement,
|
|
@@ -157,3 +157,73 @@ class TextSearch:
|
|
|
157
157
|
if confidence >= 1.0:
|
|
158
158
|
self.chatbot.logger.info('Exact match found, stopping search')
|
|
159
159
|
break
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class SemanticVectorSearch:
|
|
163
|
+
"""
|
|
164
|
+
Semantic vector search for storage adapters that use vector embeddings.
|
|
165
|
+
Does not require a tagger or comparison function - relies on the storage
|
|
166
|
+
adapter's native vector similarity search capabilities.
|
|
167
|
+
|
|
168
|
+
:param search_page_size:
|
|
169
|
+
The maximum number of records to load into memory at a time when searching.
|
|
170
|
+
Defaults to 1000
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
name = 'semantic_vector_search'
|
|
174
|
+
|
|
175
|
+
def __init__(self, chatbot, **kwargs):
|
|
176
|
+
self.chatbot = chatbot
|
|
177
|
+
|
|
178
|
+
self.search_page_size = kwargs.get(
|
|
179
|
+
'search_page_size', 1000
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def search(self, input_statement, **additional_parameters):
|
|
183
|
+
"""
|
|
184
|
+
Search for semantically similar statements using vector similarity.
|
|
185
|
+
Confidence scores are calculated by the storage adapter based on
|
|
186
|
+
vector distances and returned in the results.
|
|
187
|
+
|
|
188
|
+
:param input_statement: A statement.
|
|
189
|
+
:type input_statement: chatterbot.conversation.Statement
|
|
190
|
+
|
|
191
|
+
:param **additional_parameters: Additional parameters to be passed
|
|
192
|
+
to the ``filter`` method of the storage adapter when searching.
|
|
193
|
+
|
|
194
|
+
:rtype: Generator yielding one closest matching statement at a time.
|
|
195
|
+
"""
|
|
196
|
+
self.chatbot.logger.info('Beginning semantic vector search')
|
|
197
|
+
|
|
198
|
+
search_parameters = {
|
|
199
|
+
'search_in_response_to_contains': input_statement.text,
|
|
200
|
+
'persona_not_startswith': 'bot:',
|
|
201
|
+
'page_size': self.search_page_size
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if additional_parameters:
|
|
205
|
+
search_parameters.update(additional_parameters)
|
|
206
|
+
|
|
207
|
+
statement_list = self.chatbot.storage.filter(**search_parameters)
|
|
208
|
+
|
|
209
|
+
best_confidence_so_far = 0
|
|
210
|
+
|
|
211
|
+
self.chatbot.logger.info('Processing search results')
|
|
212
|
+
|
|
213
|
+
# Yield statements with confidence scores from vector similarity
|
|
214
|
+
for statement in statement_list:
|
|
215
|
+
# Confidence should already be set by the storage adapter
|
|
216
|
+
confidence = getattr(statement, 'confidence', 0.0)
|
|
217
|
+
|
|
218
|
+
if confidence > best_confidence_so_far:
|
|
219
|
+
best_confidence_so_far = confidence
|
|
220
|
+
|
|
221
|
+
self.chatbot.logger.info('Similar statement found: {} {}'.format(
|
|
222
|
+
statement.in_response_to, confidence
|
|
223
|
+
))
|
|
224
|
+
|
|
225
|
+
yield statement
|
|
226
|
+
|
|
227
|
+
if confidence >= 1.0:
|
|
228
|
+
self.chatbot.logger.info('Exact match found, stopping search')
|
|
229
|
+
break
|
|
@@ -30,13 +30,19 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
30
30
|
in the future and its behavior has not yet been finalized.
|
|
31
31
|
|
|
32
32
|
The RedisVectorStorageAdapter allows ChatterBot to store conversation
|
|
33
|
-
data in a redis instance.
|
|
33
|
+
data in a redis instance using vector embeddings for semantic similarity search.
|
|
34
34
|
|
|
35
35
|
All parameters are optional, by default a redis instance on localhost is assumed.
|
|
36
36
|
|
|
37
37
|
:keyword database_uri: eg: redis://localhost:6379/0',
|
|
38
38
|
The database_uri can be specified to choose a redis instance.
|
|
39
39
|
:type database_uri: str
|
|
40
|
+
|
|
41
|
+
NOTES:
|
|
42
|
+
* Unlike other database based storage adapters, the RedisVectorStorageAdapter
|
|
43
|
+
does not leverage `search_text` and `search_in_response_to` fields for indexing.
|
|
44
|
+
Instead, it uses vector embeddings to find similar statements based on
|
|
45
|
+
semantic similarity. This allows for more flexible and context-aware matching.
|
|
40
46
|
"""
|
|
41
47
|
|
|
42
48
|
class RedisMetaDataType:
|
|
@@ -100,6 +106,21 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
100
106
|
|
|
101
107
|
self.vector_store = RedisVectorStore(embeddings, config=config)
|
|
102
108
|
|
|
109
|
+
def get_preferred_tagger(self):
|
|
110
|
+
"""
|
|
111
|
+
Redis uses vector embeddings and doesn't need POS-lemma indexing.
|
|
112
|
+
Returns NoOpTagger to avoid unnecessary spaCy processing.
|
|
113
|
+
"""
|
|
114
|
+
from chatterbot.tagging import NoOpTagger
|
|
115
|
+
return NoOpTagger
|
|
116
|
+
|
|
117
|
+
def get_preferred_search_algorithm(self):
|
|
118
|
+
"""
|
|
119
|
+
Redis uses semantic vector search instead of text-based matching.
|
|
120
|
+
Returns the name of the SemanticVectorSearch algorithm.
|
|
121
|
+
"""
|
|
122
|
+
return 'semantic_vector_search'
|
|
123
|
+
|
|
103
124
|
def get_statement_model(self):
|
|
104
125
|
"""
|
|
105
126
|
Return the statement model.
|
|
@@ -127,6 +148,16 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
127
148
|
|
|
128
149
|
values.update(document.metadata)
|
|
129
150
|
|
|
151
|
+
# Convert Unix timestamp back to datetime for StatementObject
|
|
152
|
+
# Redis may return this as int, float, or string representation
|
|
153
|
+
if 'created_at' in values:
|
|
154
|
+
created_at_value = values['created_at']
|
|
155
|
+
if isinstance(created_at_value, str):
|
|
156
|
+
# Convert string to float first
|
|
157
|
+
created_at_value = float(created_at_value)
|
|
158
|
+
if isinstance(created_at_value, (int, float)):
|
|
159
|
+
values['created_at'] = datetime.fromtimestamp(created_at_value)
|
|
160
|
+
|
|
130
161
|
tags = values['tags']
|
|
131
162
|
values['tags'] = list(set(tags.split('|') if tags else []))
|
|
132
163
|
|
|
@@ -177,6 +208,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
177
208
|
- exclude_text
|
|
178
209
|
- exclude_text_words
|
|
179
210
|
- persona_not_startswith
|
|
211
|
+
- search_text_contains
|
|
180
212
|
- search_in_response_to_contains
|
|
181
213
|
- order_by
|
|
182
214
|
"""
|
|
@@ -245,27 +277,26 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
245
277
|
else:
|
|
246
278
|
filter_condition = query
|
|
247
279
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
#
|
|
258
|
-
#
|
|
259
|
-
|
|
260
|
-
|
|
280
|
+
if 'search_text_contains' in kwargs:
|
|
281
|
+
# Find statements whose text (responses) are similar.
|
|
282
|
+
#
|
|
283
|
+
# Use semantic similarity on the search query itself. This finds responses
|
|
284
|
+
# that would be semantically appropriate, even if they don't share exact words.
|
|
285
|
+
#
|
|
286
|
+
# Our vectors are of 'in_response_to' (what was said TO the bot),
|
|
287
|
+
# not 'text' (what the bot said). So we use the query as if it were an input,
|
|
288
|
+
# and find statements that would respond to similar inputs. The result is
|
|
289
|
+
# statements whose context (in_response_to) is similar, which tends to yield
|
|
290
|
+
# similar responses.
|
|
291
|
+
_search_query = kwargs['search_text_contains']
|
|
292
|
+
|
|
293
|
+
# Use vector similarity to find statements responding to similar contexts
|
|
294
|
+
embedding = self.vector_store.embeddings.embed_query(_search_query)
|
|
261
295
|
|
|
262
|
-
# Build return fields from metadata schema
|
|
263
296
|
return_fields = [
|
|
264
297
|
'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at'
|
|
265
298
|
]
|
|
266
299
|
|
|
267
|
-
# Use direct index query via RedisVL
|
|
268
|
-
# Search on the vectorized content (in_response_to) to find similar response patterns
|
|
269
300
|
query = VectorQuery(
|
|
270
301
|
vector=embedding,
|
|
271
302
|
vector_field_name='embedding',
|
|
@@ -274,20 +305,35 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
274
305
|
filter_expression=filter_condition
|
|
275
306
|
)
|
|
276
307
|
|
|
277
|
-
# Execute query
|
|
278
308
|
results = self.vector_store.index.query(query)
|
|
279
309
|
|
|
280
|
-
# Convert results to Document objects
|
|
281
310
|
Document = self.get_statement_model()
|
|
282
311
|
documents = []
|
|
283
|
-
|
|
284
|
-
|
|
312
|
+
|
|
313
|
+
# Calculate confidence from vector distances
|
|
314
|
+
# Results are ordered by similarity (best match first)
|
|
315
|
+
for idx, result in enumerate(results):
|
|
285
316
|
in_response_to = result.get('in_response_to', '')
|
|
286
317
|
|
|
287
|
-
#
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
318
|
+
# Redis vector_score is cosine distance (lower is better)
|
|
319
|
+
# Convert to confidence: confidence = 1 - distance
|
|
320
|
+
# If vector_score not available, use result order
|
|
321
|
+
vector_score = result.get('vector_score')
|
|
322
|
+
if vector_score is not None:
|
|
323
|
+
# Cosine distance ranges from 0 (identical) to 2 (opposite)
|
|
324
|
+
# Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
|
|
325
|
+
confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
|
|
326
|
+
else:
|
|
327
|
+
# Fallback: use result order (first result = highest confidence)
|
|
328
|
+
# Start at 0.95 for first result, decay by 0.05 per position
|
|
329
|
+
confidence = max(0.0, 0.95 - (idx * 0.05))
|
|
330
|
+
|
|
331
|
+
# Parse timestamp
|
|
332
|
+
created_at_value = result.get('created_at', 0)
|
|
333
|
+
if isinstance(created_at_value, str):
|
|
334
|
+
created_at = datetime.fromtimestamp(float(created_at_value))
|
|
335
|
+
elif created_at_value:
|
|
336
|
+
created_at = datetime.fromtimestamp(float(created_at_value))
|
|
291
337
|
else:
|
|
292
338
|
created_at = datetime.now()
|
|
293
339
|
|
|
@@ -297,6 +343,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
297
343
|
'persona': result.get('persona', ''),
|
|
298
344
|
'tags': result.get('tags', ''),
|
|
299
345
|
'created_at': created_at,
|
|
346
|
+
'confidence': confidence,
|
|
300
347
|
}
|
|
301
348
|
doc = Document(
|
|
302
349
|
page_content=in_response_to,
|
|
@@ -307,6 +354,23 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
307
354
|
|
|
308
355
|
return [self.model_to_object(document) for document in documents]
|
|
309
356
|
|
|
357
|
+
# Redis uses vector similarity: we search for statements whose actual
|
|
358
|
+
# text field is semantically similar to the text that produced this search_text.
|
|
359
|
+
# This is stored in the closest_match.text field, but BestMatch only passes
|
|
360
|
+
# search_text. Since we can't reverse POS tags to original text (for now),
|
|
361
|
+
# we treat this parameter as a signal to do text-based similarity search.
|
|
362
|
+
#
|
|
363
|
+
# Note: The caller should ideally pass the actual text, but for compatibility
|
|
364
|
+
# we'll work with what we receive. In practice, search_text_contains is the
|
|
365
|
+
# better parameter for this use case.
|
|
366
|
+
if 'search_text' in kwargs:
|
|
367
|
+
# For now, we'll treat search_text as a filter-only parameter
|
|
368
|
+
# and fall through to the regular query_search below.
|
|
369
|
+
# This prevents the broken behavior of embedding POS tags.
|
|
370
|
+
# The proper fix requires BestMatch to pass additional context
|
|
371
|
+
# or use search_text_contains instead.
|
|
372
|
+
pass
|
|
373
|
+
|
|
310
374
|
ordering = kwargs.get('order_by', None)
|
|
311
375
|
|
|
312
376
|
if ordering:
|
|
@@ -341,14 +405,31 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
341
405
|
# Convert results to Document objects
|
|
342
406
|
Document = self.get_statement_model()
|
|
343
407
|
documents = []
|
|
344
|
-
|
|
408
|
+
|
|
409
|
+
# Calculate confidence from vector distances
|
|
410
|
+
# Results are ordered by similarity (best match first)
|
|
411
|
+
for idx, result in enumerate(results):
|
|
345
412
|
# Extract metadata and content
|
|
346
413
|
in_response_to = result.get('in_response_to', '')
|
|
347
414
|
|
|
348
|
-
#
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
415
|
+
# Redis vector_score is cosine distance (lower is better)
|
|
416
|
+
# Convert to confidence: confidence = 1 - distance
|
|
417
|
+
# If vector_score not available, use result order
|
|
418
|
+
vector_score = result.get('vector_score')
|
|
419
|
+
if vector_score is not None:
|
|
420
|
+
# Cosine distance ranges from 0 (identical) to 2 (opposite)
|
|
421
|
+
# Normalize to confidence: 1.0 (identical) to 0.0 (opposite)
|
|
422
|
+
confidence = max(0.0, 1.0 - (float(vector_score) / 2.0))
|
|
423
|
+
else:
|
|
424
|
+
# Fallback: use result order (first result = highest confidence)
|
|
425
|
+
# Start at 0.95 for first result, decay by 0.05 per position
|
|
426
|
+
confidence = max(0.0, 0.95 - (idx * 0.05))
|
|
427
|
+
|
|
428
|
+
# Convert Unix timestamp back to datetime
|
|
429
|
+
# Redis returns numeric fields as strings
|
|
430
|
+
created_at_timestamp = result.get('created_at', '0')
|
|
431
|
+
if created_at_timestamp and created_at_timestamp != '0':
|
|
432
|
+
created_at = datetime.fromtimestamp(float(created_at_timestamp))
|
|
352
433
|
else:
|
|
353
434
|
created_at = datetime.now()
|
|
354
435
|
|
|
@@ -358,6 +439,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
358
439
|
'persona': result.get('persona', ''),
|
|
359
440
|
'tags': result.get('tags', ''),
|
|
360
441
|
'created_at': created_at,
|
|
442
|
+
'confidence': confidence,
|
|
361
443
|
}
|
|
362
444
|
doc = Document(
|
|
363
445
|
page_content=in_response_to,
|
|
@@ -395,9 +477,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
395
477
|
metadata = {
|
|
396
478
|
'text': text,
|
|
397
479
|
'category': kwargs.get('category', ''),
|
|
398
|
-
#
|
|
399
|
-
#
|
|
400
|
-
'created_at': kwargs.get('created_at') or
|
|
480
|
+
# Store created_at as Unix timestamp with microseconds (float)
|
|
481
|
+
# This provides full datetime precision while maintaining Redis NUMERIC field compatibility
|
|
482
|
+
'created_at': kwargs.get('created_at') or _default_date.timestamp(),
|
|
401
483
|
'tags': '|'.join(unique_tags) if unique_tags else '',
|
|
402
484
|
'conversation': kwargs.get('conversation', ''),
|
|
403
485
|
'persona': kwargs.get('persona', ''),
|
|
@@ -427,7 +509,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
427
509
|
metadata={
|
|
428
510
|
'text': statement.text,
|
|
429
511
|
'conversation': statement.conversation or '',
|
|
430
|
-
'created_at':
|
|
512
|
+
'created_at': statement.created_at.timestamp(),
|
|
431
513
|
'persona': statement.persona or '',
|
|
432
514
|
# Prevent duplicate tag entries in the database
|
|
433
515
|
'tags': '|'.join(
|
|
@@ -452,7 +534,7 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
452
534
|
metadata = {
|
|
453
535
|
'text': statement.text,
|
|
454
536
|
'conversation': statement.conversation or '',
|
|
455
|
-
'created_at':
|
|
537
|
+
'created_at': statement.created_at.timestamp(),
|
|
456
538
|
'persona': statement.persona or '',
|
|
457
539
|
'tags': '|'.join(unique_tags) if unique_tags else '',
|
|
458
540
|
}
|
|
@@ -508,11 +590,9 @@ class RedisVectorStorageAdapter(StorageAdapter):
|
|
|
508
590
|
# Parse the metadata
|
|
509
591
|
metadata = json.loads(data[b'_metadata_json'].decode())
|
|
510
592
|
|
|
511
|
-
# Convert created_at from
|
|
512
|
-
if 'created_at' in metadata and isinstance(metadata['created_at'], int):
|
|
513
|
-
|
|
514
|
-
# Parse YYMMDD format
|
|
515
|
-
metadata['created_at'] = datetime.strptime(created_at_str, '%y%m%d')
|
|
593
|
+
# Convert created_at from Unix timestamp back to datetime
|
|
594
|
+
if 'created_at' in metadata and isinstance(metadata['created_at'], (int, float)):
|
|
595
|
+
metadata['created_at'] = datetime.fromtimestamp(metadata['created_at'])
|
|
516
596
|
|
|
517
597
|
# Get the in_response_to from the hash
|
|
518
598
|
in_response_to = data.get(b'in_response_to', b'').decode()
|