ChatterBot 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterbot/__init__.py +1 -1
- chatterbot/chatterbot.py +41 -8
- chatterbot/comparisons.py +32 -15
- chatterbot/logic/best_match.py +42 -35
- chatterbot/logic/specific_response.py +52 -9
- chatterbot/logic/unit_conversion.py +4 -3
- chatterbot/response_selection.py +1 -1
- chatterbot/search.py +65 -17
- chatterbot/storage/__init__.py +2 -0
- chatterbot/storage/django_storage.py +13 -23
- chatterbot/storage/mongodb.py +7 -26
- chatterbot/storage/redis.py +390 -0
- chatterbot/storage/sql_storage.py +77 -68
- chatterbot/storage/storage_adapter.py +9 -7
- chatterbot/trainers.py +3 -3
- chatterbot/vectorstores.py +74 -0
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/METADATA +9 -3
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/RECORD +21 -19
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/WHEEL +1 -1
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/LICENSE +0 -0
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.3.dist-info}/top_level.txt +0 -0
chatterbot/__init__.py
CHANGED
chatterbot/chatterbot.py
CHANGED
@@ -2,7 +2,10 @@ import logging
|
|
2
2
|
from chatterbot.storage import StorageAdapter
|
3
3
|
from chatterbot.logic import LogicAdapter
|
4
4
|
from chatterbot.search import TextSearch, IndexedTextSearch
|
5
|
+
from chatterbot.tagging import PosLemmaTagger
|
6
|
+
from chatterbot import languages
|
5
7
|
from chatterbot import utils
|
8
|
+
import spacy
|
6
9
|
|
7
10
|
|
8
11
|
class ChatBot(object):
|
@@ -27,6 +30,12 @@ class ChatBot(object):
|
|
27
30
|
|
28
31
|
self.storage = utils.initialize_class(storage_adapter, **kwargs)
|
29
32
|
|
33
|
+
Tagger = kwargs.get('tagger', PosLemmaTagger)
|
34
|
+
|
35
|
+
self.tagger = Tagger(language=kwargs.get(
|
36
|
+
'tagger_language', languages.ENG
|
37
|
+
))
|
38
|
+
|
30
39
|
primary_search_algorithm = IndexedTextSearch(self, **kwargs)
|
31
40
|
text_search_algorithm = TextSearch(self, **kwargs)
|
32
41
|
|
@@ -51,6 +60,9 @@ class ChatBot(object):
|
|
51
60
|
for preprocessor in preprocessors:
|
52
61
|
self.preprocessors.append(utils.import_module(preprocessor))
|
53
62
|
|
63
|
+
# NOTE: 'xx' is the language code for a multi-language model
|
64
|
+
self.nlp = spacy.blank(self.tagger.language.ISO_639_1)
|
65
|
+
|
54
66
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
55
67
|
|
56
68
|
# Allow the bot to save input it receives so that it can learn
|
@@ -105,16 +117,27 @@ class ChatBot(object):
|
|
105
117
|
for preprocessor in self.preprocessors:
|
106
118
|
input_statement = preprocessor(input_statement)
|
107
119
|
|
120
|
+
# Mark the statement as being a response to the previous
|
121
|
+
if input_statement.in_response_to is None:
|
122
|
+
previous_statement = self.get_latest_response(input_statement.conversation)
|
123
|
+
if previous_statement:
|
124
|
+
input_statement.in_response_to = previous_statement.text
|
125
|
+
|
108
126
|
# Make sure the input statement has its search text saved
|
109
127
|
|
110
128
|
if not input_statement.search_text:
|
111
|
-
_search_text = self.
|
129
|
+
_search_text = self.tagger.get_text_index_string(input_statement.text)
|
112
130
|
input_statement.search_text = _search_text
|
113
131
|
|
114
132
|
if not input_statement.search_in_response_to and input_statement.in_response_to:
|
115
|
-
input_statement.search_in_response_to = self.
|
133
|
+
input_statement.search_in_response_to = self.tagger.get_text_index_string(
|
134
|
+
input_statement.in_response_to
|
135
|
+
)
|
116
136
|
|
117
|
-
response = self.generate_response(
|
137
|
+
response = self.generate_response(
|
138
|
+
input_statement,
|
139
|
+
additional_response_selection_parameters
|
140
|
+
)
|
118
141
|
|
119
142
|
# Update any response data that needs to be changed
|
120
143
|
if persist_values_to_response:
|
@@ -128,10 +151,13 @@ class ChatBot(object):
|
|
128
151
|
setattr(response, response_key, response_value)
|
129
152
|
|
130
153
|
if not self.read_only:
|
131
|
-
|
154
|
+
|
155
|
+
# Save the input statement
|
156
|
+
self.storage.create(**input_statement.serialize())
|
132
157
|
|
133
158
|
# Save the response generated for the input
|
134
|
-
self.
|
159
|
+
self.learn_response(response, previous_statement=input_statement)
|
160
|
+
|
135
161
|
|
136
162
|
return response
|
137
163
|
|
@@ -194,6 +220,8 @@ class ChatBot(object):
|
|
194
220
|
if result_option.count > most_common.count:
|
195
221
|
most_common = result_option
|
196
222
|
|
223
|
+
self.logger.info('Selecting "{}" as the most common response'.format(most_common.statement.text))
|
224
|
+
|
197
225
|
if most_common.count > 1:
|
198
226
|
result = most_common.statement
|
199
227
|
|
@@ -204,6 +232,8 @@ class ChatBot(object):
|
|
204
232
|
persona='bot:' + self.name
|
205
233
|
)
|
206
234
|
|
235
|
+
response.add_tags(*result.get_tags())
|
236
|
+
|
207
237
|
response.confidence = result.confidence
|
208
238
|
|
209
239
|
return response
|
@@ -228,11 +258,14 @@ class ChatBot(object):
|
|
228
258
|
statement.in_response_to = previous_statement
|
229
259
|
|
230
260
|
self.logger.info('Adding "{}" as a response to "{}"'.format(
|
231
|
-
|
232
|
-
|
261
|
+
previous_statement_text,
|
262
|
+
statement.text
|
233
263
|
))
|
234
264
|
|
235
|
-
|
265
|
+
if not statement.persona:
|
266
|
+
statement.persona = 'bot:' + self.name
|
267
|
+
|
268
|
+
# Save the response statement
|
236
269
|
return self.storage.create(**statement.serialize())
|
237
270
|
|
238
271
|
def get_latest_response(self, conversation):
|
chatterbot/comparisons.py
CHANGED
@@ -19,15 +19,22 @@ class Comparator:
|
|
19
19
|
def __call__(self, statement_a, statement_b):
|
20
20
|
return self.compare(statement_a, statement_b)
|
21
21
|
|
22
|
-
def
|
22
|
+
def compare_text(self, text_a, text_b):
|
23
23
|
"""
|
24
|
-
Implemented in subclasses: compare
|
24
|
+
Implemented in subclasses: compare text_a to text_b.
|
25
25
|
|
26
26
|
:return: The percent of similarity between the statements based on the implemented algorithm.
|
27
27
|
:rtype: float
|
28
28
|
"""
|
29
29
|
return 0
|
30
30
|
|
31
|
+
def compare(self, statement_a, statement_b):
|
32
|
+
"""
|
33
|
+
:return: The percent of similarity between the statements based on the implemented algorithm.
|
34
|
+
:rtype: float
|
35
|
+
"""
|
36
|
+
return self.compare_text(statement_a.text, statement_b.text)
|
37
|
+
|
31
38
|
|
32
39
|
class LevenshteinDistance(Comparator):
|
33
40
|
"""
|
@@ -39,21 +46,21 @@ class LevenshteinDistance(Comparator):
|
|
39
46
|
based on the Levenshtein distance algorithm.
|
40
47
|
"""
|
41
48
|
|
42
|
-
def
|
49
|
+
def compare_text(self, text_a, text_b):
|
43
50
|
"""
|
44
|
-
Compare the two
|
51
|
+
Compare the two pieces of text.
|
45
52
|
|
46
53
|
:return: The percent of similarity between the text of the statements.
|
47
54
|
:rtype: float
|
48
55
|
"""
|
49
56
|
|
50
|
-
# Return 0 if either statement has a
|
51
|
-
if
|
57
|
+
# Return 0 if either statement has a None text value
|
58
|
+
if text_a is None or text_b is None:
|
52
59
|
return 0
|
53
60
|
|
54
61
|
# Get the lowercase version of both strings
|
55
|
-
statement_a_text = str(
|
56
|
-
statement_b_text = str(
|
62
|
+
statement_a_text = str(text_a.lower())
|
63
|
+
statement_b_text = str(text_b.lower())
|
57
64
|
|
58
65
|
similarity = SequenceMatcher(
|
59
66
|
None,
|
@@ -103,15 +110,20 @@ class SpacySimilarity(Comparator):
|
|
103
110
|
# Disable the Named Entity Recognition (NER) component because it is not necessary
|
104
111
|
self.nlp = spacy.load(model, exclude=['ner'])
|
105
112
|
|
106
|
-
def
|
113
|
+
def compare_text(self, text_a, text_b):
|
107
114
|
"""
|
108
|
-
Compare the two
|
115
|
+
Compare the similarity of two strings.
|
109
116
|
|
110
117
|
:return: The percent of similarity between the closest synset distance.
|
111
118
|
:rtype: float
|
112
119
|
"""
|
113
|
-
|
114
|
-
|
120
|
+
|
121
|
+
# Return 0 if either statement has a None text value
|
122
|
+
if text_a is None or text_b is None:
|
123
|
+
return 0
|
124
|
+
|
125
|
+
document_a = self.nlp(text_a)
|
126
|
+
document_b = self.nlp(text_b)
|
115
127
|
|
116
128
|
return document_a.similarity(document_b)
|
117
129
|
|
@@ -155,14 +167,19 @@ class JaccardSimilarity(Comparator):
|
|
155
167
|
# Disable the Named Entity Recognition (NER) component because it is not necessary
|
156
168
|
self.nlp = spacy.load(model, exclude=['ner'])
|
157
169
|
|
158
|
-
def
|
170
|
+
def compare_text(self, text_a, text_b):
|
159
171
|
"""
|
160
172
|
Return the calculated similarity of two
|
161
173
|
statements based on the Jaccard index.
|
162
174
|
"""
|
175
|
+
|
176
|
+
# Return 0 if either statement has a None text value
|
177
|
+
if text_a is None or text_b is None:
|
178
|
+
return 0
|
179
|
+
|
163
180
|
# Make both strings lowercase
|
164
|
-
document_a = self.nlp(
|
165
|
-
document_b = self.nlp(
|
181
|
+
document_a = self.nlp(text_a.lower())
|
182
|
+
document_b = self.nlp(text_b.lower())
|
166
183
|
|
167
184
|
statement_a_lemmas = frozenset([
|
168
185
|
token.lemma_ for token in document_a if not token.is_stop
|
chatterbot/logic/best_match.py
CHANGED
@@ -23,10 +23,13 @@ class BestMatch(LogicAdapter):
|
|
23
23
|
self.excluded_words = kwargs.get('excluded_words')
|
24
24
|
|
25
25
|
def process(self, input_statement, additional_response_selection_parameters=None):
|
26
|
+
|
27
|
+
# Get all statements that have a response text similar to the input statement
|
26
28
|
search_results = self.search_algorithm.search(input_statement)
|
27
29
|
|
28
30
|
# Use the input statement as the closest match if no other results are found
|
29
|
-
|
31
|
+
input_statement.confidence = 0 # Use 0 confidence when no other results are found
|
32
|
+
closest_match = input_statement
|
30
33
|
|
31
34
|
# Search for the closest match to the input statement
|
32
35
|
for result in search_results:
|
@@ -36,8 +39,8 @@ class BestMatch(LogicAdapter):
|
|
36
39
|
if result.confidence >= self.maximum_similarity_threshold:
|
37
40
|
break
|
38
41
|
|
39
|
-
self.chatbot.logger.info('
|
40
|
-
closest_match.
|
42
|
+
self.chatbot.logger.info('Selecting "{}" as a response to "{}" with a confidence of {}'.format(
|
43
|
+
closest_match.in_response_to, input_statement.text, closest_match.confidence
|
41
44
|
))
|
42
45
|
|
43
46
|
recent_repeated_responses = filters.get_recent_repeated_responses(
|
@@ -51,39 +54,34 @@ class BestMatch(LogicAdapter):
|
|
51
54
|
))
|
52
55
|
|
53
56
|
response_selection_parameters = {
|
54
|
-
'
|
57
|
+
'search_text': closest_match.search_text,
|
58
|
+
'persona_not_startswith': 'bot:',
|
55
59
|
'exclude_text': recent_repeated_responses,
|
56
60
|
'exclude_text_words': self.excluded_words
|
57
61
|
}
|
58
62
|
|
59
63
|
alternate_response_selection_parameters = {
|
60
|
-
'search_in_response_to': self.chatbot.
|
64
|
+
'search_in_response_to': self.chatbot.tagger.get_text_index_string(
|
61
65
|
input_statement.text
|
62
66
|
),
|
67
|
+
'persona_not_startswith': 'bot:',
|
63
68
|
'exclude_text': recent_repeated_responses,
|
64
69
|
'exclude_text_words': self.excluded_words
|
65
70
|
}
|
66
71
|
|
67
72
|
if additional_response_selection_parameters:
|
68
|
-
response_selection_parameters.update(
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
+
response_selection_parameters.update(
|
74
|
+
additional_response_selection_parameters
|
75
|
+
)
|
76
|
+
alternate_response_selection_parameters.update(
|
77
|
+
additional_response_selection_parameters
|
78
|
+
)
|
73
79
|
|
74
|
-
alternate_response_list = []
|
75
80
|
|
76
|
-
|
77
|
-
|
78
|
-
alternate_response_list = list(self.chatbot.storage.filter(**alternate_response_selection_parameters))
|
81
|
+
# Get all statements with text similar to the closest match
|
82
|
+
response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
|
79
83
|
|
80
84
|
if response_list:
|
81
|
-
self.chatbot.logger.info(
|
82
|
-
'Selecting response from {} optimal responses.'.format(
|
83
|
-
len(response_list)
|
84
|
-
)
|
85
|
-
)
|
86
|
-
|
87
85
|
response = self.select_response(
|
88
86
|
input_statement,
|
89
87
|
response_list,
|
@@ -91,26 +89,35 @@ class BestMatch(LogicAdapter):
|
|
91
89
|
)
|
92
90
|
|
93
91
|
response.confidence = closest_match.confidence
|
94
|
-
self.chatbot.logger.info('
|
95
|
-
|
92
|
+
self.chatbot.logger.info('Selecting "{}" from {} optimal responses.'.format(
|
93
|
+
response.text,
|
94
|
+
len(response_list)
|
95
|
+
))
|
96
|
+
else:
|
96
97
|
'''
|
97
98
|
The case where there was no responses returned for the selected match
|
98
99
|
but a value exists for the statement the match is in response to.
|
99
100
|
'''
|
100
|
-
self.chatbot.logger.info(
|
101
|
-
|
102
|
-
|
101
|
+
self.chatbot.logger.info('No responses found. Generating alternate response list.')
|
102
|
+
|
103
|
+
alternate_response_list = list(self.chatbot.storage.filter(
|
104
|
+
**alternate_response_selection_parameters
|
105
|
+
))
|
106
|
+
|
107
|
+
if alternate_response_list:
|
108
|
+
response = self.select_response(
|
109
|
+
input_statement,
|
110
|
+
alternate_response_list,
|
111
|
+
self.chatbot.storage
|
103
112
|
)
|
104
|
-
)
|
105
|
-
response = self.select_response(
|
106
|
-
input_statement,
|
107
|
-
alternate_response_list,
|
108
|
-
self.chatbot.storage
|
109
|
-
)
|
110
113
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
114
|
+
response.confidence = closest_match.confidence
|
115
|
+
self.chatbot.logger.info('Selected alternative response "{}" from {} options'.format(
|
116
|
+
response.text,
|
117
|
+
len(alternate_response_list)
|
118
|
+
))
|
119
|
+
else:
|
120
|
+
response = self.get_default_response(input_statement)
|
121
|
+
self.chatbot.logger.info('Using "%s" as a default response.', response.text)
|
115
122
|
|
116
123
|
return response
|
@@ -1,4 +1,7 @@
|
|
1
1
|
from chatterbot.logic import LogicAdapter
|
2
|
+
from chatterbot.conversation import Statement
|
3
|
+
from chatterbot import constants, languages
|
4
|
+
import spacy
|
2
5
|
|
3
6
|
|
4
7
|
class SpecificResponseAdapter(LogicAdapter):
|
@@ -8,30 +11,70 @@ class SpecificResponseAdapter(LogicAdapter):
|
|
8
11
|
:kwargs:
|
9
12
|
* *input_text* (``str``) --
|
10
13
|
The input text that triggers this logic adapter.
|
11
|
-
* *output_text* (``str``) --
|
14
|
+
* *output_text* (``str`` or ``function``) --
|
12
15
|
The output text returned by this logic adapter.
|
16
|
+
If a function is provided, it should return a string.
|
13
17
|
"""
|
14
18
|
|
15
19
|
def __init__(self, chatbot, **kwargs):
|
16
20
|
super().__init__(chatbot, **kwargs)
|
17
|
-
from chatterbot.conversation import Statement
|
18
21
|
|
19
22
|
self.input_text = kwargs.get('input_text')
|
20
23
|
|
21
|
-
|
22
|
-
|
24
|
+
self.matcher = None
|
25
|
+
|
26
|
+
if MatcherClass := kwargs.get('matcher'):
|
27
|
+
language = kwargs.get('language', languages.ENG)
|
28
|
+
|
29
|
+
self.nlp = self._initialize_nlp(language)
|
30
|
+
|
31
|
+
self.matcher = MatcherClass(self.nlp.vocab)
|
32
|
+
|
33
|
+
self.matcher.add('SpecificResponse', [self.input_text])
|
34
|
+
|
35
|
+
self._output_text = kwargs.get('output_text')
|
36
|
+
|
37
|
+
def _initialize_nlp(self, language):
|
38
|
+
try:
|
39
|
+
model = constants.DEFAULT_LANGUAGE_TO_SPACY_MODEL_MAP[language]
|
40
|
+
except KeyError as e:
|
41
|
+
raise KeyError(
|
42
|
+
f'Spacy model is not available for language {language}'
|
43
|
+
) from e
|
44
|
+
|
45
|
+
return spacy.load(model)
|
23
46
|
|
24
47
|
def can_process(self, statement):
|
25
|
-
if
|
48
|
+
if self.matcher:
|
49
|
+
doc = self.nlp(statement.text)
|
50
|
+
matches = self.matcher(doc)
|
51
|
+
|
52
|
+
if matches:
|
53
|
+
return True
|
54
|
+
elif statement.text == self.input_text:
|
26
55
|
return True
|
27
56
|
|
28
57
|
return False
|
29
58
|
|
30
59
|
def process(self, statement, additional_response_selection_parameters=None):
|
31
60
|
|
32
|
-
if
|
33
|
-
|
61
|
+
if callable(self._output_text):
|
62
|
+
response_statement = Statement(text=self._output_text())
|
63
|
+
else:
|
64
|
+
response_statement = Statement(text=self._output_text)
|
65
|
+
|
66
|
+
if self.matcher:
|
67
|
+
doc = self.nlp(statement.text)
|
68
|
+
matches = self.matcher(doc)
|
69
|
+
|
70
|
+
if matches:
|
71
|
+
response_statement.confidence = 1
|
72
|
+
else:
|
73
|
+
response_statement.confidence = 0
|
74
|
+
|
75
|
+
elif statement.text == self.input_text:
|
76
|
+
response_statement.confidence = 1
|
34
77
|
else:
|
35
|
-
|
78
|
+
response_statement.confidence = 0
|
36
79
|
|
37
|
-
return
|
80
|
+
return response_statement
|
@@ -158,7 +158,8 @@ class UnitConversion(LogicAdapter):
|
|
158
158
|
response = func(p)
|
159
159
|
if response.confidence == 1.0:
|
160
160
|
break
|
161
|
-
except Exception:
|
161
|
+
except Exception as e:
|
162
|
+
self.chatbot.logger.warning('Error during UnitConversion: {}'.format(str(e)))
|
162
163
|
response.confidence = 0.0
|
163
|
-
|
164
|
-
|
164
|
+
|
165
|
+
return response
|
chatterbot/response_selection.py
CHANGED
@@ -37,7 +37,7 @@ def get_most_frequent_response(input_statement, response_list, storage=None):
|
|
37
37
|
matching_response = statement
|
38
38
|
occurrence_count = count
|
39
39
|
|
40
|
-
# Choose the most commonly
|
40
|
+
# Choose the most commonly occurring matching response
|
41
41
|
return matching_response
|
42
42
|
|
43
43
|
|
chatterbot/search.py
CHANGED
@@ -21,7 +21,7 @@ class IndexedTextSearch:
|
|
21
21
|
)
|
22
22
|
|
23
23
|
self.compare_statements = statement_comparison_function(
|
24
|
-
language=self.chatbot.
|
24
|
+
language=self.chatbot.tagger.language
|
25
25
|
)
|
26
26
|
|
27
27
|
self.search_page_size = kwargs.get(
|
@@ -43,19 +43,8 @@ class IndexedTextSearch:
|
|
43
43
|
"""
|
44
44
|
self.chatbot.logger.info('Beginning search for close text match')
|
45
45
|
|
46
|
-
input_search_text = input_statement.search_text
|
47
|
-
|
48
|
-
if not input_statement.search_text:
|
49
|
-
self.chatbot.logger.warning(
|
50
|
-
'No value for search_text was available on the provided input'
|
51
|
-
)
|
52
|
-
|
53
|
-
input_search_text = self.chatbot.storage.tagger.get_text_index_string(
|
54
|
-
input_statement.text
|
55
|
-
)
|
56
|
-
|
57
46
|
search_parameters = {
|
58
|
-
'
|
47
|
+
'search_in_response_to_contains': input_statement.search_text,
|
59
48
|
'persona_not_startswith': 'bot:',
|
60
49
|
'page_size': self.search_page_size
|
61
50
|
}
|
@@ -71,14 +60,16 @@ class IndexedTextSearch:
|
|
71
60
|
|
72
61
|
# Find the closest matching known statement
|
73
62
|
for statement in statement_list:
|
74
|
-
confidence = self.compare_statements(
|
63
|
+
confidence = self.compare_statements.compare_text(
|
64
|
+
input_statement.text, statement.in_response_to
|
65
|
+
)
|
75
66
|
|
76
67
|
if confidence > best_confidence_so_far:
|
77
68
|
best_confidence_so_far = confidence
|
78
69
|
statement.confidence = confidence
|
79
70
|
|
80
71
|
self.chatbot.logger.info('Similar text found: {} {}'.format(
|
81
|
-
statement.
|
72
|
+
statement.in_response_to, confidence
|
82
73
|
))
|
83
74
|
|
84
75
|
yield statement
|
@@ -107,7 +98,7 @@ class TextSearch:
|
|
107
98
|
)
|
108
99
|
|
109
100
|
self.compare_statements = statement_comparison_function(
|
110
|
-
language=self.chatbot.
|
101
|
+
language=self.chatbot.tagger.language
|
111
102
|
)
|
112
103
|
|
113
104
|
self.search_page_size = kwargs.get(
|
@@ -145,7 +136,9 @@ class TextSearch:
|
|
145
136
|
|
146
137
|
# Find the closest matching known statement
|
147
138
|
for statement in statement_list:
|
148
|
-
confidence = self.compare_statements(
|
139
|
+
confidence = self.compare_statements.compare_text(
|
140
|
+
input_statement.text, statement.in_response_to
|
141
|
+
)
|
149
142
|
|
150
143
|
if confidence > best_confidence_so_far:
|
151
144
|
best_confidence_so_far = confidence
|
@@ -156,3 +149,58 @@ class TextSearch:
|
|
156
149
|
))
|
157
150
|
|
158
151
|
yield statement
|
152
|
+
|
153
|
+
|
154
|
+
class VectorSearch:
|
155
|
+
"""
|
156
|
+
.. note:: BETA feature: this search method is new and experimental.
|
157
|
+
|
158
|
+
Search for similar text based on a :term:`vector database`.
|
159
|
+
"""
|
160
|
+
|
161
|
+
name = 'vector_search'
|
162
|
+
|
163
|
+
def __init__(self, chatbot, **kwargs):
|
164
|
+
from chatterbot.storage import RedisVectorStorageAdapter
|
165
|
+
|
166
|
+
# Good documentation:
|
167
|
+
# https://python.langchain.com/docs/integrations/vectorstores/redis/
|
168
|
+
#
|
169
|
+
# https://hub.docker.com/r/redis/redis-stack
|
170
|
+
|
171
|
+
# Mondodb:
|
172
|
+
# > Vector Search is only supported on Atlas Clusters
|
173
|
+
# https://www.mongodb.com/community/forums/t/can-a-local-mongodb-instance-be-used-when-working-with-langchain-mongodbatlasvectorsearch/265356
|
174
|
+
|
175
|
+
# FAISS:
|
176
|
+
# https://python.langchain.com/docs/integrations/vectorstores/faiss/
|
177
|
+
|
178
|
+
print("Starting Redis Vector Store")
|
179
|
+
|
180
|
+
# TODO: look into:
|
181
|
+
# https://python.langchain.com/api_reference/redis/chat_message_history/langchain_redis.chat_message_history.RedisChatMessageHistory.html
|
182
|
+
|
183
|
+
# The VectorSearch class is only compatible with the RedisVectorStorageAdapter
|
184
|
+
if not isinstance(chatbot.storage, RedisVectorStorageAdapter):
|
185
|
+
raise Exception(
|
186
|
+
'The VectorSearch search method requires the RedisVectorStorageAdapter storage adapter.'
|
187
|
+
)
|
188
|
+
|
189
|
+
def search(self, input_statement, **additional_parameters):
|
190
|
+
print("Querying Vector Store")
|
191
|
+
|
192
|
+
# Similarity search with score and filter
|
193
|
+
# NOTE: It looks like `return_all` is needed to return the full document
|
194
|
+
# specifically what we need here is the ID
|
195
|
+
scored_results = self.storage.vector_store.similarity_search_with_score(
|
196
|
+
input_statement.text, k=2, return_all=True
|
197
|
+
)
|
198
|
+
# sort_by="score", filter={"category": "likes"})
|
199
|
+
|
200
|
+
print("Similarity Search with Score Results:\n")
|
201
|
+
for doc, score in scored_results:
|
202
|
+
print(f"Content: {doc.page_content[:150]}...")
|
203
|
+
print(f"ID: {doc.id}")
|
204
|
+
print(f"Metadata: {doc.metadata}")
|
205
|
+
print(f"Score: {score}")
|
206
|
+
print()
|
chatterbot/storage/__init__.py
CHANGED
@@ -2,6 +2,7 @@ from chatterbot.storage.storage_adapter import StorageAdapter
|
|
2
2
|
from chatterbot.storage.django_storage import DjangoStorageAdapter
|
3
3
|
from chatterbot.storage.mongodb import MongoDatabaseAdapter
|
4
4
|
from chatterbot.storage.sql_storage import SQLStorageAdapter
|
5
|
+
from chatterbot.storage.redis import RedisVectorStorageAdapter
|
5
6
|
|
6
7
|
|
7
8
|
__all__ = (
|
@@ -9,4 +10,5 @@ __all__ = (
|
|
9
10
|
'DjangoStorageAdapter',
|
10
11
|
'MongoDatabaseAdapter',
|
11
12
|
'SQLStorageAdapter',
|
13
|
+
'RedisVectorStorageAdapter',
|
12
14
|
)
|
@@ -44,6 +44,7 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
44
44
|
exclude_text_words = kwargs.pop('exclude_text_words', [])
|
45
45
|
persona_not_startswith = kwargs.pop('persona_not_startswith', None)
|
46
46
|
search_text_contains = kwargs.pop('search_text_contains', None)
|
47
|
+
search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
|
47
48
|
|
48
49
|
# Convert a single sting into a list if only one tag is provided
|
49
50
|
if type(tags) == str:
|
@@ -83,6 +84,16 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
83
84
|
or_query
|
84
85
|
)
|
85
86
|
|
87
|
+
if search_in_response_to_contains:
|
88
|
+
or_query = Q()
|
89
|
+
|
90
|
+
for word in search_in_response_to_contains.split(' '):
|
91
|
+
or_query |= Q(search_in_response_to__contains=word)
|
92
|
+
|
93
|
+
statements = statements.filter(
|
94
|
+
or_query
|
95
|
+
)
|
96
|
+
|
86
97
|
if order_by:
|
87
98
|
statements = statements.order_by(*order_by)
|
88
99
|
|
@@ -99,13 +110,6 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
99
110
|
|
100
111
|
tags = kwargs.pop('tags', [])
|
101
112
|
|
102
|
-
if 'search_text' not in kwargs:
|
103
|
-
kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
|
104
|
-
|
105
|
-
if 'search_in_response_to' not in kwargs:
|
106
|
-
if kwargs.get('in_response_to'):
|
107
|
-
kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
|
108
|
-
|
109
113
|
statement = Statement(**kwargs)
|
110
114
|
|
111
115
|
statement.save()
|
@@ -129,20 +133,6 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
129
133
|
|
130
134
|
tag_cache = {}
|
131
135
|
|
132
|
-
# Check if any statements already have a search text
|
133
|
-
have_search_text = any(statement.search_text for statement in statements)
|
134
|
-
|
135
|
-
# Generate search text values in bulk
|
136
|
-
if not have_search_text:
|
137
|
-
search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
|
138
|
-
response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
|
139
|
-
|
140
|
-
for statement, search_text_document, response_search_text_document in zip(
|
141
|
-
statements, search_text_documents, response_search_text_documents
|
142
|
-
):
|
143
|
-
statement.search_text = search_text_document._.search_index
|
144
|
-
statement.search_in_response_to = response_search_text_document._.search_index
|
145
|
-
|
146
136
|
for statement in statements:
|
147
137
|
|
148
138
|
statement_data = statement.serialize()
|
@@ -176,10 +166,10 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
176
166
|
else:
|
177
167
|
statement = Statement.objects.create(
|
178
168
|
text=statement.text,
|
179
|
-
search_text=
|
169
|
+
search_text=statement.search_text,
|
180
170
|
conversation=statement.conversation,
|
181
171
|
in_response_to=statement.in_response_to,
|
182
|
-
search_in_response_to=
|
172
|
+
search_in_response_to=statement.search_in_response_to,
|
183
173
|
created_at=statement.created_at
|
184
174
|
)
|
185
175
|
|