ChatterBot 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterbot/__init__.py +1 -1
- chatterbot/chatterbot.py +41 -8
- chatterbot/comparisons.py +32 -15
- chatterbot/logic/best_match.py +42 -35
- chatterbot/response_selection.py +1 -1
- chatterbot/search.py +10 -17
- chatterbot/storage/django_storage.py +13 -23
- chatterbot/storage/mongodb.py +7 -26
- chatterbot/storage/sql_storage.py +77 -68
- chatterbot/storage/storage_adapter.py +9 -7
- chatterbot/trainers.py +3 -3
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.2.dist-info}/METADATA +1 -1
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.2.dist-info}/RECORD +16 -16
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.2.dist-info}/WHEEL +1 -1
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.2.dist-info}/LICENSE +0 -0
- {ChatterBot-1.2.1.dist-info → chatterbot-1.2.2.dist-info}/top_level.txt +0 -0
chatterbot/__init__.py
CHANGED
chatterbot/chatterbot.py
CHANGED
@@ -2,7 +2,10 @@ import logging
|
|
2
2
|
from chatterbot.storage import StorageAdapter
|
3
3
|
from chatterbot.logic import LogicAdapter
|
4
4
|
from chatterbot.search import TextSearch, IndexedTextSearch
|
5
|
+
from chatterbot.tagging import PosLemmaTagger
|
6
|
+
from chatterbot import languages
|
5
7
|
from chatterbot import utils
|
8
|
+
import spacy
|
6
9
|
|
7
10
|
|
8
11
|
class ChatBot(object):
|
@@ -27,6 +30,12 @@ class ChatBot(object):
|
|
27
30
|
|
28
31
|
self.storage = utils.initialize_class(storage_adapter, **kwargs)
|
29
32
|
|
33
|
+
Tagger = kwargs.get('tagger', PosLemmaTagger)
|
34
|
+
|
35
|
+
self.tagger = Tagger(language=kwargs.get(
|
36
|
+
'tagger_language', languages.ENG
|
37
|
+
))
|
38
|
+
|
30
39
|
primary_search_algorithm = IndexedTextSearch(self, **kwargs)
|
31
40
|
text_search_algorithm = TextSearch(self, **kwargs)
|
32
41
|
|
@@ -51,6 +60,9 @@ class ChatBot(object):
|
|
51
60
|
for preprocessor in preprocessors:
|
52
61
|
self.preprocessors.append(utils.import_module(preprocessor))
|
53
62
|
|
63
|
+
# NOTE: 'xx' is the language code for a multi-language model
|
64
|
+
self.nlp = spacy.blank(self.tagger.language.ISO_639_1)
|
65
|
+
|
54
66
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
55
67
|
|
56
68
|
# Allow the bot to save input it receives so that it can learn
|
@@ -105,16 +117,27 @@ class ChatBot(object):
|
|
105
117
|
for preprocessor in self.preprocessors:
|
106
118
|
input_statement = preprocessor(input_statement)
|
107
119
|
|
120
|
+
# Mark the statement as being a response to the previous
|
121
|
+
if input_statement.in_response_to is None:
|
122
|
+
previous_statement = self.get_latest_response(input_statement.conversation)
|
123
|
+
if previous_statement:
|
124
|
+
input_statement.in_response_to = previous_statement.text
|
125
|
+
|
108
126
|
# Make sure the input statement has its search text saved
|
109
127
|
|
110
128
|
if not input_statement.search_text:
|
111
|
-
_search_text = self.
|
129
|
+
_search_text = self.tagger.get_text_index_string(input_statement.text)
|
112
130
|
input_statement.search_text = _search_text
|
113
131
|
|
114
132
|
if not input_statement.search_in_response_to and input_statement.in_response_to:
|
115
|
-
input_statement.search_in_response_to = self.
|
133
|
+
input_statement.search_in_response_to = self.tagger.get_text_index_string(
|
134
|
+
input_statement.in_response_to
|
135
|
+
)
|
116
136
|
|
117
|
-
response = self.generate_response(
|
137
|
+
response = self.generate_response(
|
138
|
+
input_statement,
|
139
|
+
additional_response_selection_parameters
|
140
|
+
)
|
118
141
|
|
119
142
|
# Update any response data that needs to be changed
|
120
143
|
if persist_values_to_response:
|
@@ -128,10 +151,13 @@ class ChatBot(object):
|
|
128
151
|
setattr(response, response_key, response_value)
|
129
152
|
|
130
153
|
if not self.read_only:
|
131
|
-
|
154
|
+
|
155
|
+
# Save the input statement
|
156
|
+
self.storage.create(**input_statement.serialize())
|
132
157
|
|
133
158
|
# Save the response generated for the input
|
134
|
-
self.
|
159
|
+
self.learn_response(response, previous_statement=input_statement)
|
160
|
+
|
135
161
|
|
136
162
|
return response
|
137
163
|
|
@@ -194,6 +220,8 @@ class ChatBot(object):
|
|
194
220
|
if result_option.count > most_common.count:
|
195
221
|
most_common = result_option
|
196
222
|
|
223
|
+
self.logger.info('Selecting "{}" as the most common response'.format(most_common.statement.text))
|
224
|
+
|
197
225
|
if most_common.count > 1:
|
198
226
|
result = most_common.statement
|
199
227
|
|
@@ -204,6 +232,8 @@ class ChatBot(object):
|
|
204
232
|
persona='bot:' + self.name
|
205
233
|
)
|
206
234
|
|
235
|
+
response.add_tags(*result.get_tags())
|
236
|
+
|
207
237
|
response.confidence = result.confidence
|
208
238
|
|
209
239
|
return response
|
@@ -228,11 +258,14 @@ class ChatBot(object):
|
|
228
258
|
statement.in_response_to = previous_statement
|
229
259
|
|
230
260
|
self.logger.info('Adding "{}" as a response to "{}"'.format(
|
231
|
-
|
232
|
-
|
261
|
+
previous_statement_text,
|
262
|
+
statement.text
|
233
263
|
))
|
234
264
|
|
235
|
-
|
265
|
+
if not statement.persona:
|
266
|
+
statement.persona = 'bot:' + self.name
|
267
|
+
|
268
|
+
# Save the response statement
|
236
269
|
return self.storage.create(**statement.serialize())
|
237
270
|
|
238
271
|
def get_latest_response(self, conversation):
|
chatterbot/comparisons.py
CHANGED
@@ -19,15 +19,22 @@ class Comparator:
|
|
19
19
|
def __call__(self, statement_a, statement_b):
|
20
20
|
return self.compare(statement_a, statement_b)
|
21
21
|
|
22
|
-
def
|
22
|
+
def compare_text(self, text_a, text_b):
|
23
23
|
"""
|
24
|
-
Implemented in subclasses: compare
|
24
|
+
Implemented in subclasses: compare text_a to text_b.
|
25
25
|
|
26
26
|
:return: The percent of similarity between the statements based on the implemented algorithm.
|
27
27
|
:rtype: float
|
28
28
|
"""
|
29
29
|
return 0
|
30
30
|
|
31
|
+
def compare(self, statement_a, statement_b):
|
32
|
+
"""
|
33
|
+
:return: The percent of similarity between the statements based on the implemented algorithm.
|
34
|
+
:rtype: float
|
35
|
+
"""
|
36
|
+
return self.compare_text(statement_a.text, statement_b.text)
|
37
|
+
|
31
38
|
|
32
39
|
class LevenshteinDistance(Comparator):
|
33
40
|
"""
|
@@ -39,21 +46,21 @@ class LevenshteinDistance(Comparator):
|
|
39
46
|
based on the Levenshtein distance algorithm.
|
40
47
|
"""
|
41
48
|
|
42
|
-
def
|
49
|
+
def compare_text(self, text_a, text_b):
|
43
50
|
"""
|
44
|
-
Compare the two
|
51
|
+
Compare the two pieces of text.
|
45
52
|
|
46
53
|
:return: The percent of similarity between the text of the statements.
|
47
54
|
:rtype: float
|
48
55
|
"""
|
49
56
|
|
50
|
-
# Return 0 if either statement has a
|
51
|
-
if
|
57
|
+
# Return 0 if either statement has a None text value
|
58
|
+
if text_a is None or text_b is None:
|
52
59
|
return 0
|
53
60
|
|
54
61
|
# Get the lowercase version of both strings
|
55
|
-
statement_a_text = str(
|
56
|
-
statement_b_text = str(
|
62
|
+
statement_a_text = str(text_a.lower())
|
63
|
+
statement_b_text = str(text_b.lower())
|
57
64
|
|
58
65
|
similarity = SequenceMatcher(
|
59
66
|
None,
|
@@ -103,15 +110,20 @@ class SpacySimilarity(Comparator):
|
|
103
110
|
# Disable the Named Entity Recognition (NER) component because it is not necessary
|
104
111
|
self.nlp = spacy.load(model, exclude=['ner'])
|
105
112
|
|
106
|
-
def
|
113
|
+
def compare_text(self, text_a, text_b):
|
107
114
|
"""
|
108
|
-
Compare the two
|
115
|
+
Compare the similarity of two strings.
|
109
116
|
|
110
117
|
:return: The percent of similarity between the closest synset distance.
|
111
118
|
:rtype: float
|
112
119
|
"""
|
113
|
-
|
114
|
-
|
120
|
+
|
121
|
+
# Return 0 if either statement has a None text value
|
122
|
+
if text_a is None or text_b is None:
|
123
|
+
return 0
|
124
|
+
|
125
|
+
document_a = self.nlp(text_a)
|
126
|
+
document_b = self.nlp(text_b)
|
115
127
|
|
116
128
|
return document_a.similarity(document_b)
|
117
129
|
|
@@ -155,14 +167,19 @@ class JaccardSimilarity(Comparator):
|
|
155
167
|
# Disable the Named Entity Recognition (NER) component because it is not necessary
|
156
168
|
self.nlp = spacy.load(model, exclude=['ner'])
|
157
169
|
|
158
|
-
def
|
170
|
+
def compare_text(self, text_a, text_b):
|
159
171
|
"""
|
160
172
|
Return the calculated similarity of two
|
161
173
|
statements based on the Jaccard index.
|
162
174
|
"""
|
175
|
+
|
176
|
+
# Return 0 if either statement has a None text value
|
177
|
+
if text_a is None or text_b is None:
|
178
|
+
return 0
|
179
|
+
|
163
180
|
# Make both strings lowercase
|
164
|
-
document_a = self.nlp(
|
165
|
-
document_b = self.nlp(
|
181
|
+
document_a = self.nlp(text_a.lower())
|
182
|
+
document_b = self.nlp(text_b.lower())
|
166
183
|
|
167
184
|
statement_a_lemmas = frozenset([
|
168
185
|
token.lemma_ for token in document_a if not token.is_stop
|
chatterbot/logic/best_match.py
CHANGED
@@ -23,10 +23,13 @@ class BestMatch(LogicAdapter):
|
|
23
23
|
self.excluded_words = kwargs.get('excluded_words')
|
24
24
|
|
25
25
|
def process(self, input_statement, additional_response_selection_parameters=None):
|
26
|
+
|
27
|
+
# Get all statements that have a response text similar to the input statement
|
26
28
|
search_results = self.search_algorithm.search(input_statement)
|
27
29
|
|
28
30
|
# Use the input statement as the closest match if no other results are found
|
29
|
-
|
31
|
+
input_statement.confidence = 0 # Use 0 confidence when no other results are found
|
32
|
+
closest_match = input_statement
|
30
33
|
|
31
34
|
# Search for the closest match to the input statement
|
32
35
|
for result in search_results:
|
@@ -36,8 +39,8 @@ class BestMatch(LogicAdapter):
|
|
36
39
|
if result.confidence >= self.maximum_similarity_threshold:
|
37
40
|
break
|
38
41
|
|
39
|
-
self.chatbot.logger.info('
|
40
|
-
closest_match.
|
42
|
+
self.chatbot.logger.info('Selecting "{}" as a response to "{}" with a confidence of {}'.format(
|
43
|
+
closest_match.in_response_to, input_statement.text, closest_match.confidence
|
41
44
|
))
|
42
45
|
|
43
46
|
recent_repeated_responses = filters.get_recent_repeated_responses(
|
@@ -51,39 +54,34 @@ class BestMatch(LogicAdapter):
|
|
51
54
|
))
|
52
55
|
|
53
56
|
response_selection_parameters = {
|
54
|
-
'
|
57
|
+
'search_text': closest_match.search_text,
|
58
|
+
'persona_not_startswith': 'bot:',
|
55
59
|
'exclude_text': recent_repeated_responses,
|
56
60
|
'exclude_text_words': self.excluded_words
|
57
61
|
}
|
58
62
|
|
59
63
|
alternate_response_selection_parameters = {
|
60
|
-
'search_in_response_to': self.chatbot.
|
64
|
+
'search_in_response_to': self.chatbot.tagger.get_text_index_string(
|
61
65
|
input_statement.text
|
62
66
|
),
|
67
|
+
'persona_not_startswith': 'bot:',
|
63
68
|
'exclude_text': recent_repeated_responses,
|
64
69
|
'exclude_text_words': self.excluded_words
|
65
70
|
}
|
66
71
|
|
67
72
|
if additional_response_selection_parameters:
|
68
|
-
response_selection_parameters.update(
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
+
response_selection_parameters.update(
|
74
|
+
additional_response_selection_parameters
|
75
|
+
)
|
76
|
+
alternate_response_selection_parameters.update(
|
77
|
+
additional_response_selection_parameters
|
78
|
+
)
|
73
79
|
|
74
|
-
alternate_response_list = []
|
75
80
|
|
76
|
-
|
77
|
-
|
78
|
-
alternate_response_list = list(self.chatbot.storage.filter(**alternate_response_selection_parameters))
|
81
|
+
# Get all statements with text similar to the closest match
|
82
|
+
response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
|
79
83
|
|
80
84
|
if response_list:
|
81
|
-
self.chatbot.logger.info(
|
82
|
-
'Selecting response from {} optimal responses.'.format(
|
83
|
-
len(response_list)
|
84
|
-
)
|
85
|
-
)
|
86
|
-
|
87
85
|
response = self.select_response(
|
88
86
|
input_statement,
|
89
87
|
response_list,
|
@@ -91,26 +89,35 @@ class BestMatch(LogicAdapter):
|
|
91
89
|
)
|
92
90
|
|
93
91
|
response.confidence = closest_match.confidence
|
94
|
-
self.chatbot.logger.info('
|
95
|
-
|
92
|
+
self.chatbot.logger.info('Selecting "{}" from {} optimal responses.'.format(
|
93
|
+
response.text,
|
94
|
+
len(response_list)
|
95
|
+
))
|
96
|
+
else:
|
96
97
|
'''
|
97
98
|
The case where there was no responses returned for the selected match
|
98
99
|
but a value exists for the statement the match is in response to.
|
99
100
|
'''
|
100
|
-
self.chatbot.logger.info(
|
101
|
-
|
102
|
-
|
101
|
+
self.chatbot.logger.info('No responses found. Generating alternate response list.')
|
102
|
+
|
103
|
+
alternate_response_list = list(self.chatbot.storage.filter(
|
104
|
+
**alternate_response_selection_parameters
|
105
|
+
))
|
106
|
+
|
107
|
+
if alternate_response_list:
|
108
|
+
response = self.select_response(
|
109
|
+
input_statement,
|
110
|
+
alternate_response_list,
|
111
|
+
self.chatbot.storage
|
103
112
|
)
|
104
|
-
)
|
105
|
-
response = self.select_response(
|
106
|
-
input_statement,
|
107
|
-
alternate_response_list,
|
108
|
-
self.chatbot.storage
|
109
|
-
)
|
110
113
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
114
|
+
response.confidence = closest_match.confidence
|
115
|
+
self.chatbot.logger.info('Selected alternative response "{}" from {} options'.format(
|
116
|
+
response.text,
|
117
|
+
len(alternate_response_list)
|
118
|
+
))
|
119
|
+
else:
|
120
|
+
response = self.get_default_response(input_statement)
|
121
|
+
self.chatbot.logger.info('Using "%s" as a default response.', response.text)
|
115
122
|
|
116
123
|
return response
|
chatterbot/response_selection.py
CHANGED
@@ -37,7 +37,7 @@ def get_most_frequent_response(input_statement, response_list, storage=None):
|
|
37
37
|
matching_response = statement
|
38
38
|
occurrence_count = count
|
39
39
|
|
40
|
-
# Choose the most commonly
|
40
|
+
# Choose the most commonly occurring matching response
|
41
41
|
return matching_response
|
42
42
|
|
43
43
|
|
chatterbot/search.py
CHANGED
@@ -21,7 +21,7 @@ class IndexedTextSearch:
|
|
21
21
|
)
|
22
22
|
|
23
23
|
self.compare_statements = statement_comparison_function(
|
24
|
-
language=self.chatbot.
|
24
|
+
language=self.chatbot.tagger.language
|
25
25
|
)
|
26
26
|
|
27
27
|
self.search_page_size = kwargs.get(
|
@@ -43,19 +43,8 @@ class IndexedTextSearch:
|
|
43
43
|
"""
|
44
44
|
self.chatbot.logger.info('Beginning search for close text match')
|
45
45
|
|
46
|
-
input_search_text = input_statement.search_text
|
47
|
-
|
48
|
-
if not input_statement.search_text:
|
49
|
-
self.chatbot.logger.warning(
|
50
|
-
'No value for search_text was available on the provided input'
|
51
|
-
)
|
52
|
-
|
53
|
-
input_search_text = self.chatbot.storage.tagger.get_text_index_string(
|
54
|
-
input_statement.text
|
55
|
-
)
|
56
|
-
|
57
46
|
search_parameters = {
|
58
|
-
'
|
47
|
+
'search_in_response_to_contains': input_statement.search_text,
|
59
48
|
'persona_not_startswith': 'bot:',
|
60
49
|
'page_size': self.search_page_size
|
61
50
|
}
|
@@ -71,14 +60,16 @@ class IndexedTextSearch:
|
|
71
60
|
|
72
61
|
# Find the closest matching known statement
|
73
62
|
for statement in statement_list:
|
74
|
-
confidence = self.compare_statements(
|
63
|
+
confidence = self.compare_statements.compare_text(
|
64
|
+
input_statement.text, statement.in_response_to
|
65
|
+
)
|
75
66
|
|
76
67
|
if confidence > best_confidence_so_far:
|
77
68
|
best_confidence_so_far = confidence
|
78
69
|
statement.confidence = confidence
|
79
70
|
|
80
71
|
self.chatbot.logger.info('Similar text found: {} {}'.format(
|
81
|
-
statement.
|
72
|
+
statement.in_response_to, confidence
|
82
73
|
))
|
83
74
|
|
84
75
|
yield statement
|
@@ -107,7 +98,7 @@ class TextSearch:
|
|
107
98
|
)
|
108
99
|
|
109
100
|
self.compare_statements = statement_comparison_function(
|
110
|
-
language=self.chatbot.
|
101
|
+
language=self.chatbot.tagger.language
|
111
102
|
)
|
112
103
|
|
113
104
|
self.search_page_size = kwargs.get(
|
@@ -145,7 +136,9 @@ class TextSearch:
|
|
145
136
|
|
146
137
|
# Find the closest matching known statement
|
147
138
|
for statement in statement_list:
|
148
|
-
confidence = self.compare_statements(
|
139
|
+
confidence = self.compare_statements.compare_text(
|
140
|
+
input_statement.text, statement.in_response_to
|
141
|
+
)
|
149
142
|
|
150
143
|
if confidence > best_confidence_so_far:
|
151
144
|
best_confidence_so_far = confidence
|
@@ -44,6 +44,7 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
44
44
|
exclude_text_words = kwargs.pop('exclude_text_words', [])
|
45
45
|
persona_not_startswith = kwargs.pop('persona_not_startswith', None)
|
46
46
|
search_text_contains = kwargs.pop('search_text_contains', None)
|
47
|
+
search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
|
47
48
|
|
48
49
|
# Convert a single sting into a list if only one tag is provided
|
49
50
|
if type(tags) == str:
|
@@ -83,6 +84,16 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
83
84
|
or_query
|
84
85
|
)
|
85
86
|
|
87
|
+
if search_in_response_to_contains:
|
88
|
+
or_query = Q()
|
89
|
+
|
90
|
+
for word in search_in_response_to_contains.split(' '):
|
91
|
+
or_query |= Q(search_in_response_to__contains=word)
|
92
|
+
|
93
|
+
statements = statements.filter(
|
94
|
+
or_query
|
95
|
+
)
|
96
|
+
|
86
97
|
if order_by:
|
87
98
|
statements = statements.order_by(*order_by)
|
88
99
|
|
@@ -99,13 +110,6 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
99
110
|
|
100
111
|
tags = kwargs.pop('tags', [])
|
101
112
|
|
102
|
-
if 'search_text' not in kwargs:
|
103
|
-
kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
|
104
|
-
|
105
|
-
if 'search_in_response_to' not in kwargs:
|
106
|
-
if kwargs.get('in_response_to'):
|
107
|
-
kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
|
108
|
-
|
109
113
|
statement = Statement(**kwargs)
|
110
114
|
|
111
115
|
statement.save()
|
@@ -129,20 +133,6 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
129
133
|
|
130
134
|
tag_cache = {}
|
131
135
|
|
132
|
-
# Check if any statements already have a search text
|
133
|
-
have_search_text = any(statement.search_text for statement in statements)
|
134
|
-
|
135
|
-
# Generate search text values in bulk
|
136
|
-
if not have_search_text:
|
137
|
-
search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
|
138
|
-
response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
|
139
|
-
|
140
|
-
for statement, search_text_document, response_search_text_document in zip(
|
141
|
-
statements, search_text_documents, response_search_text_documents
|
142
|
-
):
|
143
|
-
statement.search_text = search_text_document._.search_index
|
144
|
-
statement.search_in_response_to = response_search_text_document._.search_index
|
145
|
-
|
146
136
|
for statement in statements:
|
147
137
|
|
148
138
|
statement_data = statement.serialize()
|
@@ -176,10 +166,10 @@ class DjangoStorageAdapter(StorageAdapter):
|
|
176
166
|
else:
|
177
167
|
statement = Statement.objects.create(
|
178
168
|
text=statement.text,
|
179
|
-
search_text=
|
169
|
+
search_text=statement.search_text,
|
180
170
|
conversation=statement.conversation,
|
181
171
|
in_response_to=statement.in_response_to,
|
182
|
-
search_in_response_to=
|
172
|
+
search_in_response_to=statement.search_in_response_to,
|
183
173
|
created_at=statement.created_at
|
184
174
|
)
|
185
175
|
|
chatterbot/storage/mongodb.py
CHANGED
@@ -82,6 +82,7 @@ class MongoDatabaseAdapter(StorageAdapter):
|
|
82
82
|
exclude_text_words = kwargs.pop('exclude_text_words', [])
|
83
83
|
persona_not_startswith = kwargs.pop('persona_not_startswith', None)
|
84
84
|
search_text_contains = kwargs.pop('search_text_contains', None)
|
85
|
+
search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
|
85
86
|
|
86
87
|
if tags:
|
87
88
|
kwargs['tags'] = {
|
@@ -127,6 +128,12 @@ class MongoDatabaseAdapter(StorageAdapter):
|
|
127
128
|
])
|
128
129
|
kwargs['search_text'] = re.compile(or_regex)
|
129
130
|
|
131
|
+
if search_in_response_to_contains:
|
132
|
+
or_regex = '|'.join([
|
133
|
+
'{}'.format(re.escape(word)) for word in search_in_response_to_contains.split(' ')
|
134
|
+
])
|
135
|
+
kwargs['search_in_response_to'] = re.compile(or_regex)
|
136
|
+
|
130
137
|
mongo_ordering = []
|
131
138
|
|
132
139
|
if order_by:
|
@@ -159,13 +166,6 @@ class MongoDatabaseAdapter(StorageAdapter):
|
|
159
166
|
if 'tags' in kwargs:
|
160
167
|
kwargs['tags'] = list(set(kwargs['tags']))
|
161
168
|
|
162
|
-
if 'search_text' not in kwargs:
|
163
|
-
kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
|
164
|
-
|
165
|
-
if 'search_in_response_to' not in kwargs:
|
166
|
-
if kwargs.get('in_response_to'):
|
167
|
-
kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
|
168
|
-
|
169
169
|
inserted = self.statements.insert_one(kwargs)
|
170
170
|
|
171
171
|
kwargs['id'] = inserted.inserted_id
|
@@ -178,20 +178,6 @@ class MongoDatabaseAdapter(StorageAdapter):
|
|
178
178
|
"""
|
179
179
|
create_statements = []
|
180
180
|
|
181
|
-
# Check if any statements already have a search text
|
182
|
-
have_search_text = any(statement.search_text for statement in statements)
|
183
|
-
|
184
|
-
# Generate search text values in bulk
|
185
|
-
if not have_search_text:
|
186
|
-
search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
|
187
|
-
response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
|
188
|
-
|
189
|
-
for statement, search_text_document, response_search_text_document in zip(
|
190
|
-
statements, search_text_documents, response_search_text_documents
|
191
|
-
):
|
192
|
-
statement.search_text = search_text_document._.search_index
|
193
|
-
statement.search_in_response_to = response_search_text_document._.search_index
|
194
|
-
|
195
181
|
for statement in statements:
|
196
182
|
statement_data = statement.serialize()
|
197
183
|
tag_data = list(set(statement_data.pop('tags', [])))
|
@@ -206,11 +192,6 @@ class MongoDatabaseAdapter(StorageAdapter):
|
|
206
192
|
data.pop('id', None)
|
207
193
|
data.pop('tags', None)
|
208
194
|
|
209
|
-
data['search_text'] = self.tagger.get_text_index_string(data['text'])
|
210
|
-
|
211
|
-
if data.get('in_response_to'):
|
212
|
-
data['search_in_response_to'] = self.tagger.get_text_index_string(data['in_response_to'])
|
213
|
-
|
214
195
|
update_data = {
|
215
196
|
'$set': data
|
216
197
|
}
|
@@ -114,8 +114,8 @@ class SQLStorageAdapter(StorageAdapter):
|
|
114
114
|
record = query.first()
|
115
115
|
|
116
116
|
session.delete(record)
|
117
|
-
|
118
|
-
|
117
|
+
session.commit()
|
118
|
+
session.close()
|
119
119
|
|
120
120
|
def filter(self, **kwargs):
|
121
121
|
"""
|
@@ -139,6 +139,7 @@ class SQLStorageAdapter(StorageAdapter):
|
|
139
139
|
exclude_text_words = kwargs.pop('exclude_text_words', [])
|
140
140
|
persona_not_startswith = kwargs.pop('persona_not_startswith', None)
|
141
141
|
search_text_contains = kwargs.pop('search_text_contains', None)
|
142
|
+
search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
|
142
143
|
|
143
144
|
# Convert a single sting into a list if only one tag is provided
|
144
145
|
if type(tags) == str:
|
@@ -180,6 +181,14 @@ class SQLStorageAdapter(StorageAdapter):
|
|
180
181
|
or_(*or_query)
|
181
182
|
)
|
182
183
|
|
184
|
+
if search_in_response_to_contains:
|
185
|
+
or_query = [
|
186
|
+
Statement.search_in_response_to.contains(word) for word in search_in_response_to_contains.split(' ')
|
187
|
+
]
|
188
|
+
statements = statements.filter(
|
189
|
+
or_(*or_query)
|
190
|
+
)
|
191
|
+
|
183
192
|
if order_by:
|
184
193
|
|
185
194
|
if 'created_at' in order_by:
|
@@ -196,7 +205,15 @@ class SQLStorageAdapter(StorageAdapter):
|
|
196
205
|
|
197
206
|
session.close()
|
198
207
|
|
199
|
-
def create(
|
208
|
+
def create(
|
209
|
+
self,
|
210
|
+
text,
|
211
|
+
in_response_to=None,
|
212
|
+
tags=None,
|
213
|
+
search_text=None,
|
214
|
+
search_in_response_to=None,
|
215
|
+
**kwargs
|
216
|
+
):
|
200
217
|
"""
|
201
218
|
Creates a new statement matching the keyword arguments specified.
|
202
219
|
Returns the created statement.
|
@@ -206,19 +223,25 @@ class SQLStorageAdapter(StorageAdapter):
|
|
206
223
|
|
207
224
|
session = self.Session()
|
208
225
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
226
|
+
if search_text is None:
|
227
|
+
if self.raise_on_missing_search_text:
|
228
|
+
raise Exception('generate a search_text value')
|
229
|
+
|
230
|
+
if search_in_response_to is None and in_response_to is not None:
|
231
|
+
if self.raise_on_missing_search_text:
|
232
|
+
raise Exception('generate a search_in_response_to value')
|
233
|
+
|
234
|
+
statement = Statement(
|
235
|
+
text=text,
|
236
|
+
in_response_to=in_response_to,
|
237
|
+
search_text=search_text,
|
238
|
+
search_in_response_to=search_in_response_to,
|
239
|
+
**kwargs
|
240
|
+
)
|
241
|
+
|
242
|
+
tags = frozenset(tags) if tags else frozenset()
|
243
|
+
for tag_name in frozenset(tags):
|
244
|
+
# TODO: Query existing tags in bulk
|
222
245
|
tag = session.query(Tag).filter_by(name=tag_name).first()
|
223
246
|
|
224
247
|
if not tag:
|
@@ -235,7 +258,7 @@ class SQLStorageAdapter(StorageAdapter):
|
|
235
258
|
|
236
259
|
statement_object = self.model_to_object(statement)
|
237
260
|
|
238
|
-
|
261
|
+
session.close()
|
239
262
|
|
240
263
|
return statement_object
|
241
264
|
|
@@ -256,14 +279,8 @@ class SQLStorageAdapter(StorageAdapter):
|
|
256
279
|
|
257
280
|
# Generate search text values in bulk
|
258
281
|
if not have_search_text:
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
for statement, search_text_document, response_search_text_document in zip(
|
263
|
-
statements, search_text_documents, response_search_text_documents
|
264
|
-
):
|
265
|
-
statement.search_text = search_text_document._.search_index
|
266
|
-
statement.search_in_response_to = response_search_text_document._.search_index
|
282
|
+
if self.raise_on_missing_search_text:
|
283
|
+
raise Exception('generate bulk_search_text values')
|
267
284
|
|
268
285
|
for statement in statements:
|
269
286
|
|
@@ -305,48 +322,50 @@ class SQLStorageAdapter(StorageAdapter):
|
|
305
322
|
Statement = self.get_model('statement')
|
306
323
|
Tag = self.get_model('tag')
|
307
324
|
|
308
|
-
|
309
|
-
|
310
|
-
record = None
|
311
|
-
|
312
|
-
if hasattr(statement, 'id') and statement.id is not None:
|
313
|
-
record = session.query(Statement).get(statement.id)
|
314
|
-
else:
|
315
|
-
record = session.query(Statement).filter(
|
316
|
-
Statement.text == statement.text,
|
317
|
-
Statement.conversation == statement.conversation,
|
318
|
-
).first()
|
319
|
-
|
320
|
-
# Create a new statement entry if one does not already exist
|
321
|
-
if not record:
|
322
|
-
record = Statement(
|
323
|
-
text=statement.text,
|
324
|
-
conversation=statement.conversation,
|
325
|
-
persona=statement.persona
|
326
|
-
)
|
325
|
+
session = self.Session()
|
326
|
+
record = None
|
327
327
|
|
328
|
-
|
329
|
-
record
|
328
|
+
if hasattr(statement, 'id') and statement.id is not None:
|
329
|
+
record = session.query(Statement).get(statement.id)
|
330
|
+
else:
|
331
|
+
record = session.query(Statement).filter(
|
332
|
+
Statement.text == statement.text,
|
333
|
+
Statement.conversation == statement.conversation,
|
334
|
+
).first()
|
335
|
+
|
336
|
+
# Create a new statement entry if one does not already exist
|
337
|
+
if not record:
|
338
|
+
record = Statement(
|
339
|
+
text=statement.text,
|
340
|
+
conversation=statement.conversation,
|
341
|
+
persona=statement.persona
|
342
|
+
)
|
330
343
|
|
331
|
-
|
344
|
+
# Update the response value
|
345
|
+
record.in_response_to = statement.in_response_to
|
332
346
|
|
333
|
-
|
347
|
+
record.created_at = statement.created_at
|
334
348
|
|
335
|
-
|
336
|
-
|
349
|
+
if not statement.search_text:
|
350
|
+
if self.raise_on_missing_search_text:
|
351
|
+
raise Exception('update issued without search_text value')
|
337
352
|
|
338
|
-
|
339
|
-
|
353
|
+
if statement.in_response_to and not statement.search_in_response_to:
|
354
|
+
if self.raise_on_missing_search_text:
|
355
|
+
raise Exception('update issued without search_in_response_to value')
|
340
356
|
|
341
|
-
|
342
|
-
|
343
|
-
tag = Tag(name=tag_name)
|
357
|
+
for tag_name in statement.get_tags():
|
358
|
+
tag = session.query(Tag).filter_by(name=tag_name).first()
|
344
359
|
|
345
|
-
|
360
|
+
if not tag:
|
361
|
+
# Create the record
|
362
|
+
tag = Tag(name=tag_name)
|
346
363
|
|
347
|
-
|
364
|
+
record.tags.append(tag)
|
348
365
|
|
349
|
-
|
366
|
+
session.add(record)
|
367
|
+
session.commit()
|
368
|
+
session.close()
|
350
369
|
|
351
370
|
def get_random(self):
|
352
371
|
"""
|
@@ -388,13 +407,3 @@ class SQLStorageAdapter(StorageAdapter):
|
|
388
407
|
"""
|
389
408
|
from chatterbot.ext.sqlalchemy_app.models import Base
|
390
409
|
Base.metadata.create_all(self.engine)
|
391
|
-
|
392
|
-
def _session_finish(self, session, statement_text=None):
|
393
|
-
from sqlalchemy.exc import InvalidRequestError
|
394
|
-
try:
|
395
|
-
session.commit()
|
396
|
-
except InvalidRequestError:
|
397
|
-
# Log the statement text and the exception
|
398
|
-
self.logger.exception(statement_text)
|
399
|
-
finally:
|
400
|
-
session.close()
|
@@ -1,6 +1,4 @@
|
|
1
1
|
import logging
|
2
|
-
from chatterbot import languages
|
3
|
-
from chatterbot.tagging import PosLemmaTagger
|
4
2
|
|
5
3
|
|
6
4
|
class StorageAdapter(object):
|
@@ -17,11 +15,9 @@ class StorageAdapter(object):
|
|
17
15
|
"""
|
18
16
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
'tagger_language', languages.ENG
|
24
|
-
))
|
18
|
+
self.raise_on_missing_search_text = kwargs.get(
|
19
|
+
'raise_on_missing_search_text', True
|
20
|
+
)
|
25
21
|
|
26
22
|
def get_model(self, model_name):
|
27
23
|
"""
|
@@ -116,6 +112,12 @@ class StorageAdapter(object):
|
|
116
112
|
this parameter, then the statement will be included in the
|
117
113
|
result set.
|
118
114
|
Defaults to None
|
115
|
+
|
116
|
+
:param search_in_response_to: If the ``search_in_response_to`` field
|
117
|
+
of a statement contains a word that is in the string provided to
|
118
|
+
this parameter, then the statement will be included in the
|
119
|
+
result set.
|
120
|
+
Defaults to None
|
119
121
|
"""
|
120
122
|
raise self.AdapterMethodNotImplementedError(
|
121
123
|
'The `filter` method is not implemented by this adapter.'
|
chatterbot/trainers.py
CHANGED
@@ -93,7 +93,7 @@ class ListTrainer(Trainer):
|
|
93
93
|
statements_to_create = []
|
94
94
|
|
95
95
|
# Run the pipeline in bulk to improve performance
|
96
|
-
documents = self.chatbot.
|
96
|
+
documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
|
97
97
|
|
98
98
|
# for text in enumerate(conversation):
|
99
99
|
for document in tqdm(documents, desc='List Trainer', disable=not self.show_training_progress):
|
@@ -143,7 +143,7 @@ class ChatterBotCorpusTrainer(Trainer):
|
|
143
143
|
for conversation in corpus:
|
144
144
|
|
145
145
|
# Run the pipeline in bulk to improve performance
|
146
|
-
documents = self.chatbot.
|
146
|
+
documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
|
147
147
|
|
148
148
|
previous_statement_text = None
|
149
149
|
previous_statement_search_text = ''
|
@@ -344,7 +344,7 @@ class UbuntuCorpusTrainer(Trainer):
|
|
344
344
|
previous_statement_text = None
|
345
345
|
previous_statement_search_text = ''
|
346
346
|
|
347
|
-
documents = self.chatbot.
|
347
|
+
documents = self.chatbot.tagger.as_nlp_pipeline([
|
348
348
|
(
|
349
349
|
row[3],
|
350
350
|
{
|
@@ -1,8 +1,8 @@
|
|
1
|
-
chatterbot/__init__.py,sha256=
|
1
|
+
chatterbot/__init__.py,sha256=a8HpoSxm94mcHPYO24QiFVYS4WzYafqOGq7j_VuTIr8,158
|
2
2
|
chatterbot/__main__.py,sha256=nk19D56TlPT9Zdqkq4qZZrOnLKEc4YTwUVWmXYwSyHg,207
|
3
3
|
chatterbot/adapters.py,sha256=LJ_KqLpHKPdYAFpMGK63RVH4weV5X0Zh5uGyan6qdVU,878
|
4
|
-
chatterbot/chatterbot.py,sha256=
|
5
|
-
chatterbot/comparisons.py,sha256=
|
4
|
+
chatterbot/chatterbot.py,sha256=YLKLkQ-XI4Unr3rbzjpGIupOqenuevm21tAnx-yFFgQ,10400
|
5
|
+
chatterbot/comparisons.py,sha256=8-qLFWC1Z7tZ3iPUpyY6AD9l-whSo3QE1Rno_SzIp-I,6570
|
6
6
|
chatterbot/components.py,sha256=ld3Xam8olBClvE5QqcFYggE7Q7tODCFek7BO7lhfyeU,1782
|
7
7
|
chatterbot/constants.py,sha256=c_KPQKc82CHX6H3maeyTYqWatx6j-N-8HJhmejoVi60,1875
|
8
8
|
chatterbot/conversation.py,sha256=Y-WOxPN7I3igRyAEe5py1sfS6JIYPdbwjVlY3kM8Ys8,3175
|
@@ -12,10 +12,10 @@ chatterbot/filters.py,sha256=vDSDJz2FM10xT6ybs7qJiqy4X5I4gTEfwEnjBGUxZ9g,847
|
|
12
12
|
chatterbot/languages.py,sha256=XSenfc5FxHk_JWG5gGHsZvjvrPBbCaVCm_OU-BeER_M,32784
|
13
13
|
chatterbot/parsing.py,sha256=vS-w70cMkjq4YEpDOv_pXWhAI6Zj06WYDAcMDhYDj0M,23174
|
14
14
|
chatterbot/preprocessors.py,sha256=aI4v987dZc7GOKhO43i0i73EX748hehYSpzikFHpEXs,1271
|
15
|
-
chatterbot/response_selection.py,sha256=
|
16
|
-
chatterbot/search.py,sha256=
|
15
|
+
chatterbot/response_selection.py,sha256=aYeZ54jpGIcQnI-1-TDcua_f1p3PiM5_iMg4hF5ZaIU,2951
|
16
|
+
chatterbot/search.py,sha256=FTwwON2eKPWqoc5uoKh4AUmuXDCqyfMcMcXB4wijpxg,4910
|
17
17
|
chatterbot/tagging.py,sha256=GLY9wg_rvn6pSYVML-HcxkIo_3BZ3SAyj-q1oNZY8pI,2584
|
18
|
-
chatterbot/trainers.py,sha256=
|
18
|
+
chatterbot/trainers.py,sha256=U1yh0_V7FFL51MeQe1P1Q59weceDbkHh_2kDiDYpSEc,13315
|
19
19
|
chatterbot/utils.py,sha256=ckQXvsjp2FO9GcWxziY67JovN7mShnE4RlzdYarQY5k,3277
|
20
20
|
chatterbot/ext/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
chatterbot/ext/django_chatterbot/__init__.py,sha256=iWzmBzpAsYwkwi1faxAPFY9L1bbL97RgVXK2uqULIMc,92
|
@@ -48,19 +48,19 @@ chatterbot/ext/django_chatterbot/migrations/__init__.py,sha256=47DEQpj8HBSa-_TIm
|
|
48
48
|
chatterbot/ext/sqlalchemy_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
49
|
chatterbot/ext/sqlalchemy_app/models.py,sha256=pjU4e2BUSitw_IAkrk4iFQ9pZRU35y5MomvX7aiBFCw,2492
|
50
50
|
chatterbot/logic/__init__.py,sha256=28-5swBCPfSVMl8xB5C8frOKZ2oj28rQfenbd9E4r-4,531
|
51
|
-
chatterbot/logic/best_match.py,sha256=
|
51
|
+
chatterbot/logic/best_match.py,sha256=8TNW0uZ_Uq-XPfaZUMUZDVH6KzDT65j59xblxQBv-dQ,4820
|
52
52
|
chatterbot/logic/logic_adapter.py,sha256=5kNEirh5fiF5hhSMFXD7bIkKwXHmrSsSS4qDm-6xry0,4694
|
53
53
|
chatterbot/logic/mathematical_evaluation.py,sha256=GPDKUwNFajERof2R-MkPGi2jJRP-rKAGm_f0V9JHDHE,2282
|
54
54
|
chatterbot/logic/specific_response.py,sha256=_VeJaa3kun0J7cVzLOlTYK1tBpth0B6UWms7QwtcNpY,1082
|
55
55
|
chatterbot/logic/time_adapter.py,sha256=mxdoQGeC5IjREH4PU5iHYOIPEvnYnzgysocR8xMYWXc,2406
|
56
56
|
chatterbot/logic/unit_conversion.py,sha256=DT50HHE3njUo_ttDSU8S-fwBylarhDF3l_McRLSX6Ic,5823
|
57
57
|
chatterbot/storage/__init__.py,sha256=IymIHfeisvULQzUYsQSiUBbWIZ1m5EzyMVI082tTw5w,369
|
58
|
-
chatterbot/storage/django_storage.py,sha256=
|
59
|
-
chatterbot/storage/mongodb.py,sha256=
|
60
|
-
chatterbot/storage/sql_storage.py,sha256=
|
61
|
-
chatterbot/storage/storage_adapter.py,sha256=
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
58
|
+
chatterbot/storage/django_storage.py,sha256=S5S4GipD7FyNJy4RWu5-S8sLPuSJIObwTtqTpnJu-ok,6159
|
59
|
+
chatterbot/storage/mongodb.py,sha256=Ozvdvcjb3LGZxcvbSQGzwP9VloYQbmsa2FaKunFpMyU,7934
|
60
|
+
chatterbot/storage/sql_storage.py,sha256=VVYZvclG_74IN-MrG0edc-RQ2gUO6gRQyCWWSO0MmCk,13082
|
61
|
+
chatterbot/storage/storage_adapter.py,sha256=fvyb-qNiB0HMJ0siVMCWUIY--6d-C47N1_kKZVFZAv4,6110
|
62
|
+
chatterbot-1.2.2.dist-info/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
|
63
|
+
chatterbot-1.2.2.dist-info/METADATA,sha256=EGYwvpQjhqJOfjlQWI83memJr0sXKo9QwM_wbp1wtrg,8311
|
64
|
+
chatterbot-1.2.2.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
65
|
+
chatterbot-1.2.2.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
|
66
|
+
chatterbot-1.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|