ChatterBot 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chatterbot/__init__.py CHANGED
@@ -4,7 +4,7 @@ ChatterBot is a machine learning, conversational dialog engine.
4
4
  from .chatterbot import ChatBot
5
5
 
6
6
 
7
- __version__ = '1.2.1'
7
+ __version__ = '1.2.2'
8
8
 
9
9
  __all__ = (
10
10
  'ChatBot',
chatterbot/chatterbot.py CHANGED
@@ -2,7 +2,10 @@ import logging
2
2
  from chatterbot.storage import StorageAdapter
3
3
  from chatterbot.logic import LogicAdapter
4
4
  from chatterbot.search import TextSearch, IndexedTextSearch
5
+ from chatterbot.tagging import PosLemmaTagger
6
+ from chatterbot import languages
5
7
  from chatterbot import utils
8
+ import spacy
6
9
 
7
10
 
8
11
  class ChatBot(object):
@@ -27,6 +30,12 @@ class ChatBot(object):
27
30
 
28
31
  self.storage = utils.initialize_class(storage_adapter, **kwargs)
29
32
 
33
+ Tagger = kwargs.get('tagger', PosLemmaTagger)
34
+
35
+ self.tagger = Tagger(language=kwargs.get(
36
+ 'tagger_language', languages.ENG
37
+ ))
38
+
30
39
  primary_search_algorithm = IndexedTextSearch(self, **kwargs)
31
40
  text_search_algorithm = TextSearch(self, **kwargs)
32
41
 
@@ -51,6 +60,9 @@ class ChatBot(object):
51
60
  for preprocessor in preprocessors:
52
61
  self.preprocessors.append(utils.import_module(preprocessor))
53
62
 
63
+ # NOTE: 'xx' is the language code for a multi-language model
64
+ self.nlp = spacy.blank(self.tagger.language.ISO_639_1)
65
+
54
66
  self.logger = kwargs.get('logger', logging.getLogger(__name__))
55
67
 
56
68
  # Allow the bot to save input it receives so that it can learn
@@ -105,16 +117,27 @@ class ChatBot(object):
105
117
  for preprocessor in self.preprocessors:
106
118
  input_statement = preprocessor(input_statement)
107
119
 
120
+ # Mark the statement as being a response to the previous
121
+ if input_statement.in_response_to is None:
122
+ previous_statement = self.get_latest_response(input_statement.conversation)
123
+ if previous_statement:
124
+ input_statement.in_response_to = previous_statement.text
125
+
108
126
  # Make sure the input statement has its search text saved
109
127
 
110
128
  if not input_statement.search_text:
111
- _search_text = self.storage.tagger.get_text_index_string(input_statement.text)
129
+ _search_text = self.tagger.get_text_index_string(input_statement.text)
112
130
  input_statement.search_text = _search_text
113
131
 
114
132
  if not input_statement.search_in_response_to and input_statement.in_response_to:
115
- input_statement.search_in_response_to = self.storage.tagger.get_text_index_string(input_statement.in_response_to)
133
+ input_statement.search_in_response_to = self.tagger.get_text_index_string(
134
+ input_statement.in_response_to
135
+ )
116
136
 
117
- response = self.generate_response(input_statement, additional_response_selection_parameters)
137
+ response = self.generate_response(
138
+ input_statement,
139
+ additional_response_selection_parameters
140
+ )
118
141
 
119
142
  # Update any response data that needs to be changed
120
143
  if persist_values_to_response:
@@ -128,10 +151,13 @@ class ChatBot(object):
128
151
  setattr(response, response_key, response_value)
129
152
 
130
153
  if not self.read_only:
131
- self.learn_response(input_statement)
154
+
155
+ # Save the input statement
156
+ self.storage.create(**input_statement.serialize())
132
157
 
133
158
  # Save the response generated for the input
134
- self.storage.create(**response.serialize())
159
+ self.learn_response(response, previous_statement=input_statement)
160
+
135
161
 
136
162
  return response
137
163
 
@@ -194,6 +220,8 @@ class ChatBot(object):
194
220
  if result_option.count > most_common.count:
195
221
  most_common = result_option
196
222
 
223
+ self.logger.info('Selecting "{}" as the most common response'.format(most_common.statement.text))
224
+
197
225
  if most_common.count > 1:
198
226
  result = most_common.statement
199
227
 
@@ -204,6 +232,8 @@ class ChatBot(object):
204
232
  persona='bot:' + self.name
205
233
  )
206
234
 
235
+ response.add_tags(*result.get_tags())
236
+
207
237
  response.confidence = result.confidence
208
238
 
209
239
  return response
@@ -228,11 +258,14 @@ class ChatBot(object):
228
258
  statement.in_response_to = previous_statement
229
259
 
230
260
  self.logger.info('Adding "{}" as a response to "{}"'.format(
231
- statement.text,
232
- previous_statement_text
261
+ previous_statement_text,
262
+ statement.text
233
263
  ))
234
264
 
235
- # Save the input statement
265
+ if not statement.persona:
266
+ statement.persona = 'bot:' + self.name
267
+
268
+ # Save the response statement
236
269
  return self.storage.create(**statement.serialize())
237
270
 
238
271
  def get_latest_response(self, conversation):
chatterbot/comparisons.py CHANGED
@@ -19,15 +19,22 @@ class Comparator:
19
19
  def __call__(self, statement_a, statement_b):
20
20
  return self.compare(statement_a, statement_b)
21
21
 
22
- def compare(self, statement_a, statement_b):
22
+ def compare_text(self, text_a, text_b):
23
23
  """
24
- Implemented in subclasses: compare statement_a to statement_b.
24
+ Implemented in subclasses: compare text_a to text_b.
25
25
 
26
26
  :return: The percent of similarity between the statements based on the implemented algorithm.
27
27
  :rtype: float
28
28
  """
29
29
  return 0
30
30
 
31
+ def compare(self, statement_a, statement_b):
32
+ """
33
+ :return: The percent of similarity between the statements based on the implemented algorithm.
34
+ :rtype: float
35
+ """
36
+ return self.compare_text(statement_a.text, statement_b.text)
37
+
31
38
 
32
39
  class LevenshteinDistance(Comparator):
33
40
  """
@@ -39,21 +46,21 @@ class LevenshteinDistance(Comparator):
39
46
  based on the Levenshtein distance algorithm.
40
47
  """
41
48
 
42
- def compare(self, statement_a, statement_b):
49
+ def compare_text(self, text_a, text_b):
43
50
  """
44
- Compare the two input statements.
51
+ Compare the two pieces of text.
45
52
 
46
53
  :return: The percent of similarity between the text of the statements.
47
54
  :rtype: float
48
55
  """
49
56
 
50
- # Return 0 if either statement has a falsy text value
51
- if not statement_a.text or not statement_b.text:
57
+ # Return 0 if either statement has a None text value
58
+ if text_a is None or text_b is None:
52
59
  return 0
53
60
 
54
61
  # Get the lowercase version of both strings
55
- statement_a_text = str(statement_a.text.lower())
56
- statement_b_text = str(statement_b.text.lower())
62
+ statement_a_text = str(text_a.lower())
63
+ statement_b_text = str(text_b.lower())
57
64
 
58
65
  similarity = SequenceMatcher(
59
66
  None,
@@ -103,15 +110,20 @@ class SpacySimilarity(Comparator):
103
110
  # Disable the Named Entity Recognition (NER) component because it is not necessary
104
111
  self.nlp = spacy.load(model, exclude=['ner'])
105
112
 
106
- def compare(self, statement_a, statement_b):
113
+ def compare_text(self, text_a, text_b):
107
114
  """
108
- Compare the two input statements.
115
+ Compare the similarity of two strings.
109
116
 
110
117
  :return: The percent of similarity between the closest synset distance.
111
118
  :rtype: float
112
119
  """
113
- document_a = self.nlp(statement_a.text)
114
- document_b = self.nlp(statement_b.text)
120
+
121
+ # Return 0 if either statement has a None text value
122
+ if text_a is None or text_b is None:
123
+ return 0
124
+
125
+ document_a = self.nlp(text_a)
126
+ document_b = self.nlp(text_b)
115
127
 
116
128
  return document_a.similarity(document_b)
117
129
 
@@ -155,14 +167,19 @@ class JaccardSimilarity(Comparator):
155
167
  # Disable the Named Entity Recognition (NER) component because it is not necessary
156
168
  self.nlp = spacy.load(model, exclude=['ner'])
157
169
 
158
- def compare(self, statement_a, statement_b):
170
+ def compare_text(self, text_a, text_b):
159
171
  """
160
172
  Return the calculated similarity of two
161
173
  statements based on the Jaccard index.
162
174
  """
175
+
176
+ # Return 0 if either statement has a None text value
177
+ if text_a is None or text_b is None:
178
+ return 0
179
+
163
180
  # Make both strings lowercase
164
- document_a = self.nlp(statement_a.text.lower())
165
- document_b = self.nlp(statement_b.text.lower())
181
+ document_a = self.nlp(text_a.lower())
182
+ document_b = self.nlp(text_b.lower())
166
183
 
167
184
  statement_a_lemmas = frozenset([
168
185
  token.lemma_ for token in document_a if not token.is_stop
@@ -23,10 +23,13 @@ class BestMatch(LogicAdapter):
23
23
  self.excluded_words = kwargs.get('excluded_words')
24
24
 
25
25
  def process(self, input_statement, additional_response_selection_parameters=None):
26
+
27
+ # Get all statements that have a response text similar to the input statement
26
28
  search_results = self.search_algorithm.search(input_statement)
27
29
 
28
30
  # Use the input statement as the closest match if no other results are found
29
- closest_match = next(search_results, input_statement)
31
+ input_statement.confidence = 0 # Use 0 confidence when no other results are found
32
+ closest_match = input_statement
30
33
 
31
34
  # Search for the closest match to the input statement
32
35
  for result in search_results:
@@ -36,8 +39,8 @@ class BestMatch(LogicAdapter):
36
39
  if result.confidence >= self.maximum_similarity_threshold:
37
40
  break
38
41
 
39
- self.chatbot.logger.info('Using "{}" as a close match to "{}" with a confidence of {}'.format(
40
- closest_match.text, input_statement.text, closest_match.confidence
42
+ self.chatbot.logger.info('Selecting "{}" as a response to "{}" with a confidence of {}'.format(
43
+ closest_match.in_response_to, input_statement.text, closest_match.confidence
41
44
  ))
42
45
 
43
46
  recent_repeated_responses = filters.get_recent_repeated_responses(
@@ -51,39 +54,34 @@ class BestMatch(LogicAdapter):
51
54
  ))
52
55
 
53
56
  response_selection_parameters = {
54
- 'search_in_response_to': closest_match.search_text,
57
+ 'search_text': closest_match.search_text,
58
+ 'persona_not_startswith': 'bot:',
55
59
  'exclude_text': recent_repeated_responses,
56
60
  'exclude_text_words': self.excluded_words
57
61
  }
58
62
 
59
63
  alternate_response_selection_parameters = {
60
- 'search_in_response_to': self.chatbot.storage.tagger.get_text_index_string(
64
+ 'search_in_response_to': self.chatbot.tagger.get_text_index_string(
61
65
  input_statement.text
62
66
  ),
67
+ 'persona_not_startswith': 'bot:',
63
68
  'exclude_text': recent_repeated_responses,
64
69
  'exclude_text_words': self.excluded_words
65
70
  }
66
71
 
67
72
  if additional_response_selection_parameters:
68
- response_selection_parameters.update(additional_response_selection_parameters)
69
- alternate_response_selection_parameters.update(additional_response_selection_parameters)
70
-
71
- # Get all statements that are in response to the closest match
72
- response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
73
+ response_selection_parameters.update(
74
+ additional_response_selection_parameters
75
+ )
76
+ alternate_response_selection_parameters.update(
77
+ additional_response_selection_parameters
78
+ )
73
79
 
74
- alternate_response_list = []
75
80
 
76
- if not response_list:
77
- self.chatbot.logger.info('No responses found. Generating alternate response list.')
78
- alternate_response_list = list(self.chatbot.storage.filter(**alternate_response_selection_parameters))
81
+ # Get all statements with text similar to the closest match
82
+ response_list = list(self.chatbot.storage.filter(**response_selection_parameters))
79
83
 
80
84
  if response_list:
81
- self.chatbot.logger.info(
82
- 'Selecting response from {} optimal responses.'.format(
83
- len(response_list)
84
- )
85
- )
86
-
87
85
  response = self.select_response(
88
86
  input_statement,
89
87
  response_list,
@@ -91,26 +89,35 @@ class BestMatch(LogicAdapter):
91
89
  )
92
90
 
93
91
  response.confidence = closest_match.confidence
94
- self.chatbot.logger.info('Response selected. Using "{}"'.format(response.text))
95
- elif alternate_response_list:
92
+ self.chatbot.logger.info('Selecting "{}" from {} optimal responses.'.format(
93
+ response.text,
94
+ len(response_list)
95
+ ))
96
+ else:
96
97
  '''
97
98
  The case where there was no responses returned for the selected match
98
99
  but a value exists for the statement the match is in response to.
99
100
  '''
100
- self.chatbot.logger.info(
101
- 'Selecting response from {} optimal alternate responses.'.format(
102
- len(alternate_response_list)
101
+ self.chatbot.logger.info('No responses found. Generating alternate response list.')
102
+
103
+ alternate_response_list = list(self.chatbot.storage.filter(
104
+ **alternate_response_selection_parameters
105
+ ))
106
+
107
+ if alternate_response_list:
108
+ response = self.select_response(
109
+ input_statement,
110
+ alternate_response_list,
111
+ self.chatbot.storage
103
112
  )
104
- )
105
- response = self.select_response(
106
- input_statement,
107
- alternate_response_list,
108
- self.chatbot.storage
109
- )
110
113
 
111
- response.confidence = closest_match.confidence
112
- self.chatbot.logger.info('Alternate response selected. Using "{}"'.format(response.text))
113
- else:
114
- response = self.get_default_response(input_statement)
114
+ response.confidence = closest_match.confidence
115
+ self.chatbot.logger.info('Selected alternative response "{}" from {} options'.format(
116
+ response.text,
117
+ len(alternate_response_list)
118
+ ))
119
+ else:
120
+ response = self.get_default_response(input_statement)
121
+ self.chatbot.logger.info('Using "%s" as a default response.', response.text)
115
122
 
116
123
  return response
@@ -37,7 +37,7 @@ def get_most_frequent_response(input_statement, response_list, storage=None):
37
37
  matching_response = statement
38
38
  occurrence_count = count
39
39
 
40
- # Choose the most commonly occuring matching response
40
+ # Choose the most commonly occurring matching response
41
41
  return matching_response
42
42
 
43
43
 
chatterbot/search.py CHANGED
@@ -21,7 +21,7 @@ class IndexedTextSearch:
21
21
  )
22
22
 
23
23
  self.compare_statements = statement_comparison_function(
24
- language=self.chatbot.storage.tagger.language
24
+ language=self.chatbot.tagger.language
25
25
  )
26
26
 
27
27
  self.search_page_size = kwargs.get(
@@ -43,19 +43,8 @@ class IndexedTextSearch:
43
43
  """
44
44
  self.chatbot.logger.info('Beginning search for close text match')
45
45
 
46
- input_search_text = input_statement.search_text
47
-
48
- if not input_statement.search_text:
49
- self.chatbot.logger.warning(
50
- 'No value for search_text was available on the provided input'
51
- )
52
-
53
- input_search_text = self.chatbot.storage.tagger.get_text_index_string(
54
- input_statement.text
55
- )
56
-
57
46
  search_parameters = {
58
- 'search_text_contains': input_search_text,
47
+ 'search_in_response_to_contains': input_statement.search_text,
59
48
  'persona_not_startswith': 'bot:',
60
49
  'page_size': self.search_page_size
61
50
  }
@@ -71,14 +60,16 @@ class IndexedTextSearch:
71
60
 
72
61
  # Find the closest matching known statement
73
62
  for statement in statement_list:
74
- confidence = self.compare_statements(input_statement, statement)
63
+ confidence = self.compare_statements.compare_text(
64
+ input_statement.text, statement.in_response_to
65
+ )
75
66
 
76
67
  if confidence > best_confidence_so_far:
77
68
  best_confidence_so_far = confidence
78
69
  statement.confidence = confidence
79
70
 
80
71
  self.chatbot.logger.info('Similar text found: {} {}'.format(
81
- statement.text, confidence
72
+ statement.in_response_to, confidence
82
73
  ))
83
74
 
84
75
  yield statement
@@ -107,7 +98,7 @@ class TextSearch:
107
98
  )
108
99
 
109
100
  self.compare_statements = statement_comparison_function(
110
- language=self.chatbot.storage.tagger.language
101
+ language=self.chatbot.tagger.language
111
102
  )
112
103
 
113
104
  self.search_page_size = kwargs.get(
@@ -145,7 +136,9 @@ class TextSearch:
145
136
 
146
137
  # Find the closest matching known statement
147
138
  for statement in statement_list:
148
- confidence = self.compare_statements(input_statement, statement)
139
+ confidence = self.compare_statements.compare_text(
140
+ input_statement.text, statement.in_response_to
141
+ )
149
142
 
150
143
  if confidence > best_confidence_so_far:
151
144
  best_confidence_so_far = confidence
@@ -44,6 +44,7 @@ class DjangoStorageAdapter(StorageAdapter):
44
44
  exclude_text_words = kwargs.pop('exclude_text_words', [])
45
45
  persona_not_startswith = kwargs.pop('persona_not_startswith', None)
46
46
  search_text_contains = kwargs.pop('search_text_contains', None)
47
+ search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
47
48
 
48
49
  # Convert a single sting into a list if only one tag is provided
49
50
  if type(tags) == str:
@@ -83,6 +84,16 @@ class DjangoStorageAdapter(StorageAdapter):
83
84
  or_query
84
85
  )
85
86
 
87
+ if search_in_response_to_contains:
88
+ or_query = Q()
89
+
90
+ for word in search_in_response_to_contains.split(' '):
91
+ or_query |= Q(search_in_response_to__contains=word)
92
+
93
+ statements = statements.filter(
94
+ or_query
95
+ )
96
+
86
97
  if order_by:
87
98
  statements = statements.order_by(*order_by)
88
99
 
@@ -99,13 +110,6 @@ class DjangoStorageAdapter(StorageAdapter):
99
110
 
100
111
  tags = kwargs.pop('tags', [])
101
112
 
102
- if 'search_text' not in kwargs:
103
- kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
104
-
105
- if 'search_in_response_to' not in kwargs:
106
- if kwargs.get('in_response_to'):
107
- kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
108
-
109
113
  statement = Statement(**kwargs)
110
114
 
111
115
  statement.save()
@@ -129,20 +133,6 @@ class DjangoStorageAdapter(StorageAdapter):
129
133
 
130
134
  tag_cache = {}
131
135
 
132
- # Check if any statements already have a search text
133
- have_search_text = any(statement.search_text for statement in statements)
134
-
135
- # Generate search text values in bulk
136
- if not have_search_text:
137
- search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
138
- response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
139
-
140
- for statement, search_text_document, response_search_text_document in zip(
141
- statements, search_text_documents, response_search_text_documents
142
- ):
143
- statement.search_text = search_text_document._.search_index
144
- statement.search_in_response_to = response_search_text_document._.search_index
145
-
146
136
  for statement in statements:
147
137
 
148
138
  statement_data = statement.serialize()
@@ -176,10 +166,10 @@ class DjangoStorageAdapter(StorageAdapter):
176
166
  else:
177
167
  statement = Statement.objects.create(
178
168
  text=statement.text,
179
- search_text=self.tagger.get_text_index_string(statement.text),
169
+ search_text=statement.search_text,
180
170
  conversation=statement.conversation,
181
171
  in_response_to=statement.in_response_to,
182
- search_in_response_to=self.tagger.get_text_index_string(statement.in_response_to),
172
+ search_in_response_to=statement.search_in_response_to,
183
173
  created_at=statement.created_at
184
174
  )
185
175
 
@@ -82,6 +82,7 @@ class MongoDatabaseAdapter(StorageAdapter):
82
82
  exclude_text_words = kwargs.pop('exclude_text_words', [])
83
83
  persona_not_startswith = kwargs.pop('persona_not_startswith', None)
84
84
  search_text_contains = kwargs.pop('search_text_contains', None)
85
+ search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
85
86
 
86
87
  if tags:
87
88
  kwargs['tags'] = {
@@ -127,6 +128,12 @@ class MongoDatabaseAdapter(StorageAdapter):
127
128
  ])
128
129
  kwargs['search_text'] = re.compile(or_regex)
129
130
 
131
+ if search_in_response_to_contains:
132
+ or_regex = '|'.join([
133
+ '{}'.format(re.escape(word)) for word in search_in_response_to_contains.split(' ')
134
+ ])
135
+ kwargs['search_in_response_to'] = re.compile(or_regex)
136
+
130
137
  mongo_ordering = []
131
138
 
132
139
  if order_by:
@@ -159,13 +166,6 @@ class MongoDatabaseAdapter(StorageAdapter):
159
166
  if 'tags' in kwargs:
160
167
  kwargs['tags'] = list(set(kwargs['tags']))
161
168
 
162
- if 'search_text' not in kwargs:
163
- kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
164
-
165
- if 'search_in_response_to' not in kwargs:
166
- if kwargs.get('in_response_to'):
167
- kwargs['search_in_response_to'] = self.tagger.get_text_index_string(kwargs['in_response_to'])
168
-
169
169
  inserted = self.statements.insert_one(kwargs)
170
170
 
171
171
  kwargs['id'] = inserted.inserted_id
@@ -178,20 +178,6 @@ class MongoDatabaseAdapter(StorageAdapter):
178
178
  """
179
179
  create_statements = []
180
180
 
181
- # Check if any statements already have a search text
182
- have_search_text = any(statement.search_text for statement in statements)
183
-
184
- # Generate search text values in bulk
185
- if not have_search_text:
186
- search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
187
- response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
188
-
189
- for statement, search_text_document, response_search_text_document in zip(
190
- statements, search_text_documents, response_search_text_documents
191
- ):
192
- statement.search_text = search_text_document._.search_index
193
- statement.search_in_response_to = response_search_text_document._.search_index
194
-
195
181
  for statement in statements:
196
182
  statement_data = statement.serialize()
197
183
  tag_data = list(set(statement_data.pop('tags', [])))
@@ -206,11 +192,6 @@ class MongoDatabaseAdapter(StorageAdapter):
206
192
  data.pop('id', None)
207
193
  data.pop('tags', None)
208
194
 
209
- data['search_text'] = self.tagger.get_text_index_string(data['text'])
210
-
211
- if data.get('in_response_to'):
212
- data['search_in_response_to'] = self.tagger.get_text_index_string(data['in_response_to'])
213
-
214
195
  update_data = {
215
196
  '$set': data
216
197
  }
@@ -114,8 +114,8 @@ class SQLStorageAdapter(StorageAdapter):
114
114
  record = query.first()
115
115
 
116
116
  session.delete(record)
117
-
118
- self._session_finish(session)
117
+ session.commit()
118
+ session.close()
119
119
 
120
120
  def filter(self, **kwargs):
121
121
  """
@@ -139,6 +139,7 @@ class SQLStorageAdapter(StorageAdapter):
139
139
  exclude_text_words = kwargs.pop('exclude_text_words', [])
140
140
  persona_not_startswith = kwargs.pop('persona_not_startswith', None)
141
141
  search_text_contains = kwargs.pop('search_text_contains', None)
142
+ search_in_response_to_contains = kwargs.pop('search_in_response_to_contains', None)
142
143
 
143
144
  # Convert a single sting into a list if only one tag is provided
144
145
  if type(tags) == str:
@@ -180,6 +181,14 @@ class SQLStorageAdapter(StorageAdapter):
180
181
  or_(*or_query)
181
182
  )
182
183
 
184
+ if search_in_response_to_contains:
185
+ or_query = [
186
+ Statement.search_in_response_to.contains(word) for word in search_in_response_to_contains.split(' ')
187
+ ]
188
+ statements = statements.filter(
189
+ or_(*or_query)
190
+ )
191
+
183
192
  if order_by:
184
193
 
185
194
  if 'created_at' in order_by:
@@ -196,7 +205,15 @@ class SQLStorageAdapter(StorageAdapter):
196
205
 
197
206
  session.close()
198
207
 
199
- def create(self, **kwargs):
208
+ def create(
209
+ self,
210
+ text,
211
+ in_response_to=None,
212
+ tags=None,
213
+ search_text=None,
214
+ search_in_response_to=None,
215
+ **kwargs
216
+ ):
200
217
  """
201
218
  Creates a new statement matching the keyword arguments specified.
202
219
  Returns the created statement.
@@ -206,19 +223,25 @@ class SQLStorageAdapter(StorageAdapter):
206
223
 
207
224
  session = self.Session()
208
225
 
209
- tags = set(kwargs.pop('tags', []))
210
-
211
- if 'search_text' not in kwargs:
212
- kwargs['search_text'] = self.tagger.get_text_index_string(kwargs['text'])
213
-
214
- if 'search_in_response_to' not in kwargs:
215
- in_response_to = kwargs.get('in_response_to')
216
- if in_response_to:
217
- kwargs['search_in_response_to'] = self.tagger.get_text_index_string(in_response_to)
218
-
219
- statement = Statement(**kwargs)
220
-
221
- for tag_name in tags:
226
+ if search_text is None:
227
+ if self.raise_on_missing_search_text:
228
+ raise Exception('generate a search_text value')
229
+
230
+ if search_in_response_to is None and in_response_to is not None:
231
+ if self.raise_on_missing_search_text:
232
+ raise Exception('generate a search_in_response_to value')
233
+
234
+ statement = Statement(
235
+ text=text,
236
+ in_response_to=in_response_to,
237
+ search_text=search_text,
238
+ search_in_response_to=search_in_response_to,
239
+ **kwargs
240
+ )
241
+
242
+ tags = frozenset(tags) if tags else frozenset()
243
+ for tag_name in frozenset(tags):
244
+ # TODO: Query existing tags in bulk
222
245
  tag = session.query(Tag).filter_by(name=tag_name).first()
223
246
 
224
247
  if not tag:
@@ -235,7 +258,7 @@ class SQLStorageAdapter(StorageAdapter):
235
258
 
236
259
  statement_object = self.model_to_object(statement)
237
260
 
238
- self._session_finish(session)
261
+ session.close()
239
262
 
240
263
  return statement_object
241
264
 
@@ -256,14 +279,8 @@ class SQLStorageAdapter(StorageAdapter):
256
279
 
257
280
  # Generate search text values in bulk
258
281
  if not have_search_text:
259
- search_text_documents = self.tagger.as_nlp_pipeline([statement.text for statement in statements])
260
- response_search_text_documents = self.tagger.as_nlp_pipeline([statement.in_response_to or '' for statement in statements])
261
-
262
- for statement, search_text_document, response_search_text_document in zip(
263
- statements, search_text_documents, response_search_text_documents
264
- ):
265
- statement.search_text = search_text_document._.search_index
266
- statement.search_in_response_to = response_search_text_document._.search_index
282
+ if self.raise_on_missing_search_text:
283
+ raise Exception('generate bulk_search_text values')
267
284
 
268
285
  for statement in statements:
269
286
 
@@ -305,48 +322,50 @@ class SQLStorageAdapter(StorageAdapter):
305
322
  Statement = self.get_model('statement')
306
323
  Tag = self.get_model('tag')
307
324
 
308
- if statement is not None:
309
- session = self.Session()
310
- record = None
311
-
312
- if hasattr(statement, 'id') and statement.id is not None:
313
- record = session.query(Statement).get(statement.id)
314
- else:
315
- record = session.query(Statement).filter(
316
- Statement.text == statement.text,
317
- Statement.conversation == statement.conversation,
318
- ).first()
319
-
320
- # Create a new statement entry if one does not already exist
321
- if not record:
322
- record = Statement(
323
- text=statement.text,
324
- conversation=statement.conversation,
325
- persona=statement.persona
326
- )
325
+ session = self.Session()
326
+ record = None
327
327
 
328
- # Update the response value
329
- record.in_response_to = statement.in_response_to
328
+ if hasattr(statement, 'id') and statement.id is not None:
329
+ record = session.query(Statement).get(statement.id)
330
+ else:
331
+ record = session.query(Statement).filter(
332
+ Statement.text == statement.text,
333
+ Statement.conversation == statement.conversation,
334
+ ).first()
335
+
336
+ # Create a new statement entry if one does not already exist
337
+ if not record:
338
+ record = Statement(
339
+ text=statement.text,
340
+ conversation=statement.conversation,
341
+ persona=statement.persona
342
+ )
330
343
 
331
- record.created_at = statement.created_at
344
+ # Update the response value
345
+ record.in_response_to = statement.in_response_to
332
346
 
333
- record.search_text = self.tagger.get_text_index_string(statement.text)
347
+ record.created_at = statement.created_at
334
348
 
335
- if statement.in_response_to:
336
- record.search_in_response_to = self.tagger.get_text_index_string(statement.in_response_to)
349
+ if not statement.search_text:
350
+ if self.raise_on_missing_search_text:
351
+ raise Exception('update issued without search_text value')
337
352
 
338
- for tag_name in statement.get_tags():
339
- tag = session.query(Tag).filter_by(name=tag_name).first()
353
+ if statement.in_response_to and not statement.search_in_response_to:
354
+ if self.raise_on_missing_search_text:
355
+ raise Exception('update issued without search_in_response_to value')
340
356
 
341
- if not tag:
342
- # Create the record
343
- tag = Tag(name=tag_name)
357
+ for tag_name in statement.get_tags():
358
+ tag = session.query(Tag).filter_by(name=tag_name).first()
344
359
 
345
- record.tags.append(tag)
360
+ if not tag:
361
+ # Create the record
362
+ tag = Tag(name=tag_name)
346
363
 
347
- session.add(record)
364
+ record.tags.append(tag)
348
365
 
349
- self._session_finish(session)
366
+ session.add(record)
367
+ session.commit()
368
+ session.close()
350
369
 
351
370
  def get_random(self):
352
371
  """
@@ -388,13 +407,3 @@ class SQLStorageAdapter(StorageAdapter):
388
407
  """
389
408
  from chatterbot.ext.sqlalchemy_app.models import Base
390
409
  Base.metadata.create_all(self.engine)
391
-
392
- def _session_finish(self, session, statement_text=None):
393
- from sqlalchemy.exc import InvalidRequestError
394
- try:
395
- session.commit()
396
- except InvalidRequestError:
397
- # Log the statement text and the exception
398
- self.logger.exception(statement_text)
399
- finally:
400
- session.close()
@@ -1,6 +1,4 @@
1
1
  import logging
2
- from chatterbot import languages
3
- from chatterbot.tagging import PosLemmaTagger
4
2
 
5
3
 
6
4
  class StorageAdapter(object):
@@ -17,11 +15,9 @@ class StorageAdapter(object):
17
15
  """
18
16
  self.logger = kwargs.get('logger', logging.getLogger(__name__))
19
17
 
20
- Tagger = kwargs.get('tagger', PosLemmaTagger)
21
-
22
- self.tagger = Tagger(language=kwargs.get(
23
- 'tagger_language', languages.ENG
24
- ))
18
+ self.raise_on_missing_search_text = kwargs.get(
19
+ 'raise_on_missing_search_text', True
20
+ )
25
21
 
26
22
  def get_model(self, model_name):
27
23
  """
@@ -116,6 +112,12 @@ class StorageAdapter(object):
116
112
  this parameter, then the statement will be included in the
117
113
  result set.
118
114
  Defaults to None
115
+
116
+ :param search_in_response_to: If the ``search_in_response_to`` field
117
+ of a statement contains a word that is in the string provided to
118
+ this parameter, then the statement will be included in the
119
+ result set.
120
+ Defaults to None
119
121
  """
120
122
  raise self.AdapterMethodNotImplementedError(
121
123
  'The `filter` method is not implemented by this adapter.'
chatterbot/trainers.py CHANGED
@@ -93,7 +93,7 @@ class ListTrainer(Trainer):
93
93
  statements_to_create = []
94
94
 
95
95
  # Run the pipeline in bulk to improve performance
96
- documents = self.chatbot.storage.tagger.as_nlp_pipeline(conversation)
96
+ documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
97
97
 
98
98
  # for text in enumerate(conversation):
99
99
  for document in tqdm(documents, desc='List Trainer', disable=not self.show_training_progress):
@@ -143,7 +143,7 @@ class ChatterBotCorpusTrainer(Trainer):
143
143
  for conversation in corpus:
144
144
 
145
145
  # Run the pipeline in bulk to improve performance
146
- documents = self.chatbot.storage.tagger.as_nlp_pipeline(conversation)
146
+ documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
147
147
 
148
148
  previous_statement_text = None
149
149
  previous_statement_search_text = ''
@@ -344,7 +344,7 @@ class UbuntuCorpusTrainer(Trainer):
344
344
  previous_statement_text = None
345
345
  previous_statement_search_text = ''
346
346
 
347
- documents = self.chatbot.storage.tagger.as_nlp_pipeline([
347
+ documents = self.chatbot.tagger.as_nlp_pipeline([
348
348
  (
349
349
  row[3],
350
350
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ChatterBot
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: ChatterBot is a machine learning, conversational dialog engine
5
5
  Author: Gunther Cox
6
6
  License: Copyright (c) 2016 - 2025, Gunther Cox
@@ -1,8 +1,8 @@
1
- chatterbot/__init__.py,sha256=Edfsbjoii_j0I4OOI1p1JLwfVRpwUW_8NhjfoNuvZRI,158
1
+ chatterbot/__init__.py,sha256=a8HpoSxm94mcHPYO24QiFVYS4WzYafqOGq7j_VuTIr8,158
2
2
  chatterbot/__main__.py,sha256=nk19D56TlPT9Zdqkq4qZZrOnLKEc4YTwUVWmXYwSyHg,207
3
3
  chatterbot/adapters.py,sha256=LJ_KqLpHKPdYAFpMGK63RVH4weV5X0Zh5uGyan6qdVU,878
4
- chatterbot/chatterbot.py,sha256=WhV4sSa8psrm6DebtAewIEwkz3MrnjWRftQeZMQso2w,9328
5
- chatterbot/comparisons.py,sha256=bSVTsCbFEiyVv0Rg60addtrxgjbG3QlJa3murgDWkws,6145
4
+ chatterbot/chatterbot.py,sha256=YLKLkQ-XI4Unr3rbzjpGIupOqenuevm21tAnx-yFFgQ,10400
5
+ chatterbot/comparisons.py,sha256=8-qLFWC1Z7tZ3iPUpyY6AD9l-whSo3QE1Rno_SzIp-I,6570
6
6
  chatterbot/components.py,sha256=ld3Xam8olBClvE5QqcFYggE7Q7tODCFek7BO7lhfyeU,1782
7
7
  chatterbot/constants.py,sha256=c_KPQKc82CHX6H3maeyTYqWatx6j-N-8HJhmejoVi60,1875
8
8
  chatterbot/conversation.py,sha256=Y-WOxPN7I3igRyAEe5py1sfS6JIYPdbwjVlY3kM8Ys8,3175
@@ -12,10 +12,10 @@ chatterbot/filters.py,sha256=vDSDJz2FM10xT6ybs7qJiqy4X5I4gTEfwEnjBGUxZ9g,847
12
12
  chatterbot/languages.py,sha256=XSenfc5FxHk_JWG5gGHsZvjvrPBbCaVCm_OU-BeER_M,32784
13
13
  chatterbot/parsing.py,sha256=vS-w70cMkjq4YEpDOv_pXWhAI6Zj06WYDAcMDhYDj0M,23174
14
14
  chatterbot/preprocessors.py,sha256=aI4v987dZc7GOKhO43i0i73EX748hehYSpzikFHpEXs,1271
15
- chatterbot/response_selection.py,sha256=9E7CJKlC3UCHTGvEmYvfE9cEHOltJeU77z9NfRzmeB8,2950
16
- chatterbot/search.py,sha256=Bx6j_NIdp7YDnJvunE7rmk9ma37AKYw96R0iu2i4chc,5141
15
+ chatterbot/response_selection.py,sha256=aYeZ54jpGIcQnI-1-TDcua_f1p3PiM5_iMg4hF5ZaIU,2951
16
+ chatterbot/search.py,sha256=FTwwON2eKPWqoc5uoKh4AUmuXDCqyfMcMcXB4wijpxg,4910
17
17
  chatterbot/tagging.py,sha256=GLY9wg_rvn6pSYVML-HcxkIo_3BZ3SAyj-q1oNZY8pI,2584
18
- chatterbot/trainers.py,sha256=4u6RDRPpAnecTEAOrGcDvMTjEn8Kxn8slM4UnovDvNk,13339
18
+ chatterbot/trainers.py,sha256=U1yh0_V7FFL51MeQe1P1Q59weceDbkHh_2kDiDYpSEc,13315
19
19
  chatterbot/utils.py,sha256=ckQXvsjp2FO9GcWxziY67JovN7mShnE4RlzdYarQY5k,3277
20
20
  chatterbot/ext/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  chatterbot/ext/django_chatterbot/__init__.py,sha256=iWzmBzpAsYwkwi1faxAPFY9L1bbL97RgVXK2uqULIMc,92
@@ -48,19 +48,19 @@ chatterbot/ext/django_chatterbot/migrations/__init__.py,sha256=47DEQpj8HBSa-_TIm
48
48
  chatterbot/ext/sqlalchemy_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  chatterbot/ext/sqlalchemy_app/models.py,sha256=pjU4e2BUSitw_IAkrk4iFQ9pZRU35y5MomvX7aiBFCw,2492
50
50
  chatterbot/logic/__init__.py,sha256=28-5swBCPfSVMl8xB5C8frOKZ2oj28rQfenbd9E4r-4,531
51
- chatterbot/logic/best_match.py,sha256=iueuuK6WTTywqskwC1CuWD2uHgA65Hz30h8tce_H1bU,4619
51
+ chatterbot/logic/best_match.py,sha256=8TNW0uZ_Uq-XPfaZUMUZDVH6KzDT65j59xblxQBv-dQ,4820
52
52
  chatterbot/logic/logic_adapter.py,sha256=5kNEirh5fiF5hhSMFXD7bIkKwXHmrSsSS4qDm-6xry0,4694
53
53
  chatterbot/logic/mathematical_evaluation.py,sha256=GPDKUwNFajERof2R-MkPGi2jJRP-rKAGm_f0V9JHDHE,2282
54
54
  chatterbot/logic/specific_response.py,sha256=_VeJaa3kun0J7cVzLOlTYK1tBpth0B6UWms7QwtcNpY,1082
55
55
  chatterbot/logic/time_adapter.py,sha256=mxdoQGeC5IjREH4PU5iHYOIPEvnYnzgysocR8xMYWXc,2406
56
56
  chatterbot/logic/unit_conversion.py,sha256=DT50HHE3njUo_ttDSU8S-fwBylarhDF3l_McRLSX6Ic,5823
57
57
  chatterbot/storage/__init__.py,sha256=IymIHfeisvULQzUYsQSiUBbWIZ1m5EzyMVI082tTw5w,369
58
- chatterbot/storage/django_storage.py,sha256=b_hJkBm0ZNgBB16HjJaNYVFEPs0AApRjOZpuiGNDaXk,6990
59
- chatterbot/storage/mongodb.py,sha256=s6rzn0m_eu4kkXeb80vVCyHyZrdrVW_Zf8PlttUHQlk,8962
60
- chatterbot/storage/sql_storage.py,sha256=X3PKKYcS4tiBWmuvxNMTLxK0shUQlchW0UPB1Mb1rqI,13144
61
- chatterbot/storage/storage_adapter.py,sha256=QwY3cGVpZLxkmww0OnPGZbdOykuOZT7WSKYHq84TgI0,5956
62
- ChatterBot-1.2.1.dist-info/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
63
- ChatterBot-1.2.1.dist-info/METADATA,sha256=dF7H-ZhaTdjCYSDFhsChT8h38fFQfykFmXvB6s1v1n0,8311
64
- ChatterBot-1.2.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
65
- ChatterBot-1.2.1.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
66
- ChatterBot-1.2.1.dist-info/RECORD,,
58
+ chatterbot/storage/django_storage.py,sha256=S5S4GipD7FyNJy4RWu5-S8sLPuSJIObwTtqTpnJu-ok,6159
59
+ chatterbot/storage/mongodb.py,sha256=Ozvdvcjb3LGZxcvbSQGzwP9VloYQbmsa2FaKunFpMyU,7934
60
+ chatterbot/storage/sql_storage.py,sha256=VVYZvclG_74IN-MrG0edc-RQ2gUO6gRQyCWWSO0MmCk,13082
61
+ chatterbot/storage/storage_adapter.py,sha256=fvyb-qNiB0HMJ0siVMCWUIY--6d-C47N1_kKZVFZAv4,6110
62
+ chatterbot-1.2.2.dist-info/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
63
+ chatterbot-1.2.2.dist-info/METADATA,sha256=EGYwvpQjhqJOfjlQWI83memJr0sXKo9QwM_wbp1wtrg,8311
64
+ chatterbot-1.2.2.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
65
+ chatterbot-1.2.2.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
66
+ chatterbot-1.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5