ChatterBot 1.2.5__py3-none-any.whl → 1.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chatterbot/__init__.py CHANGED
@@ -4,7 +4,7 @@ ChatterBot is a machine learning, conversational dialog engine.
4
4
  from .chatterbot import ChatBot
5
5
 
6
6
 
7
- __version__ = '1.2.5'
7
+ __version__ = '1.2.6'
8
8
 
9
9
  __all__ = (
10
10
  'ChatBot',
@@ -44,7 +44,7 @@ class SQLStorageAdapter(StorageAdapter):
44
44
  dbapi_connection.execute('PRAGMA journal_mode=WAL')
45
45
  dbapi_connection.execute('PRAGMA synchronous=NORMAL')
46
46
 
47
- if not inspect(self.engine).has_table(self.engine, 'statement'):
47
+ if not inspect(self.engine).has_table('statement'):
48
48
  self.create_database()
49
49
 
50
50
  # Check if the expected index exists on the text field of the statement table
chatterbot/tagging.py CHANGED
@@ -1,3 +1,4 @@
1
+ from typing import List, Union, Tuple
1
2
  from chatterbot import languages
2
3
  from chatterbot.utils import get_model_for_language
3
4
  import spacy
@@ -20,7 +21,7 @@ class LowercaseTagger(object):
20
21
  'chatterbot_lowercase_indexer', name='chatterbot_lowercase_indexer', last=True
21
22
  )
22
23
 
23
- def get_text_index_string(self, text):
24
+ def get_text_index_string(self, text: Union[str, List[str]]):
24
25
  if isinstance(text, list):
25
26
  documents = self.nlp.pipe(text)
26
27
  return [document._.search_index for document in documents]
@@ -28,7 +29,7 @@ class LowercaseTagger(object):
28
29
  document = self.nlp(text)
29
30
  return document._.search_index
30
31
 
31
- def as_nlp_pipeline(self, texts):
32
+ def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]):
32
33
 
33
34
  process_as_tuples = texts and isinstance(texts[0], tuple)
34
35
 
@@ -52,7 +53,7 @@ class PosLemmaTagger(object):
52
53
  'chatterbot_bigram_indexer', name='chatterbot_bigram_indexer', last=True
53
54
  )
54
55
 
55
- def get_text_index_string(self, text):
56
+ def get_text_index_string(self, text: Union[str, List[str]]):
56
57
  """
57
58
  Return a string of text containing part-of-speech, lemma pairs.
58
59
  """
@@ -63,7 +64,7 @@ class PosLemmaTagger(object):
63
64
  document = self.nlp(text)
64
65
  return document._.search_index
65
66
 
66
- def as_nlp_pipeline(self, texts):
67
+ def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]):
67
68
  """
68
69
  Accepts a single string or a list of strings, or a list of tuples
69
70
  where the first element is the text and the second element is a
chatterbot/trainers.py CHANGED
@@ -1,12 +1,13 @@
1
1
  import os
2
- import sys
3
2
  import csv
4
3
  import time
5
4
  import glob
6
5
  import json
7
6
  import tarfile
7
+ from typing import List, Union
8
8
  from tqdm import tqdm
9
9
  from dateutil import parser as date_parser
10
+ from chatterbot.chatterbot import ChatBot
10
11
  from chatterbot.conversation import Statement
11
12
 
12
13
 
@@ -20,7 +21,7 @@ class Trainer(object):
20
21
  the environment variable if it is set.
21
22
  """
22
23
 
23
- def __init__(self, chatbot, **kwargs):
24
+ def __init__(self, chatbot: ChatBot, **kwargs):
24
25
  self.chatbot = chatbot
25
26
 
26
27
  environment_default = bool(int(os.environ.get('CHATTERBOT_SHOW_TRAINING_PROGRESS', True)))
@@ -30,7 +31,7 @@ class Trainer(object):
30
31
  environment_default
31
32
  )
32
33
 
33
- def get_preprocessed_statement(self, input_statement):
34
+ def get_preprocessed_statement(self, input_statement: Statement) -> Statement:
34
35
  """
35
36
  Preprocess the input statement.
36
37
  """
@@ -58,7 +59,7 @@ class Trainer(object):
58
59
  )
59
60
  super().__init__(message or default)
60
61
 
61
- def _generate_export_data(self):
62
+ def _generate_export_data(self) -> list:
62
63
  result = []
63
64
  for statement in self.chatbot.storage.filter():
64
65
  if statement.in_response_to:
@@ -82,7 +83,7 @@ class ListTrainer(Trainer):
82
83
  where the list represents a conversation.
83
84
  """
84
85
 
85
- def train(self, conversation: list):
86
+ def train(self, conversation: List[str]):
86
87
  """
87
88
  Train the chat bot based on the provided list of
88
89
  statements that represents a single conversation.
@@ -95,7 +96,6 @@ class ListTrainer(Trainer):
95
96
  # Run the pipeline in bulk to improve performance
96
97
  documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
97
98
 
98
- # for text in enumerate(conversation):
99
99
  for document in tqdm(documents, desc='List Trainer', disable=self.disable_progress):
100
100
  statement_search_text = document._.search_index
101
101
 
@@ -123,7 +123,7 @@ class ChatterBotCorpusTrainer(Trainer):
123
123
  ChatterBot dialog corpus.
124
124
  """
125
125
 
126
- def train(self, *corpus_paths):
126
+ def train(self, *corpus_paths: Union[str, List[str]]):
127
127
  from chatterbot.corpus import load_corpus, list_corpus_files
128
128
 
129
129
  data_file_paths = []
@@ -178,7 +178,17 @@ class GenericFileTrainer(Trainer):
178
178
  or directory of those file types.
179
179
  """
180
180
 
181
- def __init__(self, chatbot, **kwargs):
181
+ # NOTE: If the value is an integer, this be the
182
+ # column index instead of the key or header
183
+ DEFAULT_STATEMENT_TO_HEADER_MAPPING = {
184
+ 'text': 'text',
185
+ 'conversation': 'conversation',
186
+ 'created_at': 'created_at',
187
+ 'persona': 'persona',
188
+ 'tags': 'tags'
189
+ }
190
+
191
+ def __init__(self, chatbot: ChatBot, **kwargs):
182
192
  """
183
193
  data_path: str The path to the data file or directory.
184
194
  field_map: dict A dictionary containing the column name to header mapping.
@@ -187,22 +197,12 @@ class GenericFileTrainer(Trainer):
187
197
 
188
198
  self.file_extension = None
189
199
 
190
- # NOTE: If the key is an integer, this be the
191
- # column index instead of the key or header
192
- DEFAULT_STATEMENT_TO_HEADER_MAPPING = {
193
- 'text': 'text',
194
- 'conversation': 'conversation',
195
- 'created_at': 'created_at',
196
- 'persona': 'persona',
197
- 'tags': 'tags'
198
- }
199
-
200
200
  self.field_map = kwargs.get(
201
201
  'field_map',
202
- DEFAULT_STATEMENT_TO_HEADER_MAPPING
202
+ self.DEFAULT_STATEMENT_TO_HEADER_MAPPING
203
203
  )
204
204
 
205
- def _get_file_list(self, data_path, limit):
205
+ def _get_file_list(self, data_path: str, limit: Union[int, None]):
206
206
  """
207
207
  Get a list of files to read from the data set.
208
208
  """
@@ -302,6 +302,20 @@ class GenericFileTrainer(Trainer):
302
302
  f'Current mapping: {self.field_map}'
303
303
  )
304
304
 
305
+ response_to_search_index_mapping = {}
306
+
307
+ if 'in_response_to' in self.field_map.keys():
308
+ # Generate the search_in_response_to value for the in_response_to fields
309
+ response_documents = self.chatbot.tagger.as_nlp_pipeline([
310
+ (
311
+ row[self.field_map['in_response_to']]
312
+ ) for row in data if len(row) > 0 and row[self.field_map['in_response_to']] is not None
313
+ ])
314
+
315
+ # (Process the response values the same way as the text values)
316
+ for document in response_documents:
317
+ response_to_search_index_mapping[document.text] = document._.search_index
318
+
305
319
  for document, context in documents:
306
320
  statement = Statement(
307
321
  text=document.text,
@@ -314,14 +328,19 @@ class GenericFileTrainer(Trainer):
314
328
  statement.created_at = date_parser.parse(context['created_at'])
315
329
 
316
330
  statement.search_text = document._.search_index
317
- statement.search_in_response_to = previous_statement_search_text
318
331
 
319
332
  # Use the in_response_to attribute for the previous statement if
320
333
  # one is defined, otherwise use the last statement which was created
321
334
  if 'in_response_to' in self.field_map.keys():
322
335
  statement.in_response_to = context.get(self.field_map['in_response_to'], None)
336
+ statement.search_in_response_to = response_to_search_index_mapping.get(
337
+ context.get(self.field_map['in_response_to'], None), ''
338
+ )
323
339
  else:
340
+ # List-type data such as CSVs with no response specified can use
341
+ # the previous statement as the in_response_to value
324
342
  statement.in_response_to = previous_statement_text
343
+ statement.search_in_response_to = previous_statement_search_text
325
344
 
326
345
  for preprocessor in self.chatbot.preprocessors:
327
346
  statement = preprocessor(statement)
@@ -345,7 +364,6 @@ class GenericFileTrainer(Trainer):
345
364
  )
346
365
  )
347
366
 
348
-
349
367
  class CsvFileTrainer(GenericFileTrainer):
350
368
  """
351
369
  .. note::
@@ -358,11 +376,11 @@ class CsvFileTrainer(GenericFileTrainer):
358
376
  parameter is set to 'tsv'.
359
377
 
360
378
  :param str file_extension: The file extension to look for when searching for files (defaults to 'csv').
361
- :param str field_map: A dictionary containing the database column name to header mapping.
379
+ :param dict field_map: A dictionary containing the database column name to header mapping.
362
380
  Values can be either the header name (str) or the column index (int).
363
381
  """
364
382
 
365
- def __init__(self, chatbot, **kwargs):
383
+ def __init__(self, chatbot: ChatBot, **kwargs):
366
384
  super().__init__(chatbot, **kwargs)
367
385
 
368
386
  self.file_extension = kwargs.get('file_extension', 'csv')
@@ -376,26 +394,26 @@ class JsonFileTrainer(GenericFileTrainer):
376
394
  Allow chatbots to be trained with data from a JSON file or
377
395
  directory of JSON files.
378
396
 
379
- :param str field_map: A dictionary containing the database column name to header mapping.
397
+ :param dict field_map: A dictionary containing the database column name to header mapping.
380
398
  """
381
399
 
382
- def __init__(self, chatbot, **kwargs):
400
+ DEFAULT_STATEMENT_TO_KEY_MAPPING = {
401
+ 'text': 'text',
402
+ 'conversation': 'conversation',
403
+ 'created_at': 'created_at',
404
+ 'in_response_to': 'in_response_to',
405
+ 'persona': 'persona',
406
+ 'tags': 'tags'
407
+ }
408
+
409
+ def __init__(self, chatbot: ChatBot, **kwargs):
383
410
  super().__init__(chatbot, **kwargs)
384
411
 
385
412
  self.file_extension = 'json'
386
413
 
387
- DEFAULT_STATEMENT_TO_KEY_MAPPING = {
388
- 'text': 'text',
389
- 'conversation': 'conversation',
390
- 'created_at': 'created_at',
391
- 'in_response_to': 'in_response_to',
392
- 'persona': 'persona',
393
- 'tags': 'tags'
394
- }
395
-
396
414
  self.field_map = kwargs.get(
397
415
  'field_map',
398
- DEFAULT_STATEMENT_TO_KEY_MAPPING
416
+ self.DEFAULT_STATEMENT_TO_KEY_MAPPING
399
417
  )
400
418
 
401
419
 
@@ -412,7 +430,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
412
430
  :param str ubuntu_corpus_data_directory: The directory where the Ubuntu corpus data is already located, or where it should be downloaded and extracted.
413
431
  """
414
432
 
415
- def __init__(self, chatbot, **kwargs):
433
+ def __init__(self, chatbot: ChatBot, **kwargs):
416
434
  super().__init__(chatbot, **kwargs)
417
435
  home_directory = os.path.expanduser('~')
418
436
 
@@ -434,7 +452,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
434
452
  'persona': 1,
435
453
  }
436
454
 
437
- def is_downloaded(self, file_path):
455
+ def is_downloaded(self, file_path: str):
438
456
  """
439
457
  Check if the data file is already downloaded.
440
458
  """
@@ -444,7 +462,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
444
462
 
445
463
  return False
446
464
 
447
- def is_extracted(self, file_path):
465
+ def is_extracted(self, file_path: str):
448
466
  """
449
467
  Check if the data file is already extracted.
450
468
  """
@@ -454,7 +472,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
454
472
  return True
455
473
  return False
456
474
 
457
- def download(self, url, show_status=True):
475
+ def download(self, url: str, show_status=True):
458
476
  """
459
477
  Download a file from the given url.
460
478
  Show a progress indicator for the download status.
@@ -493,7 +511,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
493
511
  print('Download location: %s' % file_path)
494
512
  return file_path
495
513
 
496
- def extract(self, file_path):
514
+ def extract(self, file_path: str):
497
515
  """
498
516
  Extract a tar file at the specified file path.
499
517
  """
@@ -533,7 +551,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
533
551
 
534
552
  return True
535
553
 
536
- def _get_file_list(self, data_path, limit):
554
+ def _get_file_list(self, data_path: str, limit: Union[int, None]):
537
555
  """
538
556
  Get a list of files to read from the data set.
539
557
  """
@@ -564,7 +582,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
564
582
 
565
583
  yield file_path
566
584
 
567
- def train(self, data_download_url, limit=None):
585
+ def train(self, data_download_url: str, limit: Union[int, None] = None):
568
586
  """
569
587
  :param str data_download_url: The URL to download the Ubuntu dialog corpus from.
570
588
  :param int limit: The maximum number of files to train from.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ChatterBot
3
- Version: 1.2.5
3
+ Version: 1.2.6
4
4
  Summary: ChatterBot is a machine learning, conversational dialog engine
5
5
  Author: Gunther Cox
6
6
  License-Expression: BSD-3-Clause
@@ -66,10 +66,11 @@ known conversations. The language independent design of ChatterBot allows it
66
66
  to be trained to speak any language.
67
67
 
68
68
  [![Package Version](https://img.shields.io/pypi/v/chatterbot.svg)](https://pypi.python.org/pypi/chatterbot/)
69
- [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-360/)
69
+ [![Python 3.12](https://img.shields.io/badge/python-3.12-blue.svg)](https://www.python.org/downloads/release/python-360/)
70
70
  [![Coverage Status](https://img.shields.io/coveralls/gunthercox/ChatterBot.svg)](https://coveralls.io/r/gunthercox/ChatterBot)
71
- [![Code Climate](https://codeclimate.com/github/gunthercox/ChatterBot/badges/gpa.svg)](https://codeclimate.com/github/gunthercox/ChatterBot)
71
+ [![Follow on Bluesky](https://img.shields.io/badge/🦋%20Bluesky-1185fe)](https://bsky.app/profile/chatterbot.us)
72
72
  [![Join the chat at https://gitter.im/chatterbot/Lobby](https://badges.gitter.im/chatterbot/Lobby.svg)](https://gitter.im/chatterbot/Lobby?utm_source=badge&utm_medium=badge&utm_content=badge)
73
+ <!-- [![Code Climate](https://codeclimate.com/github/gunthercox/ChatterBot/badges/gpa.svg)](https://codeclimate.com/github/gunthercox/ChatterBot) -->
73
74
 
74
75
  An example of typical input would be something like this:
75
76
 
@@ -1,4 +1,4 @@
1
- chatterbot/__init__.py,sha256=QJV-6PyWrpW0rpSjVn0lKcH3dqb77TlYoU74kv8uVjc,158
1
+ chatterbot/__init__.py,sha256=wHoKxLsCOplHJ02JmxRCg5cRL-Z-blQblhJKdE95Pck,158
2
2
  chatterbot/__main__.py,sha256=zvH4uxtGlGrP-ht_LkhX29duzjm3hRH800SDCq4YOwg,637
3
3
  chatterbot/adapters.py,sha256=LJ_KqLpHKPdYAFpMGK63RVH4weV5X0Zh5uGyan6qdVU,878
4
4
  chatterbot/chatterbot.py,sha256=nqxdeTBWdA_LDIEWTMf2gphvpNfd0c9htNwrxa_7pzo,12543
@@ -14,8 +14,8 @@ chatterbot/parsing.py,sha256=vS-w70cMkjq4YEpDOv_pXWhAI6Zj06WYDAcMDhYDj0M,23174
14
14
  chatterbot/preprocessors.py,sha256=kqsgnejSj6Z1rr9U2TGHKOp-MMaFWBdNT41EwyhQFls,1389
15
15
  chatterbot/response_selection.py,sha256=JpUVuBYrgxhHkDMRHXyWvhluSLxQED5mAhE1-VvJSmg,2970
16
16
  chatterbot/search.py,sha256=FTwwON2eKPWqoc5uoKh4AUmuXDCqyfMcMcXB4wijpxg,4910
17
- chatterbot/tagging.py,sha256=czcI2g18vILujphkjvobRyEewJU8-QjS7QRzY-hCZ4o,2429
18
- chatterbot/trainers.py,sha256=S_y-Q67hgU7p1A_ixJsR91nW_FniJUsSzbdtAQ8KJQM,19749
17
+ chatterbot/tagging.py,sha256=si0PQ3CY5EbiZ0-PIslbBtExZaQZ3NYrLUGoourweKo,2585
18
+ chatterbot/trainers.py,sha256=CZezNX68Byg9gg2z-PUZbTc5pqzTzbhgSmqHx7P6Ivg,20973
19
19
  chatterbot/utils.py,sha256=ubPiBapvUvdFVhrDjxqq5IGekUh9qMUJs_dQ605xLAI,2924
20
20
  chatterbot/vectorstores.py,sha256=-S1NB8PrZzoFIu95n2W7N4UaXuCUpyDUXIGYFebjv08,2056
21
21
  chatterbot/ext/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -60,10 +60,10 @@ chatterbot/storage/__init__.py,sha256=ADw0WQe0YKr1UIDQLaxwf0mHDnuKW_CSzgz11K4TM-
60
60
  chatterbot/storage/django_storage.py,sha256=BpuVEO4rPOiPu7f7KW1Zyar2LqEXy6I4HgPYhyGP0kE,6305
61
61
  chatterbot/storage/mongodb.py,sha256=Ozvdvcjb3LGZxcvbSQGzwP9VloYQbmsa2FaKunFpMyU,7934
62
62
  chatterbot/storage/redis.py,sha256=FKROrzZ-7WXZ8ZoK0dKmTDdS45TxL04XOSeu0p3Jrak,12675
63
- chatterbot/storage/sql_storage.py,sha256=dAMLByFKQgbiTFoBUtKDeqadYRdwVO5fz1OONTcVCH4,13076
63
+ chatterbot/storage/sql_storage.py,sha256=wESsp0OKuXjYmAZ7dl-ztX7lt4xEpjD0WCBQXK22__4,13063
64
64
  chatterbot/storage/storage_adapter.py,sha256=fvyb-qNiB0HMJ0siVMCWUIY--6d-C47N1_kKZVFZAv4,6110
65
- chatterbot-1.2.5.dist-info/licenses/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
66
- chatterbot-1.2.5.dist-info/METADATA,sha256=UgfV52vY3Rq4A_8rVZurYhKcU0DeNwply-UBh42aTd0,7049
67
- chatterbot-1.2.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
68
- chatterbot-1.2.5.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
69
- chatterbot-1.2.5.dist-info/RECORD,,
65
+ chatterbot-1.2.6.dist-info/licenses/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
66
+ chatterbot-1.2.6.dist-info/METADATA,sha256=lOBXOb2GEaOguy3_Rwqo_TEP77GeunbhiI9BGqB1zfc,7175
67
+ chatterbot-1.2.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
68
+ chatterbot-1.2.6.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
69
+ chatterbot-1.2.6.dist-info/RECORD,,