ChatterBot 1.2.5__py3-none-any.whl → 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterbot/__init__.py +1 -1
- chatterbot/storage/sql_storage.py +1 -1
- chatterbot/tagging.py +5 -4
- chatterbot/trainers.py +61 -43
- {chatterbot-1.2.5.dist-info → chatterbot-1.2.6.dist-info}/METADATA +4 -3
- {chatterbot-1.2.5.dist-info → chatterbot-1.2.6.dist-info}/RECORD +9 -9
- {chatterbot-1.2.5.dist-info → chatterbot-1.2.6.dist-info}/WHEEL +0 -0
- {chatterbot-1.2.5.dist-info → chatterbot-1.2.6.dist-info}/licenses/LICENSE +0 -0
- {chatterbot-1.2.5.dist-info → chatterbot-1.2.6.dist-info}/top_level.txt +0 -0
chatterbot/__init__.py
CHANGED
@@ -44,7 +44,7 @@ class SQLStorageAdapter(StorageAdapter):
|
|
44
44
|
dbapi_connection.execute('PRAGMA journal_mode=WAL')
|
45
45
|
dbapi_connection.execute('PRAGMA synchronous=NORMAL')
|
46
46
|
|
47
|
-
if not inspect(self.engine).has_table(
|
47
|
+
if not inspect(self.engine).has_table('statement'):
|
48
48
|
self.create_database()
|
49
49
|
|
50
50
|
# Check if the expected index exists on the text field of the statement table
|
chatterbot/tagging.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from typing import List, Union, Tuple
|
1
2
|
from chatterbot import languages
|
2
3
|
from chatterbot.utils import get_model_for_language
|
3
4
|
import spacy
|
@@ -20,7 +21,7 @@ class LowercaseTagger(object):
|
|
20
21
|
'chatterbot_lowercase_indexer', name='chatterbot_lowercase_indexer', last=True
|
21
22
|
)
|
22
23
|
|
23
|
-
def get_text_index_string(self, text):
|
24
|
+
def get_text_index_string(self, text: Union[str, List[str]]):
|
24
25
|
if isinstance(text, list):
|
25
26
|
documents = self.nlp.pipe(text)
|
26
27
|
return [document._.search_index for document in documents]
|
@@ -28,7 +29,7 @@ class LowercaseTagger(object):
|
|
28
29
|
document = self.nlp(text)
|
29
30
|
return document._.search_index
|
30
31
|
|
31
|
-
def as_nlp_pipeline(self, texts):
|
32
|
+
def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]):
|
32
33
|
|
33
34
|
process_as_tuples = texts and isinstance(texts[0], tuple)
|
34
35
|
|
@@ -52,7 +53,7 @@ class PosLemmaTagger(object):
|
|
52
53
|
'chatterbot_bigram_indexer', name='chatterbot_bigram_indexer', last=True
|
53
54
|
)
|
54
55
|
|
55
|
-
def get_text_index_string(self, text):
|
56
|
+
def get_text_index_string(self, text: Union[str, List[str]]):
|
56
57
|
"""
|
57
58
|
Return a string of text containing part-of-speech, lemma pairs.
|
58
59
|
"""
|
@@ -63,7 +64,7 @@ class PosLemmaTagger(object):
|
|
63
64
|
document = self.nlp(text)
|
64
65
|
return document._.search_index
|
65
66
|
|
66
|
-
def as_nlp_pipeline(self, texts):
|
67
|
+
def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]):
|
67
68
|
"""
|
68
69
|
Accepts a single string or a list of strings, or a list of tuples
|
69
70
|
where the first element is the text and the second element is a
|
chatterbot/trainers.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import os
|
2
|
-
import sys
|
3
2
|
import csv
|
4
3
|
import time
|
5
4
|
import glob
|
6
5
|
import json
|
7
6
|
import tarfile
|
7
|
+
from typing import List, Union
|
8
8
|
from tqdm import tqdm
|
9
9
|
from dateutil import parser as date_parser
|
10
|
+
from chatterbot.chatterbot import ChatBot
|
10
11
|
from chatterbot.conversation import Statement
|
11
12
|
|
12
13
|
|
@@ -20,7 +21,7 @@ class Trainer(object):
|
|
20
21
|
the environment variable if it is set.
|
21
22
|
"""
|
22
23
|
|
23
|
-
def __init__(self, chatbot, **kwargs):
|
24
|
+
def __init__(self, chatbot: ChatBot, **kwargs):
|
24
25
|
self.chatbot = chatbot
|
25
26
|
|
26
27
|
environment_default = bool(int(os.environ.get('CHATTERBOT_SHOW_TRAINING_PROGRESS', True)))
|
@@ -30,7 +31,7 @@ class Trainer(object):
|
|
30
31
|
environment_default
|
31
32
|
)
|
32
33
|
|
33
|
-
def get_preprocessed_statement(self, input_statement):
|
34
|
+
def get_preprocessed_statement(self, input_statement: Statement) -> Statement:
|
34
35
|
"""
|
35
36
|
Preprocess the input statement.
|
36
37
|
"""
|
@@ -58,7 +59,7 @@ class Trainer(object):
|
|
58
59
|
)
|
59
60
|
super().__init__(message or default)
|
60
61
|
|
61
|
-
def _generate_export_data(self):
|
62
|
+
def _generate_export_data(self) -> list:
|
62
63
|
result = []
|
63
64
|
for statement in self.chatbot.storage.filter():
|
64
65
|
if statement.in_response_to:
|
@@ -82,7 +83,7 @@ class ListTrainer(Trainer):
|
|
82
83
|
where the list represents a conversation.
|
83
84
|
"""
|
84
85
|
|
85
|
-
def train(self, conversation:
|
86
|
+
def train(self, conversation: List[str]):
|
86
87
|
"""
|
87
88
|
Train the chat bot based on the provided list of
|
88
89
|
statements that represents a single conversation.
|
@@ -95,7 +96,6 @@ class ListTrainer(Trainer):
|
|
95
96
|
# Run the pipeline in bulk to improve performance
|
96
97
|
documents = self.chatbot.tagger.as_nlp_pipeline(conversation)
|
97
98
|
|
98
|
-
# for text in enumerate(conversation):
|
99
99
|
for document in tqdm(documents, desc='List Trainer', disable=self.disable_progress):
|
100
100
|
statement_search_text = document._.search_index
|
101
101
|
|
@@ -123,7 +123,7 @@ class ChatterBotCorpusTrainer(Trainer):
|
|
123
123
|
ChatterBot dialog corpus.
|
124
124
|
"""
|
125
125
|
|
126
|
-
def train(self, *corpus_paths):
|
126
|
+
def train(self, *corpus_paths: Union[str, List[str]]):
|
127
127
|
from chatterbot.corpus import load_corpus, list_corpus_files
|
128
128
|
|
129
129
|
data_file_paths = []
|
@@ -178,7 +178,17 @@ class GenericFileTrainer(Trainer):
|
|
178
178
|
or directory of those file types.
|
179
179
|
"""
|
180
180
|
|
181
|
-
|
181
|
+
# NOTE: If the value is an integer, this be the
|
182
|
+
# column index instead of the key or header
|
183
|
+
DEFAULT_STATEMENT_TO_HEADER_MAPPING = {
|
184
|
+
'text': 'text',
|
185
|
+
'conversation': 'conversation',
|
186
|
+
'created_at': 'created_at',
|
187
|
+
'persona': 'persona',
|
188
|
+
'tags': 'tags'
|
189
|
+
}
|
190
|
+
|
191
|
+
def __init__(self, chatbot: ChatBot, **kwargs):
|
182
192
|
"""
|
183
193
|
data_path: str The path to the data file or directory.
|
184
194
|
field_map: dict A dictionary containing the column name to header mapping.
|
@@ -187,22 +197,12 @@ class GenericFileTrainer(Trainer):
|
|
187
197
|
|
188
198
|
self.file_extension = None
|
189
199
|
|
190
|
-
# NOTE: If the key is an integer, this be the
|
191
|
-
# column index instead of the key or header
|
192
|
-
DEFAULT_STATEMENT_TO_HEADER_MAPPING = {
|
193
|
-
'text': 'text',
|
194
|
-
'conversation': 'conversation',
|
195
|
-
'created_at': 'created_at',
|
196
|
-
'persona': 'persona',
|
197
|
-
'tags': 'tags'
|
198
|
-
}
|
199
|
-
|
200
200
|
self.field_map = kwargs.get(
|
201
201
|
'field_map',
|
202
|
-
DEFAULT_STATEMENT_TO_HEADER_MAPPING
|
202
|
+
self.DEFAULT_STATEMENT_TO_HEADER_MAPPING
|
203
203
|
)
|
204
204
|
|
205
|
-
def _get_file_list(self, data_path, limit):
|
205
|
+
def _get_file_list(self, data_path: str, limit: Union[int, None]):
|
206
206
|
"""
|
207
207
|
Get a list of files to read from the data set.
|
208
208
|
"""
|
@@ -302,6 +302,20 @@ class GenericFileTrainer(Trainer):
|
|
302
302
|
f'Current mapping: {self.field_map}'
|
303
303
|
)
|
304
304
|
|
305
|
+
response_to_search_index_mapping = {}
|
306
|
+
|
307
|
+
if 'in_response_to' in self.field_map.keys():
|
308
|
+
# Generate the search_in_response_to value for the in_response_to fields
|
309
|
+
response_documents = self.chatbot.tagger.as_nlp_pipeline([
|
310
|
+
(
|
311
|
+
row[self.field_map['in_response_to']]
|
312
|
+
) for row in data if len(row) > 0 and row[self.field_map['in_response_to']] is not None
|
313
|
+
])
|
314
|
+
|
315
|
+
# (Process the response values the same way as the text values)
|
316
|
+
for document in response_documents:
|
317
|
+
response_to_search_index_mapping[document.text] = document._.search_index
|
318
|
+
|
305
319
|
for document, context in documents:
|
306
320
|
statement = Statement(
|
307
321
|
text=document.text,
|
@@ -314,14 +328,19 @@ class GenericFileTrainer(Trainer):
|
|
314
328
|
statement.created_at = date_parser.parse(context['created_at'])
|
315
329
|
|
316
330
|
statement.search_text = document._.search_index
|
317
|
-
statement.search_in_response_to = previous_statement_search_text
|
318
331
|
|
319
332
|
# Use the in_response_to attribute for the previous statement if
|
320
333
|
# one is defined, otherwise use the last statement which was created
|
321
334
|
if 'in_response_to' in self.field_map.keys():
|
322
335
|
statement.in_response_to = context.get(self.field_map['in_response_to'], None)
|
336
|
+
statement.search_in_response_to = response_to_search_index_mapping.get(
|
337
|
+
context.get(self.field_map['in_response_to'], None), ''
|
338
|
+
)
|
323
339
|
else:
|
340
|
+
# List-type data such as CSVs with no response specified can use
|
341
|
+
# the previous statement as the in_response_to value
|
324
342
|
statement.in_response_to = previous_statement_text
|
343
|
+
statement.search_in_response_to = previous_statement_search_text
|
325
344
|
|
326
345
|
for preprocessor in self.chatbot.preprocessors:
|
327
346
|
statement = preprocessor(statement)
|
@@ -345,7 +364,6 @@ class GenericFileTrainer(Trainer):
|
|
345
364
|
)
|
346
365
|
)
|
347
366
|
|
348
|
-
|
349
367
|
class CsvFileTrainer(GenericFileTrainer):
|
350
368
|
"""
|
351
369
|
.. note::
|
@@ -358,11 +376,11 @@ class CsvFileTrainer(GenericFileTrainer):
|
|
358
376
|
parameter is set to 'tsv'.
|
359
377
|
|
360
378
|
:param str file_extension: The file extension to look for when searching for files (defaults to 'csv').
|
361
|
-
:param
|
379
|
+
:param dict field_map: A dictionary containing the database column name to header mapping.
|
362
380
|
Values can be either the header name (str) or the column index (int).
|
363
381
|
"""
|
364
382
|
|
365
|
-
def __init__(self, chatbot, **kwargs):
|
383
|
+
def __init__(self, chatbot: ChatBot, **kwargs):
|
366
384
|
super().__init__(chatbot, **kwargs)
|
367
385
|
|
368
386
|
self.file_extension = kwargs.get('file_extension', 'csv')
|
@@ -376,26 +394,26 @@ class JsonFileTrainer(GenericFileTrainer):
|
|
376
394
|
Allow chatbots to be trained with data from a JSON file or
|
377
395
|
directory of JSON files.
|
378
396
|
|
379
|
-
:param
|
397
|
+
:param dict field_map: A dictionary containing the database column name to header mapping.
|
380
398
|
"""
|
381
399
|
|
382
|
-
|
400
|
+
DEFAULT_STATEMENT_TO_KEY_MAPPING = {
|
401
|
+
'text': 'text',
|
402
|
+
'conversation': 'conversation',
|
403
|
+
'created_at': 'created_at',
|
404
|
+
'in_response_to': 'in_response_to',
|
405
|
+
'persona': 'persona',
|
406
|
+
'tags': 'tags'
|
407
|
+
}
|
408
|
+
|
409
|
+
def __init__(self, chatbot: ChatBot, **kwargs):
|
383
410
|
super().__init__(chatbot, **kwargs)
|
384
411
|
|
385
412
|
self.file_extension = 'json'
|
386
413
|
|
387
|
-
DEFAULT_STATEMENT_TO_KEY_MAPPING = {
|
388
|
-
'text': 'text',
|
389
|
-
'conversation': 'conversation',
|
390
|
-
'created_at': 'created_at',
|
391
|
-
'in_response_to': 'in_response_to',
|
392
|
-
'persona': 'persona',
|
393
|
-
'tags': 'tags'
|
394
|
-
}
|
395
|
-
|
396
414
|
self.field_map = kwargs.get(
|
397
415
|
'field_map',
|
398
|
-
DEFAULT_STATEMENT_TO_KEY_MAPPING
|
416
|
+
self.DEFAULT_STATEMENT_TO_KEY_MAPPING
|
399
417
|
)
|
400
418
|
|
401
419
|
|
@@ -412,7 +430,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
412
430
|
:param str ubuntu_corpus_data_directory: The directory where the Ubuntu corpus data is already located, or where it should be downloaded and extracted.
|
413
431
|
"""
|
414
432
|
|
415
|
-
def __init__(self, chatbot, **kwargs):
|
433
|
+
def __init__(self, chatbot: ChatBot, **kwargs):
|
416
434
|
super().__init__(chatbot, **kwargs)
|
417
435
|
home_directory = os.path.expanduser('~')
|
418
436
|
|
@@ -434,7 +452,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
434
452
|
'persona': 1,
|
435
453
|
}
|
436
454
|
|
437
|
-
def is_downloaded(self, file_path):
|
455
|
+
def is_downloaded(self, file_path: str):
|
438
456
|
"""
|
439
457
|
Check if the data file is already downloaded.
|
440
458
|
"""
|
@@ -444,7 +462,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
444
462
|
|
445
463
|
return False
|
446
464
|
|
447
|
-
def is_extracted(self, file_path):
|
465
|
+
def is_extracted(self, file_path: str):
|
448
466
|
"""
|
449
467
|
Check if the data file is already extracted.
|
450
468
|
"""
|
@@ -454,7 +472,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
454
472
|
return True
|
455
473
|
return False
|
456
474
|
|
457
|
-
def download(self, url, show_status=True):
|
475
|
+
def download(self, url: str, show_status=True):
|
458
476
|
"""
|
459
477
|
Download a file from the given url.
|
460
478
|
Show a progress indicator for the download status.
|
@@ -493,7 +511,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
493
511
|
print('Download location: %s' % file_path)
|
494
512
|
return file_path
|
495
513
|
|
496
|
-
def extract(self, file_path):
|
514
|
+
def extract(self, file_path: str):
|
497
515
|
"""
|
498
516
|
Extract a tar file at the specified file path.
|
499
517
|
"""
|
@@ -533,7 +551,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
533
551
|
|
534
552
|
return True
|
535
553
|
|
536
|
-
def _get_file_list(self, data_path, limit):
|
554
|
+
def _get_file_list(self, data_path: str, limit: Union[int, None]):
|
537
555
|
"""
|
538
556
|
Get a list of files to read from the data set.
|
539
557
|
"""
|
@@ -564,7 +582,7 @@ class UbuntuCorpusTrainer(CsvFileTrainer):
|
|
564
582
|
|
565
583
|
yield file_path
|
566
584
|
|
567
|
-
def train(self, data_download_url, limit=None):
|
585
|
+
def train(self, data_download_url: str, limit: Union[int, None] = None):
|
568
586
|
"""
|
569
587
|
:param str data_download_url: The URL to download the Ubuntu dialog corpus from.
|
570
588
|
:param int limit: The maximum number of files to train from.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ChatterBot
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.6
|
4
4
|
Summary: ChatterBot is a machine learning, conversational dialog engine
|
5
5
|
Author: Gunther Cox
|
6
6
|
License-Expression: BSD-3-Clause
|
@@ -66,10 +66,11 @@ known conversations. The language independent design of ChatterBot allows it
|
|
66
66
|
to be trained to speak any language.
|
67
67
|
|
68
68
|
[](https://pypi.python.org/pypi/chatterbot/)
|
69
|
-
[](https://www.python.org/downloads/release/python-360/)
|
70
70
|
[](https://coveralls.io/r/gunthercox/ChatterBot)
|
71
|
-
[](https://bsky.app/profile/chatterbot.us)
|
72
72
|
[](https://gitter.im/chatterbot/Lobby?utm_source=badge&utm_medium=badge&utm_content=badge)
|
73
|
+
<!-- [](https://codeclimate.com/github/gunthercox/ChatterBot) -->
|
73
74
|
|
74
75
|
An example of typical input would be something like this:
|
75
76
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
chatterbot/__init__.py,sha256=
|
1
|
+
chatterbot/__init__.py,sha256=wHoKxLsCOplHJ02JmxRCg5cRL-Z-blQblhJKdE95Pck,158
|
2
2
|
chatterbot/__main__.py,sha256=zvH4uxtGlGrP-ht_LkhX29duzjm3hRH800SDCq4YOwg,637
|
3
3
|
chatterbot/adapters.py,sha256=LJ_KqLpHKPdYAFpMGK63RVH4weV5X0Zh5uGyan6qdVU,878
|
4
4
|
chatterbot/chatterbot.py,sha256=nqxdeTBWdA_LDIEWTMf2gphvpNfd0c9htNwrxa_7pzo,12543
|
@@ -14,8 +14,8 @@ chatterbot/parsing.py,sha256=vS-w70cMkjq4YEpDOv_pXWhAI6Zj06WYDAcMDhYDj0M,23174
|
|
14
14
|
chatterbot/preprocessors.py,sha256=kqsgnejSj6Z1rr9U2TGHKOp-MMaFWBdNT41EwyhQFls,1389
|
15
15
|
chatterbot/response_selection.py,sha256=JpUVuBYrgxhHkDMRHXyWvhluSLxQED5mAhE1-VvJSmg,2970
|
16
16
|
chatterbot/search.py,sha256=FTwwON2eKPWqoc5uoKh4AUmuXDCqyfMcMcXB4wijpxg,4910
|
17
|
-
chatterbot/tagging.py,sha256=
|
18
|
-
chatterbot/trainers.py,sha256=
|
17
|
+
chatterbot/tagging.py,sha256=si0PQ3CY5EbiZ0-PIslbBtExZaQZ3NYrLUGoourweKo,2585
|
18
|
+
chatterbot/trainers.py,sha256=CZezNX68Byg9gg2z-PUZbTc5pqzTzbhgSmqHx7P6Ivg,20973
|
19
19
|
chatterbot/utils.py,sha256=ubPiBapvUvdFVhrDjxqq5IGekUh9qMUJs_dQ605xLAI,2924
|
20
20
|
chatterbot/vectorstores.py,sha256=-S1NB8PrZzoFIu95n2W7N4UaXuCUpyDUXIGYFebjv08,2056
|
21
21
|
chatterbot/ext/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -60,10 +60,10 @@ chatterbot/storage/__init__.py,sha256=ADw0WQe0YKr1UIDQLaxwf0mHDnuKW_CSzgz11K4TM-
|
|
60
60
|
chatterbot/storage/django_storage.py,sha256=BpuVEO4rPOiPu7f7KW1Zyar2LqEXy6I4HgPYhyGP0kE,6305
|
61
61
|
chatterbot/storage/mongodb.py,sha256=Ozvdvcjb3LGZxcvbSQGzwP9VloYQbmsa2FaKunFpMyU,7934
|
62
62
|
chatterbot/storage/redis.py,sha256=FKROrzZ-7WXZ8ZoK0dKmTDdS45TxL04XOSeu0p3Jrak,12675
|
63
|
-
chatterbot/storage/sql_storage.py,sha256=
|
63
|
+
chatterbot/storage/sql_storage.py,sha256=wESsp0OKuXjYmAZ7dl-ztX7lt4xEpjD0WCBQXK22__4,13063
|
64
64
|
chatterbot/storage/storage_adapter.py,sha256=fvyb-qNiB0HMJ0siVMCWUIY--6d-C47N1_kKZVFZAv4,6110
|
65
|
-
chatterbot-1.2.
|
66
|
-
chatterbot-1.2.
|
67
|
-
chatterbot-1.2.
|
68
|
-
chatterbot-1.2.
|
69
|
-
chatterbot-1.2.
|
65
|
+
chatterbot-1.2.6.dist-info/licenses/LICENSE,sha256=5b04U8mi0wp5gJMYlKi49EalnD9Q2nwY_6UEI_Avgu4,1476
|
66
|
+
chatterbot-1.2.6.dist-info/METADATA,sha256=lOBXOb2GEaOguy3_Rwqo_TEP77GeunbhiI9BGqB1zfc,7175
|
67
|
+
chatterbot-1.2.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
68
|
+
chatterbot-1.2.6.dist-info/top_level.txt,sha256=W2TzAbAJ-eBXTIKZZhVlkrh87msJNmBQpyhkrHqjSrE,11
|
69
|
+
chatterbot-1.2.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|