UniTok 3.0.2b0__tar.gz → 3.0.3b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/PKG-INFO +1 -1
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/unidep.py +12 -11
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/unitok.py +4 -3
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/vocab.py +13 -10
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/vocabs.py +6 -3
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/PKG-INFO +1 -1
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/SOURCES.txt +0 -2
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/setup.py +1 -1
- UniTok-3.0.2b0/UniTok/compatible/__init__.py +0 -0
- UniTok-3.0.2b0/UniTok/compatible/uni_warnings.py +0 -74
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/README.md +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/__init__.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/__init__.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/lengths.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/plot.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/cols.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/column.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/global_setting.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/meta.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/__init__.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/bert_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/entity_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/id_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/number_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/seq_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/split_tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/tok.py +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/dependency_links.txt +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/requires.txt +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/top_level.txt +0 -0
- {UniTok-3.0.2b0 → UniTok-3.0.3b0}/setup.cfg +0 -0
@@ -1,14 +1,12 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
import random
|
4
|
+
import warnings
|
4
5
|
from typing import Dict, List, Callable
|
5
6
|
|
6
7
|
import numpy as np
|
7
8
|
import tqdm
|
8
9
|
|
9
|
-
from .compatible.uni_warnings import MetaDataDeprecationWarning, VocabInfoDeprecationWarning, \
|
10
|
-
ColInfoDeprecationWarning, GetMaxLengthDeprecationWarning, GetVocabDeprecationWarning, \
|
11
|
-
GetVocabSizeDeprecationWarning, IsListColDeprecationWarning, ShuffleDeprecationWarning
|
12
10
|
from .meta import Meta, Col, Voc
|
13
11
|
from .vocab import Vocab
|
14
12
|
from .vocabs import Vocabs
|
@@ -178,38 +176,41 @@ class UniDep:
|
|
178
176
|
|
179
177
|
@property
|
180
178
|
def meta_data(self):
|
181
|
-
|
179
|
+
warnings.warn('meta_data is deprecated, '
|
180
|
+
'use meta instead (will be removed in 4.x version)', DeprecationWarning)
|
182
181
|
return self.meta
|
183
182
|
|
184
183
|
@property
|
185
184
|
def vocab_info(self):
|
186
|
-
|
185
|
+
warnings.warn('vocab_info is deprecated, '
|
186
|
+
'use vocs instead (will be removed in 4.x version)', DeprecationWarning)
|
187
187
|
return self.vocs
|
188
188
|
|
189
189
|
@property
|
190
190
|
def col_info(self):
|
191
|
-
|
191
|
+
warnings.warn('col_info is deprecated, '
|
192
|
+
'use cols instead (will be removed in 4.x version)', DeprecationWarning)
|
192
193
|
return self.cols
|
193
194
|
|
194
195
|
def get_vocab_size(self, col_name, as_vocab=False):
|
195
|
-
|
196
|
+
warnings.warn('unidep.get_vocab_size is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
196
197
|
vocab_id = col_name if as_vocab else self.get_vocab(col_name)
|
197
198
|
return self.vocs[vocab_id].size
|
198
199
|
|
199
200
|
def get_vocab(self, col_name):
|
200
|
-
|
201
|
+
warnings.warn('unidep.get_vocab is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
201
202
|
return self.cols[col_name].voc.name
|
202
203
|
|
203
204
|
def get_max_length(self, col_name):
|
204
|
-
|
205
|
+
warnings.warn('unidep.get_max_length is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
205
206
|
return self.cols[col_name].max_length
|
206
207
|
|
207
208
|
def is_list_col(self, col_name):
|
208
|
-
|
209
|
+
warnings.warn('unidep.is_list_col is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
209
210
|
return self.cols[col_name].list
|
210
211
|
|
211
212
|
def shuffle(self, shuffle=True):
|
212
|
-
|
213
|
+
warnings.warn('unidep.shuffle is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
213
214
|
if shuffle:
|
214
215
|
random.shuffle(self._indexes)
|
215
216
|
else:
|
@@ -1,12 +1,12 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
+
import warnings
|
3
4
|
from typing import Optional
|
4
5
|
|
5
6
|
import numpy as np
|
6
7
|
import pandas as pd
|
7
8
|
|
8
9
|
from .cols import Cols
|
9
|
-
from .compatible.uni_warnings import VocabDepotDeprecationWarning, GetTokPathDeprecationWarning
|
10
10
|
from .column import Column, IndexColumn
|
11
11
|
from .tok.bert_tok import BertTok
|
12
12
|
from .tok.entity_tok import EntTok
|
@@ -29,7 +29,8 @@ class UniTok:
|
|
29
29
|
|
30
30
|
@property
|
31
31
|
def vocab_depots(self):
|
32
|
-
|
32
|
+
warnings.warn('vocab_depot is deprecated, '
|
33
|
+
'use vocabs instead (will be removed in 4.x version)', DeprecationWarning)
|
33
34
|
return self.vocabs
|
34
35
|
|
35
36
|
def add_col(self, col: Column):
|
@@ -124,7 +125,7 @@ class UniTok:
|
|
124
125
|
"""
|
125
126
|
Get the store path of the tokenizer of a column
|
126
127
|
"""
|
127
|
-
|
128
|
+
warnings.warn('unitok.get_tok_path is deprecated (will be removed in 4.x version)', DeprecationWarning)
|
128
129
|
return self.cols[col_name].tok.vocab.get_store_path(store_dir)
|
129
130
|
|
130
131
|
def store_data(self, store_dir):
|
@@ -1,12 +1,10 @@
|
|
1
1
|
import math
|
2
2
|
import os
|
3
|
+
import warnings
|
3
4
|
from typing import Union, List
|
4
5
|
|
5
6
|
import numpy as np
|
6
7
|
|
7
|
-
from UniTok.compatible.uni_warnings import VocabMapDeprecationWarning, OOVDefaultDeprecationWarning, \
|
8
|
-
MinFrequencyDeprecationWarning, TrimVocabDeprecationWarning
|
9
|
-
|
10
8
|
|
11
9
|
class VocabMap(dict):
|
12
10
|
def __call__(self, *args, **kwargs):
|
@@ -49,7 +47,8 @@ class Vocab:
|
|
49
47
|
"""
|
50
48
|
Deprecated, use o2i instead
|
51
49
|
"""
|
52
|
-
|
50
|
+
warnings.warn('vocab.index2obj and vocab.obj2index are deprecated, '
|
51
|
+
'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', DeprecationWarning)
|
53
52
|
return self.o2i
|
54
53
|
|
55
54
|
@property
|
@@ -57,7 +56,8 @@ class Vocab:
|
|
57
56
|
"""
|
58
57
|
Deprecated, use i2o instead
|
59
58
|
"""
|
60
|
-
|
59
|
+
warnings.warn('vocab.index2obj and vocab.obj2index are deprecated, '
|
60
|
+
'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', DeprecationWarning)
|
61
61
|
return self.i2o
|
62
62
|
|
63
63
|
def extend(self, objs):
|
@@ -130,7 +130,8 @@ class Vocab:
|
|
130
130
|
|
131
131
|
@property
|
132
132
|
def oov_default(self):
|
133
|
-
|
133
|
+
warnings.warn('vocab.oov_default is deprecated, '
|
134
|
+
'use vocab.oov_token instead (will be removed in 4.x version)', DeprecationWarning)
|
134
135
|
return self._oov_token
|
135
136
|
|
136
137
|
def allow_edit(self):
|
@@ -192,7 +193,8 @@ class Vocab:
|
|
192
193
|
:return:
|
193
194
|
"""
|
194
195
|
if min_count is None:
|
195
|
-
|
196
|
+
warnings.warn('vocab.min_frequency is deprecated, '
|
197
|
+
'use vocab.min_count instead (will be removed in 4.x version)', DeprecationWarning)
|
196
198
|
min_count = min_frequency
|
197
199
|
|
198
200
|
vocabs = []
|
@@ -262,10 +264,11 @@ class Vocab:
|
|
262
264
|
def __getattr__(self, item):
|
263
265
|
if item in ['frequency_mode', 'frequency', 'max_frequency', 'frequent_vocab']:
|
264
266
|
raise AttributeError(f'{item} is deprecated after UniTok 3.0, '
|
265
|
-
|
266
|
-
|
267
|
+
f'degrade to 2.4.3.2 or lower to use it, '
|
268
|
+
f'or check new features of Vocab class')
|
267
269
|
|
268
270
|
@property
|
269
271
|
def trim_vocab(self):
|
270
|
-
|
272
|
+
warnings.warn('vocab.trim_vocab is deprecated, '
|
273
|
+
'use vocab.trim instead (will be removed in 4.x version)', DeprecationWarning)
|
271
274
|
return self.trim
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
import warnings
|
2
|
+
|
2
3
|
from .vocab import Vocab
|
3
4
|
|
4
5
|
|
@@ -9,12 +10,14 @@ class Vocabs(dict):
|
|
9
10
|
|
10
11
|
@property
|
11
12
|
def col_map(self):
|
12
|
-
|
13
|
+
warnings.warn('vocab_depot.col_map is deprecated, '
|
14
|
+
'use vocabs.cols instead (will be removed in 4.x version)', DeprecationWarning)
|
13
15
|
return self.cols
|
14
16
|
|
15
17
|
@property
|
16
18
|
def depots(self):
|
17
|
-
|
19
|
+
warnings.warn('vocab_depot.depots is deprecated, '
|
20
|
+
'use vocabs instead (will be removed in 4.x version)', DeprecationWarning)
|
18
21
|
return self
|
19
22
|
|
20
23
|
def append(self, col_or_vocab):
|
@@ -17,8 +17,6 @@ UniTok.egg-info/top_level.txt
|
|
17
17
|
UniTok/analysis/__init__.py
|
18
18
|
UniTok/analysis/lengths.py
|
19
19
|
UniTok/analysis/plot.py
|
20
|
-
UniTok/compatible/__init__.py
|
21
|
-
UniTok/compatible/uni_warnings.py
|
22
20
|
UniTok/tok/__init__.py
|
23
21
|
UniTok/tok/bert_tok.py
|
24
22
|
UniTok/tok/entity_tok.py
|
File without changes
|
@@ -1,74 +0,0 @@
|
|
1
|
-
import warnings
|
2
|
-
from typing import Callable
|
3
|
-
|
4
|
-
warned_flags = set()
|
5
|
-
|
6
|
-
|
7
|
-
class UniWarning:
|
8
|
-
def __init__(self, msg, type_: Callable = warnings.warn):
|
9
|
-
self.msg = msg
|
10
|
-
self.type = type_
|
11
|
-
|
12
|
-
def __call__(self, *args, **kwargs):
|
13
|
-
if self not in warned_flags:
|
14
|
-
warned_flags.add(self)
|
15
|
-
warnings.warn(self.msg.format(*args, **kwargs), self.type)
|
16
|
-
|
17
|
-
|
18
|
-
VocabMapDeprecationWarning = UniWarning(
|
19
|
-
'vocab.index2obj and vocab.obj2index are deprecated, '
|
20
|
-
'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
21
|
-
|
22
|
-
OOVDefaultDeprecationWarning = UniWarning(
|
23
|
-
'vocab.oov_default is deprecated, '
|
24
|
-
'use vocab.oov_token instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
25
|
-
|
26
|
-
TrimVocabDeprecationWarning = UniWarning(
|
27
|
-
'vocab.trim_vocab is deprecated, '
|
28
|
-
'use vocab.trim instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
29
|
-
|
30
|
-
MinFrequencyDeprecationWarning = UniWarning(
|
31
|
-
'vocab.min_frequency is deprecated, '
|
32
|
-
'use vocab.min_count instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
33
|
-
|
34
|
-
VocabDepotDeprecationWarning = UniWarning(
|
35
|
-
'vocab_depot is deprecated, '
|
36
|
-
'use vocabs instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
37
|
-
|
38
|
-
ColMapDeprecationWarning = UniWarning(
|
39
|
-
'vocab_depot.col_map is deprecated, '
|
40
|
-
'use vocabs.cols instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
41
|
-
|
42
|
-
GetTokPathDeprecationWarning = UniWarning(
|
43
|
-
'unitok.get_tok_path is deprecated (will be removed in 4.x version)', type_=DeprecationWarning)
|
44
|
-
|
45
|
-
DepotsDeprecationWarning = UniWarning(
|
46
|
-
'vocab_depot.depots is deprecated, '
|
47
|
-
'use vocabs instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
48
|
-
|
49
|
-
MetaDataDeprecationWarning = UniWarning(
|
50
|
-
'meta_data is deprecated, '
|
51
|
-
'use meta instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
52
|
-
|
53
|
-
VocabInfoDeprecationWarning = UniWarning(
|
54
|
-
'vocab_info is deprecated, '
|
55
|
-
'use vocs instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
56
|
-
|
57
|
-
ColInfoDeprecationWarning = UniWarning(
|
58
|
-
'col_info is deprecated, '
|
59
|
-
'use cols instead (will be removed in 4.x version)', type_=DeprecationWarning)
|
60
|
-
|
61
|
-
GetMaxLengthDeprecationWarning = UniWarning(
|
62
|
-
'unidep.get_max_length is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
|
63
|
-
|
64
|
-
GetVocabDeprecationWarning = UniWarning(
|
65
|
-
'unidep.get_vocab is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
|
66
|
-
|
67
|
-
GetVocabSizeDeprecationWarning = UniWarning(
|
68
|
-
'unidep.get_vocab_size is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
|
69
|
-
|
70
|
-
IsListColDeprecationWarning = UniWarning(
|
71
|
-
'unidep.is_list_col is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
|
72
|
-
|
73
|
-
ShuffleDeprecationWarning = UniWarning(
|
74
|
-
'unidep.shuffle is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|