UniTok 3.0.2b0__tar.gz → 3.0.3b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/PKG-INFO +1 -1
  2. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/unidep.py +12 -11
  3. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/unitok.py +4 -3
  4. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/vocab.py +13 -10
  5. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/vocabs.py +6 -3
  6. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/PKG-INFO +1 -1
  7. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/SOURCES.txt +0 -2
  8. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/setup.py +1 -1
  9. UniTok-3.0.2b0/UniTok/compatible/__init__.py +0 -0
  10. UniTok-3.0.2b0/UniTok/compatible/uni_warnings.py +0 -74
  11. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/README.md +0 -0
  12. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/__init__.py +0 -0
  13. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/__init__.py +0 -0
  14. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/lengths.py +0 -0
  15. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/analysis/plot.py +0 -0
  16. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/cols.py +0 -0
  17. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/column.py +0 -0
  18. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/global_setting.py +0 -0
  19. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/meta.py +0 -0
  20. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/__init__.py +0 -0
  21. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/bert_tok.py +0 -0
  22. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/entity_tok.py +0 -0
  23. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/id_tok.py +0 -0
  24. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/number_tok.py +0 -0
  25. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/seq_tok.py +0 -0
  26. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/split_tok.py +0 -0
  27. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok/tok/tok.py +0 -0
  28. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/dependency_links.txt +0 -0
  29. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/requires.txt +0 -0
  30. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/UniTok.egg-info/top_level.txt +0 -0
  31. {UniTok-3.0.2b0 → UniTok-3.0.3b0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.2b0
3
+ Version: 3.0.3b0
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,14 +1,12 @@
1
1
  import json
2
2
  import os
3
3
  import random
4
+ import warnings
4
5
  from typing import Dict, List, Callable
5
6
 
6
7
  import numpy as np
7
8
  import tqdm
8
9
 
9
- from .compatible.uni_warnings import MetaDataDeprecationWarning, VocabInfoDeprecationWarning, \
10
- ColInfoDeprecationWarning, GetMaxLengthDeprecationWarning, GetVocabDeprecationWarning, \
11
- GetVocabSizeDeprecationWarning, IsListColDeprecationWarning, ShuffleDeprecationWarning
12
10
  from .meta import Meta, Col, Voc
13
11
  from .vocab import Vocab
14
12
  from .vocabs import Vocabs
@@ -178,38 +176,41 @@ class UniDep:
178
176
 
179
177
  @property
180
178
  def meta_data(self):
181
- MetaDataDeprecationWarning()
179
+ warnings.warn('meta_data is deprecated, '
180
+ 'use meta instead (will be removed in 4.x version)', DeprecationWarning)
182
181
  return self.meta
183
182
 
184
183
  @property
185
184
  def vocab_info(self):
186
- VocabInfoDeprecationWarning()
185
+ warnings.warn('vocab_info is deprecated, '
186
+ 'use vocs instead (will be removed in 4.x version)', DeprecationWarning)
187
187
  return self.vocs
188
188
 
189
189
  @property
190
190
  def col_info(self):
191
- ColInfoDeprecationWarning()
191
+ warnings.warn('col_info is deprecated, '
192
+ 'use cols instead (will be removed in 4.x version)', DeprecationWarning)
192
193
  return self.cols
193
194
 
194
195
  def get_vocab_size(self, col_name, as_vocab=False):
195
- GetVocabSizeDeprecationWarning()
196
+ warnings.warn('unidep.get_vocab_size is deprecated (will be removed in 4.x version)', DeprecationWarning)
196
197
  vocab_id = col_name if as_vocab else self.get_vocab(col_name)
197
198
  return self.vocs[vocab_id].size
198
199
 
199
200
  def get_vocab(self, col_name):
200
- GetVocabDeprecationWarning()
201
+ warnings.warn('unidep.get_vocab is deprecated (will be removed in 4.x version)', DeprecationWarning)
201
202
  return self.cols[col_name].voc.name
202
203
 
203
204
  def get_max_length(self, col_name):
204
- GetMaxLengthDeprecationWarning()
205
+ warnings.warn('unidep.get_max_length is deprecated (will be removed in 4.x version)', DeprecationWarning)
205
206
  return self.cols[col_name].max_length
206
207
 
207
208
  def is_list_col(self, col_name):
208
- IsListColDeprecationWarning()
209
+ warnings.warn('unidep.is_list_col is deprecated (will be removed in 4.x version)', DeprecationWarning)
209
210
  return self.cols[col_name].list
210
211
 
211
212
  def shuffle(self, shuffle=True):
212
- ShuffleDeprecationWarning()
213
+ warnings.warn('unidep.shuffle is deprecated (will be removed in 4.x version)', DeprecationWarning)
213
214
  if shuffle:
214
215
  random.shuffle(self._indexes)
215
216
  else:
@@ -1,12 +1,12 @@
1
1
  import json
2
2
  import os
3
+ import warnings
3
4
  from typing import Optional
4
5
 
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
 
8
9
  from .cols import Cols
9
- from .compatible.uni_warnings import VocabDepotDeprecationWarning, GetTokPathDeprecationWarning
10
10
  from .column import Column, IndexColumn
11
11
  from .tok.bert_tok import BertTok
12
12
  from .tok.entity_tok import EntTok
@@ -29,7 +29,8 @@ class UniTok:
29
29
 
30
30
  @property
31
31
  def vocab_depots(self):
32
- VocabDepotDeprecationWarning()
32
+ warnings.warn('vocab_depot is deprecated, '
33
+ 'use vocabs instead (will be removed in 4.x version)', DeprecationWarning)
33
34
  return self.vocabs
34
35
 
35
36
  def add_col(self, col: Column):
@@ -124,7 +125,7 @@ class UniTok:
124
125
  """
125
126
  Get the store path of the tokenizer of a column
126
127
  """
127
- GetTokPathDeprecationWarning()
128
+ warnings.warn('unitok.get_tok_path is deprecated (will be removed in 4.x version)', DeprecationWarning)
128
129
  return self.cols[col_name].tok.vocab.get_store_path(store_dir)
129
130
 
130
131
  def store_data(self, store_dir):
@@ -1,12 +1,10 @@
1
1
  import math
2
2
  import os
3
+ import warnings
3
4
  from typing import Union, List
4
5
 
5
6
  import numpy as np
6
7
 
7
- from UniTok.compatible.uni_warnings import VocabMapDeprecationWarning, OOVDefaultDeprecationWarning, \
8
- MinFrequencyDeprecationWarning, TrimVocabDeprecationWarning
9
-
10
8
 
11
9
  class VocabMap(dict):
12
10
  def __call__(self, *args, **kwargs):
@@ -49,7 +47,8 @@ class Vocab:
49
47
  """
50
48
  Deprecated, use o2i instead
51
49
  """
52
- VocabMapDeprecationWarning()
50
+ warnings.warn('vocab.index2obj and vocab.obj2index are deprecated, '
51
+ 'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', DeprecationWarning)
53
52
  return self.o2i
54
53
 
55
54
  @property
@@ -57,7 +56,8 @@ class Vocab:
57
56
  """
58
57
  Deprecated, use i2o instead
59
58
  """
60
- VocabMapDeprecationWarning()
59
+ warnings.warn('vocab.index2obj and vocab.obj2index are deprecated, '
60
+ 'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', DeprecationWarning)
61
61
  return self.i2o
62
62
 
63
63
  def extend(self, objs):
@@ -130,7 +130,8 @@ class Vocab:
130
130
 
131
131
  @property
132
132
  def oov_default(self):
133
- OOVDefaultDeprecationWarning()
133
+ warnings.warn('vocab.oov_default is deprecated, '
134
+ 'use vocab.oov_token instead (will be removed in 4.x version)', DeprecationWarning)
134
135
  return self._oov_token
135
136
 
136
137
  def allow_edit(self):
@@ -192,7 +193,8 @@ class Vocab:
192
193
  :return:
193
194
  """
194
195
  if min_count is None:
195
- MinFrequencyDeprecationWarning()
196
+ warnings.warn('vocab.min_frequency is deprecated, '
197
+ 'use vocab.min_count instead (will be removed in 4.x version)', DeprecationWarning)
196
198
  min_count = min_frequency
197
199
 
198
200
  vocabs = []
@@ -262,10 +264,11 @@ class Vocab:
262
264
  def __getattr__(self, item):
263
265
  if item in ['frequency_mode', 'frequency', 'max_frequency', 'frequent_vocab']:
264
266
  raise AttributeError(f'{item} is deprecated after UniTok 3.0, '
265
- f'degrade to 2.4.3.2 or lower to use it, '
266
- f'or check new features of Vocab class')
267
+ f'degrade to 2.4.3.2 or lower to use it, '
268
+ f'or check new features of Vocab class')
267
269
 
268
270
  @property
269
271
  def trim_vocab(self):
270
- TrimVocabDeprecationWarning()
272
+ warnings.warn('vocab.trim_vocab is deprecated, '
273
+ 'use vocab.trim instead (will be removed in 4.x version)', DeprecationWarning)
271
274
  return self.trim
@@ -1,4 +1,5 @@
1
- from .compatible.uni_warnings import ColMapDeprecationWarning, DepotsDeprecationWarning
1
+ import warnings
2
+
2
3
  from .vocab import Vocab
3
4
 
4
5
 
@@ -9,12 +10,14 @@ class Vocabs(dict):
9
10
 
10
11
  @property
11
12
  def col_map(self):
12
- ColMapDeprecationWarning()
13
+ warnings.warn('vocab_depot.col_map is deprecated, '
14
+ 'use vocabs.cols instead (will be removed in 4.x version)', DeprecationWarning)
13
15
  return self.cols
14
16
 
15
17
  @property
16
18
  def depots(self):
17
- DepotsDeprecationWarning()
19
+ warnings.warn('vocab_depot.depots is deprecated, '
20
+ 'use vocabs instead (will be removed in 4.x version)', DeprecationWarning)
18
21
  return self
19
22
 
20
23
  def append(self, col_or_vocab):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.2b0
3
+ Version: 3.0.3b0
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -17,8 +17,6 @@ UniTok.egg-info/top_level.txt
17
17
  UniTok/analysis/__init__.py
18
18
  UniTok/analysis/lengths.py
19
19
  UniTok/analysis/plot.py
20
- UniTok/compatible/__init__.py
21
- UniTok/compatible/uni_warnings.py
22
20
  UniTok/tok/__init__.py
23
21
  UniTok/tok/bert_tok.py
24
22
  UniTok/tok/entity_tok.py
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text()
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='3.0.2.beta',
9
+ version='3.0.3.beta',
10
10
  keywords=['token', 'tokenizer', 'bert'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
File without changes
@@ -1,74 +0,0 @@
1
- import warnings
2
- from typing import Callable
3
-
4
- warned_flags = set()
5
-
6
-
7
- class UniWarning:
8
- def __init__(self, msg, type_: Callable = warnings.warn):
9
- self.msg = msg
10
- self.type = type_
11
-
12
- def __call__(self, *args, **kwargs):
13
- if self not in warned_flags:
14
- warned_flags.add(self)
15
- warnings.warn(self.msg.format(*args, **kwargs), self.type)
16
-
17
-
18
- VocabMapDeprecationWarning = UniWarning(
19
- 'vocab.index2obj and vocab.obj2index are deprecated, '
20
- 'use vocab.i2o and vocab.o2i instead (will be removed in 4.x version)', type_=DeprecationWarning)
21
-
22
- OOVDefaultDeprecationWarning = UniWarning(
23
- 'vocab.oov_default is deprecated, '
24
- 'use vocab.oov_token instead (will be removed in 4.x version)', type_=DeprecationWarning)
25
-
26
- TrimVocabDeprecationWarning = UniWarning(
27
- 'vocab.trim_vocab is deprecated, '
28
- 'use vocab.trim instead (will be removed in 4.x version)', type_=DeprecationWarning)
29
-
30
- MinFrequencyDeprecationWarning = UniWarning(
31
- 'vocab.min_frequency is deprecated, '
32
- 'use vocab.min_count instead (will be removed in 4.x version)', type_=DeprecationWarning)
33
-
34
- VocabDepotDeprecationWarning = UniWarning(
35
- 'vocab_depot is deprecated, '
36
- 'use vocabs instead (will be removed in 4.x version)', type_=DeprecationWarning)
37
-
38
- ColMapDeprecationWarning = UniWarning(
39
- 'vocab_depot.col_map is deprecated, '
40
- 'use vocabs.cols instead (will be removed in 4.x version)', type_=DeprecationWarning)
41
-
42
- GetTokPathDeprecationWarning = UniWarning(
43
- 'unitok.get_tok_path is deprecated (will be removed in 4.x version)', type_=DeprecationWarning)
44
-
45
- DepotsDeprecationWarning = UniWarning(
46
- 'vocab_depot.depots is deprecated, '
47
- 'use vocabs instead (will be removed in 4.x version)', type_=DeprecationWarning)
48
-
49
- MetaDataDeprecationWarning = UniWarning(
50
- 'meta_data is deprecated, '
51
- 'use meta instead (will be removed in 4.x version)', type_=DeprecationWarning)
52
-
53
- VocabInfoDeprecationWarning = UniWarning(
54
- 'vocab_info is deprecated, '
55
- 'use vocs instead (will be removed in 4.x version)', type_=DeprecationWarning)
56
-
57
- ColInfoDeprecationWarning = UniWarning(
58
- 'col_info is deprecated, '
59
- 'use cols instead (will be removed in 4.x version)', type_=DeprecationWarning)
60
-
61
- GetMaxLengthDeprecationWarning = UniWarning(
62
- 'unidep.get_max_length is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
63
-
64
- GetVocabDeprecationWarning = UniWarning(
65
- 'unidep.get_vocab is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
66
-
67
- GetVocabSizeDeprecationWarning = UniWarning(
68
- 'unidep.get_vocab_size is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
69
-
70
- IsListColDeprecationWarning = UniWarning(
71
- 'unidep.is_list_col is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
72
-
73
- ShuffleDeprecationWarning = UniWarning(
74
- 'unidep.shuffle is deprecated, (will be removed in 4.x version)', type_=DeprecationWarning)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes