UniTok 3.0.12__tar.gz → 3.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {UniTok-3.0.12 → UniTok-3.0.13}/PKG-INFO +1 -1
  2. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/column.py +1 -1
  3. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/vocab.py +6 -2
  4. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/PKG-INFO +1 -1
  5. {UniTok-3.0.12 → UniTok-3.0.13}/setup.py +1 -1
  6. {UniTok-3.0.12 → UniTok-3.0.13}/README.md +0 -0
  7. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/__init__.py +0 -0
  8. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/__init__.py +0 -0
  9. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/lengths.py +0 -0
  10. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/plot.py +0 -0
  11. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/cols.py +0 -0
  12. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/global_setting.py +0 -0
  13. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/meta.py +0 -0
  14. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/__init__.py +0 -0
  15. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/bert_tok.py +0 -0
  16. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/ent_tok.py +0 -0
  17. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/id_tok.py +0 -0
  18. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/number_tok.py +0 -0
  19. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/seq_tok.py +0 -0
  20. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/split_tok.py +0 -0
  21. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/tok.py +0 -0
  22. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/unidep.py +0 -0
  23. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/unitok.py +0 -0
  24. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/vocabs.py +0 -0
  25. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/SOURCES.txt +0 -0
  26. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/dependency_links.txt +0 -0
  27. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/requires.txt +0 -0
  28. {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/top_level.txt +0 -0
  29. {UniTok-3.0.12 → UniTok-3.0.13}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.12
3
+ Version: 3.0.13
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -115,4 +115,4 @@ class Column:
115
115
 
116
116
  class IndexColumn(Column):
117
117
  def __init__(self, name='index'):
118
- super().__init__(name, tok=IdTok(name=name))
118
+ super().__init__(tok=IdTok(name=name))
@@ -192,8 +192,10 @@ class Vocab:
192
192
  def trim(self, min_count=None, min_frequency=1):
193
193
  """
194
194
  trim vocab by min frequency
195
- :return:
195
+ :return: trimmed tokens
196
196
  """
197
+ _trimmed = []
198
+
197
199
  if min_count is None:
198
200
  warnings.warn('vocab.min_frequency is deprecated, '
199
201
  'use vocab.min_count instead (will be removed in 4.x version)', DeprecationWarning)
@@ -203,6 +205,8 @@ class Vocab:
203
205
  for index in self._counter:
204
206
  if self._counter[index] >= min_count:
205
207
  vocabs.append(self.i2o[index])
208
+ else:
209
+ _trimmed.append(self.i2o[index])
206
210
 
207
211
  self.i2o = dict()
208
212
  self.o2i = dict()
@@ -213,7 +217,7 @@ class Vocab:
213
217
  self.extend(vocabs)
214
218
 
215
219
  self._stable_mode = True
216
- return self
220
+ return _trimmed
217
221
 
218
222
  def summarize(self, base=10):
219
223
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.12
3
+ Version: 3.0.13
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text()
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='3.0.12',
9
+ version='3.0.13',
10
10
  keywords=['token', 'tokenizer', 'bert'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes