UniTok 3.0.2a0__tar.gz → 3.0.4a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/PKG-INFO +1 -1
  2. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/number_tok.py +1 -1
  3. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/seq_tok.py +1 -1
  4. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/vocab.py +6 -3
  5. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok.egg-info/PKG-INFO +1 -1
  6. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/setup.py +1 -1
  7. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/README.md +0 -0
  8. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/__init__.py +0 -0
  9. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/analysis/__init__.py +0 -0
  10. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/analysis/lengths.py +0 -0
  11. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/analysis/plot.py +0 -0
  12. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/cols.py +0 -0
  13. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/column.py +0 -0
  14. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/compatible/__init__.py +0 -0
  15. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/compatible/uni_warnings.py +0 -0
  16. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/global_setting.py +0 -0
  17. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/meta.py +0 -0
  18. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/__init__.py +0 -0
  19. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/bert_tok.py +0 -0
  20. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/entity_tok.py +0 -0
  21. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/id_tok.py +0 -0
  22. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/split_tok.py +0 -0
  23. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/tok/tok.py +0 -0
  24. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/unidep.py +0 -0
  25. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/unitok.py +0 -0
  26. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok/vocabs.py +0 -0
  27. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok.egg-info/SOURCES.txt +0 -0
  28. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok.egg-info/dependency_links.txt +0 -0
  29. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok.egg-info/requires.txt +0 -0
  30. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/UniTok.egg-info/top_level.txt +0 -0
  31. {UniTok-3.0.2a0 → UniTok-3.0.4a0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.2a0
3
+ Version: 3.0.4a0
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,4 +1,4 @@
1
- from UniTok.tok import BaseTok
1
+ from .tok import BaseTok
2
2
 
3
3
 
4
4
  class NumberTok(BaseTok):
@@ -1,4 +1,4 @@
1
- from UniTok.tok import BaseTok
1
+ from .tok import BaseTok
2
2
 
3
3
 
4
4
  class SeqTok(BaseTok):
@@ -29,6 +29,7 @@ class Vocab:
29
29
 
30
30
  self._editable = True # whether vocab is editable
31
31
  self._oov_token = None # out of vocabulary token
32
+ self._stable_mode = False
32
33
 
33
34
  self._count_mode = False # whether count mode is on
34
35
  self._counter = {} # counter for counting occurrence of each token
@@ -80,8 +81,10 @@ class Vocab:
80
81
  if obj in self.o2i:
81
82
  return self.o2i[obj]
82
83
 
83
- if self._count_mode:
84
- return self._oov_token or -1
84
+ if self._stable_mode:
85
+ if self._oov_token is not None:
86
+ return self._oov_token
87
+ return -1
85
88
 
86
89
  if not self._editable:
87
90
  if self._oov_token is not None:
@@ -205,7 +208,7 @@ class Vocab:
205
208
  self.reserve(self.reserved_tokens)
206
209
  self.extend(vocabs)
207
210
 
208
- # self.frequency_mode = True
211
+ self._stable_mode = True
209
212
  return self
210
213
 
211
214
  def summarize(self, base=10):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.2a0
3
+ Version: 3.0.4a0
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text()
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='3.0.2.alpha',
9
+ version='3.0.4.alpha',
10
10
  keywords=['token', 'tokenizer', 'bert'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes