UniTok 3.0.3a0__tar.gz → 3.0.4a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/PKG-INFO +1 -1
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/vocab.py +6 -3
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok.egg-info/PKG-INFO +1 -1
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/setup.py +1 -1
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/README.md +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/__init__.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/analysis/__init__.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/analysis/lengths.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/analysis/plot.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/cols.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/column.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/compatible/__init__.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/compatible/uni_warnings.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/global_setting.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/meta.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/__init__.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/bert_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/entity_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/id_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/number_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/seq_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/split_tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/tok/tok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/unidep.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/unitok.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok/vocabs.py +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok.egg-info/SOURCES.txt +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok.egg-info/dependency_links.txt +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok.egg-info/requires.txt +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/UniTok.egg-info/top_level.txt +0 -0
- {UniTok-3.0.3a0 → UniTok-3.0.4a0}/setup.cfg +0 -0
@@ -29,6 +29,7 @@ class Vocab:
|
|
29
29
|
|
30
30
|
self._editable = True # whether vocab is editable
|
31
31
|
self._oov_token = None # out of vocabulary token
|
32
|
+
self._stable_mode = False
|
32
33
|
|
33
34
|
self._count_mode = False # whether count mode is on
|
34
35
|
self._counter = {} # counter for counting occurrence of each token
|
@@ -80,8 +81,10 @@ class Vocab:
|
|
80
81
|
if obj in self.o2i:
|
81
82
|
return self.o2i[obj]
|
82
83
|
|
83
|
-
if self.
|
84
|
-
|
84
|
+
if self._stable_mode:
|
85
|
+
if self._oov_token is not None:
|
86
|
+
return self._oov_token
|
87
|
+
return -1
|
85
88
|
|
86
89
|
if not self._editable:
|
87
90
|
if self._oov_token is not None:
|
@@ -205,7 +208,7 @@ class Vocab:
|
|
205
208
|
self.reserve(self.reserved_tokens)
|
206
209
|
self.extend(vocabs)
|
207
210
|
|
208
|
-
|
211
|
+
self._stable_mode = True
|
209
212
|
return self
|
210
213
|
|
211
214
|
def summarize(self, base=10):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|