UniTok 3.0.12__tar.gz → 3.0.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {UniTok-3.0.12 → UniTok-3.0.13}/PKG-INFO +1 -1
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/column.py +1 -1
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/vocab.py +6 -2
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/PKG-INFO +1 -1
- {UniTok-3.0.12 → UniTok-3.0.13}/setup.py +1 -1
- {UniTok-3.0.12 → UniTok-3.0.13}/README.md +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/__init__.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/__init__.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/lengths.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/analysis/plot.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/cols.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/global_setting.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/meta.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/__init__.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/bert_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/ent_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/id_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/number_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/seq_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/split_tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/tok/tok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/unidep.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/unitok.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok/vocabs.py +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/SOURCES.txt +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/dependency_links.txt +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/requires.txt +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/UniTok.egg-info/top_level.txt +0 -0
- {UniTok-3.0.12 → UniTok-3.0.13}/setup.cfg +0 -0
@@ -192,8 +192,10 @@ class Vocab:
|
|
192
192
|
def trim(self, min_count=None, min_frequency=1):
|
193
193
|
"""
|
194
194
|
trim vocab by min frequency
|
195
|
-
:return:
|
195
|
+
:return: trimmed tokens
|
196
196
|
"""
|
197
|
+
_trimmed = []
|
198
|
+
|
197
199
|
if min_count is None:
|
198
200
|
warnings.warn('vocab.min_frequency is deprecated, '
|
199
201
|
'use vocab.min_count instead (will be removed in 4.x version)', DeprecationWarning)
|
@@ -203,6 +205,8 @@ class Vocab:
|
|
203
205
|
for index in self._counter:
|
204
206
|
if self._counter[index] >= min_count:
|
205
207
|
vocabs.append(self.i2o[index])
|
208
|
+
else:
|
209
|
+
_trimmed.append(self.i2o[index])
|
206
210
|
|
207
211
|
self.i2o = dict()
|
208
212
|
self.o2i = dict()
|
@@ -213,7 +217,7 @@ class Vocab:
|
|
213
217
|
self.extend(vocabs)
|
214
218
|
|
215
219
|
self._stable_mode = True
|
216
|
-
return
|
220
|
+
return _trimmed
|
217
221
|
|
218
222
|
def summarize(self, base=10):
|
219
223
|
"""
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|