UniTok 4.4.0__tar.gz → 4.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {UniTok-4.4.0 → UniTok-4.4.2}/PKG-INFO +2 -1
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/PKG-INFO +2 -1
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/requires.txt +1 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/setup.py +3 -2
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/job.py +1 -1
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/meta.py +6 -1
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/unitok.py +5 -5
- {UniTok-4.4.0 → UniTok-4.4.2}/LICENSE +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/README.md +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/SOURCES.txt +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/dependency_links.txt +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/entry_points.txt +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/top_level.txt +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/__main__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/lengths.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/plot.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/cols.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/column.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/fut.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/global_setting.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/meta.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/bert_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/ent_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/id_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/number_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/seq_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/split_tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/tok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/unidep.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/unitok.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/vocab.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/vocabs.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/setup.cfg +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/__main__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/feature.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/selector.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/status.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/base_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/digit_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/entity_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/glove_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/split_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/transformers_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/union_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/unknown_tokenizer.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/class_pool.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/data.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/function.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/json_handler.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/pkl_handler.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/hub.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/param_hub.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/feature_set.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/index_set.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/job_set.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/tokenizer_set.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/vocabulary_set.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/instance.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/map.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/space.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/symbol.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/verbose.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/__init__.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/counter.py +0 -0
- {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/vocabulary.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: UniTok
|
3
|
-
Version: 4.4.
|
3
|
+
Version: 4.4.2
|
4
4
|
Summary: Unified Tokenizer
|
5
5
|
Home-page: https://github.com/Jyonn/UnifiedTokenizer
|
6
6
|
Author: Jyonn Liu
|
@@ -18,6 +18,7 @@ Requires-Dist: transformers
|
|
18
18
|
Requires-Dist: oba
|
19
19
|
Requires-Dist: prettytable
|
20
20
|
Requires-Dist: rich
|
21
|
+
Requires-Dist: fastparquet
|
21
22
|
|
22
23
|
# UniTok V4
|
23
24
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: UniTok
|
3
|
-
Version: 4.4.
|
3
|
+
Version: 4.4.2
|
4
4
|
Summary: Unified Tokenizer
|
5
5
|
Home-page: https://github.com/Jyonn/UnifiedTokenizer
|
6
6
|
Author: Jyonn Liu
|
@@ -18,6 +18,7 @@ Requires-Dist: transformers
|
|
18
18
|
Requires-Dist: oba
|
19
19
|
Requires-Dist: prettytable
|
20
20
|
Requires-Dist: rich
|
21
|
+
Requires-Dist: fastparquet
|
21
22
|
|
22
23
|
# UniTok V4
|
23
24
|
|
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
|
|
6
6
|
|
7
7
|
setup(
|
8
8
|
name='UniTok',
|
9
|
-
version='4.4.
|
9
|
+
version='4.4.2',
|
10
10
|
keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
|
11
11
|
description='Unified Tokenizer',
|
12
12
|
long_description=long_description,
|
@@ -25,7 +25,8 @@ setup(
|
|
25
25
|
'transformers',
|
26
26
|
'oba',
|
27
27
|
'prettytable',
|
28
|
-
'rich'
|
28
|
+
'rich',
|
29
|
+
'fastparquet'
|
29
30
|
],
|
30
31
|
entry_points={
|
31
32
|
'console_scripts': [
|
@@ -4,7 +4,7 @@ from unitok.feature import Feature, FeatureHub
|
|
4
4
|
|
5
5
|
class Job(Feature):
|
6
6
|
def __init__(self, **kwargs):
|
7
|
-
warnings.
|
7
|
+
warnings.warn(f'`Job` class is deprecated, use `Feature`.', DeprecationWarning, stacklevel=2)
|
8
8
|
super().__init__(**kwargs)
|
9
9
|
|
10
10
|
|
@@ -27,6 +27,11 @@ class Meta:
|
|
27
27
|
self.tokenizers = TokenizerSet()
|
28
28
|
self.features = FeatureSet()
|
29
29
|
|
30
|
+
@property
|
31
|
+
def jobs(self):
|
32
|
+
warnings.warn('`jobs` is deprecated, use `features` instead.', DeprecationWarning, stacklevel=2)
|
33
|
+
return self.features
|
34
|
+
|
30
35
|
@staticmethod
|
31
36
|
def parse_vocabulary(name: str, **kwargs):
|
32
37
|
return Vocab(name)
|
@@ -66,7 +71,7 @@ class Meta:
|
|
66
71
|
|
67
72
|
@staticmethod
|
68
73
|
def parse_job(name: str, column: str, tokenizer: str, truncate: int, order: int, key: bool, max_len: int):
|
69
|
-
warnings.
|
74
|
+
warnings.warn('`parse_job` is deprecated, use `parse_feature` instead.', DeprecationWarning, stacklevel=2)
|
70
75
|
return Meta.parse_feature(name, column, tokenizer, truncate, order, key, max_len)
|
71
76
|
|
72
77
|
@staticmethod
|
@@ -41,12 +41,12 @@ class UniTok(Status):
|
|
41
41
|
|
42
42
|
@property
|
43
43
|
def key_job(self):
|
44
|
-
warnings.
|
44
|
+
warnings.warn('key_job is deprecated, use key_feat instead', DeprecationWarning, stacklevel=2)
|
45
45
|
return self.key_feature
|
46
46
|
|
47
47
|
@key_job.setter
|
48
48
|
def key_job(self, value):
|
49
|
-
warnings.
|
49
|
+
warnings.warn('key_job is deprecated, use key_feat instead', DeprecationWarning, stacklevel=2)
|
50
50
|
self.key_feature = value
|
51
51
|
|
52
52
|
@property
|
@@ -130,7 +130,7 @@ class UniTok(Status):
|
|
130
130
|
)
|
131
131
|
|
132
132
|
def add_index_job(self, name: str = 'index', tokenizer: DigitTokenizer = None):
|
133
|
-
warnings.
|
133
|
+
warnings.warn('`add_index_job` is deprecated, use `add_job` instead', DeprecationWarning, stacklevel=2)
|
134
134
|
return self.add_index_feature(name=name, tokenizer=tokenizer)
|
135
135
|
|
136
136
|
def add_job(
|
@@ -141,7 +141,7 @@ class UniTok(Status):
|
|
141
141
|
truncate: int = None,
|
142
142
|
key: bool = False,
|
143
143
|
):
|
144
|
-
warnings.
|
144
|
+
warnings.warn('`add_job` is deprecated, use `add_feature` instead', DeprecationWarning, stacklevel=2)
|
145
145
|
return self.add_feature(
|
146
146
|
tokenizer=tokenizer,
|
147
147
|
column=column,
|
@@ -543,5 +543,5 @@ class UniTok(Status):
|
|
543
543
|
self.data.pop(feature.name)
|
544
544
|
|
545
545
|
def remove_job(self, feature: Union[Feature, str]):
|
546
|
-
warnings.
|
546
|
+
warnings.warn(f'`remove_job` is deprecated, use `remove_feature` instead.', DeprecationWarning, stacklevel=2)
|
547
547
|
self.remove_feature(feature)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|