UniTok 4.4.0__tar.gz → 4.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {UniTok-4.4.0 → UniTok-4.4.2}/PKG-INFO +2 -1
  2. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/PKG-INFO +2 -1
  3. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/requires.txt +1 -0
  4. {UniTok-4.4.0 → UniTok-4.4.2}/setup.py +3 -2
  5. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/job.py +1 -1
  6. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/meta.py +6 -1
  7. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/unitok.py +5 -5
  8. {UniTok-4.4.0 → UniTok-4.4.2}/LICENSE +0 -0
  9. {UniTok-4.4.0 → UniTok-4.4.2}/README.md +0 -0
  10. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/SOURCES.txt +0 -0
  11. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/dependency_links.txt +0 -0
  12. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/entry_points.txt +0 -0
  13. {UniTok-4.4.0 → UniTok-4.4.2}/UniTok.egg-info/top_level.txt +0 -0
  14. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/__init__.py +0 -0
  15. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/__main__.py +0 -0
  16. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/__init__.py +0 -0
  17. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/lengths.py +0 -0
  18. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/analysis/plot.py +0 -0
  19. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/cols.py +0 -0
  20. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/column.py +0 -0
  21. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/fut.py +0 -0
  22. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/global_setting.py +0 -0
  23. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/meta.py +0 -0
  24. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/__init__.py +0 -0
  25. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/bert_tok.py +0 -0
  26. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/ent_tok.py +0 -0
  27. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/id_tok.py +0 -0
  28. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/number_tok.py +0 -0
  29. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/seq_tok.py +0 -0
  30. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/split_tok.py +0 -0
  31. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/tok/tok.py +0 -0
  32. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/unidep.py +0 -0
  33. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/unitok.py +0 -0
  34. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/vocab.py +0 -0
  35. {UniTok-4.4.0 → UniTok-4.4.2}/UniTokv3/vocabs.py +0 -0
  36. {UniTok-4.4.0 → UniTok-4.4.2}/setup.cfg +0 -0
  37. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/__init__.py +0 -0
  38. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/__main__.py +0 -0
  39. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/feature.py +0 -0
  40. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/selector.py +0 -0
  41. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/status.py +0 -0
  42. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/__init__.py +0 -0
  43. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/base_tokenizer.py +0 -0
  44. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/digit_tokenizer.py +0 -0
  45. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/entity_tokenizer.py +0 -0
  46. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/glove_tokenizer.py +0 -0
  47. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/split_tokenizer.py +0 -0
  48. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/transformers_tokenizer.py +0 -0
  49. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/union_tokenizer.py +0 -0
  50. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/tokenizer/unknown_tokenizer.py +0 -0
  51. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/__init__.py +0 -0
  52. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/class_pool.py +0 -0
  53. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/data.py +0 -0
  54. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/function.py +0 -0
  55. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/__init__.py +0 -0
  56. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/json_handler.py +0 -0
  57. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/handler/pkl_handler.py +0 -0
  58. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/__init__.py +0 -0
  59. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/hub.py +0 -0
  60. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/hub/param_hub.py +0 -0
  61. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/__init__.py +0 -0
  62. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/feature_set.py +0 -0
  63. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/index_set.py +0 -0
  64. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/job_set.py +0 -0
  65. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/tokenizer_set.py +0 -0
  66. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/index_set/vocabulary_set.py +0 -0
  67. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/instance.py +0 -0
  68. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/map.py +0 -0
  69. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/space.py +0 -0
  70. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/symbol.py +0 -0
  71. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/utils/verbose.py +0 -0
  72. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/__init__.py +0 -0
  73. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/counter.py +0 -0
  74. {UniTok-4.4.0 → UniTok-4.4.2}/unitok/vocabulary/vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.4.0
3
+ Version: 4.4.2
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -18,6 +18,7 @@ Requires-Dist: transformers
18
18
  Requires-Dist: oba
19
19
  Requires-Dist: prettytable
20
20
  Requires-Dist: rich
21
+ Requires-Dist: fastparquet
21
22
 
22
23
  # UniTok V4
23
24
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.4.0
3
+ Version: 4.4.2
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -18,6 +18,7 @@ Requires-Dist: transformers
18
18
  Requires-Dist: oba
19
19
  Requires-Dist: prettytable
20
20
  Requires-Dist: rich
21
+ Requires-Dist: fastparquet
21
22
 
22
23
  # UniTok V4
23
24
 
@@ -6,3 +6,4 @@ transformers
6
6
  oba
7
7
  prettytable
8
8
  rich
9
+ fastparquet
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='4.4.0',
9
+ version='4.4.2',
10
10
  keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
@@ -25,7 +25,8 @@ setup(
25
25
  'transformers',
26
26
  'oba',
27
27
  'prettytable',
28
- 'rich'
28
+ 'rich',
29
+ 'fastparquet'
29
30
  ],
30
31
  entry_points={
31
32
  'console_scripts': [
@@ -4,7 +4,7 @@ from unitok.feature import Feature, FeatureHub
4
4
 
5
5
  class Job(Feature):
6
6
  def __init__(self, **kwargs):
7
- warnings.deprecated(f'Job is deprecated, use Feature instead.')
7
+ warnings.warn(f'`Job` class is deprecated, use `Feature`.', DeprecationWarning, stacklevel=2)
8
8
  super().__init__(**kwargs)
9
9
 
10
10
 
@@ -27,6 +27,11 @@ class Meta:
27
27
  self.tokenizers = TokenizerSet()
28
28
  self.features = FeatureSet()
29
29
 
30
+ @property
31
+ def jobs(self):
32
+ warnings.warn('`jobs` is deprecated, use `features` instead.', DeprecationWarning, stacklevel=2)
33
+ return self.features
34
+
30
35
  @staticmethod
31
36
  def parse_vocabulary(name: str, **kwargs):
32
37
  return Vocab(name)
@@ -66,7 +71,7 @@ class Meta:
66
71
 
67
72
  @staticmethod
68
73
  def parse_job(name: str, column: str, tokenizer: str, truncate: int, order: int, key: bool, max_len: int):
69
- warnings.deprecated('`parse_job` is deprecated, use `parse_feature` instead.', stacklevel=2)
74
+ warnings.warn('`parse_job` is deprecated, use `parse_feature` instead.', DeprecationWarning, stacklevel=2)
70
75
  return Meta.parse_feature(name, column, tokenizer, truncate, order, key, max_len)
71
76
 
72
77
  @staticmethod
@@ -41,12 +41,12 @@ class UniTok(Status):
41
41
 
42
42
  @property
43
43
  def key_job(self):
44
- warnings.deprecated('key_job is deprecated, use key_feat instead', stacklevel=2)
44
+ warnings.warn('key_job is deprecated, use key_feat instead', DeprecationWarning, stacklevel=2)
45
45
  return self.key_feature
46
46
 
47
47
  @key_job.setter
48
48
  def key_job(self, value):
49
- warnings.deprecated('key_job is deprecated, use key_feat instead', stacklevel=2)
49
+ warnings.warn('key_job is deprecated, use key_feat instead', DeprecationWarning, stacklevel=2)
50
50
  self.key_feature = value
51
51
 
52
52
  @property
@@ -130,7 +130,7 @@ class UniTok(Status):
130
130
  )
131
131
 
132
132
  def add_index_job(self, name: str = 'index', tokenizer: DigitTokenizer = None):
133
- warnings.deprecated('`add_index_job` is deprecated, use `add_job` instead', stacklevel=2)
133
+ warnings.warn('`add_index_job` is deprecated, use `add_job` instead', DeprecationWarning, stacklevel=2)
134
134
  return self.add_index_feature(name=name, tokenizer=tokenizer)
135
135
 
136
136
  def add_job(
@@ -141,7 +141,7 @@ class UniTok(Status):
141
141
  truncate: int = None,
142
142
  key: bool = False,
143
143
  ):
144
- warnings.deprecated('`add_job` is deprecated, use `add_feature` instead', stacklevel=2)
144
+ warnings.warn('`add_job` is deprecated, use `add_feature` instead', DeprecationWarning, stacklevel=2)
145
145
  return self.add_feature(
146
146
  tokenizer=tokenizer,
147
147
  column=column,
@@ -543,5 +543,5 @@ class UniTok(Status):
543
543
  self.data.pop(feature.name)
544
544
 
545
545
  def remove_job(self, feature: Union[Feature, str]):
546
- warnings.deprecated(f'`remove_job` is deprecated, use `remove_feature` instead.', stacklevel=2)
546
+ warnings.warn(f'`remove_job` is deprecated, use `remove_feature` instead.', DeprecationWarning, stacklevel=2)
547
547
  self.remove_feature(feature)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes