UniTok 4.4.1__tar.gz → 4.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {UniTok-4.4.1 → UniTok-4.4.3}/PKG-INFO +1 -1
  2. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/PKG-INFO +1 -1
  3. {UniTok-4.4.1 → UniTok-4.4.3}/setup.py +1 -1
  4. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/meta.py +5 -0
  5. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/union_tokenizer.py +3 -0
  6. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/feature_set.py +2 -1
  7. {UniTok-4.4.1 → UniTok-4.4.3}/LICENSE +0 -0
  8. {UniTok-4.4.1 → UniTok-4.4.3}/README.md +0 -0
  9. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/SOURCES.txt +0 -0
  10. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/dependency_links.txt +0 -0
  11. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/entry_points.txt +0 -0
  12. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/requires.txt +0 -0
  13. {UniTok-4.4.1 → UniTok-4.4.3}/UniTok.egg-info/top_level.txt +0 -0
  14. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/__init__.py +0 -0
  15. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/__main__.py +0 -0
  16. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/analysis/__init__.py +0 -0
  17. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/analysis/lengths.py +0 -0
  18. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/analysis/plot.py +0 -0
  19. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/cols.py +0 -0
  20. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/column.py +0 -0
  21. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/fut.py +0 -0
  22. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/global_setting.py +0 -0
  23. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/meta.py +0 -0
  24. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/__init__.py +0 -0
  25. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/bert_tok.py +0 -0
  26. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/ent_tok.py +0 -0
  27. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/id_tok.py +0 -0
  28. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/number_tok.py +0 -0
  29. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/seq_tok.py +0 -0
  30. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/split_tok.py +0 -0
  31. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/tok/tok.py +0 -0
  32. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/unidep.py +0 -0
  33. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/unitok.py +0 -0
  34. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/vocab.py +0 -0
  35. {UniTok-4.4.1 → UniTok-4.4.3}/UniTokv3/vocabs.py +0 -0
  36. {UniTok-4.4.1 → UniTok-4.4.3}/setup.cfg +0 -0
  37. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/__init__.py +0 -0
  38. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/__main__.py +0 -0
  39. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/feature.py +0 -0
  40. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/job.py +0 -0
  41. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/selector.py +0 -0
  42. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/status.py +0 -0
  43. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/__init__.py +0 -0
  44. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/base_tokenizer.py +0 -0
  45. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/digit_tokenizer.py +0 -0
  46. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/entity_tokenizer.py +0 -0
  47. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/glove_tokenizer.py +0 -0
  48. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/split_tokenizer.py +0 -0
  49. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/transformers_tokenizer.py +0 -0
  50. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/tokenizer/unknown_tokenizer.py +0 -0
  51. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/unitok.py +0 -0
  52. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/__init__.py +0 -0
  53. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/class_pool.py +0 -0
  54. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/data.py +0 -0
  55. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/function.py +0 -0
  56. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/handler/__init__.py +0 -0
  57. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/handler/json_handler.py +0 -0
  58. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/handler/pkl_handler.py +0 -0
  59. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/hub/__init__.py +0 -0
  60. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/hub/hub.py +0 -0
  61. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/hub/param_hub.py +0 -0
  62. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/__init__.py +0 -0
  63. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/index_set.py +0 -0
  64. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/job_set.py +0 -0
  65. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/tokenizer_set.py +0 -0
  66. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/index_set/vocabulary_set.py +0 -0
  67. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/instance.py +0 -0
  68. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/map.py +0 -0
  69. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/space.py +0 -0
  70. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/symbol.py +0 -0
  71. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/utils/verbose.py +0 -0
  72. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/vocabulary/__init__.py +0 -0
  73. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/vocabulary/counter.py +0 -0
  74. {UniTok-4.4.1 → UniTok-4.4.3}/unitok/vocabulary/vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.4.1
3
+ Version: 4.4.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.4.1
3
+ Version: 4.4.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='4.4.1',
9
+ version='4.4.3',
10
10
  keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
@@ -27,6 +27,11 @@ class Meta:
27
27
  self.tokenizers = TokenizerSet()
28
28
  self.features = FeatureSet()
29
29
 
30
+ @property
31
+ def jobs(self):
32
+ warnings.warn('`jobs` is deprecated, use `features` instead.', DeprecationWarning, stacklevel=2)
33
+ return self.features
34
+
30
35
  @staticmethod
31
36
  def parse_vocabulary(name: str, **kwargs):
32
37
  return Vocab(name)
@@ -12,6 +12,9 @@ class UnionTokenizer(BaseTokenizer):
12
12
  self.tokenizer = tokenizer
13
13
  self.classname = self.tokenizer._detailed_classname
14
14
 
15
+ def __call__(self, obj):
16
+ raise NotImplementedError('UnionTokenizer is used as a placeholder and should not be called.')
17
+
15
18
  @property
16
19
  def _detailed_classname(self):
17
20
  return f'{self.__class__.__name__}[{self.classname}]'
@@ -22,4 +22,5 @@ class FeatureSet(IndexSet[Feature]):
22
22
  raise ValueError(f'Merge unprocessed feature: {feature}')
23
23
  if self.has(self._get_key(feature)):
24
24
  raise ValueError(f'Conflict feature name: {feature.name}')
25
- self.add(feature.clone(order=next_order, tokenizer=UnionTokenizer(feature.tokenizer)))
25
+ # self.add(feature.clone(order=next_order, tokenizer=UnionTokenizer(feature.tokenizer)))
26
+ self.add(feature.clone(order=next_order))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes