UniTok 4.3.6__tar.gz → 4.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {UniTok-4.3.6 → UniTok-4.3.8}/PKG-INFO +1 -1
  2. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/PKG-INFO +1 -1
  3. {UniTok-4.3.6 → UniTok-4.3.8}/setup.py +1 -1
  4. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/digit_tokenizer.py +1 -0
  5. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/transformers_tokenizer.py +9 -0
  6. {UniTok-4.3.6 → UniTok-4.3.8}/LICENSE +0 -0
  7. {UniTok-4.3.6 → UniTok-4.3.8}/README.md +0 -0
  8. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/SOURCES.txt +0 -0
  9. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/dependency_links.txt +0 -0
  10. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/entry_points.txt +0 -0
  11. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/requires.txt +0 -0
  12. {UniTok-4.3.6 → UniTok-4.3.8}/UniTok.egg-info/top_level.txt +0 -0
  13. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/__init__.py +0 -0
  14. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/__main__.py +0 -0
  15. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/analysis/__init__.py +0 -0
  16. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/analysis/lengths.py +0 -0
  17. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/analysis/plot.py +0 -0
  18. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/cols.py +0 -0
  19. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/column.py +0 -0
  20. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/fut.py +0 -0
  21. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/global_setting.py +0 -0
  22. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/meta.py +0 -0
  23. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/__init__.py +0 -0
  24. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/bert_tok.py +0 -0
  25. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/ent_tok.py +0 -0
  26. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/id_tok.py +0 -0
  27. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/number_tok.py +0 -0
  28. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/seq_tok.py +0 -0
  29. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/split_tok.py +0 -0
  30. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/tok/tok.py +0 -0
  31. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/unidep.py +0 -0
  32. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/unitok.py +0 -0
  33. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/vocab.py +0 -0
  34. {UniTok-4.3.6 → UniTok-4.3.8}/UniTokv3/vocabs.py +0 -0
  35. {UniTok-4.3.6 → UniTok-4.3.8}/setup.cfg +0 -0
  36. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/__init__.py +0 -0
  37. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/__main__.py +0 -0
  38. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/job.py +0 -0
  39. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/meta.py +0 -0
  40. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/selector.py +0 -0
  41. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/status.py +0 -0
  42. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/__init__.py +0 -0
  43. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/base_tokenizer.py +0 -0
  44. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/entity_tokenizer.py +0 -0
  45. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/glove_tokenizer.py +0 -0
  46. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/split_tokenizer.py +0 -0
  47. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/union_tokenizer.py +0 -0
  48. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/tokenizer/unknown_tokenizer.py +0 -0
  49. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/unitok.py +0 -0
  50. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/__init__.py +0 -0
  51. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/class_pool.py +0 -0
  52. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/data.py +0 -0
  53. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/function.py +0 -0
  54. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/handler/__init__.py +0 -0
  55. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/handler/json_handler.py +0 -0
  56. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/handler/pkl_handler.py +0 -0
  57. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/hub/__init__.py +0 -0
  58. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/hub/hub.py +0 -0
  59. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/hub/param_hub.py +0 -0
  60. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/index_set/__init__.py +0 -0
  61. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/index_set/index_set.py +0 -0
  62. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/index_set/job_set.py +0 -0
  63. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/index_set/tokenizer_set.py +0 -0
  64. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/index_set/vocabulary_set.py +0 -0
  65. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/instance.py +0 -0
  66. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/map.py +0 -0
  67. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/space.py +0 -0
  68. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/symbol.py +0 -0
  69. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/utils/verbose.py +0 -0
  70. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/vocabulary/__init__.py +0 -0
  71. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/vocabulary/counter.py +0 -0
  72. {UniTok-4.3.6 → UniTok-4.3.8}/unitok/vocabulary/vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.6
3
+ Version: 4.3.8
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.6
3
+ Version: 4.3.8
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='4.3.6',
9
+ version='4.3.8',
10
10
  keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
@@ -31,3 +31,4 @@ class DigitsTokenizer(DigitTokenizer):
31
31
  obj = [int(o) for o in obj]
32
32
  for o in obj:
33
33
  super().__call__(o)
34
+ return obj
@@ -56,6 +56,15 @@ class TransformersTokenizer(BaseTokenizer):
56
56
  self.vocab.counter(token)
57
57
  return tokens
58
58
 
59
+ def __getstate__(self):
60
+ state = self.__dict__.copy()
61
+ state['tokenizer'] = None
62
+ return state
63
+
64
+ def __setstate__(self, state):
65
+ self.__dict__.update(state)
66
+ self.tokenizer = AutoTokenizer.from_pretrained(self.key, **self.kwargs)
67
+
59
68
 
60
69
  class BertTokenizer(TransformersTokenizer):
61
70
  param_list = []
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes