UniTok 4.3.1__tar.gz → 4.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {UniTok-4.3.1 → UniTok-4.3.3}/PKG-INFO +1 -1
  2. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/PKG-INFO +1 -1
  3. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/SOURCES.txt +6 -0
  4. {UniTok-4.3.1 → UniTok-4.3.3}/setup.py +1 -1
  5. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/unitok.py +2 -2
  6. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/hub/hub.py +5 -1
  7. UniTok-4.3.3/unitok/utils/space.py +35 -0
  8. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/vocabulary/vocabulary.py +4 -0
  9. UniTok-4.3.1/unitok/utils/space.py +0 -29
  10. {UniTok-4.3.1 → UniTok-4.3.3}/LICENSE +0 -0
  11. {UniTok-4.3.1 → UniTok-4.3.3}/README.md +0 -0
  12. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/dependency_links.txt +0 -0
  13. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/entry_points.txt +0 -0
  14. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/requires.txt +0 -0
  15. {UniTok-4.3.1 → UniTok-4.3.3}/UniTok.egg-info/top_level.txt +0 -0
  16. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/__init__.py +0 -0
  17. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/__main__.py +0 -0
  18. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/analysis/__init__.py +0 -0
  19. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/analysis/lengths.py +0 -0
  20. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/analysis/plot.py +0 -0
  21. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/cols.py +0 -0
  22. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/column.py +0 -0
  23. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/fut.py +0 -0
  24. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/global_setting.py +0 -0
  25. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/meta.py +0 -0
  26. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/__init__.py +0 -0
  27. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/bert_tok.py +0 -0
  28. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/ent_tok.py +0 -0
  29. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/id_tok.py +0 -0
  30. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/number_tok.py +0 -0
  31. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/seq_tok.py +0 -0
  32. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/split_tok.py +0 -0
  33. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/tok/tok.py +0 -0
  34. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/unidep.py +0 -0
  35. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/unitok.py +0 -0
  36. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/vocab.py +0 -0
  37. {UniTok-4.3.1 → UniTok-4.3.3}/UniTokv3/vocabs.py +0 -0
  38. {UniTok-4.3.1 → UniTok-4.3.3}/setup.cfg +0 -0
  39. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/__init__.py +0 -0
  40. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/__main__.py +0 -0
  41. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/job.py +0 -0
  42. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/meta.py +0 -0
  43. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/selector.py +0 -0
  44. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/status.py +0 -0
  45. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/__init__.py +0 -0
  46. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/base_tokenizer.py +0 -0
  47. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/digit_tokenizer.py +0 -0
  48. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/entity_tokenizer.py +0 -0
  49. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/glove_tokenizer.py +0 -0
  50. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/split_tokenizer.py +0 -0
  51. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/transformers_tokenizer.py +0 -0
  52. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/union_tokenizer.py +0 -0
  53. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/tokenizer/unknown_tokenizer.py +0 -0
  54. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/__init__.py +0 -0
  55. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/class_pool.py +0 -0
  56. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/data.py +0 -0
  57. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/function.py +0 -0
  58. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/handler/__init__.py +0 -0
  59. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/handler/json_handler.py +0 -0
  60. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/handler/pkl_handler.py +0 -0
  61. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/hub/__init__.py +0 -0
  62. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/hub/param_hub.py +0 -0
  63. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/index_set/__init__.py +0 -0
  64. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/index_set/index_set.py +0 -0
  65. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/index_set/job_set.py +0 -0
  66. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/index_set/tokenizer_set.py +0 -0
  67. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/index_set/vocabulary_set.py +0 -0
  68. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/instance.py +0 -0
  69. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/map.py +0 -0
  70. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/symbol.py +0 -0
  71. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/utils/verbose.py +0 -0
  72. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/vocabulary/__init__.py +0 -0
  73. {UniTok-4.3.1 → UniTok-4.3.3}/unitok/vocabulary/counter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.1
3
+ Version: 4.3.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.1
3
+ Version: 4.3.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -36,6 +36,12 @@ unitok/meta.py
36
36
  unitok/selector.py
37
37
  unitok/status.py
38
38
  unitok/unitok.py
39
+ unitok.egg-info/PKG-INFO
40
+ unitok.egg-info/SOURCES.txt
41
+ unitok.egg-info/dependency_links.txt
42
+ unitok.egg-info/entry_points.txt
43
+ unitok.egg-info/requires.txt
44
+ unitok.egg-info/top_level.txt
39
45
  unitok/tokenizer/__init__.py
40
46
  unitok/tokenizer/base_tokenizer.py
41
47
  unitok/tokenizer/digit_tokenizer.py
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='4.3.1',
9
+ version='4.3.3',
10
10
  keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
@@ -98,12 +98,12 @@ class UniTok(Status):
98
98
 
99
99
  def __enter__(self):
100
100
  from unitok.utils import Space
101
- Space.set(self)
101
+ Space.push(self)
102
102
  return self
103
103
 
104
104
  def __exit__(self, exc_type, exc_val, exc_tb):
105
105
  from unitok.utils import Space
106
- Space.unset()
106
+ Space.pop(self)
107
107
 
108
108
  @Status.require_initialized
109
109
  def add_index_job(self, name: str = 'index', tokenizer: DigitTokenizer = None):
@@ -13,10 +13,14 @@ class Hub(abc.ABC, Generic[T]):
13
13
  @classmethod
14
14
  def add(cls, key, obj: T = None):
15
15
  instance = cls._instance.current()
16
- if key in instance and instance[key] is not obj:
16
+ if key in instance and cls.notequal(instance[key], obj):
17
17
  raise ValueError(f'Conflict object declaration: {obj} and {instance[key]}')
18
18
  instance[key] = obj
19
19
 
20
+ @classmethod
21
+ def notequal(cls, a: T, b: T) -> bool:
22
+ return a is not b
23
+
20
24
  @classmethod
21
25
  def get(cls, name: str, **kwargs) -> T:
22
26
  """
@@ -0,0 +1,35 @@
1
+ class Space:
2
+ """
3
+ UniTok allows multiple instances to be created, but the "with" statement can only be used with one instance.
4
+ """
5
+
6
+ _stack = []
7
+
8
+ @classmethod
9
+ def push(cls, obj):
10
+ """
11
+ Lock the unitok instance as the current active instance
12
+ """
13
+ cls._stack.append(obj)
14
+ # if cls._active_instance is not None:
15
+ # raise ValueError(f'Space is already locked to {cls._active_instance}')
16
+
17
+ @classmethod
18
+ def pop(cls, obj):
19
+ """
20
+ Unlock the current active instance
21
+ """
22
+ # cls._active_instance = None
23
+ if not cls._stack:
24
+ raise ValueError('Space stack is empty')
25
+ if cls._stack[-1] != obj:
26
+ raise ValueError('Space stack is not in order')
27
+ cls._stack.pop()
28
+
29
+ @classmethod
30
+ def get_space(cls):
31
+ """
32
+ Get the current active instance
33
+ """
34
+ # return cls._active_space
35
+ return cls._stack[-1] if cls._stack else None
@@ -164,3 +164,7 @@ class VocabularyHub(Hub[Vocabulary]):
164
164
  def add(cls, key, obj: Vocabulary = None):
165
165
  key, obj = key.name, key
166
166
  return super().add(key, obj)
167
+
168
+ @classmethod
169
+ def notequal(cls, a: Vocabulary, b: Vocabulary) -> bool:
170
+ return a.name != b.name or a.size != b.size
@@ -1,29 +0,0 @@
1
- class Space:
2
- """
3
- UniTok allows multiple instances to be created, but the "with" statement can only be used with one instance.
4
- """
5
-
6
- _active_instance = None
7
-
8
- @classmethod
9
- def set(cls, obj):
10
- """
11
- Lock the unitok instance as the current active instance
12
- """
13
- if cls._active_instance is not None:
14
- raise ValueError(f'Space is already locked to {cls._active_instance}')
15
- cls._active_instance = obj
16
-
17
- @classmethod
18
- def unset(cls):
19
- """
20
- Unlock the current active instance
21
- """
22
- cls._active_instance = None
23
-
24
- @classmethod
25
- def get_space(cls):
26
- """
27
- Get the current active instance
28
- """
29
- return cls._active_instance
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes