UniTok 4.3.2__tar.gz → 4.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {UniTok-4.3.2 → UniTok-4.3.3}/PKG-INFO +1 -1
  2. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/PKG-INFO +1 -1
  3. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/SOURCES.txt +6 -0
  4. {UniTok-4.3.2 → UniTok-4.3.3}/setup.py +1 -1
  5. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/hub/hub.py +5 -1
  6. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/vocabulary/vocabulary.py +4 -0
  7. {UniTok-4.3.2 → UniTok-4.3.3}/LICENSE +0 -0
  8. {UniTok-4.3.2 → UniTok-4.3.3}/README.md +0 -0
  9. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/dependency_links.txt +0 -0
  10. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/entry_points.txt +0 -0
  11. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/requires.txt +0 -0
  12. {UniTok-4.3.2 → UniTok-4.3.3}/UniTok.egg-info/top_level.txt +0 -0
  13. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/__init__.py +0 -0
  14. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/__main__.py +0 -0
  15. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/analysis/__init__.py +0 -0
  16. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/analysis/lengths.py +0 -0
  17. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/analysis/plot.py +0 -0
  18. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/cols.py +0 -0
  19. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/column.py +0 -0
  20. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/fut.py +0 -0
  21. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/global_setting.py +0 -0
  22. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/meta.py +0 -0
  23. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/__init__.py +0 -0
  24. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/bert_tok.py +0 -0
  25. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/ent_tok.py +0 -0
  26. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/id_tok.py +0 -0
  27. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/number_tok.py +0 -0
  28. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/seq_tok.py +0 -0
  29. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/split_tok.py +0 -0
  30. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/tok/tok.py +0 -0
  31. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/unidep.py +0 -0
  32. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/unitok.py +0 -0
  33. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/vocab.py +0 -0
  34. {UniTok-4.3.2 → UniTok-4.3.3}/UniTokv3/vocabs.py +0 -0
  35. {UniTok-4.3.2 → UniTok-4.3.3}/setup.cfg +0 -0
  36. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/__init__.py +0 -0
  37. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/__main__.py +0 -0
  38. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/job.py +0 -0
  39. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/meta.py +0 -0
  40. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/selector.py +0 -0
  41. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/status.py +0 -0
  42. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/__init__.py +0 -0
  43. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/base_tokenizer.py +0 -0
  44. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/digit_tokenizer.py +0 -0
  45. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/entity_tokenizer.py +0 -0
  46. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/glove_tokenizer.py +0 -0
  47. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/split_tokenizer.py +0 -0
  48. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/transformers_tokenizer.py +0 -0
  49. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/union_tokenizer.py +0 -0
  50. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/tokenizer/unknown_tokenizer.py +0 -0
  51. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/unitok.py +0 -0
  52. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/__init__.py +0 -0
  53. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/class_pool.py +0 -0
  54. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/data.py +0 -0
  55. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/function.py +0 -0
  56. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/handler/__init__.py +0 -0
  57. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/handler/json_handler.py +0 -0
  58. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/handler/pkl_handler.py +0 -0
  59. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/hub/__init__.py +0 -0
  60. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/hub/param_hub.py +0 -0
  61. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/index_set/__init__.py +0 -0
  62. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/index_set/index_set.py +0 -0
  63. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/index_set/job_set.py +0 -0
  64. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/index_set/tokenizer_set.py +0 -0
  65. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/index_set/vocabulary_set.py +0 -0
  66. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/instance.py +0 -0
  67. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/map.py +0 -0
  68. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/space.py +0 -0
  69. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/symbol.py +0 -0
  70. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/utils/verbose.py +0 -0
  71. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/vocabulary/__init__.py +0 -0
  72. {UniTok-4.3.2 → UniTok-4.3.3}/unitok/vocabulary/counter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.2
3
+ Version: 4.3.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 4.3.2
3
+ Version: 4.3.3
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -36,6 +36,12 @@ unitok/meta.py
36
36
  unitok/selector.py
37
37
  unitok/status.py
38
38
  unitok/unitok.py
39
+ unitok.egg-info/PKG-INFO
40
+ unitok.egg-info/SOURCES.txt
41
+ unitok.egg-info/dependency_links.txt
42
+ unitok.egg-info/entry_points.txt
43
+ unitok.egg-info/requires.txt
44
+ unitok.egg-info/top_level.txt
39
45
  unitok/tokenizer/__init__.py
40
46
  unitok/tokenizer/base_tokenizer.py
41
47
  unitok/tokenizer/digit_tokenizer.py
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text(encoding='utf8')
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='4.3.2',
9
+ version='4.3.3',
10
10
  keywords=['token', 'tokenizer', 'NLP', 'transformers', 'glove', 'bert', 'llama'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
@@ -13,10 +13,14 @@ class Hub(abc.ABC, Generic[T]):
13
13
  @classmethod
14
14
  def add(cls, key, obj: T = None):
15
15
  instance = cls._instance.current()
16
- if key in instance and instance[key] is not obj:
16
+ if key in instance and cls.notequal(instance[key], obj):
17
17
  raise ValueError(f'Conflict object declaration: {obj} and {instance[key]}')
18
18
  instance[key] = obj
19
19
 
20
+ @classmethod
21
+ def notequal(cls, a: T, b: T) -> bool:
22
+ return a is not b
23
+
20
24
  @classmethod
21
25
  def get(cls, name: str, **kwargs) -> T:
22
26
  """
@@ -164,3 +164,7 @@ class VocabularyHub(Hub[Vocabulary]):
164
164
  def add(cls, key, obj: Vocabulary = None):
165
165
  key, obj = key.name, key
166
166
  return super().add(key, obj)
167
+
168
+ @classmethod
169
+ def notequal(cls, a: Vocabulary, b: Vocabulary) -> bool:
170
+ return a.name != b.name or a.size != b.size
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes