UniTok 3.1.0__tar.gz → 3.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {UniTok-3.1.0 → UniTok-3.1.1}/PKG-INFO +1 -1
  2. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/column.py +2 -3
  3. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/meta.py +3 -3
  4. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/unidep.py +1 -1
  5. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/unitok.py +3 -3
  6. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok.egg-info/PKG-INFO +1 -1
  7. {UniTok-3.1.0 → UniTok-3.1.1}/setup.py +1 -1
  8. {UniTok-3.1.0 → UniTok-3.1.1}/README.md +0 -0
  9. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/__init__.py +0 -0
  10. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/analysis/__init__.py +0 -0
  11. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/analysis/lengths.py +0 -0
  12. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/analysis/plot.py +0 -0
  13. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/cols.py +0 -0
  14. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/global_setting.py +0 -0
  15. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/__init__.py +0 -0
  16. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/bert_tok.py +0 -0
  17. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/ent_tok.py +0 -0
  18. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/id_tok.py +0 -0
  19. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/number_tok.py +0 -0
  20. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/seq_tok.py +0 -0
  21. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/split_tok.py +0 -0
  22. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/tok/tok.py +0 -0
  23. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/vocab.py +0 -0
  24. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok/vocabs.py +0 -0
  25. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok.egg-info/SOURCES.txt +0 -0
  26. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok.egg-info/dependency_links.txt +0 -0
  27. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok.egg-info/requires.txt +0 -0
  28. {UniTok-3.1.0 → UniTok-3.1.1}/UniTok.egg-info/top_level.txt +0 -0
  29. {UniTok-3.1.0 → UniTok-3.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.1.0
3
+ Version: 3.1.1
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -1,5 +1,4 @@
1
- from typing import Type
2
-
1
+ from typing import Type, Union
3
2
 
4
3
  from tqdm import tqdm
5
4
 
@@ -45,7 +44,7 @@ class Column:
45
44
  tok (BaseTok): The tokenizer of the column.
46
45
  operator (SeqOperator): The operator of the column.
47
46
  """
48
- def __init__(self, tok: BaseTok | Type[BaseTok], name=None, operator: SeqOperator = None, **kwargs):
47
+ def __init__(self, tok: Union[BaseTok, Type[BaseTok]], name=None, operator: SeqOperator = None, **kwargs):
49
48
  self.tok = tok
50
49
  self.name = name or tok.vocab.name
51
50
  self.operator = operator
@@ -1,13 +1,13 @@
1
1
  import json
2
2
  import os
3
3
  import warnings
4
- from typing import List
4
+ from typing import List, Union
5
5
 
6
6
 
7
7
  class Col:
8
8
  def __init__(self, name, voc=None, max_length=None, padding=None, vocab=None):
9
9
  self.name: str = name
10
- self.voc: Voc | str = voc or vocab
10
+ self.voc: Union[Voc, str] = voc or vocab
11
11
  self.max_length = max_length
12
12
  self.padding = padding
13
13
  self.list = max_length is not None
@@ -29,7 +29,7 @@ class Voc:
29
29
  def __init__(self, name, size, cols, store_dir):
30
30
  self.name: str = name
31
31
  self.size: int = size
32
- self.cols: List[Col | str] = cols
32
+ self.cols: List[Union[Col, str]] = cols
33
33
  self.store_dir = store_dir
34
34
 
35
35
  def __eq__(self, other):
@@ -46,7 +46,7 @@ class UniDep:
46
46
  self.print('resize sample_size to', self._sample_size)
47
47
  self.sample_size = self._sample_size
48
48
 
49
- self.vocabs = Vocabs()
49
+ self.vocabs = Vocabs() # type: Union[Dict[str, Vocab], Vocabs]
50
50
  for vocab_name in self.vocs:
51
51
  self.vocabs.append(Vocab(name=vocab_name).load(self.store_dir))
52
52
  self.id2index = self.vocabs[self.id_voc.name].o2i
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import os
3
3
  import warnings
4
- from typing import Optional, Type
4
+ from typing import Optional, Type, Dict, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -58,7 +58,7 @@ class UniTok:
58
58
 
59
59
  def __init__(self):
60
60
  self.cols = Cols()
61
- self.vocabs = Vocabs()
61
+ self.vocabs = Vocabs() # type: Union[Dict[str, Vocab], Vocabs]
62
62
  self.id_col = None # type: Optional[Column]
63
63
  self.data = None
64
64
 
@@ -68,7 +68,7 @@ class UniTok:
68
68
  'use vocabs instead (will be removed in 4.x version)', DeprecationWarning)
69
69
  return self.vocabs
70
70
 
71
- def add_col(self, col: Column | str, tok: BaseTok | Type[BaseTok] = None):
71
+ def add_col(self, col: Union[Column, str], tok: Union[BaseTok, Type[BaseTok]] = None):
72
72
  """
73
73
  Declare a column in the DataFrame to be tokenized.
74
74
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.1.0
3
+ Version: 3.1.1
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text()
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='3.1.0',
9
+ version='3.1.1',
10
10
  keywords=['token', 'tokenizer', 'bert'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes