UniTok 3.0.4__tar.gz → 3.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {UniTok-3.0.4 → UniTok-3.0.5}/PKG-INFO +1 -1
  2. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/meta.py +24 -16
  3. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/unidep.py +1 -7
  4. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok.egg-info/PKG-INFO +1 -1
  5. {UniTok-3.0.4 → UniTok-3.0.5}/setup.py +1 -1
  6. {UniTok-3.0.4 → UniTok-3.0.5}/README.md +0 -0
  7. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/__init__.py +0 -0
  8. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/analysis/__init__.py +0 -0
  9. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/analysis/lengths.py +0 -0
  10. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/analysis/plot.py +0 -0
  11. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/cols.py +0 -0
  12. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/column.py +0 -0
  13. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/global_setting.py +0 -0
  14. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/__init__.py +0 -0
  15. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/bert_tok.py +0 -0
  16. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/entity_tok.py +0 -0
  17. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/id_tok.py +0 -0
  18. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/number_tok.py +0 -0
  19. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/seq_tok.py +0 -0
  20. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/split_tok.py +0 -0
  21. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/tok/tok.py +0 -0
  22. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/unitok.py +0 -0
  23. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/vocab.py +0 -0
  24. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok/vocabs.py +0 -0
  25. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok.egg-info/SOURCES.txt +0 -0
  26. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok.egg-info/dependency_links.txt +0 -0
  27. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok.egg-info/requires.txt +0 -0
  28. {UniTok-3.0.4 → UniTok-3.0.5}/UniTok.egg-info/top_level.txt +0 -0
  29. {UniTok-3.0.4 → UniTok-3.0.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.4
3
+ Version: 3.0.5
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -44,12 +44,15 @@ class Voc:
44
44
  class Meta:
45
45
  VER = 'UniDep-2.0'
46
46
 
47
- def __init__(self, version, id_col, col_info=None, vocab_info=None, cols=None, vocs=None):
48
- self.version = version
47
+ def __init__(self, store_dir):
48
+ self.store_dir = store_dir
49
+ self.path = os.path.join(self.store_dir, 'meta.data.json')
49
50
 
50
- self.cols = cols or col_info
51
- self.vocs = vocs or vocab_info
52
- self.id_col = id_col
51
+ data = self.load()
52
+ self.version = data['version']
53
+ self.cols = data.get('cols') or data['col_info']
54
+ self.vocs = data.get('vocs') or data['vocab_info']
55
+ self.id_col = data['id_col']
53
56
 
54
57
  # build col-voc graph
55
58
  self.cols = {col: Col(**self.cols[col], name=col) for col in self.cols}
@@ -61,7 +64,7 @@ class Meta:
61
64
  for voc in self.vocs.values():
62
65
  voc.cols = [self.cols[col] for col in voc.cols]
63
66
 
64
- self.upgrade = self.version_check()
67
+ self.version_check()
65
68
 
66
69
  @staticmethod
67
70
  def parse_version(version):
@@ -77,8 +80,11 @@ class Meta:
77
80
  'vocs': {voc.name: voc.get_info() for voc in self.vocs.values()}
78
81
  }
79
82
 
80
- def save(self, store_dir):
81
- json.dump(self.get_info(), open(os.path.join(store_dir, 'meta.data.json'), 'w'), indent=2)
83
+ def load(self) -> dict:
84
+ return json.load(open(self.path))
85
+
86
+ def save(self):
87
+ json.dump(self.get_info(), open(os.path.join(self.store_dir, 'meta.data.json'), 'w'), indent=2)
82
88
 
83
89
  def version_check(self):
84
90
  current_version = self.parse_version(Meta.VER)
@@ -86,15 +92,17 @@ class Meta:
86
92
 
87
93
  if current_version != depot_version:
88
94
  warnings.warn(
89
- 'Meta version mismatch, '
90
- 'current version: {}, '
91
- 'depot version: {}. '
92
- 'It may cause unexpected error.'.format(
93
- current_version, depot_version
94
- ))
95
+ f'meta version of depot ({self.store_dir}) mismatch, '
96
+ f'current version: {current_version}, '
97
+ f'depot version: {depot_version}. '
98
+ f'It may cause unexpected error.')
95
99
 
96
100
  if current_version <= depot_version:
97
- return False
101
+ return
98
102
 
99
103
  command = input('Press Y to upgrade meta data for future use (Y/n): ')
100
- return command.lower() == 'y'
104
+ if command.lower() == 'y':
105
+ os.rename(self.path, self.path + '.bak')
106
+ print('Old meta data backed up to {}.'.format(self.path + '.bak'))
107
+ self.save()
108
+ print('Meta data upgraded.')
@@ -23,13 +23,7 @@ class UniDep:
23
23
  self.cached_samples = []
24
24
 
25
25
  self.meta_path = os.path.join(self.store_dir, 'meta.data.json')
26
- self.meta = Meta(**json.load(open(self.meta_path)))
27
-
28
- if self.meta.upgrade:
29
- # backup old meta
30
- os.rename(self.meta_path, self.meta_path + '.bak')
31
- # save new meta
32
- self.meta.save(self.store_dir)
26
+ self.meta = Meta(**json.load(open(self.meta_path)), store_dir=self.store_dir)
33
27
 
34
28
  self.data_path = os.path.join(self.store_dir, 'data.npy')
35
29
  self.data = np.load(self.data_path, allow_pickle=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: UniTok
3
- Version: 3.0.4
3
+ Version: 3.0.5
4
4
  Summary: Unified Tokenizer
5
5
  Home-page: https://github.com/Jyonn/UnifiedTokenizer
6
6
  Author: Jyonn Liu
@@ -6,7 +6,7 @@ long_description = (this_directory / "README.md").read_text()
6
6
 
7
7
  setup(
8
8
  name='UniTok',
9
- version='3.0.4',
9
+ version='3.0.5',
10
10
  keywords=['token', 'tokenizer', 'bert'],
11
11
  description='Unified Tokenizer',
12
12
  long_description=long_description,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes