UniTok 3.0.7__tar.gz → 3.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {UniTok-3.0.7 → UniTok-3.0.9}/PKG-INFO +1 -1
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/meta.py +14 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/unidep.py +12 -8
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok.egg-info/PKG-INFO +1 -1
- {UniTok-3.0.7 → UniTok-3.0.9}/setup.py +1 -1
- {UniTok-3.0.7 → UniTok-3.0.9}/README.md +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/__init__.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/analysis/__init__.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/analysis/lengths.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/analysis/plot.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/cols.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/column.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/global_setting.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/__init__.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/bert_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/entity_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/id_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/number_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/seq_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/split_tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/tok/tok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/unitok.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/vocab.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok/vocabs.py +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok.egg-info/SOURCES.txt +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok.egg-info/dependency_links.txt +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok.egg-info/requires.txt +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/UniTok.egg-info/top_level.txt +0 -0
- {UniTok-3.0.7 → UniTok-3.0.9}/setup.cfg +0 -0
@@ -40,6 +40,20 @@ class Voc:
|
|
40
40
|
'cols': [col.name for col in self.cols]
|
41
41
|
}
|
42
42
|
|
43
|
+
def merge(self, other):
|
44
|
+
cols = self.cols.copy()
|
45
|
+
for col in other.cols:
|
46
|
+
for _col in cols:
|
47
|
+
if col.name == _col.name:
|
48
|
+
break
|
49
|
+
else:
|
50
|
+
cols.append(col)
|
51
|
+
return Voc(
|
52
|
+
name=self.name,
|
53
|
+
size=self.size,
|
54
|
+
cols=cols,
|
55
|
+
)
|
56
|
+
|
43
57
|
|
44
58
|
class Meta:
|
45
59
|
VER = 'UniDep-2.0'
|
@@ -121,18 +121,21 @@ class UniDep:
|
|
121
121
|
|
122
122
|
@classmethod
|
123
123
|
def _merge_cols(cls, c1: Dict[str, Col], c2: Dict[str, Col]) -> Dict[str, Col]:
|
124
|
-
for
|
125
|
-
if
|
126
|
-
raise ValueError(f'col {
|
124
|
+
for name, col in c2.items():
|
125
|
+
if name in c1 and c1[name] != col:
|
126
|
+
raise ValueError(f'col {name} config conflict')
|
127
127
|
return cls._merge(c1, c2)
|
128
128
|
|
129
129
|
@classmethod
|
130
130
|
def _merge_vocs(cls, v1: Dict[str, Voc], v2: Dict[str, Voc]) -> Dict[str, Voc]:
|
131
|
-
|
132
|
-
|
133
|
-
if
|
134
|
-
|
135
|
-
|
131
|
+
merged = v1.copy()
|
132
|
+
for name, vocab in v2.items():
|
133
|
+
if name in v1:
|
134
|
+
if v1[name] != vocab:
|
135
|
+
raise ValueError(f'vocab {name} config conflict')
|
136
|
+
vocab = v1[name].merge(vocab)
|
137
|
+
merged[name] = vocab
|
138
|
+
return merged
|
136
139
|
|
137
140
|
def union(self, *depots: 'UniDep'):
|
138
141
|
"""
|
@@ -174,6 +177,7 @@ class UniDep:
|
|
174
177
|
export unioned or filtered depot
|
175
178
|
"""
|
176
179
|
|
180
|
+
os.makedirs(store_dir, exist_ok=True)
|
177
181
|
data = dict()
|
178
182
|
|
179
183
|
for sample in tqdm.tqdm(self, disable=self.silent):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|