returnn 1.20250703.183400__py3-none-any.whl → 1.20250708.165746__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- returnn/PKG-INFO +1 -1
- returnn/_setup_info_generated.py +2 -2
- returnn/datasets/lm.py +18 -2
- returnn/datasets/meta.py +10 -2
- {returnn-1.20250703.183400.dist-info → returnn-1.20250708.165746.dist-info}/METADATA +1 -1
- {returnn-1.20250703.183400.dist-info → returnn-1.20250708.165746.dist-info}/RECORD +9 -9
- {returnn-1.20250703.183400.dist-info → returnn-1.20250708.165746.dist-info}/LICENSE +0 -0
- {returnn-1.20250703.183400.dist-info → returnn-1.20250708.165746.dist-info}/WHEEL +0 -0
- {returnn-1.20250703.183400.dist-info → returnn-1.20250708.165746.dist-info}/top_level.txt +0 -0
returnn/PKG-INFO
CHANGED
returnn/_setup_info_generated.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
version = '1.
|
|
2
|
-
long_version = '1.
|
|
1
|
+
version = '1.20250708.165746'
|
|
2
|
+
long_version = '1.20250708.165746+git.eff9959'
|
returnn/datasets/lm.py
CHANGED
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
from typing import (
|
|
11
11
|
Iterable,
|
|
12
|
+
Literal,
|
|
12
13
|
Optional,
|
|
13
14
|
Sequence,
|
|
14
15
|
Union,
|
|
@@ -1177,6 +1178,7 @@ class PhoneSeqGenerator:
|
|
|
1177
1178
|
add_extra_begin_lemma: float = 1.0,
|
|
1178
1179
|
extra_end_lemma: Optional[Dict[str, Any]] = None,
|
|
1179
1180
|
add_extra_end_lemma: float = 1.0,
|
|
1181
|
+
phon_pick_strategy: Literal["random", "first"] = "random",
|
|
1180
1182
|
):
|
|
1181
1183
|
"""
|
|
1182
1184
|
:param lexicon_file: lexicon XML file
|
|
@@ -1196,6 +1198,8 @@ class PhoneSeqGenerator:
|
|
|
1196
1198
|
:param add_extra_begin_lemma:
|
|
1197
1199
|
:param extra_end_lemma: just like ``extra_begin_lemma``, but for the end
|
|
1198
1200
|
:param add_extra_end_lemma:
|
|
1201
|
+
:param phon_pick_strategy: "random" or "first". If "random", then lemmas are picked randomly
|
|
1202
|
+
if multiple pronunciations exist.
|
|
1199
1203
|
"""
|
|
1200
1204
|
self.lexicon = Lexicon(lexicon_file)
|
|
1201
1205
|
self.phonemes = sorted(self.lexicon.phonemes.keys(), key=lambda s: self.lexicon.phonemes[s]["index"])
|
|
@@ -1217,6 +1221,7 @@ class PhoneSeqGenerator:
|
|
|
1217
1221
|
self.add_extra_begin_lemma = add_extra_begin_lemma
|
|
1218
1222
|
self.extra_end_lemma = extra_end_lemma
|
|
1219
1223
|
self.add_extra_end_lemma = add_extra_end_lemma
|
|
1224
|
+
self.phon_pick_strategy = phon_pick_strategy
|
|
1220
1225
|
|
|
1221
1226
|
def random_seed(self, seed: int):
|
|
1222
1227
|
"""Reset RNG via given seed"""
|
|
@@ -1284,7 +1289,12 @@ class PhoneSeqGenerator:
|
|
|
1284
1289
|
""":return: space-separated phones"""
|
|
1285
1290
|
phones = []
|
|
1286
1291
|
for lemma in self._iter_orth_lemmas(orth):
|
|
1287
|
-
|
|
1292
|
+
if self.phon_pick_strategy == "first":
|
|
1293
|
+
phon = lemma["phons"][0]
|
|
1294
|
+
elif self.phon_pick_strategy == "random":
|
|
1295
|
+
phon = self.rnd.choice(lemma["phons"])
|
|
1296
|
+
else:
|
|
1297
|
+
raise ValueError(f"Unknown phon_pick_strategy {self.phon_pick_strategy}")
|
|
1288
1298
|
phones.append(phon["phon"])
|
|
1289
1299
|
return " ".join(phones)
|
|
1290
1300
|
|
|
@@ -1356,7 +1366,13 @@ class PhoneSeqGenerator:
|
|
|
1356
1366
|
"""
|
|
1357
1367
|
allos: List[AllophoneState] = []
|
|
1358
1368
|
for lemma in self._iter_orth_lemmas(orth):
|
|
1359
|
-
|
|
1369
|
+
if self.phon_pick_strategy == "first":
|
|
1370
|
+
phon = lemma["phons"][0]
|
|
1371
|
+
elif self.phon_pick_strategy == "random":
|
|
1372
|
+
phon = self.rnd.choice(lemma["phons"])
|
|
1373
|
+
else:
|
|
1374
|
+
raise ValueError(f"Unknown phon_pick_strategy {self.phon_pick_strategy}")
|
|
1375
|
+
# space-separated phones in phon["phon"]
|
|
1360
1376
|
l_allos = list(self._phones_to_allos(phon["phon"].split()))
|
|
1361
1377
|
l_allos[0].mark_initial()
|
|
1362
1378
|
l_allos[-1].mark_final()
|
returnn/datasets/meta.py
CHANGED
|
@@ -1990,9 +1990,13 @@ class VariableDataset(Dataset):
|
|
|
1990
1990
|
class MultiEpochDataset(CachedDataset2):
|
|
1991
1991
|
"""
|
|
1992
1992
|
It wraps some dataset, where one outer epoch corresponds to multiple epochs in the inner wrapped dataset.
|
|
1993
|
+
I.e. one iteration through this dataset corresponds to multiple iterations through the inner dataset.
|
|
1993
1994
|
|
|
1994
|
-
This can be useful
|
|
1995
|
+
This can be useful for forwarding, when you want to do multiple iterations through the dataset.
|
|
1996
|
+
This could be useful for clustering.
|
|
1995
1997
|
|
|
1998
|
+
This can also be useful when the inner dataset uses (or must use) partition_epoch,
|
|
1999
|
+
and we want to cover the whole full epoch:
|
|
1996
2000
|
One specific example when the data is distributed over multiple files,
|
|
1997
2001
|
and for reasonable performance, you want to have the data copied to the local disk,
|
|
1998
2002
|
but all data together is too large to fit on the local disk.
|
|
@@ -2041,7 +2045,11 @@ class MultiEpochDataset(CachedDataset2):
|
|
|
2041
2045
|
return self._dataset.get_all_tags()
|
|
2042
2046
|
|
|
2043
2047
|
def get_total_num_seqs(self, *, fast: bool = False) -> int:
|
|
2044
|
-
"""
|
|
2048
|
+
"""
|
|
2049
|
+
Total num seqs.
|
|
2050
|
+
Note that this is the total number of seqs in the inner dataset,
|
|
2051
|
+
so without the multi-epoch handling.
|
|
2052
|
+
"""
|
|
2045
2053
|
return self._dataset.get_total_num_seqs(fast=fast)
|
|
2046
2054
|
|
|
2047
2055
|
def get_data_keys(self) -> List[str]:
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
returnn/PKG-INFO,sha256=
|
|
1
|
+
returnn/PKG-INFO,sha256=gFz3WCtUJq8MCZeykNbFyHQVIFyQnwVcQkfRsDeC0X0,5215
|
|
2
2
|
returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
|
|
3
3
|
returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
|
|
4
4
|
returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
|
|
5
5
|
returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
|
|
6
|
-
returnn/_setup_info_generated.py,sha256=
|
|
6
|
+
returnn/_setup_info_generated.py,sha256=QIOgjEYWfaIUKGZdpx_bvAWra8S_SM7m87LEcq2Lcp0,77
|
|
7
7
|
returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
|
|
8
8
|
returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
|
|
9
9
|
returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
|
|
@@ -20,9 +20,9 @@ returnn/datasets/cached2.py,sha256=_6pza3IG68JexaExhj1ld3fP6pE7T-G804driJ9Z_qo,1
|
|
|
20
20
|
returnn/datasets/distrib_files.py,sha256=9-3pJaF8Ws1Cs4AlelFCODz6b5YiaTsrD7tMCB76PDY,29865
|
|
21
21
|
returnn/datasets/generating.py,sha256=9U_w6URIrv-Rb-hDbPOzYW9qYXzJbw32N6G268IKyoM,99833
|
|
22
22
|
returnn/datasets/hdf.py,sha256=v5sjBenURR9Z-g7AQ9tsL84yDSye5RtbLpym3M6HSDE,67833
|
|
23
|
-
returnn/datasets/lm.py,sha256=
|
|
23
|
+
returnn/datasets/lm.py,sha256=ycHdGHxT4QshBM9LPktLDaaQRTLO5zQyueCK5KMNR_4,100022
|
|
24
24
|
returnn/datasets/map.py,sha256=kOBJVZmwDhLsOplzDNByIfa0NRSUaMo2Lsy36lBvxrM,10907
|
|
25
|
-
returnn/datasets/meta.py,sha256=
|
|
25
|
+
returnn/datasets/meta.py,sha256=6XPPxhiNSxWw9Hu5Z6wG8dD9Zk82FqiI-k9HGQSTKgw,95658
|
|
26
26
|
returnn/datasets/multi_proc.py,sha256=aVjsLt2qjHnHOrEYCgIPCwNYE-f1fiGP6eZ8NGAr3A4,22583
|
|
27
27
|
returnn/datasets/normalization_data.py,sha256=J3njQCMvWAbIAVPepO2L_Xdau9eWYB7Zyd6STeGzTbc,14615
|
|
28
28
|
returnn/datasets/numpy_dump.py,sha256=wl8bKIKAlff2HPJPtuu5wBg3TLOf16d2wLVB4lLAwTM,5158
|
|
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
|
|
|
253
253
|
returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
|
|
254
254
|
returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
|
|
255
255
|
returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
|
|
256
|
-
returnn-1.
|
|
257
|
-
returnn-1.
|
|
258
|
-
returnn-1.
|
|
259
|
-
returnn-1.
|
|
260
|
-
returnn-1.
|
|
256
|
+
returnn-1.20250708.165746.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
|
|
257
|
+
returnn-1.20250708.165746.dist-info/METADATA,sha256=gFz3WCtUJq8MCZeykNbFyHQVIFyQnwVcQkfRsDeC0X0,5215
|
|
258
|
+
returnn-1.20250708.165746.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
259
|
+
returnn-1.20250708.165746.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
|
|
260
|
+
returnn-1.20250708.165746.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|