returnn 1.20250703.183400__py3-none-any.whl → 1.20250708.165746__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250703.183400
3
+ Version: 1.20250708.165746
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250703.183400'
2
- long_version = '1.20250703.183400+git.a5c35a2'
1
+ version = '1.20250708.165746'
2
+ long_version = '1.20250708.165746+git.eff9959'
returnn/datasets/lm.py CHANGED
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  from typing import (
11
11
  Iterable,
12
+ Literal,
12
13
  Optional,
13
14
  Sequence,
14
15
  Union,
@@ -1177,6 +1178,7 @@ class PhoneSeqGenerator:
1177
1178
  add_extra_begin_lemma: float = 1.0,
1178
1179
  extra_end_lemma: Optional[Dict[str, Any]] = None,
1179
1180
  add_extra_end_lemma: float = 1.0,
1181
+ phon_pick_strategy: Literal["random", "first"] = "random",
1180
1182
  ):
1181
1183
  """
1182
1184
  :param lexicon_file: lexicon XML file
@@ -1196,6 +1198,8 @@ class PhoneSeqGenerator:
1196
1198
  :param add_extra_begin_lemma:
1197
1199
  :param extra_end_lemma: just like ``extra_begin_lemma``, but for the end
1198
1200
  :param add_extra_end_lemma:
1201
+ :param phon_pick_strategy: "random" or "first". If "random", then lemmas are picked randomly
1202
+ if multiple pronunciations exist.
1199
1203
  """
1200
1204
  self.lexicon = Lexicon(lexicon_file)
1201
1205
  self.phonemes = sorted(self.lexicon.phonemes.keys(), key=lambda s: self.lexicon.phonemes[s]["index"])
@@ -1217,6 +1221,7 @@ class PhoneSeqGenerator:
1217
1221
  self.add_extra_begin_lemma = add_extra_begin_lemma
1218
1222
  self.extra_end_lemma = extra_end_lemma
1219
1223
  self.add_extra_end_lemma = add_extra_end_lemma
1224
+ self.phon_pick_strategy = phon_pick_strategy
1220
1225
 
1221
1226
  def random_seed(self, seed: int):
1222
1227
  """Reset RNG via given seed"""
@@ -1284,7 +1289,12 @@ class PhoneSeqGenerator:
1284
1289
  """:return: space-separated phones"""
1285
1290
  phones = []
1286
1291
  for lemma in self._iter_orth_lemmas(orth):
1287
- phon = self.rnd.choice(lemma["phons"])
1292
+ if self.phon_pick_strategy == "first":
1293
+ phon = lemma["phons"][0]
1294
+ elif self.phon_pick_strategy == "random":
1295
+ phon = self.rnd.choice(lemma["phons"])
1296
+ else:
1297
+ raise ValueError(f"Unknown phon_pick_strategy {self.phon_pick_strategy}")
1288
1298
  phones.append(phon["phon"])
1289
1299
  return " ".join(phones)
1290
1300
 
@@ -1356,7 +1366,13 @@ class PhoneSeqGenerator:
1356
1366
  """
1357
1367
  allos: List[AllophoneState] = []
1358
1368
  for lemma in self._iter_orth_lemmas(orth):
1359
- phon = self.rnd.choice(lemma["phons"]) # space-separated phones in phon["phon"]
1369
+ if self.phon_pick_strategy == "first":
1370
+ phon = lemma["phons"][0]
1371
+ elif self.phon_pick_strategy == "random":
1372
+ phon = self.rnd.choice(lemma["phons"])
1373
+ else:
1374
+ raise ValueError(f"Unknown phon_pick_strategy {self.phon_pick_strategy}")
1375
+ # space-separated phones in phon["phon"]
1360
1376
  l_allos = list(self._phones_to_allos(phon["phon"].split()))
1361
1377
  l_allos[0].mark_initial()
1362
1378
  l_allos[-1].mark_final()
returnn/datasets/meta.py CHANGED
@@ -1990,9 +1990,13 @@ class VariableDataset(Dataset):
1990
1990
  class MultiEpochDataset(CachedDataset2):
1991
1991
  """
1992
1992
  It wraps some dataset, where one outer epoch corresponds to multiple epochs in the inner wrapped dataset.
1993
+ I.e. one iteration through this dataset corresponds to multiple iterations through the inner dataset.
1993
1994
 
1994
- This can be useful when the inner dataset uses partition_epoch, and we want to cover the whole full epoch.
1995
+ This can be useful for forwarding, when you want to do multiple iterations through the dataset.
1996
+ This could be useful for clustering.
1995
1997
 
1998
+ This can also be useful when the inner dataset uses (or must use) partition_epoch,
1999
+ and we want to cover the whole full epoch:
1996
2000
  One specific example when the data is distributed over multiple files,
1997
2001
  and for reasonable performance, you want to have the data copied to the local disk,
1998
2002
  but all data together is too large to fit on the local disk.
@@ -2041,7 +2045,11 @@ class MultiEpochDataset(CachedDataset2):
2041
2045
  return self._dataset.get_all_tags()
2042
2046
 
2043
2047
  def get_total_num_seqs(self, *, fast: bool = False) -> int:
2044
- """total num seqs"""
2048
+ """
2049
+ Total num seqs.
2050
+ Note that this is the total number of seqs in the inner dataset,
2051
+ so without the multi-epoch handling.
2052
+ """
2045
2053
  return self._dataset.get_total_num_seqs(fast=fast)
2046
2054
 
2047
2055
  def get_data_keys(self) -> List[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250703.183400
3
+ Version: 1.20250708.165746
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=6b050edAnN_IixiTU7cKY9Y7GsV2u54Go4pMebtP1LA,5215
1
+ returnn/PKG-INFO,sha256=gFz3WCtUJq8MCZeykNbFyHQVIFyQnwVcQkfRsDeC0X0,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=W3RqBPspEAo2psz8RlzIp3A0pBAmerlhlAfLeZuEUy8,77
6
+ returnn/_setup_info_generated.py,sha256=QIOgjEYWfaIUKGZdpx_bvAWra8S_SM7m87LEcq2Lcp0,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -20,9 +20,9 @@ returnn/datasets/cached2.py,sha256=_6pza3IG68JexaExhj1ld3fP6pE7T-G804driJ9Z_qo,1
20
20
  returnn/datasets/distrib_files.py,sha256=9-3pJaF8Ws1Cs4AlelFCODz6b5YiaTsrD7tMCB76PDY,29865
21
21
  returnn/datasets/generating.py,sha256=9U_w6URIrv-Rb-hDbPOzYW9qYXzJbw32N6G268IKyoM,99833
22
22
  returnn/datasets/hdf.py,sha256=v5sjBenURR9Z-g7AQ9tsL84yDSye5RtbLpym3M6HSDE,67833
23
- returnn/datasets/lm.py,sha256=IqUsOzbdSWUynL0YFL25HbtMR4AxaQGHvjjqRE9IwBo,99215
23
+ returnn/datasets/lm.py,sha256=ycHdGHxT4QshBM9LPktLDaaQRTLO5zQyueCK5KMNR_4,100022
24
24
  returnn/datasets/map.py,sha256=kOBJVZmwDhLsOplzDNByIfa0NRSUaMo2Lsy36lBvxrM,10907
25
- returnn/datasets/meta.py,sha256=KQtidTgSh-1gNgbpJ8OhXt6v2lkhPPH5dpjfzwsr3E4,95251
25
+ returnn/datasets/meta.py,sha256=6XPPxhiNSxWw9Hu5Z6wG8dD9Zk82FqiI-k9HGQSTKgw,95658
26
26
  returnn/datasets/multi_proc.py,sha256=aVjsLt2qjHnHOrEYCgIPCwNYE-f1fiGP6eZ8NGAr3A4,22583
27
27
  returnn/datasets/normalization_data.py,sha256=J3njQCMvWAbIAVPepO2L_Xdau9eWYB7Zyd6STeGzTbc,14615
28
28
  returnn/datasets/numpy_dump.py,sha256=wl8bKIKAlff2HPJPtuu5wBg3TLOf16d2wLVB4lLAwTM,5158
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250703.183400.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250703.183400.dist-info/METADATA,sha256=6b050edAnN_IixiTU7cKY9Y7GsV2u54Go4pMebtP1LA,5215
258
- returnn-1.20250703.183400.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
- returnn-1.20250703.183400.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250703.183400.dist-info/RECORD,,
256
+ returnn-1.20250708.165746.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250708.165746.dist-info/METADATA,sha256=gFz3WCtUJq8MCZeykNbFyHQVIFyQnwVcQkfRsDeC0X0,5215
258
+ returnn-1.20250708.165746.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
+ returnn-1.20250708.165746.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250708.165746.dist-info/RECORD,,