returnn 1.20250508.181644__py3-none-any.whl → 1.20250514.101430__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250508.181644
3
+ Version: 1.20250514.101430
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250508.181644'
2
- long_version = '1.20250508.181644+git.0baf1d9'
1
+ version = '1.20250514.101430'
2
+ long_version = '1.20250514.101430+git.c557cc3'
@@ -308,11 +308,18 @@ class PostprocessingDataset(CachedDataset2):
308
308
  last_complete_frac = complete_frac
309
309
  for data_key, out_t in self._out_tensor_dict_template.data.items():
310
310
  in_t = t_dict.data[data_key]
311
- assert (
312
- in_t.ndim == out_t.batch_ndim
313
- and in_t.dtype == out_t.dtype
314
- and all(d.dimension in (d_, None) for (d, d_) in zip(in_t.dims, out_t.shape))
311
+ assert in_t.ndim == out_t.batch_ndim, (
312
+ f"Dim number mismatch for {data_key}: {in_t.ndim} != {out_t.batch_ndim}. "
313
+ "Postprocessing data tensors must not have a batch dimension."
315
314
  )
315
+ assert in_t.dtype == out_t.dtype, (
316
+ f"dtype mismatch for {data_key}: '{in_t.dtype}' != '{out_t.dtype}'"
317
+ )
318
+ for i, (in_dim, out_shape) in enumerate(zip(in_t.dims, out_t.shape)):
319
+ assert in_dim.dimension is None or in_dim.dimension == out_shape, (
320
+ f"Dim {i} mismatch on {data_key}: "
321
+ f"{in_dim.dimension} must either be `None` or equal {out_shape}"
322
+ )
316
323
  yield t_dict
317
324
 
318
325
  data_iter = self._iterate_dataset()
@@ -15,6 +15,7 @@ __all__ = [
15
15
 
16
16
  from typing import Optional, Union, Type, Callable, List, Dict
17
17
  import sys
18
+ import re
18
19
  import numpy
19
20
 
20
21
  from returnn.util.basic import NotSpecified
@@ -58,6 +59,7 @@ class Vocabulary:
58
59
  num_labels: Optional[int] = None,
59
60
  seq_postfix: Optional[List[int]] = None,
60
61
  labels: Optional[Union[List[str], Callable[[], List[str]]]] = None,
62
+ single_whitespace_split: bool = False,
61
63
  ):
62
64
  """
63
65
  :param vocab_file:
@@ -76,6 +78,11 @@ class Vocabulary:
76
78
  :param num_labels: just for verification
77
79
  :param seq_postfix: labels will be added to the seq in self.get_seq
78
80
  :param labels:
81
+ :param single_whitespace_split:
82
+ Assume that the given text is encoded using ``" ".join(labels[i] for i in seq)``,
83
+ and this will undo that.
84
+ This makes a difference when there is whitespace itself in the vocab (in ``labels``).
85
+ If not enabled (the default), this will simply use ``str.split()``.
79
86
  """
80
87
  if vocab_file and not isinstance(vocab_file, str): # sometimes it is a Path
81
88
  vocab_file = str(vocab_file)
@@ -131,6 +138,12 @@ class Vocabulary:
131
138
  self.control_symbol_ids = {name: self.to_id(label) for name, label in (control_symbols or {}).items()}
132
139
  self.user_defined_symbol_ids = {name: self.to_id(label) for name, label in (user_defined_symbols or {}).items()}
133
140
  self.seq_postfix = seq_postfix or []
141
+ # To be used with findall in get_seq.
142
+ self.decode_seq_token_re = (
143
+ re.compile("(%s|\\S+)(?: |$)" % "|".join(re.escape(v) for v in self.labels))
144
+ if single_whitespace_split
145
+ else None
146
+ )
134
147
 
135
148
  def __repr__(self):
136
149
  parts = [repr(self.vocab_file), "num_labels=%s" % self.num_labels]
@@ -317,7 +330,10 @@ class Vocabulary:
317
330
  :param sentence: assumed to be seq of vocab entries separated by whitespace
318
331
  :return: seq of label indices
319
332
  """
320
- segments = sentence.split()
333
+ if self.decode_seq_token_re is not None:
334
+ segments = self.decode_seq_token_re.findall(sentence)
335
+ else:
336
+ segments = sentence.split()
321
337
  return self.get_seq_indices(segments) + self.seq_postfix
322
338
 
323
339
  def get_seq_indices(self, seq: List[str]) -> List[int]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250508.181644
3
+ Version: 1.20250514.101430
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=3S-lQxtFtKrtOERvd0Z3NPAS_25okWAx5ApxuNsvpWQ,5215
1
+ returnn/PKG-INFO,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=btH3IxkWGxhL-KKGiJ5s1XMxjS8nXUvW9qGpqsH7Hjs,77
6
+ returnn/_setup_info_generated.py,sha256=CfrKH5EWL08ucEeXafiSxPiV-BUoBw--NGoCK_ERZnw,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -26,7 +26,7 @@ returnn/datasets/meta.py,sha256=KQtidTgSh-1gNgbpJ8OhXt6v2lkhPPH5dpjfzwsr3E4,9525
26
26
  returnn/datasets/multi_proc.py,sha256=aVjsLt2qjHnHOrEYCgIPCwNYE-f1fiGP6eZ8NGAr3A4,22583
27
27
  returnn/datasets/normalization_data.py,sha256=J3njQCMvWAbIAVPepO2L_Xdau9eWYB7Zyd6STeGzTbc,14615
28
28
  returnn/datasets/numpy_dump.py,sha256=wl8bKIKAlff2HPJPtuu5wBg3TLOf16d2wLVB4lLAwTM,5158
29
- returnn/datasets/postprocessing.py,sha256=dV6RYy-a-3sQJjK3twv88YOOcMkFb7qplfqaU_0Cyu4,23518
29
+ returnn/datasets/postprocessing.py,sha256=6SfT58BxbHYO2QlGzOgIV04Zqkp-kl0B85168DQaB9A,24060
30
30
  returnn/datasets/raw_wav.py,sha256=M7eTHp4CTtLQf3yPTiJY-mSJYgZNxkGV9IFN9J1dq_4,9144
31
31
  returnn/datasets/sprint.py,sha256=JAs5dOmdteSOwA7YQcTF9KaTCtGfRjiyJUZClSr85pY,55502
32
32
  returnn/datasets/stereo.py,sha256=PkowC91bZWihIYuIZgyGgPcNwgq5jBvyxxu1nER-VhM,17633
@@ -34,7 +34,7 @@ returnn/datasets/text_dict.py,sha256=BPE73nh6-vtSLy3SiDf4dpFl9RJorE7oO6l5y2FU3MI
34
34
  returnn/datasets/util/__init__.py,sha256=rEKhSD6fyhDiQF-x7dUQMwa29JZu72SDm7mYcCcLghY,52
35
35
  returnn/datasets/util/feature_extraction.py,sha256=axtXDb9wcNpOmyhmW3WJUj5xda29TKkKvOcGGvq7ExA,23923
36
36
  returnn/datasets/util/strings.py,sha256=pP8pmXhArkssYqmPOLuxEG9gsko891ZxrWiai86qbLE,412
37
- returnn/datasets/util/vocabulary.py,sha256=HKEbUmYPiW9hsrxf3eE-bNXBOOUcQ7vHkXB4BPtxmEA,27182
37
+ returnn/datasets/util/vocabulary.py,sha256=1W13FgxPVP9XSIyhkt4I7CXK5lj99zT97R-gLa_xnAU,27964
38
38
  returnn/engine/__init__.py,sha256=br7hpn8i_hIBi2uTQfnN3BF9g5DREYa_mQi0_Nvlu6o,228
39
39
  returnn/engine/base.py,sha256=0n4FtB_B2H3W_9KdoLr0P7YPER-hVkbk69pwFqsqmqw,18467
40
40
  returnn/engine/batch.py,sha256=amXW8mGspuSQjo00JdisE2eOLy5Ij1weWWzkE-lXSJM,9912
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250508.181644.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250508.181644.dist-info/METADATA,sha256=3S-lQxtFtKrtOERvd0Z3NPAS_25okWAx5ApxuNsvpWQ,5215
258
- returnn-1.20250508.181644.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
- returnn-1.20250508.181644.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250508.181644.dist-info/RECORD,,
256
+ returnn-1.20250514.101430.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250514.101430.dist-info/METADATA,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
258
+ returnn-1.20250514.101430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
+ returnn-1.20250514.101430.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250514.101430.dist-info/RECORD,,