PyPI - returnn - Versions diffs - 1.20250508.181644__py3-none-any.whl → 1.20250514.101430__py3-none-any.whl - Mend

returnn 1.20250508.181644py3-none-any.whl → 1.20250514.101430py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (9) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250508.181644
+Version: 1.20250514.101430
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250508.181644'
-long_version = '1.20250508.181644+git.0baf1d9'
+version = '1.20250514.101430'
+long_version = '1.20250514.101430+git.c557cc3'

returnn/datasets/postprocessing.py CHANGED Viewed

@@ -308,11 +308,18 @@ class PostprocessingDataset(CachedDataset2):
                     last_complete_frac = complete_frac
                 for data_key, out_t in self._out_tensor_dict_template.data.items():
                     in_t = t_dict.data[data_key]
-                    assert (
-                        in_t.ndim == out_t.batch_ndim
-                        and in_t.dtype == out_t.dtype
-                        and all(d.dimension in (d_, None) for (d, d_) in zip(in_t.dims, out_t.shape))
+                    assert in_t.ndim == out_t.batch_ndim, (
+                        f"Dim number mismatch for {data_key}: {in_t.ndim} != {out_t.batch_ndim}. "
+                        "Postprocessing data tensors must not have a batch dimension."
                     )
+                    assert in_t.dtype == out_t.dtype, (
+                        f"dtype mismatch for {data_key}: '{in_t.dtype}' != '{out_t.dtype}'"
+                    )
+                    for i, (in_dim, out_shape) in enumerate(zip(in_t.dims, out_t.shape)):
+                        assert in_dim.dimension is None or in_dim.dimension == out_shape, (
+                            f"Dim {i} mismatch on {data_key}: "
+                            f"{in_dim.dimension} must either be `None` or equal {out_shape}"
+                        )
                 yield t_dict
         data_iter = self._iterate_dataset()

returnn/datasets/util/vocabulary.py CHANGED Viewed

@@ -15,6 +15,7 @@ __all__ = [
 from typing import Optional, Union, Type, Callable, List, Dict
 import sys
+import re
 import numpy
 from returnn.util.basic import NotSpecified
@@ -58,6 +59,7 @@ class Vocabulary:
         num_labels: Optional[int] = None,
         seq_postfix: Optional[List[int]] = None,
         labels: Optional[Union[List[str], Callable[[], List[str]]]] = None,
+        single_whitespace_split: bool = False,
     ):
         """
         :param vocab_file:
@@ -76,6 +78,11 @@ class Vocabulary:
         :param num_labels: just for verification
         :param seq_postfix: labels will be added to the seq in self.get_seq
         :param labels:
+        :param single_whitespace_split:
+            Assume that the given text is encoded using ``" ".join(labels[i] for i in seq)``,
+            and this will undo that.
+            This makes a difference when there is whitespace itself in the vocab (in ``labels``).
+            If not enabled (the default), this will simply use ``str.split()``.
         """
         if vocab_file and not isinstance(vocab_file, str):  # sometimes it is a Path
             vocab_file = str(vocab_file)
@@ -131,6 +138,12 @@ class Vocabulary:
         self.control_symbol_ids = {name: self.to_id(label) for name, label in (control_symbols or {}).items()}
         self.user_defined_symbol_ids = {name: self.to_id(label) for name, label in (user_defined_symbols or {}).items()}
         self.seq_postfix = seq_postfix or []
+        # To be used with findall in get_seq.
+        self.decode_seq_token_re = (
+            re.compile("(%s|\\S+)(?: |$)" % "|".join(re.escape(v) for v in self.labels))
+            if single_whitespace_split
+            else None
+        )
     def __repr__(self):
         parts = [repr(self.vocab_file), "num_labels=%s" % self.num_labels]
@@ -317,7 +330,10 @@ class Vocabulary:
         :param sentence: assumed to be seq of vocab entries separated by whitespace
         :return: seq of label indices
         """
-        segments = sentence.split()
+        if self.decode_seq_token_re is not None:
+            segments = self.decode_seq_token_re.findall(sentence)
+        else:
+            segments = sentence.split()
         return self.get_seq_indices(segments) + self.seq_postfix
     def get_seq_indices(self, seq: List[str]) -> List[int]:

{returnn-1.20250508.181644.dist-info → returnn-1.20250514.101430.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250508.181644
+Version: 1.20250514.101430
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250508.181644.dist-info → returnn-1.20250514.101430.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=3S-lQxtFtKrtOERvd0Z3NPAS_25okWAx5ApxuNsvpWQ,5215
+returnn/PKG-INFO,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=btH3IxkWGxhL-KKGiJ5s1XMxjS8nXUvW9qGpqsH7Hjs,77
+returnn/_setup_info_generated.py,sha256=CfrKH5EWL08ucEeXafiSxPiV-BUoBw--NGoCK_ERZnw,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -26,7 +26,7 @@ returnn/datasets/meta.py,sha256=KQtidTgSh-1gNgbpJ8OhXt6v2lkhPPH5dpjfzwsr3E4,9525
 returnn/datasets/multi_proc.py,sha256=aVjsLt2qjHnHOrEYCgIPCwNYE-f1fiGP6eZ8NGAr3A4,22583
 returnn/datasets/normalization_data.py,sha256=J3njQCMvWAbIAVPepO2L_Xdau9eWYB7Zyd6STeGzTbc,14615
 returnn/datasets/numpy_dump.py,sha256=wl8bKIKAlff2HPJPtuu5wBg3TLOf16d2wLVB4lLAwTM,5158
-returnn/datasets/postprocessing.py,sha256=dV6RYy-a-3sQJjK3twv88YOOcMkFb7qplfqaU_0Cyu4,23518
+returnn/datasets/postprocessing.py,sha256=6SfT58BxbHYO2QlGzOgIV04Zqkp-kl0B85168DQaB9A,24060
 returnn/datasets/raw_wav.py,sha256=M7eTHp4CTtLQf3yPTiJY-mSJYgZNxkGV9IFN9J1dq_4,9144
 returnn/datasets/sprint.py,sha256=JAs5dOmdteSOwA7YQcTF9KaTCtGfRjiyJUZClSr85pY,55502
 returnn/datasets/stereo.py,sha256=PkowC91bZWihIYuIZgyGgPcNwgq5jBvyxxu1nER-VhM,17633
@@ -34,7 +34,7 @@ returnn/datasets/text_dict.py,sha256=BPE73nh6-vtSLy3SiDf4dpFl9RJorE7oO6l5y2FU3MI
 returnn/datasets/util/__init__.py,sha256=rEKhSD6fyhDiQF-x7dUQMwa29JZu72SDm7mYcCcLghY,52
 returnn/datasets/util/feature_extraction.py,sha256=axtXDb9wcNpOmyhmW3WJUj5xda29TKkKvOcGGvq7ExA,23923
 returnn/datasets/util/strings.py,sha256=pP8pmXhArkssYqmPOLuxEG9gsko891ZxrWiai86qbLE,412
-returnn/datasets/util/vocabulary.py,sha256=HKEbUmYPiW9hsrxf3eE-bNXBOOUcQ7vHkXB4BPtxmEA,27182
+returnn/datasets/util/vocabulary.py,sha256=1W13FgxPVP9XSIyhkt4I7CXK5lj99zT97R-gLa_xnAU,27964
 returnn/engine/__init__.py,sha256=br7hpn8i_hIBi2uTQfnN3BF9g5DREYa_mQi0_Nvlu6o,228
 returnn/engine/base.py,sha256=0n4FtB_B2H3W_9KdoLr0P7YPER-hVkbk69pwFqsqmqw,18467
 returnn/engine/batch.py,sha256=amXW8mGspuSQjo00JdisE2eOLy5Ij1weWWzkE-lXSJM,9912
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250508.181644.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250508.181644.dist-info/METADATA,sha256=3S-lQxtFtKrtOERvd0Z3NPAS_25okWAx5ApxuNsvpWQ,5215
-returnn-1.20250508.181644.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-returnn-1.20250508.181644.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250508.181644.dist-info/RECORD,,
+returnn-1.20250514.101430.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250514.101430.dist-info/METADATA,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
+returnn-1.20250514.101430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+returnn-1.20250514.101430.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250514.101430.dist-info/RECORD,,

{returnn-1.20250508.181644.dist-info → returnn-1.20250514.101430.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250508.181644.dist-info → returnn-1.20250514.101430.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250508.181644.dist-info → returnn-1.20250514.101430.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250508.181644__py3-none-any.whl → 1.20250514.101430__py3-none-any.whl

Potentially problematic release.

returnn 1.20250508.181644py3-none-any.whl → 1.20250514.101430py3-none-any.whl