PyPI - returnn - Versions diffs - 1.20241022.173306__tar.gz → 1.20241022.224754__tar.gz - Mend

returnn 1.20241022.173306tar.gz → 1.20241022.224754tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (468) hide show

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20241022.173306
+Version: 1.20241022.224754
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20241022.224754/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20241022.224754'
2	+ long_version = '1.20241022.224754+git.b5db365'

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/frontend/audio/mel.py RENAMED Viewed

@@ -56,8 +56,8 @@ def mel_filterbank(
         filter_bank_matrix_np = _mel_filter_bank_matrix_np(
             f_min=f_min, f_max=f_max, sampling_rate=sampling_rate, fft_size=fft_length, nr_of_filters=out_dim.dimension
         )
-        filter_bank_matrix_np = filter_bank_matrix_np.astype(x.dtype)
         filter_bank_matrix = rf.convert_to_tensor(filter_bank_matrix_np, dims=(in_dim, out_dim), _backend=backend)
+        filter_bank_matrix = rf.cast(filter_bank_matrix, dtype=x.dtype)
         filter_bank_matrix = rf.copy_to_device(filter_bank_matrix, x.device)
         if backend.executing_eagerly():
             if len(_mel_filter_bank_matrix_cache) > 100:
@@ -191,6 +191,9 @@ def log_mel_filterbank_from_raw(
         fft_length=n_fft,
     )
     power_spectrogram = rf.abs(spectrogram) ** 2.0
+    # stft might have upcasted this to float32 because some PyTorch versions don't support stft on bfloat16.
+    # https://github.com/pytorch/pytorch/issues/117844
+    power_spectrogram = rf.cast(power_spectrogram, dtype=raw_audio.dtype)
     mel_fbank = mel_filterbank(power_spectrogram, in_dim=in_dim_, out_dim=out_dim, sampling_rate=sampling_rate)
     log_mel_fbank = rf.safe_log(mel_fbank, eps=1e-10)
     if log_base != math.e:

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/frontend/decoder/transformer.py RENAMED Viewed

@@ -101,15 +101,11 @@ class TransformerDecoder(rf.Module):
         if pos_enc is None:
             pass
         elif isinstance(pos_enc, dict):
-            pos_enc = rf.build_from_dict(
-                pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
-            )
+            pos_enc = rf.build_from_dict(pos_enc, feat_dim=embed_dim or model_dim)
         elif isinstance(pos_enc, rf.Module):
             pass
         elif isinstance(pos_enc, FunctionType):
-            pos_enc = functools.partial(
-                pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
-            )
+            pos_enc = functools.partial(pos_enc, feat_dim=embed_dim or model_dim)
         else:
             raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
         self.pos_enc = pos_enc

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/frontend/dtype.py RENAMED Viewed

@@ -3,9 +3,17 @@ DType helpers
 """
 from __future__ import annotations
+from contextlib import contextmanager
-__all__ = ["get_default_float_dtype", "get_default_int_dtype", "get_default_array_index_dtype", "is_float_dtype"]
+__all__ = [
+    "get_default_float_dtype",
+    "set_default_float_dtype",
+    "set_default_float_dtype_ctx",
+    "get_default_int_dtype",
+    "get_default_array_index_dtype",
+    "is_float_dtype",
+]
 _default_float_dtype: str = "float32"
@@ -21,6 +29,32 @@ def get_default_float_dtype() -> str:
     return _default_float_dtype
+def set_default_float_dtype(dtype: str):
+    """
+    Set the default float dtype
+    :param dtype: the new default float dtype
+    """
+    global _default_float_dtype
+    assert isinstance(dtype, str)
+    _default_float_dtype = dtype
+@contextmanager
+def set_default_float_dtype_ctx(dtype: str):
+    """
+    :param dtype: see :func:`get_default_float_dtype`
+    """
+    global _default_float_dtype
+    assert isinstance(dtype, str)
+    old_default_float_dtype = _default_float_dtype
+    try:
+        _default_float_dtype = dtype
+        yield
+    finally:
+        _default_float_dtype = old_default_float_dtype
 def get_default_int_dtype() -> str:
     """
     https://data-apis.org/array-api/latest/API_specification/data_types.html#default-data-types

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/tensor/utils.py RENAMED Viewed

@@ -104,6 +104,9 @@ def tensor_fill_random_numpy_(
             x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
         elif x.dtype.startswith("float"):
             x.raw_tensor = rnd.normal(0.0, 1.0, size=shape).astype(x.dtype)
+        elif x.dtype == "bfloat16":
+            # Numpy does not support bfloat16, will later be casted to bfloat16
+            x.raw_tensor = rnd.normal(0.0, 1.0, size=shape).astype("float32")
         elif x.dtype.startswith("complex"):
             real = rnd.normal(0.0, 1.0, size=shape)
             imag = rnd.normal(0.0, 1.0, size=shape)

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/torch/data/extern_data.py RENAMED Viewed

@@ -3,7 +3,7 @@ From raw dict to extern_data tensor dict.
 """
 from __future__ import annotations
-from typing import Any, Union, Dict, List, Sequence
+from typing import Optional, Any, Union, Dict, List, Sequence
 import numpy
 import torch
 from returnn.tensor import Tensor, TensorDict, Dim
@@ -27,13 +27,18 @@ def raw_dict_to_extern_data(
     *,
     extern_data_template: TensorDict,
     device: Union[str, torch.device],
+    float_dtype: Optional[Union[str, torch.dtype]] = None,
 ) -> TensorDict:
     """
     :param extern_data_raw: This comes out of the DataLoader, via our collate_batch.
     :param extern_data_template: Specified via `extern_data` in the config.
     :param device: E.g. the GPU.
+    :param float_dtype:
     :return: tensor dict, like extern_data_template, but with raw tensors set to Torch tensors, on the right device.
     """
+    if isinstance(float_dtype, str):
+        float_dtype = getattr(torch, float_dtype)
+        assert isinstance(float_dtype, torch.dtype)
     assert isinstance(extern_data_raw, dict) and extern_data_raw
     batch_dim = get_batch_dim_from_extern_data(extern_data_template)
     for dim in _get_dyn_dims_from_extern_data(extern_data_template):
@@ -51,6 +56,8 @@ def raw_dict_to_extern_data(
                     dim.dimension == raw_tensor.shape[i]
                 ), f"shape mismatch for {k}: {raw_tensor.shape} vs {data.batch_shape}"
         if isinstance(raw_tensor, torch.Tensor):
+            if raw_tensor.dtype.is_floating_point and float_dtype:
+                raw_tensor = raw_tensor.to(dtype=float_dtype)
             data.dtype = str(raw_tensor.dtype).split(".")[-1]  # just overwrite for now...
             data.raw_tensor = raw_tensor.to(device)
         elif isinstance(raw_tensor, numpy.ndarray):

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/torch/engine.py RENAMED Viewed

@@ -4,7 +4,7 @@ Main engine for PyTorch
 from __future__ import annotations
 from typing import Optional, Any, Union, Callable, Dict, Set
-from contextlib import nullcontext
+from contextlib import nullcontext, ExitStack, contextmanager
 import gc
 import os
@@ -129,6 +129,13 @@ class Engine(EngineBase):
         self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
         self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
+        default_float_dtype = config.value("default_float_dtype", None)
+        if default_float_dtype is not None:
+            assert isinstance(default_float_dtype, str)
+            default_float_dtype = getattr(torch, default_float_dtype)
+            assert isinstance(default_float_dtype, torch.dtype)
+        self._default_float_dtype: Optional[torch.dtype] = default_float_dtype
         amp_options = self.config.opt_typed_value("torch_amp")
         grad_scaler_opts = self.config.typed_value("grad_scaler", NotSpecified)
         if amp_options is not None:
@@ -380,7 +387,10 @@ class Engine(EngineBase):
                     cur_count_grad_accum = 0
                 extern_data = extern_data_util.raw_dict_to_extern_data(
-                    extern_data_raw, extern_data_template=self.extern_data, device=self._device
+                    extern_data_raw,
+                    extern_data_template=self.extern_data,
+                    device=self._device,
+                    float_dtype=self._default_float_dtype,
                 )
                 self._run_step(extern_data, train_flag=True, train_func=True)
@@ -389,7 +399,7 @@ class Engine(EngineBase):
                 losses_dict = NumbersDict(
                     {
                         name: (
-                            float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
+                            float(loss.get_summed_loss().raw_tensor.detach().cpu().item())
                             if self._device != "meta"
                             else float("nan")
                         )
@@ -553,7 +563,10 @@ class Engine(EngineBase):
                         torch.distributed.broadcast(_has_data, src=0)
                     extern_data = extern_data_util.raw_dict_to_extern_data(
-                        extern_data_raw, extern_data_template=self.extern_data, device=self._device
+                        extern_data_raw,
+                        extern_data_template=self.extern_data,
+                        device=self._device,
+                        float_dtype=self._default_float_dtype,
                     )
                     self._run_step(extern_data, train_func=True)
@@ -566,7 +579,7 @@ class Engine(EngineBase):
                     losses_dict = NumbersDict(
                         {
                             name: (
-                                float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
+                                float(loss.get_summed_loss().raw_tensor.detach().cpu().item())
                                 if self._device != "meta"
                                 else float("nan")
                             )
@@ -686,6 +699,26 @@ class Engine(EngineBase):
         return data_loader
+    @contextmanager
+    def _run_ctx_mgr(self):
+        with ExitStack() as stack:
+            if self._use_autocast:
+                stack.enter_context(autocast(device_type=self._device.split(":")[0], dtype=self._autocast_dtype))
+            stack.enter_context(rf.set_default_device_ctx(self._device))
+            if self._default_float_dtype:
+                stack.enter_context(rf.set_default_float_dtype_ctx(str(self._default_float_dtype).split(".")[-1]))
+                stack.enter_context(self._set_torch_default_dtype_ctx_mgr(self._default_float_dtype))
+            yield
+    @contextmanager
+    def _set_torch_default_dtype_ctx_mgr(self, dtype: torch.dtype):
+        old_dtype = torch.get_default_dtype()
+        try:
+            torch.set_default_dtype(dtype)
+            yield
+        finally:
+            torch.set_default_dtype(old_dtype)
     def _run_step(
         self, extern_data: TensorDict, *, train_flag: bool = False, train_func: bool, _inside_wrapped: bool = False
     ):
@@ -706,11 +739,7 @@ class Engine(EngineBase):
                 expected_outputs=self._forward_step_expected_outputs, step=self.global_train_step, epoch=self.epoch
             )
-        with (
-            autocast(device_type=self._device.split(":")[0], dtype=self._autocast_dtype)
-            if self._use_autocast
-            else nullcontext()
-        ), rf.set_default_device_ctx(self._device):
+        with self._run_ctx_mgr():
             sentinel_kw = util.get_fwd_compat_kwargs()
             if train_func:
                 self._train_step_func(model=self._orig_model, extern_data=extern_data, **sentinel_kw)
@@ -893,6 +922,8 @@ class Engine(EngineBase):
                 )
             )
+        if self._default_float_dtype:
+            self._pt_model.to(dtype=self._default_float_dtype)
         self._pt_model.to(self._device)
         if model_epoch_filename and is_training:
@@ -906,11 +937,7 @@ class Engine(EngineBase):
         load_model_post_hooks = self.config.typed_value("load_model_post_hooks")
         if load_model_post_hooks:
-            with (
-                autocast(device_type=self._device.split(":")[0], dtype=self._autocast_dtype)
-                if self._use_autocast
-                else nullcontext()
-            ), rf.set_default_device_ctx(self._device):
+            with self._run_ctx_mgr():
                 sentinel_kw = util.get_fwd_compat_kwargs()
                 for hook in load_model_post_hooks:
                     hook(model=self._orig_model, **sentinel_kw)
@@ -1090,7 +1117,10 @@ class Engine(EngineBase):
             # Currently, this callback interface is intended to also be used by other backends,
             # and then the user can always assume Numpy arrays.
             if isinstance(raw, torch.Tensor):  # might already be numpy array
-                raw = raw.detach().cpu().numpy()
+                raw = raw.detach().cpu()
+                if raw.dtype == torch.bfloat16:
+                    raw = raw.float()
+                raw = raw.numpy()
             y.raw_tensor = raw
             return y
@@ -1120,7 +1150,10 @@ class Engine(EngineBase):
                     # Also resets any dyn dims, which might have been set in the prev step.
                     self._forward_step_expected_outputs.reset_content()
                 extern_data = extern_data_util.raw_dict_to_extern_data(
-                    extern_data_raw, extern_data_template=self.extern_data, device=self._device
+                    extern_data_raw,
+                    extern_data_template=self.extern_data,
+                    device=self._device,
+                    float_dtype=self._default_float_dtype,
                 )
                 try:
                     self._run_step(extern_data, train_func=False)
@@ -1224,7 +1257,10 @@ def _to_raw(n: Union[int, float, Tensor]):
     if isinstance(n, (int, float)):
         return n
     if isinstance(n, Tensor):
-        return n.raw_tensor.detach().cpu().numpy()
+        x = n.raw_tensor.detach().cpu()
+        if x.dtype == torch.bfloat16:
+            x = x.float()
+        return x.numpy()
     raise TypeError(f"Unexpected {n} of type {type(n)}")

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -676,6 +676,9 @@ class TorchBackend(Backend[torch.Tensor]):
         if len(batch_dims) != 1:
             targets_raw = torch.reshape(targets_raw, (batch_n_elems, targets_raw.shape[-1]))  # [B', S]
             targets_lengths = torch.reshape(targets_lengths, (batch_n_elems,))  # [B']
+        if log_probs.dtype == torch.bfloat16:
+            # Currently (PyTorch 2.5), ctc_loss does not support bfloat16.
+            log_probs = log_probs.to(torch.float32)
         loss_raw = torch.nn.functional.ctc_loss(
             log_probs=log_probs,
             targets=targets_raw,
@@ -691,7 +694,7 @@ class TorchBackend(Backend[torch.Tensor]):
             name="ctc_loss",
             dims=batch_dims,
             raw_tensor=loss_raw,
-            dtype=logits.dtype,
+            dtype=TorchBackend.get_dtype_name_raw(loss_raw),
         )
         return loss
@@ -2039,6 +2042,12 @@ class TorchBackend(Backend[torch.Tensor]):
                 pad_right = fft_length - frame_length - pad_left
                 window_pt = torch.nn.functional.pad(window_pt, (pad_left, pad_right))
+        orig_dtype = x_raw.dtype
+        if orig_dtype == torch.bfloat16:
+            # PyTorch stft does not support bfloat16 currently (PyTorch 2.5):
+            # https://github.com/pytorch/pytorch/issues/117844
+            # (Check back later here whether that's still the case...)
+            x_raw = x_raw.to(torch.float32)
         y_raw = torch.stft(
             x_raw,
             n_fft=fft_length,

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn/torch/util/exception_helper.py RENAMED Viewed

@@ -86,7 +86,10 @@ def _help_data_or_array(
     :return: (info,(min,max))
     """
     if isinstance(value, torch.Tensor):
-        value = value.detach().cpu().numpy()
+        value = value.detach().cpu()
+        if value.dtype == torch.bfloat16:
+            value = value.float()
+        value = value.numpy()
     v_minmax = -1, -1
     if isinstance(value, np.ndarray):
         info = "shape %s, dtype %s" % (value.shape, value.dtype)

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/returnn.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20241022.173306
+Version: 1.20241022.224754
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/tests/test_rf_base.py RENAMED Viewed

@@ -769,3 +769,31 @@ def test_edit_distance():
         a_ = Tensor("a", [batch_dim, a_spatial_dim], dtype="int64", raw_tensor=torch.tensor(a_values))
         b_ = Tensor("b", [batch_dim, b_spatial_dim], dtype="int64", raw_tensor=torch.tensor(b_values))
         _check_edit_distance(a_, a_spatial_dim, b_, b_spatial_dim)
+def test_audio_log_mel_filterbank_from_raw_bfloat16():
+    time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
+    out_dim = Dim(80, name="freq")
+    extern_data = TensorDict(
+        {
+            "data": Tensor("data", [batch_dim, time_dim], dtype="float32"),
+        }
+    )
+    # noinspection PyShadowingNames
+    def _forward_step(*, extern_data: TensorDict, **_kwargs):
+        audio = extern_data["data"]
+        audio = rf.cast(audio, "bfloat16")
+        out, out_spatial_dim = rf.audio.log_mel_filterbank_from_raw(audio, in_spatial_dim=time_dim, out_dim=out_dim)
+        assert out.dtype == "bfloat16"
+        out = rf.cast(out, "float32")  # the test framework doesn't support bfloat16 currently due to Numpy...
+        out.mark_as_default_output(shape=(batch_dim, out_spatial_dim, out_dim))
+    run_model(
+        extern_data,
+        lambda **_kwargs: rf.Module(),
+        _forward_step,
+        dyn_dim_min_sizes={time_dim: 2000},
+        dyn_dim_max_sizes={time_dim: 3000},
+        test_tensorflow=False,
+    )

{returnn-1.20241022.173306 → returnn-1.20241022.224754}/tests/test_torch_engine.py RENAMED Viewed

@@ -606,6 +606,33 @@ def test_torch_engine_train_lion_optimizer():
         engine.train()
+def test_torch_engine_bf16():
+    config = Config(
+        dict(
+            task="train",
+            device="cpu",
+            default_float_dtype="bfloat16",
+            extern_data={"data": {"dim": 9}, "classes": {"dim": 2, "sparse": True}},
+            get_model=TrainTestModel,
+            train_step=TrainTestModel.train_step,
+            batch_size=500,
+            optimizer={"class": "adam"},
+            num_epochs=1,
+        )
+    )
+    dataset = init_dataset({"class": "Task12AXDataset", "num_seqs": 10, "name": "train"})
+    dataset.init_seq_order(epoch=1)
+    with global_config_ctx(config):
+        engine = Engine(config=config)
+        engine.init_train_from_config(train_data=dataset)
+        engine.train()
+        params = list(engine.get_pt_model().parameters())
+        assert params
+        for p in params:
+            assert p.dtype == torch.bfloat16
 if __name__ == "__main__":
     better_exchook.install()
     if len(sys.argv) <= 1: