PyPI - returnn - Versions diffs - 1.20230408.155406__tar.gz → 1.20230409.122444__tar.gz - Mend

returnn 1.20230408.155406tar.gz → 1.20230409.122444tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (364) hide show

{returnn-1.20230408.155406/returnn.egg-info → returnn-1.20230409.122444}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20230408.155406
+Version: 1.20230409.122444
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20230409.122444/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20230409.122444'
2	+ long_version = '1.20230409.122444+git.0fd75ff'

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/demos/demo-rf.config RENAMED Viewed

@@ -50,6 +50,7 @@ def train_step(*, model: Model, extern_data, **_kwargs):
     data = extern_data["data"]
     logits = model(data)
     targets = extern_data["classes"]
+    # TODO: use flattening on logits/targets
     loss = rf.cross_entropy(estimated=logits, estimated_type="logits", target=targets, axis=out_dim)
     loss.mark_as_loss(name="ce")

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/demos/demo-torch.config RENAMED Viewed

@@ -58,7 +58,7 @@ def train_step(*, model: Model, extern_data, **_kwargs):
     targets = extern_data["classes"]
     targets_packed = torch.nn.utils.rnn.pack_padded_sequence(
         targets.raw_tensor, data.dims[1].dyn_size_ext.raw_tensor, batch_first=True, enforce_sorted=False)
-    loss = nn.CrossEntropyLoss()(logits_packed.data, targets_packed.data.long())
+    loss = nn.CrossEntropyLoss(reduction='none')(logits_packed.data, targets_packed.data.long())
     rf.get_run_ctx().mark_as_loss(name="cross_entropy", loss=loss)

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/frontend/_backend.py RENAMED Viewed

@@ -208,6 +208,29 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def cast_raw(raw_tensor: T, dtype: str) -> T:
+        """
+        :param raw_tensor:
+        :param dtype: e.g. "float32"
+        :return: raw tensor with dtype casted
+        """
+        raise NotImplementedError
+    @staticmethod
+    def cast(tensor: Tensor, dtype: str) -> Tensor:
+        """
+        :param tensor:
+        :param dtype: e.g. "float32"
+        :return: tensor with dtype casted
+        """
+        # Default implementation using cast_raw.
+        res = tensor.copy_template()
+        res.dtype = dtype
+        # noinspection PyProtectedMember
+        res.raw_tensor = tensor._raw_backend.cast_raw(tensor.raw_tensor, dtype)
+        return res
     # Restrict the possible activation function names,
     # to not get unexpected behavior,
     # or unwanted incompatibilities.
@@ -287,6 +310,13 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def have_sequence_mask_raw() -> bool:
+        """
+        :return: whether we have a sequence_mask_raw implementation
+        """
+        return False
     @staticmethod
     def sequence_mask_raw(lengths: T, *, batch_major: bool = True) -> T:
         """
@@ -309,7 +339,7 @@ class Backend(Generic[T]):
         :return: context manager
         """
         # Default implementation for eager-based frameworks
-        pass  # nothing to do
+        yield  # nothing to do
     @staticmethod
     @contextlib.contextmanager

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/frontend/array_.py RENAMED Viewed

@@ -12,7 +12,7 @@ from .types import RawTensorTypes
 T = TypeVar("T")
-__all__ = ["convert_to_tensor", "constant", "gather"]
+__all__ = ["convert_to_tensor", "constant", "cast", "gather"]
 def convert_to_tensor(
@@ -77,6 +77,16 @@ def convert_to_tensor(
 constant = convert_to_tensor  # alias for some older code
+def cast(tensor: Tensor, dtype: str) -> Tensor:
+    """
+    :param tensor:
+    :param dtype:
+    :return: tensor with the same data, but with a different dtype
+    """
+    # noinspection PyProtectedMember
+    return tensor._raw_backend.cast(tensor, dtype=dtype)
 # noinspection PyUnusedLocal
 def gather(
     source: Tensor,

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/frontend/const.py RENAMED Viewed

@@ -14,7 +14,7 @@ __all__ = ["full", "constant", "fill", "zeros", "ones"]
 def full(
-    dims: Sequence[Dim], fill_value: RawTensorTypes, *, dtype: Optional[str] = None, sparse_dim: Optional[Dim] = None
+    *, dims: Sequence[Dim], fill_value: RawTensorTypes, dtype: Optional[str] = None, sparse_dim: Optional[Dim] = None
 ) -> Tensor:
     """
     full
@@ -46,11 +46,11 @@ def zeros(dims: Sequence[Dim], *, dtype: Optional[str] = None, sparse_dim: Optio
     """
     zeros. float by default.
     """
-    return full(dims, 0, dtype=dtype or rf.get_default_float_dtype(), sparse_dim=sparse_dim)
+    return full(dims=dims, fill_value=0, dtype=dtype or rf.get_default_float_dtype(), sparse_dim=sparse_dim)
 def ones(dims: Sequence[Dim], *, dtype: Optional[str] = None, sparse_dim: Optional[Dim] = None) -> Tensor:
     """
     ones. float by default.
     """
-    return full(dims, 1, dtype=dtype or rf.get_default_float_dtype(), sparse_dim=sparse_dim)
+    return full(dims=dims, fill_value=1, dtype=dtype or rf.get_default_float_dtype(), sparse_dim=sparse_dim)

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/frontend/run_ctx.py RENAMED Viewed

@@ -119,24 +119,18 @@ class RunCtx:
           E.g. if the overall normalization is sum(loss)/sum(num_frames), this is also what the optimizer will use,
           otherwise the optimizer will just use sum(loss).
         :param custom_inv_norm_factor:
-          The standard norm factor is 1/sum(target_seq_len) if the target has a time-axis,
-          or 1/sum(output_seq_len) if there is no target and the output has a time-axis,
+          The standard inv norm factor is sum(target_seq_len) if the target has a time-axis,
+          or sum(output_seq_len) if there is no target and the output has a time-axis,
           or 1 otherwise. (See :func:`Loss.init` for details.)
           This is used for proper normalization of accumulated loss/error per epoch
           and also proper normalization per batch for reporting,
           no matter if use_normalized_loss is True or False.
           If you want to change this norm factor, you can set this.
-          Basically, for all reporting, it uses sum(loss) * sum(custom_inv_norm_factor).
+          Basically, for all reporting, it uses sum(loss) / sum(custom_inv_norm_factor).
         """
         assert self.stage == "train_step"
         if not isinstance(loss, Tensor):
             assert isinstance(loss, _backend.global_backend.RawTensorType)
-            assert _backend.global_backend.get_ndim_raw(loss) == 0, (
-                f"mark_as_loss(<loss with shape {_backend.global_backend.get_known_shape_raw(loss)}>, {name!r}):"
-                " Only scalar raw losses are supported,"
-                " because we cannot know whether there are any dynamic dims which might require padding."
-                " Explicitly convert to a Tensor first and specify dim tags."
-            )
             loss = rf.convert_to_tensor(loss)
         assert name not in self.losses
         self.losses[name] = Loss(
@@ -220,31 +214,52 @@ class Loss:
     scale: float = 1.0
     as_error: bool = False
-    use_normalized_loss: bool = False
+    use_normalized_loss: bool = False  # for the gradient / total loss
     use_flatten_frames: bool = True
     custom_inv_norm_factor: Optional[Tensor] = None
+    _summed_loss_cached: Optional[Tensor] = None
+    _mean_loss_cached: Optional[Tensor] = None
     def get_summed_loss(self) -> Tensor:
         """
         :return: sum of loss (scalar)
         """
         if not self.loss.dims:
             return self.loss
-        return rf.reduce_sum(self.loss, axis=self.loss.dims)
+        if self._summed_loss_cached is not None:
+            return self._summed_loss_cached
+        if self._mean_loss_cached is not None:
+            return self._mean_loss_cached / self.get_inv_norm_factor()
+        self._summed_loss_cached = rf.reduce_sum(self.loss, axis=self.loss.dims)
+        return self._summed_loss_cached
     def get_mean_loss(self) -> Tensor:
         """
         :return: sum of loss (scalar)
         """
+        if self._mean_loss_cached is not None:
+            return self._mean_loss_cached
         if self.custom_inv_norm_factor:
-            return self.get_summed_loss() * self.custom_inv_norm_factor
+            loss = self.get_summed_loss()
+            loss /= rf.cast(self.custom_inv_norm_factor, dtype=loss.dtype)
+            return loss
         if not self.loss.dims:
             return self.loss
-        return rf.reduce_mean(self.loss, axis=self.loss.dims)
+        self._mean_loss_cached = rf.reduce_mean(self.loss, axis=self.loss.dims)
+        return self._mean_loss_cached
+    def get_inv_norm_factor(self) -> Union[int, Tensor]:
+        """
+        :return: inverse norm factor (scalar)
+        """
+        if self.custom_inv_norm_factor:
+            return self.custom_inv_norm_factor
+        return self.loss.num_elements()
     def get_scaled_reduced_loss(self) -> Tensor:
         """
-        :return: scaled reduced loss (scalar), as it is supposed to be used for calculating the
+        :return: scaled reduced loss (scalar), as it is supposed to be used for calculating the train gradient
         """
         if self.use_normalized_loss:
             loss = self.get_mean_loss()

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/tensor/_dim_extra.py RENAMED Viewed

@@ -747,7 +747,11 @@ class _DimMixin:
         :return: whether dim is static or dynamic but with scalar dyn_size_ext
         """
         if self.is_static():
+            if self.capacity is not None:
+                return self.size < self.capacity
             return False
+        if self.capacity is not None:
+            return True
         if not self.dyn_size_ext:
             return True  # unknown
         return self.dyn_size_ext.batch_ndim > 0
@@ -1516,6 +1520,21 @@ class _DimMixin:
         If `self.src_data` has a placeholder, will use the shape from there.
         Otherwise, uses `self.dimension` (if static) or `self.dyn_size` (if dynamic).
+        :return: max(size or dyn_size)
+        """
+        res = self.get_dim_value_tensor()
+        if isinstance(res, _t.Tensor):
+            assert res.dims == ()
+            return res.raw_tensor
+        assert isinstance(res, int)
+        return res
+    def get_dim_value_tensor(self) -> Union[int, _t.Tensor]:
+        """
+        Infers the dim this axis should have if unbroadcasted.
+        If `self.src_data` has a placeholder, will use the shape from there.
+        Otherwise, uses `self.dimension` (if static) or `self.dyn_size` (if dynamic).
         :return: max(size or dyn_size)
         """
         import returnn.frontend as rf
@@ -1530,25 +1549,33 @@ class _DimMixin:
                     # Masking is not always possible here, e.g.
                     # self = Dim{'self-att-keys'['time:var:extern_data:classes'[B]]}.
                     use_time_mask=False,
-                ).raw_tensor
-            return self.dyn_size_ext.placeholder
+                )
+            return self.dyn_size_ext
         if self.is_batch_dim():
+            res = None
             if self._extra and self._extra.src_data:
-                return self._extra.src_data.get_batch_dim()
-            if self.batch:
-                return self.batch.dim
+                res = self._extra.src_data.get_batch_dim()
+            elif self.batch:
+                res = self.batch.dim
+            if isinstance(res, int):
+                return res
+            if res is not None:
+                return _t.Tensor("batch", dims=(), dtype=rf.get_default_array_index_dtype(), raw_tensor=res)
         if (
             self._extra
             and self._extra.src_data is not None
             and self._extra.src_axis is not None
             and self._extra.src_data.placeholder is not None
         ):
-            return self._extra.src_data.get_dim(self._extra.src_axis)
+            res = self._extra.src_data.get_dim(self._extra.src_axis)
+            if isinstance(res, int):
+                return res
+            return _t.Tensor("batch", dims=(), dtype=rf.get_default_array_index_dtype(), raw_tensor=res)
         self.complete_dyn_size()
         if self.dyn_size_ext and self.dyn_size_ext.placeholder is not None:
             if self.dyn_size_ext.batch_ndim > 0:
-                return rf.reduce_max(self.dyn_size_ext, axis=self.dyn_size_ext.dim_tags).raw_tensor
-            return self.dyn_size_ext.placeholder
+                return rf.reduce_max(self.dyn_size_ext, axis=self.dyn_size_ext.dim_tags)
+            return self.dyn_size_ext
         raise Exception("%s: need placeholder, self.dimension or self.dyn_size for dim value" % self)
     def axis_split_info(self):

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/tensor/_tensor_extra.py RENAMED Viewed

@@ -2676,7 +2676,11 @@ class _TensorMixin(_TensorMixinBase):
         backend = tag.dyn_size_ext._raw_backend
         assert set(tag.dyn_size_ext.dim_tags).issubset(self.dim_tags)  # https://github.com/rwth-i6/returnn/issues/721
         with backend.name_scope_raw("get_sequence_mask_broadcast"):
-            if tag.dyn_size_ext.have_batch_axis() and tag.dyn_size_ext.batch_ndim == 1:  # just [B]
+            if (
+                backend.have_sequence_mask_raw()
+                and tag.dyn_size_ext.have_batch_axis()
+                and tag.dyn_size_ext.batch_ndim == 1
+            ):  # just [B]
                 # This is the common case where the size is of shape [B].
                 # We make use of sequence_mask or sequence_mask_time_major in that case,
                 # which is optimized by caching.
@@ -2733,11 +2737,45 @@ class _TensorMixin(_TensorMixinBase):
         assert tag.dyn_size_ext
         return tag.dyn_size_ext.copy_compatible_to(self, check_dtype=False, check_sparse=False).placeholder
+    def num_elements(self: Tensor) -> Union[int, Tensor]:
+        """
+        :return: number of elements in this tensor, i.e. prod(self.shape)
+        :rtype: tf.Tensor
+        """
+        if all(dim.is_static() for dim in self.dims):
+            n = 1
+            for dim in self.dims:
+                n *= dim.dimension
+            return n
+        import returnn.frontend as rf
+        n = 1
+        dims = list(self.dims)
+        dims.sort(key=lambda dim: -dim.dyn_size_ext.batch_ndim if dim.dyn_size_ext else 0)
+        while dims:
+            dim = dims.pop(0)
+            if dim.is_static():
+                n *= dim.dimension
+                continue
+            # E.g. dyn_size_ext is shape [B], and self has shape [B,T].
+            # Due to the sorting of dims above, dims will be [T,B], and we will first process T.
+            # We want to sum over dyn_size_ext, but then we need to remove the other dims it covers.
+            for dim_ in dim.dyn_size_ext.dims:
+                assert dim_ in dims  # num elements not really well-defined then
+                assert not dim_.need_masking()  # not implemented
+                dims.remove(dim_)
+            n_ = rf.reduce_sum(dim.dyn_size_ext, axis=dim.dyn_size_ext.dims)
+            n *= n_
+        return n
     def copy_masked(self: Tensor, mask_value) -> Tensor:
         """
         :param float|int|tf.Tensor mask_value:
         """
         assert self.placeholder is not None
+        if not any(dim.need_masking() for dim in self.dims):
+            return self.copy()
         assert self._raw_backend.is_tensorflow  # not implemented otherwise for now
         from returnn.tf.util.basic import mask_dyn_seq_len_nd

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/tf/frontend_layers/_backend.py RENAMED Viewed

@@ -122,6 +122,11 @@ class ReturnnLayersBackend(Backend[Layer]):
         """transpose_raw is a no-op in this backend"""
         return raw_tensor
+    @staticmethod
+    def cast(tensor: Tensor, dtype: str) -> Tensor:
+        """cast"""
+        return rfl.make_layer({"class": "cast", "from": tensor, "dtype": dtype}, name="cast")
     @staticmethod
     def activation(tensor: Tensor, func: str) -> Tensor:
         """activation"""
@@ -172,11 +177,6 @@ class ReturnnLayersBackend(Backend[Layer]):
             log_probs = rf.log_softmax(logits, axis=axis)
             return -rf.matmul(targets, log_probs, reduce=axis)
-    @staticmethod
-    def sequence_mask_raw(lengths: Layer, *, batch_major: bool = True) -> Layer:
-        """sequence mask"""
-        raise NotImplementedError  # TODO
     @staticmethod
     def create_parameter_raw(tensor: rf.Parameter) -> Layer:
         """create parameter"""

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/tf/frontend_low_level/_backend.py RENAMED Viewed

@@ -194,6 +194,11 @@ class TFBackend(Backend[tf.Tensor]):
         with tf_util.same_control_flow_ctx(raw_tensor):
             return tf.tile(raw_tensor, [1] * axis + [dim] + [1] * (raw_tensor.shape.ndims - axis - 1))
+    @staticmethod
+    def cast_raw(raw_tensor: tf.Tensor, dtype: str) -> tf.Tensor:
+        """cast"""
+        return tf.cast(raw_tensor, dtype)
     @staticmethod
     def activation_raw(raw_tensor: tf.Tensor, func: str) -> tf.Tensor:
         """
@@ -212,6 +217,13 @@ class TFBackend(Backend[tf.Tensor]):
             raise ValueError(f"unknown activation function {func!r}")
         return f(raw_tensor)
+    @staticmethod
+    def have_sequence_mask_raw() -> bool:
+        """
+        :return: whether we have sequence_mask
+        """
+        return True
     @staticmethod
     def sequence_mask_raw(lengths: tf.Tensor, *, batch_major: bool = True) -> tf.Tensor:
         """

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/torch/engine.py RENAMED Viewed

@@ -141,34 +141,44 @@ class Engine(EngineBase):
         self._pt_model.train()
         accumulated_losses_dict = NumbersDict()
+        accumulated_inv_norm_factors_dict = NumbersDict()
         step_idx = 0
         for data in self._train_dataloader:
             self._run_step(data)
             train_ctx = rf.get_run_ctx()
-            losses_dict = train_ctx.losses
             total_loss = train_ctx.total_loss()
+            losses_dict = NumbersDict(
+                {
+                    name: float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
+                    for name, loss in train_ctx.losses.items()
+                }
+            )
+            inv_norm_factors_dict = NumbersDict(
+                {name: float(_to_raw(loss.get_inv_norm_factor())) for name, loss in train_ctx.losses.items()}
+            )
             self._updater.get_optimizer().zero_grad()
             total_loss.raw_tensor.backward()
             self._updater.get_optimizer().step()
-            losses_dict = {
-                "train_loss_" + name: float(loss.loss.raw_tensor.detach().cpu().numpy())
-                for name, loss in losses_dict.items()
-            }
-            accumulated_losses_dict += NumbersDict(losses_dict)
-            print("step %i, loss: %f" % (step_idx, total_loss.raw_tensor.detach().cpu().numpy()), file=log.v4)
+            accumulated_losses_dict += losses_dict
+            accumulated_inv_norm_factors_dict += inv_norm_factors_dict
+            print(f"step {step_idx}, loss: {dict(losses_dict / inv_norm_factors_dict)}", file=log.v4)
             step_idx += 1
             self._train_step += 1
         print("Trained %i steps" % step_idx)
-        accumulated_losses_dict = accumulated_losses_dict / step_idx
-        self.learning_rate_control.set_epoch_error(self.epoch, dict(accumulated_losses_dict))
+        accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
+        self.learning_rate_control.set_epoch_error(
+            self.epoch, {f"train_loss_{k}": v for k, v in accumulated_losses_dict.items()}
+        )
         self.learning_rate_control.save()
+        print(f"Total train loss: {dict(accumulated_losses_dict)}", file=log.v3)
         if self.epoch % self._save_model_epoch_interval == 0 or self.epoch == self._final_epoch:
             self._save_model()
             self._save_optimizer()
@@ -186,8 +196,8 @@ class Engine(EngineBase):
             data_loader = self._eval_dataloaders[dataset_name]
-            accumulated_loss = 0.0
             accumulated_losses_dict = NumbersDict()
+            accumulated_inv_norm_factors_dict = NumbersDict()
             step_idx = 0
             with torch.no_grad():
@@ -195,29 +205,31 @@ class Engine(EngineBase):
                     self._run_step(data)
                     train_ctx = rf.get_run_ctx()
-                    losses_dict = train_ctx.losses
-                    total_loss = train_ctx.total_loss()
-                    total_loss = total_loss.raw_tensor.detach().cpu().numpy()
-                    losses_dict = {
-                        dataset_name + "_loss_" + name: float(loss.loss.raw_tensor.detach().cpu().numpy())
-                        for name, loss in losses_dict.items()
-                    }
-                    print("step %i, loss: %f" % (step_idx, total_loss), file=log.v4)
-                    accumulated_loss += total_loss
-                    accumulated_losses_dict += NumbersDict(losses_dict)
-                    step_idx += 1
-            assert step_idx > 0, "No data in dataset '{}'.".format(dataset_name)
-            accumulated_loss = accumulated_loss / step_idx
-            accumulated_losses_dict = accumulated_losses_dict / step_idx
+                    losses_dict = NumbersDict(
+                        {
+                            name: float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
+                            for name, loss in train_ctx.losses.items()
+                        }
+                    )
+                    inv_norm_factors_dict = NumbersDict(
+                        {name: float(_to_raw(loss.get_inv_norm_factor())) for name, loss in train_ctx.losses.items()}
+                    )
+                    accumulated_losses_dict += losses_dict
+                    accumulated_inv_norm_factors_dict += inv_norm_factors_dict
+                    print(f"step {step_idx}, loss: {dict(losses_dict / inv_norm_factors_dict)}", file=log.v4)
+                    step_idx += 1
-            self.learning_rate_control.set_epoch_error(self.epoch, dict(accumulated_losses_dict))
+            assert step_idx > 0, f"No data in dataset {dataset_name!r}."
+            accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
-            print("Total loss for '{}': {:.6}".format(dataset_name, accumulated_loss), file=log.v3)
+            self.learning_rate_control.set_epoch_error(
+                self.epoch, {f"{dataset_name}_loss_{k}": v for k, v in accumulated_losses_dict.items()}
+            )
+            self.learning_rate_control.save()
-        self.learning_rate_control.save()
+            print(f"Total loss for {dataset_name!r}: {dict(accumulated_losses_dict)}", file=log.v3)
     def _create_data_loader(self, dataset: Dataset) -> DataLoader2:
         """
@@ -312,6 +324,7 @@ class Engine(EngineBase):
         else:
             raise TypeError(f"get_model returned {model} of type {type(model)}, expected rf.Module or torch.nn.Module")
         assert isinstance(self._pt_model, torch.nn.Module)
+        print("Model:", self._pt_model, file=log.v4)
         if checkpoint_state is not None:
             self._pt_model.load_state_dict(checkpoint_state["model"])
@@ -404,3 +417,11 @@ class Engine(EngineBase):
             os.makedirs(directory, exist_ok=True)
         self._updater.save_optimizer(filename)
+def _to_raw(n: Union[int, float, Tensor]):
+    if isinstance(n, (int, float)):
+        return n
+    if isinstance(n, Tensor):
+        return n.raw_tensor.detach().cpu().numpy()
+    raise TypeError(f"Unexpected {n} of type {type(n)}")

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -116,6 +116,11 @@ class TorchBackend(Backend[torch.Tensor]):
         """
         return raw_tensor.unsqueeze(axis)
+    @staticmethod
+    def cast_raw(raw_tensor: torch.Tensor, dtype: str) -> torch.Tensor:
+        """cast"""
+        return raw_tensor.to(dtype=TorchBackend.as_dtype_raw(dtype))
     @staticmethod
     def activation_raw(raw_tensor: torch.Tensor, func: str) -> torch.Tensor:
         """
@@ -411,6 +416,21 @@ class TorchBackend(Backend[torch.Tensor]):
         return result_tensor
+    @staticmethod
+    def range_over_dim(dim: Dim) -> Tensor[torch.Tensor]:
+        """
+        :param dim:
+        :return: tensor with shape [dim]
+        """
+        out = Tensor(
+            "range",
+            dims=[dim],
+            sparse_dim=dim,
+            dtype=dim.dyn_size_ext.dtype if dim.dyn_size_ext else rf.get_default_array_index_dtype(),
+        )
+        out.raw_tensor = torch.arange(dim.get_dim_value())
+        return out
     @staticmethod
     def reduce(
         source: Tensor[torch.Tensor],
@@ -422,15 +442,25 @@ class TorchBackend(Backend[torch.Tensor]):
         """reduce"""
         assert mode in Backend._AllowedReduceModes
         if isinstance(axis, Dim):
-            assert not axis.need_masking()  # not implemented
-        else:
-            assert all(not dim.need_masking() for dim in axis)  # not implemented
+            axis = [axis]
+        assert all(isinstance(dim, Dim) for dim in axis)
+        if use_time_mask is not False and any(dim.need_masking() for dim in axis):
+            source = source.copy()
+            dtype = source.raw_tensor.dtype
+            if mode == "max":
+                mask_value = torch.finfo(dtype).min if dtype.is_floating_point else torch.iinfo(dtype).min
+            elif mode == "min":
+                mask_value = torch.finfo(dtype).max if dtype.is_floating_point else torch.iinfo(dtype).max
+            elif mode == "sum":
+                mask_value = 0
+            else:
+                raise NotImplementedError(f"reduce_{mode} not implemented with masking on tensor {source!r}.")
+            for i, dim in enumerate(axis):
+                if dim.need_masking():
+                    mask = source.get_sequence_mask_broadcast(axis=i)
+                    source.raw_tensor = torch.where(mask, source.raw_tensor, mask_value)
         func = getattr(torch, mode)
-        raw_dims = (
-            [source.get_axis_from_description(axis)]
-            if isinstance(axis, Dim)
-            else [source.get_axis_from_description(dim) for dim in axis]
-        )
+        raw_dims = [source.get_axis_from_description(dim) for dim in axis]
         res_dims = [dim for i, dim in enumerate(source.dims) if i not in raw_dims]
         if not res_dims:
             raw_result = func(source.raw_tensor)

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/returnn/torch/frontend/bridge.py RENAMED Viewed

@@ -76,6 +76,9 @@ class _RFModuleAsPTModule(torch.nn.Module):
             pt_mod = rf_module_to_pt_module(rf_mod)
             self.add_module(name, pt_mod)
+    def _get_name(self):
+        return self._rf_module.__class__.__name__ + "[RF→PT]"
     @property
     def rf_module(self) -> rf.Module:
         """RF module"""

{returnn-1.20230408.155406 → returnn-1.20230409.122444/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20230408.155406
+Version: 1.20230409.122444
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/tests/test_demos.py RENAMED Viewed

@@ -139,6 +139,13 @@ def test_demo_torch_task12ax():
     # TODO also check FER. So far this is not properly reported. https://github.com/rwth-i6/returnn/issues/1120
+@unittest.skipIf(not torch, "no PyTorch")
+def test_demo_rf_torch_task12ax():
+    cleanup_tmp_models("demos/demo-rf.config")
+    run(py, "rnn.py", "demos/demo-rf.config", print_stdout=True)
+    # TODO also check FER. So far this is not properly reported. https://github.com/rwth-i6/returnn/issues/1120
 def test_demo_iter_dataset_task12ax():
     # there should be no actual TF dependency, we just iterate the dataset
     cleanup_tmp_models("demos/demo-tf-vanilla-lstm.12ax.config")

returnn-1.20230408.155406/_setup_info_generated.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- version = '1.20230408.155406'
2	- long_version = '1.20230408.155406+git.03aed81'

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/.editorconfig RENAMED Viewed

File without changes

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/.gitignore RENAMED Viewed

File without changes

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/.gitmodules RENAMED Viewed

File without changes

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/.kateconfig RENAMED Viewed

File without changes

{returnn-1.20230408.155406 → returnn-1.20230409.122444}/CHANGELOG.md RENAMED Viewed

File without changes

returnn 1.20230408.155406__tar.gz → 1.20230409.122444__tar.gz

returnn 1.20230408.155406tar.gz → 1.20230409.122444tar.gz