PyPI - returnn - Versions diffs - 1.20250430.145858__py3-none-any.whl → 1.20250508.181644__py3-none-any.whl - Mend

returnn 1.20250430.145858py3-none-any.whl → 1.20250508.181644py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (68) hide show

returnn/PKG-INFO +1 -1
returnn/_setup_info_generated.py +2 -2
returnn/datasets/basic.py +24 -25
returnn/datasets/cached.py +4 -3
returnn/datasets/distrib_files.py +1 -2
returnn/datasets/generating.py +20 -20
returnn/datasets/hdf.py +9 -9
returnn/datasets/lm.py +25 -13
returnn/datasets/meta.py +39 -38
returnn/datasets/normalization_data.py +1 -1
returnn/datasets/postprocessing.py +9 -9
returnn/datasets/sprint.py +8 -7
returnn/datasets/util/strings.py +0 -1
returnn/datasets/util/vocabulary.py +3 -3
returnn/extern/graph_editor/subgraph.py +1 -2
returnn/extern/graph_editor/transform.py +1 -2
returnn/extern/graph_editor/util.py +1 -2
returnn/frontend/_backend.py +4 -3
returnn/frontend/_utils.py +1 -1
returnn/frontend/audio/mel.py +0 -1
returnn/frontend/const.py +3 -3
returnn/frontend/device.py +0 -1
returnn/frontend/dropout.py +1 -1
returnn/frontend/encoder/e_branchformer.py +1 -1
returnn/frontend/loop.py +3 -3
returnn/frontend/loss.py +0 -1
returnn/frontend/matmul.py +0 -1
returnn/frontend/run_ctx.py +9 -9
returnn/frontend/signal.py +0 -1
returnn/frontend/types.py +2 -4
returnn/native_op.py +13 -0
returnn/sprint/cache.py +2 -4
returnn/sprint/interface.py +3 -4
returnn/tensor/_dim_extra.py +9 -9
returnn/tensor/_tensor_extra.py +20 -19
returnn/tensor/_tensor_op_overloads.py +0 -1
returnn/tensor/tensor.py +1 -1
returnn/tensor/tensor_dict.py +9 -9
returnn/tf/engine.py +60 -65
returnn/tf/frontend_layers/_backend.py +3 -3
returnn/tf/frontend_layers/cond.py +6 -6
returnn/tf/frontend_layers/debug_eager_mode.py +0 -1
returnn/tf/frontend_layers/layer.py +12 -12
returnn/tf/frontend_layers/loop.py +3 -3
returnn/tf/frontend_layers/make_layer.py +0 -1
returnn/tf/layers/base.py +56 -49
returnn/tf/layers/basic.py +60 -65
returnn/tf/layers/rec.py +74 -74
returnn/tf/native_op.py +1 -3
returnn/tf/network.py +60 -57
returnn/tf/updater.py +3 -3
returnn/tf/util/basic.py +24 -23
returnn/torch/data/extern_data.py +4 -5
returnn/torch/data/pipeline.py +3 -4
returnn/torch/engine.py +16 -16
returnn/torch/frontend/_backend.py +15 -15
returnn/torch/frontend/bridge.py +3 -3
returnn/torch/updater.py +8 -9
returnn/torch/util/debug_inf_nan.py +0 -2
returnn/torch/util/exception_helper.py +1 -1
returnn/torch/util/scaled_gradient.py +0 -1
returnn/util/basic.py +1 -2
returnn/util/better_exchook.py +14 -0
{returnn-1.20250430.145858.dist-info → returnn-1.20250508.181644.dist-info}/METADATA +1 -1
{returnn-1.20250430.145858.dist-info → returnn-1.20250508.181644.dist-info}/RECORD +68 -68
{returnn-1.20250430.145858.dist-info → returnn-1.20250508.181644.dist-info}/LICENSE +0 -0
{returnn-1.20250430.145858.dist-info → returnn-1.20250508.181644.dist-info}/WHEEL +0 -0
{returnn-1.20250430.145858.dist-info → returnn-1.20250508.181644.dist-info}/top_level.txt +0 -0

returnn/tf/network.py CHANGED Viewed

@@ -3,7 +3,9 @@ Defines the :class:`TFNetwork` and :class:`ExternData`.
 """
 from __future__ import annotations
-from typing import Optional, Any, Protocol, List, Tuple, Dict
+from typing import Callable, List, Optional, Any, Protocol, Tuple, Dict, TYPE_CHECKING, Union
 import tensorflow as tf
 import sys
 import re
@@ -19,6 +21,11 @@ from returnn.tensor import Tensor, Dim, TensorDict
 from returnn.tf.util.data import Data
 from returnn.util import basic as util
+if TYPE_CHECKING:
+    from returnn.config import Config
+    from returnn.tf.layers.base import SearchChoices
+    from returnn.tf.util.data import BatchInfo
 class DataNotFound(Exception):
     """
@@ -39,8 +46,8 @@ class ExternData(TensorDict):
         :param None|dict[str,dict[str]] data: optional init kwargs for Data
         """
         super().__init__()
-        self._config = None  # type: typing.Optional["returnn.config.Config"]
-        self._batch_info = None  # type: typing.Optional["returnn.tf.util.data.BatchInfo"]
+        self._config: typing.Optional[Config] = None
+        self._batch_info: typing.Optional[BatchInfo] = None
         self.default_input = default_input
         self.default_target = default_target
         self.extra_added_keys = set()  # set[str]
@@ -369,8 +376,7 @@ def _extern_data_types_from_config(config):
             print("Warning: Using extern_data and will ignore num_inputs/num_outputs in config.", file=log.v2)
     else:
         log.print_deprecation_warning(
-            "Using num_inputs/num_outputs instead of extern_data is deprecated"
-            " and might be removed in future versions"
+            "Using num_inputs/num_outputs instead of extern_data is deprecated and might be removed in future versions"
         )
         num_inputs, num_outputs = _num_inputs_outputs_from_config(config)
         data_dims = num_outputs.copy()
@@ -502,7 +508,7 @@ class _NetworkConstructionStack:
     """
     def __init__(self):
-        self.layers = []  # type: typing.List[str]
+        self.layers: typing.List[str] = []
         self.in_flat_construct_count = 0
     def append(self, layer_name):
@@ -645,33 +651,31 @@ class TFNetwork:
         self.extra_deps_in_extra = False
         self.extra_only_template = False
         self.is_root_in_ctx = not parent_net  # default. might be overwritten
-        self.extra_nets = {}  # type: typing.Dict[str,TFNetwork]
-        self.subnets = {}  # type: typing.Dict[str,Subnetwork]
+        self.extra_nets: Dict[str, TFNetwork] = {}
+        self.subnets: Dict[str, Subnetwork] = {}
         self._selected_train_layers = None
         self._construction_stack = _NetworkConstructionStack()
         self.layers_desc: Dict[str, Dict[str, Any]] = {}
         self.layers: Dict[str, LayerBase] = {}
-        self.losses_dict = {}  # type: typing.Dict[str,LossHolder]
-        self.total_loss = None  # type: typing.Optional[tf.Tensor]
-        self.total_constraints = None  # type: typing.Optional[tf.Tensor]
-        self.total_objective = None  # type: typing.Optional[tf.Tensor]
-        self._global_train_step = None  # type: typing.Optional[tf.Tensor]
-        self._global_train_step_var = None  # type: typing.Optional[tf.Variable]
+        self.losses_dict: Dict[str, LossHolder] = {}
+        self.total_loss: Optional[tf.Tensor] = None
+        self.total_constraints: Optional[tf.Tensor] = None
+        self.total_objective: Optional[tf.Tensor] = None
+        self._global_train_step: Optional[tf.Tensor] = None
+        self._global_train_step_var: Optional[tf.Variable] = None
         self.epoch_step = None
-        self.saver = None  # type: typing.Optional[tf.compat.v1.train.Saver]
-        self.extra_vars_to_save = []  # type: typing.List[tf.Variable]
+        self.saver: Optional[tf.compat.v1.train.Saver] = None
+        self.extra_vars_to_save: List[tf.Variable] = []
         self.recurrent = False
-        self._assigner_cache = {}  # type: typing.Dict[tf.Variable,tf_util.VariableAssigner]
+        self._assigner_cache: Dict[tf.Variable, tf_util.VariableAssigner] = {}
         self.concat_sources_dropout_cache: Dict[
             Tuple[Tuple[LayerBase, ...], Dim, float, Optional[Tuple[Optional[int], ...]]], Data
         ] = {}
-        self._merge_all_summaries = None  # type: typing.Optional[tf.Tensor]
-        self._graph_reset_callbacks = []  # type: typing.List[typing.Callable]
-        self._run_opts = {}  # type: typing.Dict[str, typing.Any]
-        self._run_finished_callbacks = []  # type: typing.List[typing.Callable]
-        self._map_search_beam_to_search_choices = (
-            {}
-        )  # type: typing.Dict[tf_util.SearchBeam,"returnn.tf.layers.base.SearchChoices"]
+        self._merge_all_summaries: Optional[tf.Tensor] = None
+        self._graph_reset_callbacks: List[Callable] = []
+        self._run_opts: Dict[str, Any] = {}
+        self._run_finished_callbacks: List[Callable] = []
+        self._map_search_beam_to_search_choices: Dict[tf_util.SearchBeam, SearchChoices] = {}
     def __repr__(self):
         s = "TFNetwork %r" % self.name
@@ -1308,15 +1312,16 @@ class TFNetwork:
                 layer.output.sanity_check()
                 # The axes should not have moved now.
                 output_special_axes = layer.output.get_special_axes_dict()
-                assert (
-                    output_template_special_axes == output_special_axes
-                ), "%s %r: not equal: %r == %r, from data %r -> %r" % (
-                    layer_class.__name__,
-                    name,
-                    output_template_special_axes,
-                    output_special_axes,
-                    output_template,
-                    layer.output,
+                assert output_template_special_axes == output_special_axes, (
+                    "%s %r: not equal: %r == %r, from data %r -> %r"
+                    % (
+                        layer_class.__name__,
+                        name,
+                        output_template_special_axes,
+                        output_special_axes,
+                        output_template,
+                        layer.output,
+                    )
                 )
             except TypeError:
                 help_on_type_error_wrong_args(cls=layer_class, kwargs=list(layer_desc.keys()))
@@ -1486,7 +1491,7 @@ class TFNetwork:
         else:
             total_loss = None
             total_constraints = None
-        losses_multi_dict = {}  # type: typing.Dict[str,typing.List[typing.Tuple[typing.Optional[str],LossHolder]]]
+        losses_multi_dict: Dict[str, List[Tuple[Optional[str], LossHolder]]] = {}
         # self.layers also include extra net layers and sub layers, see add_layer.
         for name, layer in sorted(self.layers.items()):
             assert isinstance(layer, LayerBase)
@@ -1869,14 +1874,15 @@ class TFNetwork:
         # All end points must be mapped now.
         for layer in end_points:
-            assert (
-                layer in mapped_layers
-            ), "end point %r not mapped.\n end points:\n%s\n mapped:\n%s\n blacklist:\n%s\n starting points:\n%s" % (
-                layer,
-                pformat(end_points),
-                pformat(mapped_layers),
-                pformat(blacklist),
-                pformat(starting_points),
+            assert layer in mapped_layers, (
+                "end point %r not mapped.\n end points:\n%s\n mapped:\n%s\n blacklist:\n%s\n starting points:\n%s"
+                % (
+                    layer,
+                    pformat(end_points),
+                    pformat(mapped_layers),
+                    pformat(blacklist),
+                    pformat(starting_points),
+                )
             )
         # Assign flatten_with_seq_len_mask cache to mapped layers.
         for layer, new_layer in mapped_layers.items():
@@ -2402,9 +2408,7 @@ class TFNetwork:
         Note that this excludes auxiliary params.
         """
-        layers = {
-            layer.get_absolute_name(): layer for layer in self.get_all_layers_deep()
-        }  # type: typing.Dict[str,LayerBase]
+        layers: Dict[str, LayerBase] = {layer.get_absolute_name(): layer for layer in self.get_all_layers_deep()}
         for layer_name, layer_values_dict in values_dict.items():
             if layer_values_dict:
                 if ignore_non_existing and layer_name not in layers:
@@ -4091,9 +4095,9 @@ class LossHolder:
                     self._error_value = self._layer._cond_only_on_eval_opt(self.loss.get_error, default_value=0.0)
                 else:
                     self._error_value = self.loss.get_error()
-            assert (
-                self._loss_value is not None or self._error_value is not None
-            ), "layer %r loss %r return None for loss and error" % (self._layer, self.loss)
+            assert self._loss_value is not None or self._error_value is not None, (
+                "layer %r loss %r return None for loss and error" % (self._layer, self.loss)
+            )
         if self._norm_factor is None:
             self._norm_factor = self.loss.get_normalization_factor()
         loss_value = self._loss_value
@@ -4515,12 +4519,12 @@ class CustomCheckpointLoader:
         # All variables in the checkpoint:
         self.var_ckpt_names = set(self.reader.get_variable_to_shape_map())  # type: typing.Set[str]
         # All variables of the model to be loaded:
-        self.var_net_names = {
+        self.var_net_names: Dict[str, Union[tf.Variable, Any]] = {
             self._get_param_name(v): v for v in self.saveable_params
-        }  # type: typing.Dict[str,typing.Union[tf.Variable,typing.Any]]
+        }
         # Model variables missing in the checkpoint:
-        self.missing_var_names = []  # type: typing.List[str]
-        self.missing_non_critical_var_names = []  # type: typing.List[str]
+        self.missing_var_names: List[str] = []
+        self.missing_non_critical_var_names: List[str] = []
         for name, v in sorted(self.var_net_names.items()):
             if name in self.var_ckpt_names:
                 continue
@@ -4702,10 +4706,10 @@ class CustomCheckpointLoader:
             "rnn/lstm_cell/bias": "lstm_cell/bias",
             "rnn/lstm_cell/kernel": "lstm_cell/kernel",
             (
-                "cudnn/params_canonical/rnn/multi_rnn_cell/cell_0/" "cudnn_compatible_lstm_cell/bias"
+                "cudnn/params_canonical/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias"
             ): "lstm_fused_cell/bias",
             (
-                "cudnn/params_canonical/rnn/multi_rnn_cell/cell_0/" "cudnn_compatible_lstm_cell/kernel"
+                "cudnn/params_canonical/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel"
             ): "lstm_fused_cell/kernel",
         }
@@ -4877,7 +4881,7 @@ class CustomCheckpointLoader:
                 self.target = target
                 self.keys = [target + "bias", target + "kernel"]
                 self.prefix = prefix
-                self.data = None  # type: typing.Optional[typing.Dict[str,numpy.ndarray]]
+                self.data: typing.Optional[typing.Dict[str, numpy.ndarray]] = None
             # noinspection PyMethodParameters
             def _load(sself):
@@ -5140,8 +5144,7 @@ class CustomLoadParamFunc(Protocol):
     def __call__(
         self, *, name: str, shape: Tuple[int], reader: tf.compat.v1.train.NewCheckpointReader
-    ) -> Optional[numpy.ndarray]:
-        ...
+    ) -> Optional[numpy.ndarray]: ...
 def set_custom_post_init(var, func):

returnn/tf/updater.py CHANGED Viewed

@@ -219,9 +219,9 @@ class Updater:
             learning_rate_function = self.config.typed_dict.get("dynamic_learning_rate")
             signature = inspect.signature(learning_rate_function)
-            assert any(
-                [arg.kind == inspect.Parameter.VAR_KEYWORD for arg in signature.parameters.values()]
-            ), "please specify **kwargs in dynamic_learning_rate for future compatibility"
+            assert any([arg.kind == inspect.Parameter.VAR_KEYWORD for arg in signature.parameters.values()]), (
+                "please specify **kwargs in dynamic_learning_rate for future compatibility"
+            )
             if "epoch" in signature.parameters:
                 raise NotImplementedError("TF updater: dynamic_learning_rate with epoch not supported currently")
             lr = learning_rate_function(

returnn/tf/util/basic.py CHANGED Viewed

@@ -1799,7 +1799,7 @@ def dropout(
         x = tf.convert_to_tensor(x, name="x")
         assert isinstance(x, tf.Tensor)
         if isinstance(keep_prob, (float, int)) and not 0 < keep_prob <= 1:
-            raise ValueError("keep_prob must be a scalar tensor or a float in the " "range (0, 1], got %g" % keep_prob)
+            raise ValueError("keep_prob must be a scalar tensor or a float in the range (0, 1], got %g" % keep_prob)
         # Do nothing if we know keep_prob == 1
         if isinstance(keep_prob, (float, int)) and keep_prob == 1:
             return x
@@ -2492,9 +2492,9 @@ def get_common_shape(values, ignore_axes=(), allow_broadcast_all_sources=NotSpec
     import numpy
     assert len(values) > 0
-    assert all(
-        [isinstance(value, (tf.Tensor, tf.Variable, float, int, numpy.number)) for value in values]
-    ), "types %r" % ([type(v) for v in values])
+    assert all([isinstance(value, (tf.Tensor, tf.Variable, float, int, numpy.number)) for value in values]), (
+        "types %r" % ([type(v) for v in values])
+    )
     # Filter out scalars.
     values = [value for value in values if isinstance(value, (tf.Tensor, tf.Variable))]
     assert all([value.shape.ndims is not None for value in values]), "some unknown ndim"
@@ -2523,14 +2523,15 @@ def get_common_shape(values, ignore_axes=(), allow_broadcast_all_sources=NotSpec
                         common_shape[axis] = static_dim
                     else:  # common_shape is int
                         assert isinstance(common_shape[axis], int)
-                        assert (
-                            common_shape[axis] == static_dim
-                        ), "non matching dim %r vs %r in axis %i, value %r of values %r" % (
-                            common_shape[axis],
-                            static_dim,
-                            axis,
-                            value,
-                            values,
+                        assert common_shape[axis] == static_dim, (
+                            "non matching dim %r vs %r in axis %i, value %r of values %r"
+                            % (
+                                common_shape[axis],
+                                static_dim,
+                                axis,
+                                value,
+                                values,
+                            )
                         )
         # Check validate_broadcast_all_sources
         need_broadcast = {id(value): False for value in values}
@@ -2576,9 +2577,9 @@ def unbroadcast_to_common_shape(value, common_shape, ignore_axes=(), allow_only_
     for axis in ignore_axes:
         assert 0 <= axis < ndim
         tile_multiples[axis] = 1
-    assert all(
-        [m is not None for m in tile_multiples]
-    ), "ignore_axes %r probably missing some axis for common shape %r" % (ignore_axes, common_shape)
+    assert all([m is not None for m in tile_multiples]), (
+        "ignore_axes %r probably missing some axis for common shape %r" % (ignore_axes, common_shape)
+    )
     if all([isinstance(m, int) and m == 1 for m in tile_multiples]):
         # We have a no-op.
         return value
@@ -6611,7 +6612,6 @@ def find_unsupported_devices_in_graph(graph, dev_name, ignore=None):
 class _DeviceAttrMod:
     _tf_mod = None
     @classmethod
@@ -7680,13 +7680,14 @@ class FetchHelper:
         _, info = copier(sgv, dst_graph=sgv.graph, dst_scope="", reuse_dst_scope=True)
         assert isinstance(info, graph_editor.TransformerInfo)
         target_op_transformed = info.transformed(target_op)
-        assert isinstance(
-            target_op_transformed, tf.Operation
-        ), "\ntarget_op\n%r,\nfetches\n%r,\nstop_at_ts\n%s,\nops\n%s" % (
-            target_op,
-            fetches,
-            pformat(stop_at_ts),
-            pformat(ops),
+        assert isinstance(target_op_transformed, tf.Operation), (
+            "\ntarget_op\n%r,\nfetches\n%r,\nstop_at_ts\n%s,\nops\n%s"
+            % (
+                target_op,
+                fetches,
+                pformat(stop_at_ts),
+                pformat(ops),
+            )
         )
         fetch_helpers = []
         for x in fetch_helper_tensors:

returnn/torch/data/extern_data.py CHANGED Viewed

@@ -56,9 +56,9 @@ def raw_dict_to_extern_data(
         assert len(raw_tensor.shape) == data.batch_ndim, f"ndim mismatch for {k}: {raw_tensor.shape} vs {data}"
         for i, dim in enumerate(data.dims):
             if dim.dimension is not None:
-                assert (
-                    dim.dimension == raw_tensor.shape[i]
-                ), f"shape mismatch for {k}: {raw_tensor.shape} vs {data.batch_shape}"
+                assert dim.dimension == raw_tensor.shape[i], (
+                    f"shape mismatch for {k}: {raw_tensor.shape} vs {data.batch_shape}"
+                )
         if isinstance(raw_tensor, torch.Tensor):
             if raw_tensor.dtype.is_floating_point and float_dtype:
                 raw_tensor = raw_tensor.to(dtype=float_dtype)
@@ -81,8 +81,7 @@ def raw_dict_to_extern_data(
             and (data.dims[1].dyn_size_ext is None or data.dims[1].dyn_size_ext.raw_tensor is None)
         ):
             assert k + ":seq_len" in extern_data_raw, (
-                f"extern_data {data}, dyn spatial dim, missing {k}:seq_len in raw dict, "
-                f"check dataset or collate_batch"
+                f"extern_data {data}, dyn spatial dim, missing {k}:seq_len in raw dict, check dataset or collate_batch"
             )
             size = extern_data_raw[k + ":seq_len"]
             # Sequence lengths have to be on CPU for the later call to rnn.pack_padded_sequence

returnn/torch/data/pipeline.py CHANGED Viewed

@@ -123,7 +123,6 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
         chunking_data_keys = list(self._chunk_size.keys())
         for data_dict in self._dataset:
             if not chunking_data_keys:
                 chunking_data_keys = list(data_dict.keys())  # use all if not configured separately
                 chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs", "epoch", "complete_frac"]
@@ -150,9 +149,9 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
                 if num_chunks is None:
                     num_chunks = len(chunks)
                 else:
-                    assert num_chunks == len(
-                        chunks
-                    ), "Chunking resulted in different number of chunks for different data keys."
+                    assert num_chunks == len(chunks), (
+                        "Chunking resulted in different number of chunks for different data keys."
+                    )
                 data_chunks[data_key] = chunks

returnn/torch/engine.py CHANGED Viewed

@@ -66,22 +66,22 @@ class Engine(EngineBase):
         self.model_filename = self.config.value("model", None)
         self._mp_manager = torch.multiprocessing.Manager()
         self._epoch_mp_shared = self._mp_manager.Value("i", 0)
-        self.train_dataset = None  # type: Optional[Dataset]
+        self.train_dataset: Optional[Dataset] = None
         self.eval_datasets = {}
-        self.extern_data = None  # type: Optional[TensorDict]
-        self._train_dataloader = None  # type: Optional[DataLoader]
-        self._eval_dataloaders = {}  # type: Dict[str, DataLoader]
+        self.extern_data: Optional[TensorDict] = None
+        self._train_dataloader: Optional[DataLoader] = None
+        self._eval_dataloaders: Dict[str, DataLoader] = {}
-        self._start_epoch = None  # type: Optional[int]
-        self._final_epoch = None  # type: Optional[int]
-        self._min_seq_length = config.typed_value("min_seq_length", None) or config.int(
+        self._start_epoch: Optional[int] = None
+        self._final_epoch: Optional[int] = None
+        self._min_seq_length: Union[int, float, Dict[str, int], NumbersDict] = config.typed_value(
             "min_seq_length", None
-        )  # type: Union[int,float,Dict[str,int],NumbersDict]
-        self._max_seq_length = config.typed_value("max_seq_length", None) or config.int(
+        ) or config.int("min_seq_length", None)
+        self._max_seq_length: Union[int, float, Dict[str, int], NumbersDict] = config.typed_value(
             "max_seq_length", None
-        )  # type: Union[int,float,Dict[str,int],NumbersDict]
-        self._orig_model = None  # type: Optional[Union[rf.Module, torch.nn.Module]]
-        self._pt_model = None  # type: Optional[torch.nn.Module]
+        ) or config.int("max_seq_length", None)
+        self._orig_model: Optional[Union[rf.Module, torch.nn.Module]] = None
+        self._pt_model: Optional[torch.nn.Module] = None
         self._epoch_start_func: Optional[Callable] = self.config.typed_value("epoch_start")
         self._epoch_end_func: Optional[Callable] = self.config.typed_value("epoch_end")
         self._train_step_func: Optional[Callable] = None
@@ -95,15 +95,15 @@ class Engine(EngineBase):
         self._updater: Optional[Updater] = None
         self._use_autocast = False
-        self._autocast_dtype = None  # type: Optional[str]
-        self._grad_scaler = None  # type: Optional[amp.GradScaler]
+        self._autocast_dtype: Optional[str] = None
+        self._grad_scaler: Optional[amp.GradScaler] = None
         dev_ = get_device_from_config_opt(config.value("device", None))
         self._device = dev_.result
         print("Using device:", self._device, f"({dev_.reason or '?'})", file=log.v2)
-        self._torch_distributed_ctx = None  # type: Optional[DistributedContext]
-        self._ddp_pt_model = None  # type: Optional[DistributedDataParallel]
+        self._torch_distributed_ctx: Optional[DistributedContext] = None
+        self._ddp_pt_model: Optional[DistributedDataParallel] = None
         if config.typed_value("torch_distributed") is not None:
             self._torch_distributed_ctx = dist_get_ctx(config=config)

returnn/torch/frontend/_backend.py CHANGED Viewed

@@ -421,9 +421,9 @@ class TorchBackend(Backend[torch.Tensor]):
         else:  # not allow_broadcast
             for source, dim in sources:
                 templ_dims = other_dims[:axis] + [dim] + other_dims[axis:]
-                assert set(templ_dims) == set(
-                    source.dims
-                ), f"concat {source} {dim} not allowed with allow_broadcast=False"
+                assert set(templ_dims) == set(source.dims), (
+                    f"concat {source} {dim} not allowed with allow_broadcast=False"
+                )
                 source_ = source.copy_transpose(templ_dims)
                 sources_raw.append(source_.raw_tensor)
         out = Tensor(
@@ -612,9 +612,9 @@ class TorchBackend(Backend[torch.Tensor]):
         assert axis in logits.dims, "Specified axis not present in logits."
         if axis == targets.sparse_dim:
-            assert (
-                logits.dims_set - {axis} == targets.dims_set
-            ), "logits Dims and target Dims have to match (except for implicit sparse_dim)."
+            assert logits.dims_set - {axis} == targets.dims_set, (
+                "logits Dims and target Dims have to match (except for implicit sparse_dim)."
+            )
             logits_dim_order = list(targets.dims)
             if len(logits_dim_order) > 0:
@@ -629,9 +629,9 @@ class TorchBackend(Backend[torch.Tensor]):
                 targets.raw_tensor = targets.raw_tensor.long()
         else:
-            assert (
-                not targets.sparse_dim
-            ), "We expect that cross entropy would always be calculated along the sparse dim, if there is one."
+            assert not targets.sparse_dim, (
+                "We expect that cross entropy would always be calculated along the sparse dim, if there is one."
+            )
             assert logits.dims_set == targets.dims_set, "logits Dims and target Dims have to match."
             assert axis in targets.dims, "Specified axis not present in targets."
@@ -1348,12 +1348,12 @@ class TorchBackend(Backend[torch.Tensor]):
         a_dims = a.dims
         b_dims = b.dims
-        assert all(
-            dim in a_dims for dim in reduce
-        ), f"'a' does not have the specified reduce dim(s) {reduce} (a dims: {a_dims})"
-        assert all(
-            dim in b_dims for dim in reduce
-        ), f"'b' does not have the specified reduce dim(s) {reduce} (b dims: {b_dims})"
+        assert all(dim in a_dims for dim in reduce), (
+            f"'a' does not have the specified reduce dim(s) {reduce} (a dims: {a_dims})"
+        )
+        assert all(dim in b_dims for dim in reduce), (
+            f"'b' does not have the specified reduce dim(s) {reduce} (b dims: {b_dims})"
+        )
         if len(reduce) > 1:
             reduce = list(reduce)

returnn/torch/frontend/bridge.py CHANGED Viewed

@@ -178,9 +178,9 @@ class RFModuleAsPTModule(torch.nn.Module):
         rf_param = getattr(self._rf_module, name, None)
         if not isinstance(rf_param, rf.Parameter):
             return  # just ignore
-        assert isinstance(
-            param, torch.nn.Parameter
-        ), f"{self} register_parameter {name}: did not get a Parameter but {type(param).__name__}"
+        assert isinstance(param, torch.nn.Parameter), (
+            f"{self} register_parameter {name}: did not get a Parameter but {type(param).__name__}"
+        )
         rf_param.raw_tensor = param
     def register_buffer(self, name: str, tensor: Optional[torch.Tensor], persistent: bool = True) -> None:

returnn/torch/updater.py CHANGED Viewed

@@ -39,7 +39,7 @@ def _init_optimizer_classes_dict():
 def get_optimizer_class(
-    class_name: Union[str, Type[torch.optim.Optimizer], Callable[[], Type[torch.optim.Optimizer]]]
+    class_name: Union[str, Type[torch.optim.Optimizer], Callable[[], Type[torch.optim.Optimizer]]],
 ) -> Type[torch.optim.Optimizer]:
     """
     :param class_name: Optimizer class, either as str (e.g. "adam"), as type (torch.optim.Adam) or callable.
@@ -121,9 +121,9 @@ class Updater:
                 import inspect
                 signature = inspect.signature(self.learning_rate_function)
-                assert any(
-                    [arg.kind == inspect.Parameter.VAR_KEYWORD for arg in signature.parameters.values()]
-                ), "please specify **kwargs in dynamic_learning_rate for future compatibility"
+                assert any([arg.kind == inspect.Parameter.VAR_KEYWORD for arg in signature.parameters.values()]), (
+                    "please specify **kwargs in dynamic_learning_rate for future compatibility"
+                )
                 if "network" in signature.parameters:
                     raise ValueError("Torch updater: dynamic_learning_rate network is TF specific")
             else:
@@ -497,10 +497,9 @@ class Updater:
         # Split in parameter groups only if decouple_constraints is set and the optimizer accepts weight_decay.
         cls_init_kwargs = _get_class_init_kwargs(optim_class)
         if "weight_decay" not in cls_init_kwargs:
-            assert (
-                "weight_decay" not in optimizer_opts
-            ), "weight_decay not accepted by the chosen optimizer. Accepted values: %s" % ", ".join(
-                "%s" % optim_name for optim_name in cls_init_kwargs
+            assert "weight_decay" not in optimizer_opts, (
+                "weight_decay not accepted by the chosen optimizer. Accepted values: %s"
+                % ", ".join("%s" % optim_name for optim_name in cls_init_kwargs)
             )
             return network_params
@@ -564,7 +563,7 @@ class Updater:
 def _wrap_user_blacklist_wd_modules(
-    mods: Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]]
+    mods: Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]],
 ) -> Tuple[type, ...]:
     assert isinstance(mods, (list, tuple)), f"invalid blacklist_weight_decay_modules {mods!r}"
     res = []

returnn/torch/util/debug_inf_nan.py CHANGED Viewed

@@ -30,7 +30,6 @@ Also, there might be inf/nan values which are ok, expected, and not a problem
 So we don't stop on the first occurrence but just report all of them.
 """
 from __future__ import annotations
 import sys
@@ -90,7 +89,6 @@ def debug_inf_nan(
                 print(f"Caught RuntimeError in backward: {exc}", file=file)
     else:  # without grad
         with trace_ops:
             func()

returnn/torch/util/exception_helper.py CHANGED Viewed

@@ -79,7 +79,7 @@ def help_on_torch_exception(
 def _help_data_or_array(
-    value: Union[torch.Tensor, np.ndarray, bool, object]
+    value: Union[torch.Tensor, np.ndarray, bool, object],
 ) -> Tuple[str, Tuple[Union[int, float], Union[int, float]]]:
     """
     :param value:

returnn/torch/util/scaled_gradient.py CHANGED Viewed

@@ -14,7 +14,6 @@ https://github.com/janfreyberg/pytorch-revgrad/blob/449fa763a76d/src/pytorch_rev
 https://github.com/tadeephuy/GradientReversal/blob/5d9857d63/gradient_reversal/functional.py
 """
 from __future__ import annotations
 from typing import Optional, Union
 import torch

returnn/util/basic.py CHANGED Viewed

@@ -705,7 +705,7 @@ def expand_env_vars(s: str) -> str:
             return delim
         if mo.group("invalid") is not None:
             i = mo.start("invalid")
-            raise ValueError(f"Invalid placeholder in string: {s[i:i+2]!r}...")
+            raise ValueError(f"Invalid placeholder in string: {s[i : i + 2]!r}...")
         raise ValueError(f"Unrecognized named group in pattern {pattern}")
     return pattern_.sub(_convert, s)
@@ -1811,7 +1811,6 @@ def json_remove_comments(string, strip_space=True):
     index = 0
     for match in re.finditer(tokenizer, string):
         if not (in_multi or in_single):
             tmp = string[index : match.start()]
             if not in_string and strip_space:

returnn 1.20250430.145858__py3-none-any.whl → 1.20250508.181644__py3-none-any.whl

Potentially problematic release.

returnn 1.20250430.145858py3-none-any.whl → 1.20250508.181644py3-none-any.whl