PyPI - returnn - Versions diffs - 1.20251013.113026__py3-none-any.whl → 1.20260109.93428__py3-none-any.whl - Mend

returnn 1.20251013.113026py3-none-any.whl → 1.20260109.93428py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (43) hide show

returnn/PKG-INFO +2 -2
returnn/_setup_info_generated.py +2 -2
returnn/config.py +1 -1
returnn/datasets/distrib_files.py +53 -1
returnn/datasets/generating.py +3 -5
returnn/datasets/lm.py +20 -0
returnn/datasets/meta.py +179 -60
returnn/datasets/postprocessing.py +597 -108
returnn/datasets/util/vocabulary.py +90 -0
returnn/frontend/array_.py +46 -0
returnn/frontend/attention.py +54 -20
returnn/frontend/conv.py +273 -54
returnn/frontend/device.py +14 -1
returnn/frontend/encoder/conformer.py +20 -0
returnn/frontend/encoder/transformer.py +2 -0
returnn/frontend/loss.py +40 -1
returnn/frontend/math_.py +54 -14
returnn/frontend/module.py +8 -1
returnn/frontend/nested.py +5 -0
returnn/native_op.cpp +80 -0
returnn/sprint/cache.py +12 -13
returnn/tensor/_dim_extra.py +39 -24
returnn/tensor/utils.py +7 -4
returnn/tf/frontend_layers/_backend.py +4 -3
returnn/tf/layers/basic.py +15 -39
returnn/tf/native_op.py +11 -58
returnn/tf/network.py +1 -1
returnn/tf/util/basic.py +19 -0
returnn/torch/engine.py +67 -2
returnn/torch/frontend/_backend.py +135 -13
returnn/torch/frontend/bridge.py +61 -0
returnn/torch/util/exception_helper.py +7 -1
returnn/util/basic.py +6 -7
returnn/util/better_exchook.py +4 -0
returnn/util/collect_outputs_dict.py +79 -0
returnn/util/debug.py +11 -2
returnn/util/file_cache.py +15 -1
returnn/util/task_system.py +1 -1
{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/METADATA +2 -2
{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/RECORD +43 -42
{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/LICENSE +0 -0
{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/WHEEL +0 -0
{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/top_level.txt +0 -0

returnn/torch/engine.py CHANGED Viewed

@@ -134,6 +134,14 @@ class Engine(EngineBase):
         self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
         self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
+        if config.bool("use_tensorboard", False):
+            from torch.utils.tensorboard import SummaryWriter
+            self._tensorboard_writer = SummaryWriter()
+            self._tensorboard_opts = config.typed_value("tensorboard_opts", {})
+        else:
+            self._tensorboard_writer = None
         default_float_dtype = config.value("default_float_dtype", None)
         if default_float_dtype is not None:
             assert isinstance(default_float_dtype, str)
@@ -257,6 +265,9 @@ class Engine(EngineBase):
             self.init_train_epoch()
             self.train_epoch()
+        if self._tensorboard_writer:
+            self._tensorboard_writer.close()
         print(f"Finished training at epoch {self.epoch}, global train step {self.global_train_step}", file=log.v3)
     def init_train_epoch(self):
@@ -513,6 +524,18 @@ class Engine(EngineBase):
                     batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
                     log_memory_usage_device=self._device if self._log_memory_usage else None,
                 )
+                if (
+                    self._tensorboard_writer
+                    and self.global_train_step % self._tensorboard_opts.get("log_every_n_train_steps", 100) == 0
+                ):
+                    # write losses/errors to tensorboard
+                    for key, val in eval_info.items():
+                        self._tensorboard_writer.add_scalar(f"train/{key}", val, global_step=self.global_train_step)
+                    self._tensorboard_writer.add_scalar(
+                        "train/learning_rate",
+                        self._updater.get_effective_learning_rate(),
+                        global_step=self.global_train_step,
+                    )
                 if self._stop_on_nonfinite_train_score:
                     if any(np.isinf(v) or np.isnan(v) for v in accumulated_losses_dict.values()):
@@ -702,12 +725,20 @@ class Engine(EngineBase):
                         start_elapsed=step_end_time - eval_start_time,
                         log_memory_usage_device=self._device if self._log_memory_usage else None,
                     )
                     step_idx += 1
             assert step_idx > 0, f"No data in dataset {dataset_name!r}."
             accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
             accumulated_losses_dict = self._maybe_extend_losses_info(accumulated_losses_dict)
+            if self._tensorboard_writer:
+                # write losses/errors to tensorboard
+                for key, val in accumulated_losses_dict.items():
+                    self._tensorboard_writer.add_scalar(
+                        f"{dataset_name}/{key}", val, global_step=self.global_train_step
+                    )
             self.learning_rate_control.set_epoch_error(
                 self.epoch, {f"{dataset_name}_loss_{k}": v for k, v in accumulated_losses_dict.items()}
             )
@@ -899,7 +930,7 @@ class Engine(EngineBase):
             if not os.path.exists(filename) and os.path.exists(model_epoch_filename):
                 filename = model_epoch_filename
             print("Load model %s" % (filename,), file=log.v4)
-            checkpoint_state = torch.load(filename, map_location=self._device)
+            checkpoint_state = _torch_load(filename, device=self._device)
             if epoch is None:
                 epoch = checkpoint_state.get("epoch", self._start_epoch or 1)
             step = checkpoint_state.get("step", 1)
@@ -999,7 +1030,7 @@ class Engine(EngineBase):
                         print("(No relevant parameters matching.)", file=log.v3)
                     continue
                 print(f"Pre-load weights for key '{preload_key}' from {opts['filename']}", file=log.v3)
-                preload_model_state = torch.load(opts["filename"], map_location=self._device)
+                preload_model_state = _torch_load(opts["filename"], device=self._device)
                 if opts.get("checkpoint_key", "model") is not None:
                     # This can be used if an external checkpoint saves a checkpoint a different structure that just the
                     # model state dict. E.g., if a checkpoint is created using
@@ -1032,6 +1063,28 @@ class Engine(EngineBase):
                 preload_model_state_keys = set(preload_model_state.keys())
                 loaded_state_keys.update(preload_model_state.keys())
                 missing_keys.difference_update(preload_model_state.keys())
+                custom_missing_load_func = opts.get("custom_missing_load_func")
+                if custom_missing_load_func:
+                    custom_missing_vars_map = {}
+                    for var_name in missing_keys_preload:
+                        var_shape = self._pt_model.state_dict()[var_name].shape
+                        var_val = custom_missing_load_func(
+                            name=var_name,
+                            shape=var_shape,
+                            preload_model_state=preload_model_state,
+                            **util.get_fwd_compat_kwargs(),
+                        )
+                        if var_val is not None:
+                            assert var_val.shape == var_shape
+                            custom_missing_vars_map[var_name] = var_val
+                    preload_model_state.update(custom_missing_vars_map)
+                    missing_keys_preload, unexpected_keys_preload = self._pt_model.load_state_dict(
+                        preload_model_state, strict=False
+                    )
+                    loaded_state_keys.update(preload_model_state.keys())
+                    missing_keys.difference_update(preload_model_state.keys())
                 del preload_model_state
                 gc.collect()
@@ -1669,3 +1722,15 @@ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
             p=p,
         ).item()
     )
+def _torch_load(filename: Union[str, os.PathLike], *, device: str) -> Dict[str, Any]:
+    # Might resolve PtCheckpoint or Sisyphus Path objects or so.
+    filename = os.fspath(filename)
+    if filename.endswith(".safetensors"):
+        from safetensors.torch import load_file as safetensors_load
+        return safetensors_load(filename, device=device)
+    return torch.load(filename, map_location=device)

returnn/torch/frontend/_backend.py CHANGED Viewed

@@ -1166,20 +1166,29 @@ class TorchBackend(Backend[torch.Tensor]):
         if start is None:
             start = 0
         if isinstance(size, Dim):
+            assert end is None
             size = size.get_dim_value()
         elif isinstance(size, Tensor):
+            assert end is None
             assert size.dims == ()  # scalar
             size = size.raw_tensor
-        if size is not None:
-            assert end is None
-            out.raw_tensor = torch.narrow(source.raw_tensor, dim=axis_int, start=start, length=size)
-        else:
+        elif isinstance(size, int):
+            pass
+        elif size is None:
             if isinstance(end, Tensor):
                 assert end.dims == ()
                 end = end.raw_tensor
-            if end is None:
+            elif isinstance(end, int):
+                if end < 0:
+                    end += axis.get_dim_value()
+            elif end is None:
                 end = axis.get_dim_value()
-            out.raw_tensor = torch.narrow(source.raw_tensor, dim=axis_int, start=start, length=end - start)
+            else:
+                raise TypeError(f"slice: unsupported type for end: {type(end)}")
+            size = end - start
+        else:
+            raise TypeError(f"slice: unsupported type for size: {type(size)}")
+        out.raw_tensor = torch.narrow(source.raw_tensor, dim=axis_int, start=start, length=size)
         return out
     @staticmethod
@@ -1352,12 +1361,24 @@ class TorchBackend(Backend[torch.Tensor]):
         a_dims = a.dims
         b_dims = b.dims
-        assert all(dim in a_dims for dim in reduce), (
-            f"'a' does not have the specified reduce dim(s) {reduce} (a dims: {a_dims})"
-        )
-        assert all(dim in b_dims for dim in reduce), (
-            f"'b' does not have the specified reduce dim(s) {reduce} (b dims: {b_dims})"
-        )
+        if not all(dim in a_dims for dim in reduce) or not all(dim in b_dims for dim in reduce):
+            # revert to the generic einsum implementation
+            assert all(dim in a_dims + b_dims for dim in reduce), "Some reduce Dims not in a or b."
+            result_dims = [dim for dim in a_dims if dim not in reduce] + [
+                dim for dim in b_dims if dim not in reduce and dim not in a_dims
+            ]
+            map_to_letter = {}
+            for dim in a_dims + b_dims:
+                if dim not in map_to_letter:
+                    map_to_letter[dim] = chr(97 + len(map_to_letter))  # 'a', 'b', 'c', ...
+            a_subscript = "".join(map_to_letter[dim] for dim in a_dims)
+            b_subscript = "".join(map_to_letter[dim] for dim in b_dims)
+            out_subscript = "".join(map_to_letter[dim] for dim in result_dims)
+            raw_result = torch.einsum(f"{a_subscript},{b_subscript}->{out_subscript}", a.raw_tensor, b.raw_tensor)
+            result_tensor = Tensor(
+                "einsum", dims=result_dims, raw_tensor=raw_result, dtype=TorchBackend.get_dtype_name_raw(raw_result)
+            )
+            return result_tensor
         if len(reduce) > 1:
             reduce = list(reduce)
@@ -1767,6 +1788,9 @@ class TorchBackend(Backend[torch.Tensor]):
         remaining_dims = [d for d in tensor.dims if d not in mask.dims]
         tensor_templ_dims = tuple(dims) + tuple(remaining_dims)
         in_raw = tensor.copy_compatible_to_dims_raw(tensor_templ_dims)
+        if any(in_raw.shape[i] == 1 < d.get_dim_value() for i, d in enumerate(dims)):
+            # unbroadcast
+            in_raw = in_raw.expand([d.get_dim_value() for d in tensor_templ_dims])
         if mask.raw_tensor.device.type == "meta":
             # This is not supported, but also, we would anyway not know the out shape.
             # However, instead of erroring, just assume some dummy mask.
@@ -1920,7 +1944,7 @@ class TorchBackend(Backend[torch.Tensor]):
         if not out_spatial_dims:
             out_spatial_dims = rf.make_conv_out_spatial_dims(
                 in_spatial_dims=in_spatial_dims,
-                filter_size=[d.dimension for d in filter_size],
+                filter_size=filter_size,
                 strides=strides or 1,
                 dilation_rate=dilation_rate or 1,
                 padding=padding,
@@ -2033,6 +2057,104 @@ class TorchBackend(Backend[torch.Tensor]):
         out.feature_dim = out_dim
         return out, out_spatial_dims
+    # noinspection PyShadowingBuiltins
+    @staticmethod
+    def transposed_conv(
+        source: Tensor,
+        *,
+        in_dim: Dim,
+        out_dim: Dim,
+        in_spatial_dims: Sequence[Dim],
+        out_spatial_dims: Optional[Sequence[Dim]] = None,
+        filter: Tensor,
+        filter_size: Sequence[Dim],
+        padding: str,
+        remove_padding: Union[Sequence[int], int] = 0,
+        output_padding: Optional[Union[Sequence[Optional[int]], int]] = None,
+        strides: Optional[Sequence[int]] = None,
+        bias: Optional[Tensor] = None,
+    ) -> Tuple[Tensor, Sequence[Dim]]:
+        """transposed convolution"""
+        if not out_spatial_dims:
+            out_spatial_dims = rf.make_transposed_conv_out_spatial_dims(
+                in_spatial_dims=in_spatial_dims,
+                filter_size=filter_size,
+                strides=strides,
+                padding=padding,
+                output_padding=output_padding,
+            )
+            assert remove_padding == 0  # not implemented yet otherwise...
+        if strides is None:
+            strides = [fs.dimension for fs in filter_size]
+        filter_dims = (in_dim, out_dim) + tuple(filter_size)
+        filter = filter.copy_transpose(filter_dims)
+        batch_dims = [d for d in source.dims if d not in (in_dim,) + tuple(in_spatial_dims)]
+        # Torch conv expects (N,C,<spatial dims>) as shape.
+        source = source.copy_transpose(batch_dims + [in_dim] + list(in_spatial_dims))
+        if len(batch_dims) == 1:
+            src_raw = source.raw_tensor
+        else:
+            src_raw = torch.reshape(
+                source.raw_tensor,
+                # potentially merge batch dims all together
+                [-1, in_dim.get_dim_value()] + [d.get_dim_value() for d in in_spatial_dims],
+            )
+        if padding == "same":
+            raise NotImplementedError("transposed_conv with padding='same' not implemented")
+        if padding == "valid":
+            padding_val = 0
+        else:
+            raise ValueError(f"invalid padding {padding!r}, expected 'same' or 'valid'")
+        if len(filter_size) == 1:
+            out_raw = torch.nn.functional.conv_transpose1d(
+                src_raw,
+                weight=filter.raw_tensor,
+                bias=bias.raw_tensor if bias is not None else None,
+                stride=strides,
+                padding=padding_val,
+                output_padding=output_padding or 0,
+            )
+        elif len(filter_size) == 2:
+            out_raw = torch.nn.functional.conv_transpose2d(
+                src_raw,
+                weight=filter.raw_tensor,
+                bias=bias.raw_tensor if bias is not None else None,
+                stride=strides,
+                padding=padding_val,
+                output_padding=output_padding or 0,
+            )
+        elif len(filter_size) == 3:
+            out_raw = torch.nn.functional.conv_transpose3d(
+                src_raw,
+                weight=filter.raw_tensor,
+                bias=bias.raw_tensor if bias is not None else None,
+                stride=strides,
+                padding=padding_val,
+                output_padding=output_padding or 0,
+            )
+        else:
+            raise ValueError(f"invalid number of filter dims {filter_size}, expected 1, 2, or 3")
+        if remove_padding:
+            if isinstance(remove_padding, int):
+                remove_padding = [remove_padding] * len(out_spatial_dims)
+            assert len(remove_padding) == len(out_spatial_dims)
+            slices = [slice(None)] * out_raw.ndim
+            for i, pad in enumerate(remove_padding):
+                if pad > 0:
+                    slices[2 + i] = slice(0, -pad)
+            out_raw = out_raw[tuple(slices)]
+        out = Tensor(
+            "transposed_conv",
+            dims=batch_dims + [out_dim] + list(out_spatial_dims),
+            dtype=TorchBackend.get_dtype_name_raw(out_raw),
+        )
+        if len(batch_dims) == 1:
+            out.raw_tensor = out_raw
+        else:
+            out.raw_tensor = torch.reshape(out_raw, [d.get_dim_value() for d in out.dims])
+        out.feature_dim = out_dim
+        return out, out_spatial_dims
     @staticmethod
     def pool(
         source: Tensor,

returnn/torch/frontend/bridge.py CHANGED Viewed

@@ -136,6 +136,15 @@ class RFModuleAsPTModule(torch.nn.Module):
     def _get_name(self):
         return self._rf_module.__class__.__name__ + "[RF→PT]"
+    def __repr__(self) -> str:
+        """
+        Return a custom repr for Sequential/ModuleList that compresses repeated module representations if possible,
+        otherwise fallback to default behavior.
+        """
+        if _can_use_compact_repr(self):
+            return _repr_compact(self)
+        return super().__repr__()
     @property
     def rf_module(self) -> rf.Module:
         """RF module"""
@@ -193,3 +202,55 @@ class RFModuleAsPTModule(torch.nn.Module):
             # See similar logic in torch.nn.Module._apply.
             pt_param = torch.nn.Parameter(tensor, tensor.requires_grad)
             rf_param.raw_tensor = pt_param
+def _can_use_compact_repr(self: RFModuleAsPTModule) -> bool:
+    return list(self._modules.keys()) == [str(i) for i in range(len(self._modules))]
+def _repr_compact(self: RFModuleAsPTModule) -> str:
+    """
+    Return a custom repr for Sequential/ModuleList that compresses repeated module representations.
+    Code copied and adapted from torch.nn.ModuleList.__repr__.
+    """
+    list_of_reprs = [repr(item) for item in self._modules.values()]
+    if len(list_of_reprs) == 0:
+        return self._get_name() + "()"
+    start_end_indices = [[0, 0]]
+    repeated_blocks = [list_of_reprs[0]]
+    for i, r in enumerate(list_of_reprs[1:], 1):
+        if r == repeated_blocks[-1]:
+            start_end_indices[-1][1] += 1
+            continue
+        start_end_indices.append([i, i])
+        repeated_blocks.append(r)
+    lines = []
+    main_str = self._get_name() + "("
+    for (start_id, end_id), b in zip(start_end_indices, repeated_blocks):
+        local_repr = f"({start_id}): {b}"  # default repr
+        if start_id != end_id:
+            n = end_id - start_id + 1
+            local_repr = f"({start_id}-{end_id}): {n} x {b}"
+        local_repr = _add_indent(local_repr, 2)
+        lines.append(local_repr)
+    main_str += "\n  " + "\n  ".join(lines) + "\n"
+    main_str += ")"
+    return main_str
+def _add_indent(s_: str, num_spaces: int) -> str:
+    s = s_.split("\n")
+    # don't do anything for single-line stuff
+    if len(s) == 1:
+        return s_
+    first = s.pop(0)
+    s = [(num_spaces * " ") + line for line in s]
+    s = "\n".join(s)
+    s = first + "\n" + s
+    return s

returnn/torch/util/exception_helper.py CHANGED Viewed

@@ -71,7 +71,13 @@ def help_on_torch_exception(
     if not count_frames:
         exc_ext.append("(No module call frames.)")
-    if len(exc.args) == 1 and isinstance(exc.args[0], str) and not always_direct_print:
+    if (
+        # KeyError formatting would be wrong, showing `KeyError: "enc_spatial_dim\n\nStep idx: 0\..."`
+        not isinstance(exc, KeyError)
+        and len(exc.args) == 1
+        and isinstance(exc.args[0], str)
+        and not always_direct_print
+    ):
         exc.args = ("\n".join([exc.args[0], ""] + exc_ext),)
     else:
         for msg in exc_ext:

returnn/util/basic.py CHANGED Viewed

@@ -365,12 +365,9 @@ def get_checkpoint_filepattern(filepath):
     :return: CheckpointLoader compatible filepattern
     :rtype: str
     """
-    if filepath.endswith(".meta"):
-        return filepath[: -len(".meta")]
-    elif filepath.endswith(".index"):
-        return filepath[: -len(".index")]
-    elif filepath.endswith(".pt"):
-        return filepath[: -len(".pt")]
+    for ext in [".meta", ".index", ".pt"]:
+        if filepath.endswith(ext):
+            return filepath[: -len(ext)]
     return filepath
@@ -557,7 +554,9 @@ def get_tensorflow_version_tuple() -> Tuple[int, ...]:
     import tensorflow as tf  # noqa
     import re
-    return tuple([int(re.sub("(-rc[0-9]|-dev[0-9]*)", "", s)) for s in tf.__version__.split(".")])
+    # Remove unwanted suffixes from the TF version string (e.g. "2.20.0-dev0+selfbuilt")
+    filtered_version = [re.sub("(-rc[0-9]|-dev[0-9]*)(\\+selfbuilt)?", "", s) for s in tf.__version__.split(".")]
+    return tuple(int(v) for v in filtered_version)
 class ReportImportedDevModules:

returnn/util/better_exchook.py CHANGED Viewed

@@ -1093,6 +1093,7 @@ def format_tb(
     with_color=None,
     with_vars=None,
     clear_frames=True,
+    colorize=None,
 ):
     """
     Formats a traceback into a list of strings, each corresponding to one frame.
@@ -1110,11 +1111,14 @@ def format_tb(
         That will potentially fix some mem leaks regarding locals, so it can be important.
         Also see https://github.com/python/cpython/issues/113939.
         However, any further access to frame locals will not work (e.g., if you want to use a debugger afterward).
+    :param colorize: for compat with Python >=3.13, currently ignored
     :return: list of strings, each corresponding to one frame in the traceback.
         Each string contains the file name, line number, function name, source code line, maybe relevant variables,
         etc., and a final newline.
     :rtype: list[str]
     """
+    if colorize is not None and with_color is None:
+        with_color = colorize
     color = Color(enable=with_color)
     output = _OutputLinesCollector(color=color)

returnn/util/collect_outputs_dict.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""
+Customized (derived) dict to pass as ``collected_outputs`` to some of the RF modules,
+or potential other use cases.
+You can predefine (by pattern) what kind of outputs you want to collect and store in this dict.
+"""
+from typing import Optional, Union, Sequence
+import fnmatch
+class CollectOutputsDict(dict):
+    """
+    Customized (derived) dict, where you can predefine (by key pattern)
+    what kind of keys you want to collect and store in this dict.
+    Other keys will be ignored.
+    """
+    def __init__(self, *args, allowed_key_patterns: Optional[Sequence[str]] = None, **kwargs):
+        """
+        Initialize the CollectOutputsDict.
+        :param allowed_key_patterns:
+            List of key patterns (with wildcards) that are allowed to be stored in the dict.
+            If None, all keys are allowed.
+        """
+        super().__init__(*args, **kwargs)
+        self.allowed_key_patterns = allowed_key_patterns
+    def __setitem__(self, key, value):
+        """
+        Set an item in the dict if the key matches allowed patterns.
+        """
+        if self.is_key_allowed(key):
+            super().__setitem__(key, value)
+    def setdefault(self, key, default=None):
+        """
+        Set default value for a key if it matches allowed patterns.
+        """
+        if self.is_key_allowed(key):
+            return super().setdefault(key, default)
+        return None
+    def update(self, mapping, **kwargs):
+        """
+        Update the dict with another mapping, only adding allowed keys.
+        """
+        assert not kwargs
+        for key, value in mapping.items():
+            if self.is_key_allowed(key):
+                super().__setitem__(key, value)
+    def is_key_allowed(self, key: str) -> bool:
+        """
+        Check if the key matches any of the allowed patterns.
+        :param key:
+        :return: True if the key is allowed, False otherwise.
+        """
+        if self.allowed_key_patterns is None:
+            return True  # If no patterns defined, allow all keys
+        for pattern in self.allowed_key_patterns:
+            if fnmatch.fnmatch(key, pattern):
+                return True
+        return False
+def is_key_allowed_in_collect_outputs_dict(collect_outputs: Union[CollectOutputsDict, dict], key: str) -> bool:
+    """
+    Check if a key is allowed in the given CollectOutputsDict.
+    :param collect_outputs:
+    :param key:
+    :return: True if the key is allowed, False otherwise.
+    """
+    if isinstance(collect_outputs, CollectOutputsDict):
+        return collect_outputs.is_key_allowed(key)
+    return True  # If it's a regular dict, all keys are allowed

returnn/util/debug.py CHANGED Viewed

@@ -704,7 +704,7 @@ def check_py_traces_rf_to_pt_equal(
     """
     import random
     import torch
-    from returnn.tensor import Tensor, Dim
+    from returnn.tensor import Dim
     import returnn.frontend as rf
     # noinspection PyProtectedMember
@@ -715,9 +715,18 @@ def check_py_traces_rf_to_pt_equal(
     def _get_entry(trace, func, i, name, j):
         return trace[func][i][name][j]
+    def _get_entry_attr(trace, func, i, name, j):
+        name, attr = name.split(".", 1)
+        obj = trace[func][i][name][j]
+        return eval(f"{name}.{attr}", {name: obj})
     def _resolve_dim(dim: Union[Dim, str]) -> Dim:
         if isinstance(dim, Dim):
             return dim
+        elif isinstance(dim, str) and "." in dim:
+            dim = _get_entry_attr(trace_rf, *check_rf[:2], dim, -1)
+            assert isinstance(dim, Dim)
+            return dim
         elif isinstance(dim, str):
             dim = _get_entry(trace_rf, *check_rf[:2], dim, -1)
             assert isinstance(dim, Dim)
@@ -763,7 +772,7 @@ def check_py_traces_rf_to_pt_equal(
             if len(indices) > 5:
                 msgs.append("  non-matching ...")
             non_matching.append("\n".join(msgs_prefix + msgs))
-            print(f"  mismatch!")
+            print("  mismatch!")
             for msg in msgs:
                 print(msg)

returnn/util/file_cache.py CHANGED Viewed

@@ -426,7 +426,21 @@ class FileCache:
                 orig_mtime_ns = os.stat(src_filename).st_mtime_ns
                 FileInfo(mtime_ns=orig_mtime_ns).save(info_file_name)
-                _copy_with_prealloc(src_filename, dst_tmp_filename)
+                try:
+                    _copy_with_prealloc(src_filename, dst_tmp_filename)
+                except Exception:
+                    # Cleanup if it was created already.
+                    # That avoids some of the ambiguity of the existence of the .copy file.
+                    # https://github.com/rwth-i6/returnn/issues/1785
+                    try:
+                        os.remove(dst_tmp_filename)
+                    except FileNotFoundError:
+                        pass
+                    try:
+                        os.remove(info_file_name)
+                    except FileNotFoundError:  # not really expected here, but safe to ignore
+                        pass
+                    raise
                 os.rename(dst_tmp_filename, dst_filename)
     @staticmethod

returnn/util/task_system.py CHANGED Viewed

@@ -671,7 +671,7 @@ class Pickler(_BasePickler):
                 return
         # For some reason, Numpy fromstring/tostring is faster than Numpy loads/dumps.
         self.save(make_numpy_ndarray_fromstring)
-        self.save((obj.tostring(), str(obj.dtype), obj.shape))
+        self.save((obj.tobytes(), str(obj.dtype), obj.shape))
         self.write(pickle.REDUCE)
     dispatch[numpy.ndarray] = save_ndarray

{returnn-1.20251013.113026.dist-info → returnn-1.20260109.93428.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20251013.113026
+Version: 1.20260109.93428
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer
@@ -36,7 +36,7 @@ Welcome to RETURNN
 `RETURNN paper 2018 <https://arxiv.org/abs/1805.05225>`_.
 RETURNN - RWTH extensible training framework for universal recurrent neural networks,
-is a Theano/TensorFlow-based implementation of modern recurrent neural network architectures.
+is a PyTorch/TensorFlow-based implementation of modern recurrent neural network architectures.
 It is optimized for fast and reliable training of recurrent neural networks in a multi-GPU environment.
 The high-level features and goals of RETURNN are:

returnn 1.20251013.113026__py3-none-any.whl → 1.20260109.93428__py3-none-any.whl

Potentially problematic release.

returnn 1.20251013.113026py3-none-any.whl → 1.20260109.93428py3-none-any.whl