PyPI - returnn - Versions diffs - 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl - Mend

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

returnn/PKG-INFO +2 -2
returnn/__old_mod_loader__.py +26 -2
returnn/_setup_info_generated.py +2 -2
returnn/datasets/lm.py +130 -42
returnn/datasets/meta.py +93 -43
returnn/datasets/postprocessing.py +597 -108
returnn/datasets/util/vocabulary.py +90 -0
returnn/frontend/__init__.py +1 -0
returnn/frontend/_backend.py +41 -0
returnn/frontend/_native/__init__.py +22 -0
returnn/frontend/_numpy_backend.py +7 -0
returnn/frontend/_utils.py +1 -1
returnn/frontend/array_.py +48 -2
returnn/frontend/assert_.py +35 -0
returnn/frontend/attention.py +54 -20
returnn/frontend/conv.py +273 -54
returnn/frontend/device.py +14 -1
returnn/frontend/encoder/conformer.py +20 -0
returnn/frontend/encoder/transformer.py +2 -0
returnn/frontend/loss.py +222 -3
returnn/frontend/math_.py +54 -14
returnn/native_op.cpp +182 -172
returnn/native_op.py +36 -31
returnn/sprint/cache.py +12 -13
returnn/tensor/_dim_extra.py +7 -7
returnn/tensor/_tensor_extra.py +10 -10
returnn/tensor/utils.py +8 -5
returnn/tf/frontend_layers/_backend.py +7 -3
returnn/tf/layers/basic.py +27 -40
returnn/tf/native_op.py +27 -63
returnn/tf/network.py +1 -1
returnn/tf/util/basic.py +22 -197
returnn/torch/engine.py +157 -6
returnn/torch/frontend/_backend.py +280 -29
returnn/torch/frontend/bridge.py +61 -0
returnn/torch/frontend/compile_helper.py +106 -0
returnn/torch/util/array_.py +30 -0
returnn/torch/util/assert_.py +122 -0
returnn/torch/util/exception_helper.py +7 -1
returnn/torch/util/native_op.py +885 -0
returnn/torch/util/native_op_code_compiler.py +308 -0
returnn/util/basic.py +6 -7
returnn/util/better_exchook.py +4 -0
returnn/util/cuda_env.py +332 -0
returnn/util/debug.py +12 -2
returnn/util/file_cache.py +15 -1
returnn/util/fsa.py +17 -13
returnn/util/native_code_compiler.py +104 -47
returnn/util/task_system.py +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0

returnn/util/debug.py CHANGED Viewed

@@ -309,6 +309,7 @@ def _get_native_signal_handler_lib_filename() -> str:
               old_signal_handler[SIGILL] = signal(SIGILL, signal_handler);
               old_signal_handler[SIGABRT] = signal(SIGABRT, signal_handler);
               old_signal_handler[SIGFPE] = signal(SIGFPE, signal_handler);
+              old_signal_handler[SIGUSR1] = signal(SIGUSR1, signal_handler);
             }
             """
         ),
@@ -704,7 +705,7 @@ def check_py_traces_rf_to_pt_equal(
     """
     import random
     import torch
-    from returnn.tensor import Tensor, Dim
+    from returnn.tensor import Dim
     import returnn.frontend as rf
     # noinspection PyProtectedMember
@@ -715,9 +716,18 @@ def check_py_traces_rf_to_pt_equal(
     def _get_entry(trace, func, i, name, j):
         return trace[func][i][name][j]
+    def _get_entry_attr(trace, func, i, name, j):
+        name, attr = name.split(".", 1)
+        obj = trace[func][i][name][j]
+        return eval(f"{name}.{attr}", {name: obj})
     def _resolve_dim(dim: Union[Dim, str]) -> Dim:
         if isinstance(dim, Dim):
             return dim
+        elif isinstance(dim, str) and "." in dim:
+            dim = _get_entry_attr(trace_rf, *check_rf[:2], dim, -1)
+            assert isinstance(dim, Dim)
+            return dim
         elif isinstance(dim, str):
             dim = _get_entry(trace_rf, *check_rf[:2], dim, -1)
             assert isinstance(dim, Dim)
@@ -763,7 +773,7 @@ def check_py_traces_rf_to_pt_equal(
             if len(indices) > 5:
                 msgs.append("  non-matching ...")
             non_matching.append("\n".join(msgs_prefix + msgs))
-            print(f"  mismatch!")
+            print("  mismatch!")
             for msg in msgs:
                 print(msg)

returnn/util/file_cache.py CHANGED Viewed

@@ -426,7 +426,21 @@ class FileCache:
                 orig_mtime_ns = os.stat(src_filename).st_mtime_ns
                 FileInfo(mtime_ns=orig_mtime_ns).save(info_file_name)
-                _copy_with_prealloc(src_filename, dst_tmp_filename)
+                try:
+                    _copy_with_prealloc(src_filename, dst_tmp_filename)
+                except Exception:
+                    # Cleanup if it was created already.
+                    # That avoids some of the ambiguity of the existence of the .copy file.
+                    # https://github.com/rwth-i6/returnn/issues/1785
+                    try:
+                        os.remove(dst_tmp_filename)
+                    except FileNotFoundError:
+                        pass
+                    try:
+                        os.remove(info_file_name)
+                    except FileNotFoundError:  # not really expected here, but safe to ignore
+                        pass
+                    raise
                 os.rename(dst_tmp_filename, dst_filename)
     @staticmethod

returnn/util/fsa.py CHANGED Viewed

@@ -10,7 +10,7 @@ from __future__ import annotations
 import numpy
 import pickle
 import itertools
-import typing
+from typing import Optional, List, Tuple
 from copy import deepcopy
 from os.path import isfile
 from returnn.log import log
@@ -397,7 +397,7 @@ class Ctc:
             e_end_3 = Edge(self.fsa.num_states, self.fsa.num_states + 1, self.fsa.lem_list[-1][-1], 1.0)
             self.fsa.edges.append(e_end_3)
             self.fsa.num_states += 3
-            # add node nuber of final state
+            # add node number of final state
             self.final_states.append(self.fsa.num_states - 1)
         elif self.fsa.lem_edges is not None:
@@ -806,7 +806,7 @@ class Ngram:
         :param int n: size of the gram (1, 2, 3)
         """
         self.n = n
-        self.lexicon = None  # type: typing.Optional[Lexicon]
+        self.lexicon: Optional[Lexicon] = None
         # lexicon consists of 3 entries: phoneme_list, phonemes and lemmas
         # phoneme_list: list of string phonemes in the lexicon
         # phonemes: dict of dict of str {phone: {index: , symbol: , variation:}}
@@ -1059,7 +1059,7 @@ class FastBwFsaShared:
     def __init__(self):
         self.num_states = 1
-        self.edges = []  # type: typing.List[Edge]
+        self.edges: List[Edge] = []
     def add_edge(self, source_state_idx, target_state_idx, emission_idx, weight=0.0):
         """
@@ -1148,17 +1148,20 @@ class FastBwFsaShared:
         )
-def get_ctc_fsa_fast_bw(targets, seq_lens, blank_idx):
+def get_ctc_fsa_fast_bw(
+    *, targets: numpy.ndarray, seq_lens: numpy.ndarray, blank_idx: int, label_loop: bool = True
+) -> FastBaumWelchBatchFsa:
     """
-    :param numpy.ndarray targets: shape (batch,time)
-    :param numpy.ndarray seq_lens: shape (batch)
-    :param int blank_idx:
-    :rtype: FastBaumWelchBatchFsa
+    :param targets: shape (batch,time)
+    :param seq_lens: shape (batch)
+    :param blank_idx:
+    :param label_loop:
+    :return: FSA
     """
     n_batch, n_time = targets.shape
     assert seq_lens.shape == (n_batch,)
-    edges = []  # type: typing.List[typing.Tuple[int,int,int,int]]  # list of (from,to,emission_idx,sequence_idx)
-    start_end_states = []  # type: typing.List[typing.Tuple[int,int]]  # list of (start,end), same len as batch
+    edges: List[Tuple[int, int, int, int]] = []  # list of (from,to,emission_idx,sequence_idx)
+    start_end_states: List[Tuple[int, int]] = []  # list of (start,end), same len as batch
     state_idx = 0
     # Note: We don't use weights on the edges, i.e. they are all set to zero.
     # I.e. we want that all strings for some given length T have the same probability.
@@ -1188,9 +1191,10 @@ def get_ctc_fsa_fast_bw(targets, seq_lens, blank_idx):
                 # Skip directly to final state (state_idx + 3).
                 edges.append((state_idx, state_idx + 3, label_idx, batch_idx))  # label
             state_idx += 1
-            edges.append((state_idx, state_idx, label_idx, batch_idx))  # label loop
+            if label_loop:
+                edges.append((state_idx, state_idx, label_idx, batch_idx))  # label loop
             edges.append((state_idx, state_idx + 1, blank_idx, batch_idx))  # blank
-            if not is_final_label and label_idx != next_label_idx:
+            if not is_final_label and (not label_loop or label_idx != next_label_idx):
                 # Skip over blank is allowed in this case.
                 edges.append((state_idx, state_idx + 2, next_label_idx, batch_idx))  # next label
                 if next_is_final_label:

returnn/util/native_code_compiler.py CHANGED Viewed

@@ -3,10 +3,11 @@ Native code compiler
 """
 from __future__ import annotations
-from typing import Optional, List
+from typing import Optional, Union, Sequence, List, Tuple, Dict
 import typing
 import os
 import sys
+import shutil
 from . import basic as util
@@ -17,47 +18,50 @@ class NativeCodeCompiler:
     """
     CacheDirName = "returnn_native"
-    CollectedCompilers = None  # type: Optional[List[NativeCodeCompiler]]
+    CollectedCompilers: Optional[List[NativeCodeCompiler]] = None
     def __init__(
         self,
-        base_name,
-        code_version,
-        code,
-        is_cpp=True,
-        c_macro_defines=None,
-        ld_flags=None,
-        include_paths=(),
-        include_deps=None,
-        static_version_name=None,
-        should_cleanup_old_all=True,
-        should_cleanup_old_mydir=False,
-        use_cxx11_abi=False,
-        log_stream=None,
-        verbose=False,
+        base_name: str,
+        *,
+        code_version: Union[int, Tuple[int, ...]] = 1,
+        code: str,
+        is_cpp: bool = True,
+        c_macro_defines: Optional[Dict[str, Union[str, int, None]]] = None,
+        ld_flags: Optional[Sequence[str]] = None,
+        include_paths: Optional[Sequence[str]] = (),
+        include_deps: Optional[Sequence[str]] = None,
+        static_version_name: Optional[str] = None,
+        should_cleanup_old_all: bool = True,
+        should_cleanup_old_mydir: bool = False,
+        use_cxx11_abi: bool = False,
+        log_stream: Optional[typing.TextIO] = None,
+        verbose: Optional[bool] = None,
     ):
         """
-        :param str base_name: base name for the module, e.g. "zero_out"
-        :param int|tuple[int] code_version: check for the cache whether to reuse
-        :param str code: the source code itself
-        :param bool is_cpp: if False, C is assumed
-        :param dict[str,str|int|None]|None c_macro_defines: e.g. {"TENSORFLOW": 1}
-        :param list[str]|None ld_flags: e.g. ["-lblas"]
-        :param list[str]|tuple[str] include_paths:
-        :param list[str]|None include_deps: if provided and an existing lib file,
+        :param base_name: base name for the module, e.g. "zero_out"
+        :param code_version: check for the cache whether to reuse
+        :param code: the source code itself
+        :param is_cpp: if False, C is assumed
+        :param c_macro_defines: e.g. {"TENSORFLOW": 1}
+        :param ld_flags: e.g. ["-lblas"]
+        :param include_paths:
+        :param include_deps: if provided and an existing lib file,
             we will check if any dependency is newer
             and we need to recompile. we could also do it automatically via -MD but that seems overkill and too slow.
-        :param str|None static_version_name: normally, we use .../base_name/hash as the dir
+        :param static_version_name: normally, we use .../base_name/hash as the dir
             but this would use .../base_name/static_version_name.
-        :param bool should_cleanup_old_all: whether we should look in the cache dir
+        :param should_cleanup_old_all: whether we should look in the cache dir
             and check all ops if we can delete some old ones which are older than some limit
             (self._cleanup_time_limit_days)
-        :param bool should_cleanup_old_mydir: whether we should delete our op dir before we compile there.
-        :param typing.TextIO|None log_stream: file stream for print statements
-        :param bool verbose: be slightly more verbose
+        :param should_cleanup_old_mydir: whether we should delete our op dir before we compile there.
+        :param log_stream: file stream for print statements
+        :param verbose: be slightly more verbose
         """
         if self.CollectedCompilers is not None:
             self.CollectedCompilers.append(self)
+        if verbose is None:
+            verbose = os.environ.get("RETURNN_NATIVE_CODE_COMPILER_VERBOSE") == "1"
         self.verbose = verbose
         self.cache_dir = "%s/%s" % (util.get_cache_dir(), self.CacheDirName)
         self._include_paths = list(include_paths)
@@ -69,6 +73,7 @@ class NativeCodeCompiler:
         self.ld_flags = ld_flags or []
         self.include_deps = include_deps
         self.static_version_name = static_version_name
+        self.use_cxx11_abi = use_cxx11_abi
         self._code_hash = self._make_code_hash()
         self._info_dict = self._make_info_dict()
         self._hash = self._make_hash()
@@ -76,7 +81,6 @@ class NativeCodeCompiler:
         if should_cleanup_old_all:
             self._cleanup_old()
         self._should_cleanup_old_mydir = should_cleanup_old_mydir
-        self.use_cxx11_abi = use_cxx11_abi
         self._log_stream = log_stream
         if self.verbose:
             print("%s: %r" % (self.__class__.__name__, self), file=log_stream)
@@ -157,7 +161,16 @@ class NativeCodeCompiler:
         assert isinstance(res, dict)
         return res
-    _relevant_info_keys = ("code_version", "code_hash", "c_macro_defines", "ld_flags", "compiler_bin", "platform")
+    _relevant_info_keys = (
+        "code_version",
+        "code_hash",
+        "c_macro_defines",
+        "ld_flags",
+        "compiler_bin",
+        "platform",
+        "use_cxx11_abi",
+        "cpp_version",
+    )
     def _make_info_dict(self):
         """
@@ -174,6 +187,8 @@ class NativeCodeCompiler:
             "ld_flags": self.ld_flags,
             "compiler_bin": self._get_compiler_bin(),
             "platform": platform.platform(),
+            "use_cxx11_abi": self.use_cxx11_abi,
+            "cpp_version": self.cpp_version,
         }
     def _make_code_hash(self):
@@ -251,8 +266,8 @@ class NativeCodeCompiler:
         :rtype: str
         """
         if self.is_cpp:
-            return "g++"
-        return "gcc"
+            return get_cpp_bin()
+        return get_cc_bin()
     def _transform_compiler_opts(self, opts):
         """
@@ -261,27 +276,35 @@ class NativeCodeCompiler:
         """
         return opts
+    cpp_version = 11
     def _extra_common_opts(self):
         """
         :rtype: list[str]
         """
         if self.is_cpp:
-            return ["-std=c++11"]
+            return [f"-std=c++{self.cpp_version}"]
         return []
-    @classmethod
-    def _transform_ld_flag(cls, opt):
-        """
-        :param str opt:
-        :rtype: str
-        """
+    def _transform_ld_flags(self, opts: Sequence[str]) -> Sequence[str]:
+        res = []
+        for opt in opts:
+            if opt.startswith("-l") or opt.startswith("-L"):
+                res.append(opt)
+            else:
+                res.append("-Wl," + opt)
+        opts = res
         if sys.platform == "darwin":
-            # It seems some versions of MacOS ld cannot handle the `-l:filename` argument correctly.
-            # E.g. TensorFlow 1.14 incorrectly uses this.
-            # https://github.com/tensorflow/tensorflow/issues/30564
-            if opt.startswith("-l:lib") and opt.endswith(".dylib"):
-                return "-l%s" % opt[len("-l:lib") : -len(".dylib")]
-        return opt
+            res = []
+            for opt in opts:
+                # It seems some versions of MacOS ld cannot handle the `-l:filename` argument correctly.
+                # E.g. TensorFlow 1.14 incorrectly uses this.
+                # https://github.com/tensorflow/tensorflow/issues/30564
+                if opt.startswith("-l:lib") and opt.endswith(".dylib"):
+                    opt = "-l%s" % opt[len("-l:lib") : -len(".dylib")]
+                res.append(opt)
+            return res
+        return opts
     def _maybe_compile_inner(self):
         # Directory should be created by the locking mechanism.
@@ -300,7 +323,7 @@ class NativeCodeCompiler:
         common_opts += ["-D%s=%s" % item for item in sorted(self.c_macro_defines.items())]
         common_opts += ["-g"]
         opts = common_opts + [self._c_filename, "-o", self._so_filename]
-        opts += list(map(self._transform_ld_flag, self.ld_flags))
+        opts += self._transform_ld_flags(self.ld_flags)
         cmd_bin = self._get_compiler_bin()
         cmd_args = [cmd_bin] + opts
         from subprocess import Popen, PIPE, STDOUT, CalledProcessError
@@ -348,3 +371,37 @@ class NativeCodeCompiler:
         """
         self._maybe_compile()
         return self._so_filename
+def get_cc_bin() -> str:
+    """
+    :return: path
+    """
+    cc_bin = os.environ.get("CC", "")
+    if cc_bin:
+        if cc_bin.startswith("/"):
+            return cc_bin
+        cc_bin = shutil.which(cc_bin)
+        if cc_bin:
+            return cc_bin
+    cc_bin = shutil.which("cc") or shutil.which("clang") or shutil.which("gcc")
+    if not cc_bin:
+        raise RuntimeError("Cannot find C compiler (cc, clang, gcc) in PATH")
+    return cc_bin
+def get_cpp_bin() -> str:
+    """
+    :return: path
+    """
+    cpp_bin = os.environ.get("CXX", "")
+    if cpp_bin:
+        if cpp_bin.startswith("/"):
+            return cpp_bin
+        cpp_bin = shutil.which(cpp_bin)
+        if cpp_bin:
+            return cpp_bin
+    cpp_bin = shutil.which("c++") or shutil.which("cpp") or shutil.which("clang++") or shutil.which("g++")
+    if not cpp_bin:
+        raise RuntimeError("Cannot find C++ compiler (c++, cpp, clang++, g++) in PATH")
+    return cpp_bin

returnn/util/task_system.py CHANGED Viewed

@@ -671,7 +671,7 @@ class Pickler(_BasePickler):
                 return
         # For some reason, Numpy fromstring/tostring is faster than Numpy loads/dumps.
         self.save(make_numpy_ndarray_fromstring)
-        self.save((obj.tostring(), str(obj.dtype), obj.shape))
+        self.save((obj.tobytes(), str(obj.dtype), obj.shape))
         self.write(pickle.REDUCE)
     dispatch[numpy.ndarray] = save_ndarray

{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20251027.232712
+Version: 1.20260119.15400
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer
@@ -36,7 +36,7 @@ Welcome to RETURNN
 `RETURNN paper 2018 <https://arxiv.org/abs/1805.05225>`_.
 RETURNN - RWTH extensible training framework for universal recurrent neural networks,
-is a Theano/TensorFlow-based implementation of modern recurrent neural network architectures.
+is a PyTorch/TensorFlow-based implementation of modern recurrent neural network architectures.
 It is optimized for fast and reliable training of recurrent neural networks in a multi-GPU environment.
 The high-level features and goals of RETURNN are:

returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl