PyPI - returnn - Versions diffs - 1.20260105.192646__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl - Mend

returnn 1.20260105.192646py3-none-any.whl → 1.20260119.15400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

returnn/PKG-INFO +1 -1
returnn/__old_mod_loader__.py +26 -2
returnn/_setup_info_generated.py +2 -2
returnn/datasets/lm.py +110 -42
returnn/frontend/__init__.py +1 -0
returnn/frontend/_backend.py +41 -0
returnn/frontend/_native/__init__.py +22 -0
returnn/frontend/_numpy_backend.py +7 -0
returnn/frontend/_utils.py +1 -1
returnn/frontend/array_.py +6 -5
returnn/frontend/assert_.py +35 -0
returnn/frontend/device.py +14 -1
returnn/frontend/encoder/conformer.py +19 -0
returnn/frontend/loss.py +183 -3
returnn/frontend/math_.py +54 -14
returnn/native_op.cpp +104 -174
returnn/native_op.py +36 -31
returnn/tensor/_dim_extra.py +7 -7
returnn/tensor/_tensor_extra.py +10 -10
returnn/tensor/utils.py +1 -1
returnn/tf/frontend_layers/_backend.py +3 -1
returnn/tf/layers/basic.py +13 -2
returnn/tf/native_op.py +16 -5
returnn/tf/util/basic.py +7 -201
returnn/torch/engine.py +120 -3
returnn/torch/frontend/_backend.py +166 -22
returnn/torch/frontend/bridge.py +61 -0
returnn/torch/frontend/compile_helper.py +106 -0
returnn/torch/util/array_.py +30 -0
returnn/torch/util/assert_.py +122 -0
returnn/torch/util/native_op.py +885 -0
returnn/torch/util/native_op_code_compiler.py +308 -0
returnn/util/basic.py +3 -1
returnn/util/cuda_env.py +332 -0
returnn/util/debug.py +1 -0
returnn/util/fsa.py +17 -13
returnn/util/native_code_compiler.py +104 -47
{returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +1 -1
{returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +42 -36
{returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
{returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
{returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20260105.192646
+Version: 1.20260119.15400
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/__old_mod_loader__.py CHANGED Viewed

@@ -17,7 +17,7 @@ This is supported as well.
 import sys
 import os
 import types
-import typing
+from typing import Any, Dict
 import importlib
 old_to_new_mod_mapping = {
@@ -122,7 +122,7 @@ class _LazyLoader(types.ModuleType):
             fn = "%s/%s/__init__.py" % (_base_dir, full_mod_name.replace(".", "/"))
             assert os.path.exists(fn), "_LazyLoader: mod %r not found in %r" % (full_mod_name, _base_dir)
         self.__file__ = fn
-        self._lazy_mod_config = dict(full_mod_name=full_mod_name, **kwargs)  # type: typing.Dict[str]
+        self._lazy_mod_config: Dict[str, Any] = dict(full_mod_name=full_mod_name, **kwargs)
     def _load(self):
         full_mod_name = self.__name__
@@ -172,6 +172,30 @@ class _LazyLoader(types.ModuleType):
         return super(_LazyLoader, self).__getattribute__(item)
     def __getattr__(self, item):
+        if item == "torch":
+            # torch.compile Dynamo hashing can trigger this, when it uses pickle to serialize some object state,
+            # which iterates through sys.modules and does getattr on each module.
+            # In this case, it searches for torch.
+            #   File ".../torch/_inductor/codecache.py", line 607 in dumps
+            #   File ".../torch/_inductor/codecache.py", line 622 in get_hash
+            #   File ".../torch/_inductor/codecache.py", line 961 in compiled_fx_graph_hash
+            #   ...
+            # Unfortunately, Pickler.dump is native code, so we cannot easily check whether that is the parent frame.
+            # The C stacktrace looks like:
+            # ...
+            # 7   Python                              0x0000000102e7d504 call_attribute + 80
+            # 8   Python                              0x0000000102e7d400 _Py_slot_tp_getattr_hook + 576
+            # 9   Python                              0x0000000102e507a0 PyObject_GetOptionalAttr + 248
+            # 10  _pickle.cpython-313-darwin.so       0x0000000102d24fb4 get_deep_attribute + 104
+            # 11  _pickle.cpython-313-darwin.so       0x0000000102d250b8 _checkmodule + 88
+            # 12  _pickle.cpython-313-darwin.so       0x0000000102d22588 save_global + 3024
+            # 13  _pickle.cpython-313-darwin.so       0x0000000102d1eddc save + 3424
+            # ...
+            # Right now, we just check for `item == "torch"` as a heuristic,
+            # which should never exist for any of the old-style wrapped modules here.
+            # We could maybe also check sys._getframe(1).f_code or so and add some other heuristics...
+            raise AttributeError(f"module {self.__name__} has no attribute {item} (lazy loading skipped)")
         module = self._load()
         return getattr(module, item)

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20260105.192646'
-long_version = '1.20260105.192646+git.1201db0'
+version = '1.20260119.015400'
+long_version = '1.20260119.015400+git.5c6a8c0'

returnn/datasets/lm.py CHANGED Viewed

@@ -86,6 +86,7 @@ class LmDataset(CachedDataset2):
         delayed_seq_data_start_symbol="[START]",
         dtype: Optional[str] = None,
         tag_prefix: Optional[str] = None,
+        _debug_limit_line_count: Optional[int] = None,
         **kwargs,
     ):
         """
@@ -138,6 +139,8 @@ class LmDataset(CachedDataset2):
           delayed_seq_data_start_symbol + original_sequence[:-1].
         :param str delayed_seq_data_start_symbol: used for add_delayed_seq_data.
         :param dtype: explicit dtype. if not given, automatically determined based on the number of labels.
+        :param tag_prefix: prefix for sequence tags. by default "line-".
+        :param _debug_limit_line_count:
         """
         super(LmDataset, self).__init__(**kwargs)
@@ -316,6 +319,10 @@ class LmDataset(CachedDataset2):
         self.num_skipped = 0
         self.num_unknown = 0
+        if _debug_limit_line_count is None:
+            _debug_limit_line_count = _get_debug_limit_line_count()
+        self._debug_limit_line_count = _debug_limit_line_count
     def _lazy_init(self):
         if self._orths_offsets_and_lens is not None:
             return
@@ -340,6 +347,9 @@ class LmDataset(CachedDataset2):
         lens_per_corpus_file = []
         start_time = time.time()
         last_print_time = start_time
+        debug_limit_line_count = self._debug_limit_line_count
+        debug_limit_est_total = 0
+        debug_limit_hit = False
         def _init_tmp_file():
             nonlocal tmp_file, tmp_file_orth_files_index
@@ -368,13 +378,16 @@ class LmDataset(CachedDataset2):
             if time.time() - last_print_time > 10:
                 print(
-                    f"  ... loaded {len(self._orths_offsets_and_lens)} sequences,"
+                    f"  ... loaded {len(orths)} sequences,"
                     f" {human_bytes_size(total_bytes_read)},"
                     f" after {hms(time.time() - start_time)}",
                     file=log.v4,
                 )
                 last_print_time = time.time()
+            if debug_limit_line_count is not None and len(orths) - prev_orth_len >= debug_limit_line_count:
+                raise _ReachedDebugLimitLineCount()
         # If a list of files is provided, concatenate all.
         if isinstance(corpus_file, str):
             corpus_file = [corpus_file]
@@ -383,37 +396,46 @@ class LmDataset(CachedDataset2):
         for file_name in corpus_file:
             if self._use_cache_manager:
                 file_name = cf(file_name)
-            if _is_bliss(file_name):
-                _init_tmp_file()
-                _iter_bliss(filename=file_name, callback=_tmp_file_add_line, decode=False)
-            elif file_name.endswith(".gz"):
-                _init_tmp_file()
-                _iter_txt(
-                    filename=file_name,
-                    callback=_tmp_file_add_line,
-                    skip_empty_lines=self._skip_empty_lines,
-                    decode=False,
-                )
-            else:  # Raw txt file
-                # Directly mmap the file.
-                # We just need to scan once through it to find line offsets.
-                file = open(file_name, "rb")
-                file_mmap = mmap.mmap(file.fileno(), 0, flags=mmap.MAP_PRIVATE)
-                file_index = len(self._orth_files)
-                self._orth_files.append(file)
-                self._orth_mmaps.append(file_mmap)
-                pos = 0
-                while True:
-                    next_new_line = file_mmap.find(b"\n", pos)
-                    if next_new_line == -1:
-                        break
-                    line_len = next_new_line - pos
-                    if line_len or not self._skip_empty_lines:
-                        orths.append((file_index, pos, line_len))
-                    total_bytes_read += line_len + 1
-                    pos = next_new_line + 1
-                    _maybe_report_status()
+            try:
+                if _is_bliss(file_name):
+                    _init_tmp_file()
+                    _iter_bliss(filename=file_name, callback=_tmp_file_add_line, decode=False)
+                elif file_name.endswith(".gz"):
+                    _init_tmp_file()
+                    _iter_txt(
+                        filename=file_name,
+                        callback=_tmp_file_add_line,
+                        skip_empty_lines=self._skip_empty_lines,
+                        decode=False,
+                    )
+                else:  # Raw txt file
+                    # Directly mmap the file.
+                    # We just need to scan once through it to find line offsets.
+                    file = open(file_name, "rb")
+                    file_mmap = mmap.mmap(file.fileno(), 0, flags=mmap.MAP_PRIVATE)
+                    file_index = len(self._orth_files)
+                    self._orth_files.append(file)
+                    self._orth_mmaps.append(file_mmap)
+                    pos = 0
+                    while True:
+                        next_new_line = file_mmap.find(b"\n", pos)
+                        if next_new_line == -1:
+                            break
+                        line_len = next_new_line - pos
+                        if line_len or not self._skip_empty_lines:
+                            orths.append((file_index, pos, line_len))
+                        total_bytes_read += line_len + 1
+                        pos = next_new_line + 1
+                        _maybe_report_status()
+            except _ReachedDebugLimitLineCount as exc:
+                assert exc.estimated_total_num_seqs is not None  # currently only for _iter_txt implemented
+                debug_limit_est_total += exc.estimated_total_num_seqs
+                debug_limit_hit = True
+            else:  # iteration completed without hitting debug limit
+                debug_limit_est_total += len(orths) - prev_orth_len
             lens_per_corpus_file.append(len(orths) - prev_orth_len)
             prev_orth_len = len(orths)
@@ -447,6 +469,18 @@ class LmDataset(CachedDataset2):
             file=log.v4,
         )
+        if debug_limit_hit:
+            est_frac_loaded = len(self._orths_offsets_and_lens) / debug_limit_est_total
+            new_partition_epoch = max(int(self.partition_epoch * est_frac_loaded), 1)
+            print(
+                f"LmDataset: debug limit of {debug_limit_line_count} lines (per file) hit,"
+                f" estimated total num seqs {debug_limit_est_total},"
+                f" loaded {len(self._orths_offsets_and_lens)}, {est_frac_loaded:.2%},"
+                f" adjusting partition_epoch from {self.partition_epoch} to {new_partition_epoch}",
+                file=log.v4,
+            )
+            self.partition_epoch = new_partition_epoch
         # It's only estimated because we might filter some out or so.
         self._estimated_num_seqs = len(self._orths_offsets_and_lens) // self.partition_epoch
@@ -784,19 +818,34 @@ def _iter_txt(
     :param decode:
     """
     f = open(filename, "rb")
+    f_ = f
     if filename.endswith(".gz"):
         f = gzip.GzipFile(fileobj=f)
-    for line in f:
-        if decode:
-            try:
-                line = line.decode("utf8")
-            except UnicodeDecodeError:
-                line = line.decode("latin_1")  # or iso8859_15?
-        line = line.strip()
-        if skip_empty_lines and not line:
-            continue
-        callback(line)
+    count = 0
+    try:
+        for line in f:
+            if decode:
+                try:
+                    line = line.decode("utf8")
+                except UnicodeDecodeError:
+                    line = line.decode("latin_1")  # or iso8859_15?
+            line = line.strip()
+            if skip_empty_lines and not line:
+                continue
+            count += 1
+            callback(line)
+    except _ReachedDebugLimitLineCount as exc:
+        print(f"Reached debug limit line count for {filename}, stopping early", file=log.v4)
+        pos = f_.tell()
+        f_.seek(0, os.SEEK_END)
+        size = f_.tell()
+        print(f"  stopped at byte {human_bytes_size(pos)} / {human_bytes_size(size)}", file=log.v4)
+        estimated_num_seqs = int(count * (size / pos))
+        print(f"  estimated total num seqs: {estimated_num_seqs}", file=log.v4)
+        exc.estimated_total_num_seqs = estimated_num_seqs
+        raise
 def iter_corpus(
@@ -2517,6 +2566,25 @@ def get_post_processor_function(opts):
     return chained_post_processors
+def _get_debug_limit_line_count() -> Optional[int]:
+    """
+    :return: if set, limit to this many lines for debugging
+    """
+    from returnn.config import get_global_config
+    config = get_global_config(raise_exception=False)
+    if not config:
+        return None
+    return config.int("lm_dataset_debug_limit_line_count", None)
+class _ReachedDebugLimitLineCount(Exception):
+    """internal exception to signal reached debug limit line count"""
+    estimated_total_num_seqs: Optional[int] = None
 def _main():
     from returnn.util import better_exchook

returnn/frontend/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .state import *
 # Now the rest, in alphabetical order.
 from .array_ import *
+from .assert_ import *
 from .attention import *
 from .backend import *
 from .build_from_dict import *

returnn/frontend/_backend.py CHANGED Viewed

@@ -42,6 +42,11 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def assert_(condition: Tensor, message: str):
+        """assert"""
+        raise NotImplementedError
     @staticmethod
     def get_tensor_dependencies(x: Tensor) -> Sequence[Tensor]:
         """
@@ -624,12 +629,48 @@ class Backend(Generic[T]):
         targets_spatial_dim: Dim,
         blank_index: int,
         max_approx: bool = False,
+        use_native_op: Optional[bool] = None,
+        label_loop: bool = True,
     ) -> Tensor:
         """
         Calculates the CTC loss.
         """
         raise NotImplementedError
+    @staticmethod
+    def ctc_best_path(
+        *,
+        logits: Tensor,
+        logits_normalized: bool = False,
+        targets: Tensor,
+        input_spatial_dim: Dim,
+        targets_spatial_dim: Dim,
+        blank_index: int,
+        label_loop: bool = True,
+    ) -> Tensor:
+        """
+        Calculates the CTC best path.
+        """
+        raise NotImplementedError
+    @staticmethod
+    def have_edit_distance() -> bool:
+        """
+        :return: whether we have an edit_distance implementation
+        """
+        return False
+    @staticmethod
+    def edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim) -> Tensor:
+        """
+        :param a: [B,Ta]
+        :param a_spatial_dim: Ta
+        :param b: [B,Tb]
+        :param b_spatial_dim: Tb
+        :return: [B]
+        """
+        raise NotImplementedError
     @staticmethod
     def have_sequence_mask_raw() -> bool:
         """

returnn/frontend/_native/__init__.py CHANGED Viewed

@@ -67,6 +67,24 @@ def _code_hash_md5(filename: str) -> str:
 _is_set_up = False
+_enabled = True
+def set_enabled(enabled: bool):
+    """
+    Enable or disable the native code setup.
+    :param enabled:
+    """
+    global _enabled
+    _enabled = enabled
+def is_set_up() -> bool:
+    """
+    :return: whether the native code is set up
+    """
+    return _is_set_up
 def setup():
@@ -76,6 +94,8 @@ def setup():
     global _is_set_up
     if _is_set_up:
         return
+    if not _enabled:
+        return
     _is_set_up = True  # only try once
     from returnn.tensor import Tensor, Dim
@@ -177,6 +197,8 @@ def setup_torch():
     global _is_set_up_torch
     if _is_set_up_torch:
         return
+    if not _enabled:
+        return
     _is_set_up_torch = True  # only try once
     import torch

returnn/frontend/_numpy_backend.py CHANGED Viewed

@@ -26,6 +26,13 @@ class NumpyBackend(Backend[numpy.ndarray]):
         """executing eagerly"""
         return True
+    @staticmethod
+    def assert_(condition: Tensor, message: str):
+        """assert"""
+        assert condition.dims == (), "condition for assert must be a scalar"
+        if not condition.raw_tensor.item():
+            raise AssertionError(message)
     @staticmethod
     def get_dtype_name_raw(raw_tensor: numpy.ndarray) -> str:
         """

returnn/frontend/_utils.py CHANGED Viewed

@@ -110,7 +110,7 @@ def bin_op_out_template(
             all_dims.extend([dim_ for dim_ in a.dims if dim_ == dim])
         else:
             all_dims.extend([dim_ for dim_ in b.dims if dim_ == dim])
-    if all(set(x.dims) != set(all_dims) for x in (a, b)):
+    if all([set(x.dims) != set(all_dims) for x in (a, b)]):
         if allow_broadcast_all_sources is False:
             raise ValueError(f"compare: sources {a!r} {b!r} not allowed with allow_broadcast_all_sources=False")
         elif allow_broadcast_all_sources is None:

returnn/frontend/array_.py CHANGED Viewed

@@ -195,7 +195,7 @@ def merge_dims(
     if out_dim is None:
         from returnn.util.basic import prod
-        if any(d.need_masking() for d in dims[1:]):
+        if any([d.need_masking() for d in dims[1:]]):
             # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
             # This would then potentially discard some of the data in the tensor in subsequent operations,
             # when masking is applied.
@@ -910,7 +910,7 @@ def scatter(
         else:
             raise ValueError(f"scatter: invalid mode {mode!r}")
     indices_dim = indices_dim if isinstance(indices_dim, (list, tuple)) else [indices_dim]
-    if any(dim.need_masking() for dim in indices_dim):
+    if any([dim.need_masking() for dim in indices_dim]):
         if use_mask is None:
             use_mask = rf.use_mask_default(
                 default=True, default_false_for_behavior_version_up_to=22, func_name="scatter"
@@ -1367,12 +1367,13 @@ def repeat(
     repeats = repeats.copy_masked(0, dims=[in_spatial_dim])
     idxs = rf.cumsum(repeats, spatial_dim=in_spatial_dim)  # [batch...,in_spatial_dim] -> idx in out_spatial_dim + 1
     new_size = rf.gather(idxs, indices=in_spatial_dim.get_dim_value_tensor() - 1, axis=in_spatial_dim)  # [batch...]
+    dim_dev = rf.get_default_dim_size_device()
     if out_spatial_dim is None:
-        out_spatial_dim = Dim(new_size, name="repeat")
+        out_spatial_dim = Dim(rf.copy_to_device(new_size, dim_dev), name="repeat")
     elif out_spatial_dim.dyn_size_ext is None:
-        out_spatial_dim.dyn_size_ext = new_size
+        out_spatial_dim.dyn_size_ext = rf.copy_to_device(new_size, dim_dev)
     elif out_spatial_dim.dyn_size_ext is not None and out_spatial_dim.dyn_size_ext.raw_tensor is None:
-        out_spatial_dim.dyn_size_ext.raw_tensor = new_size.raw_tensor
+        out_spatial_dim.dyn_size_ext.raw_tensor = rf.copy_to_device(new_size, dim_dev).raw_tensor
     out_spatial_dim_ext = out_spatial_dim + 1
     rel_idx_counts = rf.scatter(
         rf.expand_dims(rf.ones((), device=values.device, dtype="int32"), dims=idxs.dims),

returnn/frontend/assert_.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Assertion utility functions for validating conditions in Python code.
+"""
+from __future__ import annotations
+from typing import Union
+import returnn.frontend as rf
+from returnn.tensor import Tensor
+__all__ = ["assert_"]
+def assert_(condition: Union[Tensor, bool], message: str):
+    """
+    Asserts that a given condition is True.
+    If the condition is False, raises an AssertionError with the provided message.
+    This runs async on GPU.
+    :param condition:
+    :param message:
+    :return: nothing
+    """
+    if isinstance(condition, bool):
+        if not condition:
+            raise AssertionError(message)
+    elif isinstance(condition, Tensor):
+        if condition.dims:
+            condition = rf.reduce_all(condition, axis=condition.dims)  # reduce to scalar
+        # noinspection PyProtectedMember
+        condition._raw_backend.assert_(condition, message=message)
+    else:
+        raise TypeError(f"Condition must be a boolean or a Tensor, got {type(condition)}")

returnn/frontend/device.py CHANGED Viewed

@@ -8,7 +8,13 @@ from contextlib import contextmanager
 from returnn.tensor import Tensor
-__all__ = ["copy_to_device", "get_default_device", "set_default_device", "set_default_device_ctx"]
+__all__ = [
+    "copy_to_device",
+    "get_default_device",
+    "set_default_device",
+    "set_default_device_ctx",
+    "get_default_dim_size_device",
+]
 _default_device: Optional[str] = None
@@ -61,3 +67,10 @@ def set_default_device_ctx(device: Optional[str]):
         yield
     finally:
         _default_device = old_device
+def get_default_dim_size_device() -> Optional[str]:
+    """
+    :return: default device, where to put new tensors for dim sizes (Dim.dyn_size_ext)
+    """
+    return "cpu"

returnn/frontend/encoder/conformer.py CHANGED Viewed

@@ -167,6 +167,25 @@ class ConformerConvSubsample(ISeqDownsamplingEncoder):
         out, _ = rf.merge_dims(x, dims=[self._final_second_spatial_dim, in_dim])
         return out, in_spatial_dims[0]
+    def get_out_spatial_dim(self, in_spatial_dim: Dim) -> Dim:
+        """Get output spatial dimension given input spatial dimension."""
+        out_spatial_dim = in_spatial_dim
+        for i, conv_layer in enumerate(self.conv_layers):
+            (out_spatial_dim,) = rf.make_conv_out_spatial_dims(
+                [out_spatial_dim],
+                filter_size=conv_layer.filter_size[0],
+                strides=conv_layer.strides[0],
+                padding=conv_layer.padding,
+            )
+            if self.pool_sizes and i < len(self.pool_sizes):
+                (out_spatial_dim,) = rf.make_conv_out_spatial_dims(
+                    [out_spatial_dim],
+                    filter_size=self.pool_sizes[i][0],
+                    strides=self.pool_sizes[i][0],
+                    padding="same",
+                )
+        return out_spatial_dim
 class ConformerEncoderLayer(rf.Module):
     """

returnn 1.20260105.192646__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

returnn 1.20260105.192646py3-none-any.whl → 1.20260119.15400py3-none-any.whl