PyPI - returnn - Versions diffs - 1.20250901.123052__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl - Mend

returnn 1.20250901.123052py3-none-any.whl → 1.20260105.192646py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

returnn/PKG-INFO +2 -2
returnn/_setup_info_generated.py +2 -2
returnn/config.py +1 -1
returnn/datasets/basic.py +29 -13
returnn/datasets/distrib_files.py +61 -3
returnn/datasets/generating.py +12 -21
returnn/datasets/huggingface.py +434 -0
returnn/datasets/lm.py +20 -0
returnn/datasets/meta.py +179 -60
returnn/datasets/multi_proc.py +1 -1
returnn/datasets/postprocessing.py +597 -108
returnn/datasets/text_dict.py +1 -1
returnn/datasets/util/vocabulary.py +90 -0
returnn/frontend/_backend.py +7 -0
returnn/frontend/array_.py +54 -1
returnn/frontend/attention.py +54 -20
returnn/frontend/conv.py +273 -54
returnn/frontend/decoder/transformer.py +36 -17
returnn/frontend/encoder/conformer.py +1 -0
returnn/frontend/encoder/transformer.py +2 -0
returnn/frontend/loss.py +40 -1
returnn/frontend/module.py +8 -1
returnn/frontend/nested.py +9 -0
returnn/native_op.cpp +80 -0
returnn/sprint/cache.py +12 -13
returnn/tensor/_dim_extra.py +51 -29
returnn/tensor/_tensor_extra.py +6 -1
returnn/tensor/utils.py +7 -4
returnn/tf/frontend_layers/_backend.py +11 -2
returnn/tf/frontend_low_level/_backend.py +15 -0
returnn/tf/layers/basic.py +16 -38
returnn/tf/native_op.py +11 -58
returnn/tf/network.py +1 -1
returnn/tf/util/basic.py +19 -0
returnn/torch/data/returnn_dataset_wrapper.py +9 -3
returnn/torch/engine.py +67 -2
returnn/torch/frontend/_backend.py +119 -7
returnn/torch/util/diagnose_gpu.py +65 -31
returnn/torch/util/exception_helper.py +7 -1
returnn/util/basic.py +6 -7
returnn/util/better_exchook.py +4 -0
returnn/util/collect_outputs_dict.py +79 -0
returnn/util/debug.py +11 -2
returnn/util/file_cache.py +42 -4
returnn/util/task_system.py +1 -1
{returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/METADATA +2 -2
{returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/RECORD +50 -48
{returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/LICENSE +0 -0
{returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/WHEEL +0 -0
{returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/top_level.txt +0 -0

returnn/native_op.cpp CHANGED Viewed

@@ -206,6 +206,14 @@ Ndarray* Ndarray_Copy(const Ndarray* self) {
 #include "tensorflow/core/public/version.h"
+#ifndef TF_MAJOR_VERSION
+#error "TF_MAJOR_VERSION is not defined!"
+#endif
+#ifndef TF_MINOR_VERSION
+#error "TF_MINOR_VERSION is not defined!"
+#endif
 #if (TF_MAJOR_VERSION == 1 && TF_MINOR_VERSION >= 6) || (TF_MAJOR_VERSION > 1)
 #define TF_issue_6602_workaround 0
 #define TWOD_LSTM_SUPPORT 1
@@ -402,6 +410,9 @@ static void tf_cuda_sgemm_batched(
 #else  // CUDA
+#ifdef HAVE_CUSTOM_BLAS
 /*
     // matrices are in column-major form
 	int sgemm_(char *transa, char *transb,
@@ -419,6 +430,75 @@ static void tf_cuda_sgemm_batched(
 		sgemm_(&transa, &transb, \
 			&m_, &n_, &k_, alpha, A, &lda_, B, &ldb_, beta, C, &ldc_); \
 	}
+#else  // HAVE_CUSTOM_BLAS
+template<typename T>
+static void tf_cpu_sgemm(
+    OpKernelContext* context,
+    char transa_, char transb_,
+    int m, int n, int k,
+    const T* alpha_ptr, const T* a_ptr, int lda,
+    const T* b_ptr, int ldb, const T* beta_ptr,
+    T* c_ptr, int ldc)
+{
+    if (m <= 0 || n <= 0 || k <= 0) return;
+    auto d = context->eigen_cpu_device();
+    const T alpha = *alpha_ptr;
+    const T beta = *beta_ptr;
+    bool transa = (transa_ == 'T' || transa_ == 't' || transa_ == 'C' || transa_ == 'c');
+    bool transb = (transb_ == 'T' || transb_ == 't' || transb_ == 'C' || transb_ == 'c');
+    // 1. Map as COLUMN-MAJOR
+    // Physical rows (height) for the Map is always the leading dimension (lda, ldb, ldc)
+    typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::ColMajor>, Eigen::Unaligned> ConstMap;
+    typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::ColMajor>, Eigen::Unaligned> MutableMap;
+    // Logical height/width of slices before any transposition
+    int a_slice_rows = transa ? k : m;
+    int a_slice_cols = transa ? m : k;
+    int b_slice_rows = transb ? n : k;
+    int b_slice_cols = transb ? k : n;
+    // Map and Slice
+    auto a = ConstMap(a_ptr, lda, a_slice_cols).slice(
+        Eigen::array<Eigen::Index, 2>({0, 0}),
+        Eigen::array<Eigen::Index, 2>({(Eigen::Index)a_slice_rows, (Eigen::Index)a_slice_cols}));
+    auto b = ConstMap(b_ptr, ldb, b_slice_cols).slice(
+        Eigen::array<Eigen::Index, 2>({0, 0}),
+        Eigen::array<Eigen::Index, 2>({(Eigen::Index)b_slice_rows, (Eigen::Index)b_slice_cols}));
+    auto c = MutableMap(c_ptr, ldc, n).slice(
+        Eigen::array<Eigen::Index, 2>({0, 0}),
+        Eigen::array<Eigen::Index, 2>({(Eigen::Index)m, (Eigen::Index)n}));
+    // 2. Define Contraction Pairs based on Transposition
+    // Column-Major Matrix Mult: (M x K) * (K x N)
+    // Standard: Contract Axis 1 of A with Axis 0 of B
+    // If A is Transposed: A is (K x M), contract Axis 0 of A
+    // If B is Transposed: B is (N x K), contract Axis 1 of B
+    Eigen::array<Eigen::IndexPair<int>, 1> pairs;
+    pairs[0] = Eigen::IndexPair<int>(transa ? 0 : 1, transb ? 1 : 0);
+    // 3. Execution
+    if (alpha == T(1) && beta == T(0)) {
+        c.device(d) = a.contract(b, pairs);
+    } else if (alpha == T(1) && beta == T(1)) {
+        c.device(d) += a.contract(b, pairs);
+    } else {
+        c.device(d) = a.contract(b, pairs) * alpha + c * beta;
+    }
+}
+#define Ndarray_sgemm(\
+	transpose_A, transpose_B, \
+	m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) \
+    tf_cpu_sgemm<float>(context, transpose_A, transpose_B, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+#endif  // HAVE_CUSTOM_BLAS
 #endif  // CUDA
 // See Context struct below.

returnn/sprint/cache.py CHANGED Viewed

@@ -7,10 +7,9 @@ This module is about reading (maybe later also writing) the Sprint archive forma
 """
 from __future__ import annotations
-from typing import List, Optional, Tuple
+from typing import Optional, List, Tuple, Dict
 import sys
 import os
-import typing
 import array
 from struct import pack, unpack
 import numpy
@@ -212,7 +211,7 @@ class FileArchive:
     def __init__(self, filename, must_exists=True, encoding="ascii"):
         self.encoding = encoding
-        self.ft = {}  # type: typing.Dict[str,FileInfo]
+        self.ft: Dict[str, FileInfo] = {}
         if os.path.exists(filename):
             self.allophones = []
             self.f = open(filename, "rb")
@@ -334,8 +333,8 @@ class FileArchive:
             # print(typ)
             assert type_ == "vector-f32"
             count = self.read_U32()
-            data = [None] * count  # type: typing.List[typing.Optional[numpy.ndarray]]
-            time_ = [None] * count  # type: typing.List[typing.Optional[numpy.ndarray]]
+            data: List[Optional[numpy.ndarray]] = [None] * count
+            time_: List[Optional[numpy.ndarray]] = [None] * count
             for i in range(count):
                 size = self.read_U32()
                 data[i] = self.read_v("f", size)  # size x f32
@@ -450,7 +449,7 @@ class FileArchive:
             a = array.array("b")
             a.fromfile(self.f, comp)
             # unpack
-            b = zlib.decompress(a.tostring(), 15 + 32)
+            b = zlib.decompress(a.tobytes(), 15 + 32)
             # substitute self.f by an anonymous memmap file object
             # restore original file handle after we're done
             backup_f = self.f
@@ -575,17 +574,17 @@ class FileArchiveBundle:
         :param str encoding: encoding used in the files
         """
         # filename -> FileArchive
-        self.archives = {}  # type: typing.Dict[str,FileArchive]
+        self.archives: Dict[str, FileArchive] = {}
         # archive content file -> FileArchive
-        self.files = {}  # type: typing.Dict[str,FileArchive]
+        self.files: Dict[str, FileArchive] = {}
         self._short_seg_names = {}
         if filename is not None:
             self.add_bundle(filename=filename, encoding=encoding)
-    def add_bundle(self, filename, encoding="ascii"):
+    def add_bundle(self, filename: str, encoding: str = "ascii"):
         """
-        :param str filename: bundle
-        :param str encoding:
+        :param filename: bundle
+        :param encoding:
         """
         file_dir = os.path.dirname(filename) or "."
         for line in open(filename).read().splitlines():
@@ -837,7 +836,7 @@ class MixtureSet:
         """
         a = array.array("b")
         a.fromfile(self.f, length)
-        return a.tostring().decode(encoding)
+        return a.tobytes().decode(encoding)
     def read_f32(self):
         """
@@ -1003,7 +1002,7 @@ class WordBoundaries:
         """
         a = array.array("b")
         a.fromfile(self.f, length)
-        return a.tostring().decode(encoding)
+        return a.tobytes().decode(encoding)
     def __init__(self, filename):
         """

returnn/tensor/_dim_extra.py CHANGED Viewed

@@ -18,6 +18,8 @@ if TYPE_CHECKING:
     # just for type hints, otherwise use _d.Dim
     from .dim import Dim
+    from returnn.datasets.util.vocabulary import Vocabulary
 from . import dim as _d
 from . import tensor as _t
 from . import marked_dim as _m
@@ -41,54 +43,63 @@ class _DimExtra:
         self,
         *,
         dim: Dim,
-        kind=DimTypes.Unspecified,
-        vocab=None,
-        undefined=False,
-        special=False,
-        auto_generated=False,
-        match_priority=0,
-        derived_from_tag=None,
-        derived_from_op=None,
-        batch=None,
-        control_flow_ctx=None,
+        kind: Entity = DimTypes.Unspecified,
+        vocab: Union[None, Dict[str, Any], Vocabulary] = None,
+        undefined: bool = False,
+        special: bool = False,
+        auto_generated: bool = False,
+        match_priority: int = 0,
+        derived_from_tag: Optional[Dim] = None,
+        derived_from_op: Optional[Op] = None,
+        batch: Optional[BatchInfo] = None,
+        control_flow_ctx: Optional[ControlFlowContext] = None,
         src_data: Optional[_t.Tensor] = None,
         src_axis: Optional[int] = None,
     ):
         """
         :param dim:
-        :param Entity|None kind:
-        :param returnn.datasets.util.vocabulary.Vocabulary|None vocab:
-        :param bool undefined: When this is specified as `None` by the user via `shape`.
-        :param bool special: this can not be a dim tag of :class:`Tensor`.
+        :param kind:
+        :param vocab:
+        :param undefined: When this is specified as `None` by the user via `shape`.
+        :param special: this can not be a dim tag of :class:`Tensor`.
             But this dim tag also does not match anything except itself.
             So it can be used to represent special placeholders with special meanings like ``single_step``.
-        :param bool auto_generated:
+        :param auto_generated:
             This is auto-generated by RETURNN because it was not explicitly specified by the user.
             E.g. for ConvLayer and others.
             This implies certain behavior on equality, such as comparing the description,
             to allow for several independent creations of the dim tag during template construction.
-        :param Dim|None derived_from_tag:
+        :param derived_from_tag:
             Whether this new tag is reduced, down/up sampled, padded etc from this given other tag.
             In situations where dim tags are being matched (Data.get_common_data),
             the behavior is to consider them as equal,
             and assume that the chain of operations (e.g. padding + valid conv) results in the same dim.
-        :param Op|None derived_from_op:
-        :param int match_priority: when there is ambiguity between multiple dim tags, this value defines the order
+        :param derived_from_op:
+        :param match_priority: when there is ambiguity between multiple dim tags, this value defines the order
             in which the dimension are assigned to their matching counterparts.
             A dimension tag with a higher priority value is assigned first.
             E.g. for a square matrix used for a linear transformation,
             the reduce dim tag should have a higher priority.
-        :param BatchInfo|None batch: for batch-dim, or dynamic dims per batch
-        :param ControlFlowContext|None control_flow_ctx:
+        :param batch: for batch-dim, or dynamic dims per batch
+        :param control_flow_ctx:
         :param src_data:
         :param src_axis:
         """
         self.dim = dim
         assert kind is None or (isinstance(kind, Entity) and kind in DimTypes.Types)
         self.kind = kind
+        if vocab:
+            from returnn.datasets.util.vocabulary import Vocabulary
+            if isinstance(vocab, Vocabulary):
+                pass
+            elif isinstance(vocab, dict):
+                vocab = Vocabulary.create_vocab(**vocab)
+            else:
+                raise TypeError(f"invalid vocab {vocab!r} type {type(vocab)}")
         self.vocab = vocab
-        self.same_as = None  # type: Optional[_d.Dim]
-        self.copy_same_as = None  # type: Optional[_d.Dim]
+        self.same_as: Optional[Dim] = None
+        self.copy_same_as: Optional[Dim] = None
         self.derived_from_tag = derived_from_tag
         self.derived_from_op = derived_from_op
         if derived_from_op and not derived_from_op.output:
@@ -116,8 +127,8 @@ class _DimExtra:
         self.auto_generated = auto_generated
         # We can have different tag variants per batch info (e.g. with beam), or per control flow ctx.
         # They each have same_as = self. The same_base should have the base (global) batch info.
-        self.same_for_batch_ctx = {}  # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
-        self.cache_dyn_size_ext_dev = {}  # type: Dict[str,_t.Tensor]  # device -> dyn_size_ext
+        self.same_for_batch_ctx: Dict[Tuple[BatchInfo, Optional[ControlFlowContext]], Dim] = {}
+        self.cache_dyn_size_ext_dev: Dict[str, _t.Tensor] = {}  # device -> dyn_size_ext
         self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {}  # (dev,dim_order) -> seq_mask
         self.cache_dim_math = _CacheDimMath()  # op (add,sub,...), operand -> Dim
@@ -134,6 +145,7 @@ class _DimExtra:
     def __setstate__(self, state):
         self.__dict__.update(state)
         if self.kind is not None:
+            # noinspection PyTypeChecker
             self.kind = {v.name: v for v in DimTypes.Types}[self.kind]
     def __sis_state__(self):
@@ -151,6 +163,9 @@ class _DimMixin:
     def _handle_extra_kwargs(self: Dim, *, dyn_size: Optional[_t.RawTensorType] = None, **kwargs):
         if kwargs:
             self._extra = _DimExtra(dim=self, **kwargs)
+            if self._extra.vocab and self.size is None:
+                self.size = self._extra.vocab.num_labels
+                self.capacity = self.capacity or self.size
         if dyn_size is not None:
             self.dyn_size = dyn_size
         if self.derived_from_op and self.is_dynamic():
@@ -2184,7 +2199,7 @@ class _DimMixin:
             other = other.dimension  # makes matching easier
         if isinstance(other, int) and other == 1:
             return self
-        if self.is_constant_static_dim() and isinstance(other, _d.Dim):
+        if self.is_constant_static_dim() and isinstance(other, _d.Dim) and not other.is_constant_static_dim():
             return self.dimension * other  # use rmul
         cache_key = ("mul", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
@@ -2571,14 +2586,19 @@ class _MathFindMatchingAdditive:
 def _math_find_matching_mult(start: Dim, other: Union[int, Dim], *, right: bool) -> Optional[Dim]:
-    if (isinstance(other, int) or other.is_constant_static_dim()) and start.is_constant_static_dim():
+    # we assume, if other is Dim, then it is not constant static dim
+    if isinstance(other, int) and start.is_constant_static_dim():
         return _math_get_dim_via_bin_op([start, other] if right else [other, start], "mul")
     c_op = start.derived_from_op
     if c_op and c_op.kind == "mul" and len(c_op.inputs) == 2:
         if right:
             return c_op.inputs[0] * (c_op.inputs[1] * other)
-        else:
-            return (other * c_op.inputs[0]) * c_op.inputs[1]
+        # Don't do right=False -> (other * c_op.inputs[0]) * c_op.inputs[1],
+        # because this can lead to infinite recursions,
+        # and also we don't have a proper normalized form for multiplication.
+        # However, if both left-most factors are constant static dims, then we can merge it.
+        elif isinstance(other, int) and c_op.inputs[0].is_constant_static_dim():
+            return (other * c_op.inputs[0].dimension) * c_op.inputs[1]
     return None
@@ -2665,7 +2685,9 @@ def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
 def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
-    # Also see _OpLinearTerm.representative_tag().
+    if any(not term_.auto_generated for term_ in terms):
+        # Always prefer non-auto-generated.
+        terms = [term_ for term_ in terms if not term_.auto_generated]
     # First find any dynamic.
     for term_ in terms:
         if term_.is_dynamic_seq_length():

returnn/tensor/_tensor_extra.py CHANGED Viewed

@@ -588,7 +588,12 @@ class _TensorMixin(_TensorMixinBase):
     def __getstate__(self):
         d = {k: getattr(self, k) for k in self.__slots__}
-        d["_raw_tensor"] = None  # do not store the TF tensors
+        if (
+            self._raw_tensor is not None
+            and self._raw_backend is not None
+            and not self._raw_backend.should_pickle_tensor(self._raw_tensor)
+        ):
+            d["_raw_tensor"] = None
         return d
     def __setstate__(self, state):

returnn/tensor/utils.py CHANGED Viewed

@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
     *,
     min_val: int = 0,
     max_val: Optional[int] = None,
-    rnd: numpy.random.RandomState,
+    rnd: Optional[numpy.random.RandomState] = None,
     dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
     dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
 ) -> bool:
     """fill. return whether sth was filled"""
+    if rnd is None:
+        # noinspection PyUnresolvedReferences,PyProtectedMember
+        rnd = numpy.random.mtrand._rand
     if dyn_dim_max_sizes is None:
         dyn_dim_max_sizes = {}
     if dyn_dim_min_sizes is None:
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
                 continue
             if tensor_fill_random_numpy_(
                 dim.dyn_size_ext,
-                min_val=dyn_dim_min_sizes.get(dim, 2),
+                min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
                 max_val=dyn_dim_max_sizes.get(dim, None),
                 rnd=rnd,
                 dyn_dim_max_sizes=dyn_dim_max_sizes,
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
             if max_val is None:
                 max_val = rnd.randint(5, 20)
             if x.sparse_dim and x.sparse_dim.dimension is not None:
-                max_val = x.sparse_dim.dimension
-            x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
+                max_val = x.sparse_dim.dimension - 1
+            x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
         elif x.dtype == "bool":
             x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
         elif x.dtype.startswith("float"):

returnn/tf/frontend_layers/_backend.py CHANGED Viewed

@@ -45,6 +45,13 @@ class ReturnnLayersBackend(Backend[Layer]):
         """executing eagerly"""
         return False
+    @staticmethod
+    def should_pickle_tensor(raw_tensor: Layer) -> bool:
+        """
+        :return: whether the tensor should be included in a pickle or set to `None`.
+        """
+        return False
     @staticmethod
     def get_tensor_dependencies(x: Tensor[Layer]) -> Sequence[Tensor]:
         """get tensor inputs"""
@@ -1060,14 +1067,16 @@ class ReturnnLayersBackend(Backend[Layer]):
                 s = filter_size[i].dimension if not strides else strides[i]
                 if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
                     out_spatial_dims[i] = in_spatial_dims[i]
-        layer_dict = {
+        assert all(size.is_static() for size in filter_size)
+        layer_dict: Dict[str, Any] = {
             "class": "transposed_conv",
             "from": source,
             "in_dim": in_dim,
             "in_spatial_dims": in_spatial_dims,
             "out_dim": out_dim,
             "out_spatial_dims": out_spatial_dims,
-            "filter_size": filter_size,
+            "filter_size": [size.dimension for size in filter_size],
+            "filter_perm": list(filter_size) + [out_dim, in_dim],
             "padding": padding,
         }
         if remove_padding:

returnn/tf/frontend_low_level/_backend.py CHANGED Viewed

@@ -38,6 +38,21 @@ class TFBackend(Backend[tf.Tensor]):
         """
         return tf.executing_eagerly()
+    @staticmethod
+    def should_pickle_tensor(raw_tensor: tf.Tensor) -> bool:
+        """
+        :return: whether the tensor should be included in a pickle or set to `None`.
+        """
+        from tensorflow.python.framework.ops import EagerTensor
+        # Can not pickle symbolic TF tensors.
+        #
+        # See for discussion:
+        #  - https://github.com/rwth-i6/returnn/issues/1541
+        #  - https://github.com/rwth-i6/returnn/issues/1763
+        return isinstance(raw_tensor, EagerTensor)
     @staticmethod
     def get_dtype_name_raw(raw_tensor: tf.Tensor) -> str:
         """

returnn/tf/layers/basic.py CHANGED Viewed

@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
         """
         from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
-        super(TransposedConvLayer, self).__init__(**kwargs)
+        super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
         out_dim  # noqa  # via get_out_data_from_opts
         assert not self.input_data.sparse
         assert self.input_data.have_batch_axis()
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
     ):
         """
         Determines output length of a transposed convolution given input length.
-        Copied from conv_utils.deconv_output_length, adapted with simplification.
+        Copied from TF/Keras conv_utils.deconv_output_length
+        (https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
+        adapted with simplification.
         Also see :func:`ConvLayer.calc_out_dim`.
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
         """
         if out_dim and out_dim.is_dim_known():
             return out_dim.get_dim_value()
-        assert padding in {"same", "valid", "full"}
-        # Get the dilated kernel size
-        filter_size = filter_size + (filter_size - 1) * (dilation - 1)
-        if stride != 1:
-            input_length = input_length * stride
+        import returnn.frontend as rf
-        # Infer length if output padding is None, else compute the exact length
-        if output_padding is None:
-            if padding == "valid":
-                if isinstance(input_length, Dim):
-                    length = input_length + max(filter_size - stride, 0)
-                else:
-                    length = tf_util.simplify_add(input_length, max(filter_size - stride, 0))
-            elif padding == "full":
-                if isinstance(input_length, Dim):
-                    length = input_length - (stride + filter_size - 2)
-                else:
-                    length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
-            elif padding == "same":
-                length = input_length
-            else:
-                raise Exception("invalid padding %r" % (padding,))
-        else:  # output_padding
-            if padding == "same":
-                pad = filter_size // 2
-            elif padding == "valid":
-                pad = 0
-            elif padding == "full":
-                pad = filter_size - 1
-            else:
-                raise Exception("invalid padding %r" % (padding,))
-            if isinstance(input_length, Dim):
-                length = input_length + (-stride + filter_size - 2 * pad + output_padding)
-            else:
-                length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
-        return length
+        return rf.calc_transposed_conv_out_length(
+            input_length,
+            filter_size=filter_size,
+            padding=padding,
+            output_padding=output_padding,
+            stride=stride,
+            dilation_rate=dilation,
+        )
     @classmethod
     def get_out_data_from_opts(
@@ -10488,6 +10464,7 @@ class TopKLayer(LayerBase):
         self._sub_layers = {}
         for key, (v, a) in sub_outputs.items():
             sub_out_data = self.output.copy_template(name="%s/%s" % (self.name, key))
+            sub_out_data.feature_dim = None
             sub_out_data.dtype = "int32"
             sub_out_data.sparse_dim = a
             sub_out_data.placeholder = v
@@ -10527,6 +10504,7 @@ class TopKLayer(LayerBase):
         axis = [in_data.get_dim_tag_from_description(a) for a in axis]
         out_dims = [dim for dim in in_data.dim_tags if dim not in axis] + [k_dim]
         out_data = in_data.copy_template(name=name).copy_template_new_dim_tags(out_dims)
+        out_data.feature_dim = None
         if for_indices is not None:
             assert 0 <= for_indices < len(axis)
             out_data.dtype = "int32"

returnn/tf/native_op.py CHANGED Viewed

@@ -528,77 +528,30 @@ class OpMaker:
     def _make_mod(self):
         if self.cache_key in self.mod_cache:
             return self.mod_cache[self.cache_key]
-        from returnn.util.basic import find_lib
-        # Note about BLAS linkage:
-        # TensorFlow (or its Eigen lib) likely has linked against some BLAS lib itself.
-        # For our CPU code, we directly call some BLAS functions such as `sgemm_`.
-        # On platforms where there is a flat namespace (e.g. Mac),
-        # it probably is not needed to explicitly link it again for this module.
-        # In other cases, it's probably needed, but it's not so clear which lib has the
-        # right symbols (e.g. the `sgemm_` symbol).
+        # Note about BLAS / matmul:
+        # Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
+        # and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
+        # Now, by default, we use the underlying Eigen library,
+        # which is the same code path that TF also uses for CPU matmul.
+        # Only if an explicit BLAS library is specified, we use that instead.
         ld_flags = []
-        have_blas_lib = False
+        c_macro_defines = {}
         if self.blas_lib is not None and os.path.exists(self.blas_lib):
             path = os.path.dirname(self.blas_lib)
             if path == "":
                 path = "."
             ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
-            have_blas_lib = True
-        if not have_blas_lib and self.search_for_runtime_blas:
-            from returnn.util.basic import find_sgemm_libs_from_runtime
-            libs = find_sgemm_libs_from_runtime()
-            if libs:
-                numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
-                if numpy_libs:
-                    # Prefer Numpy; move to front.
-                    libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
-                if self.blas_lib is not None:
-                    libs = [lib for lib in libs if self.blas_lib in lib]
-                for fn in libs:
-                    ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
-                    have_blas_lib = True
-        if not have_blas_lib and self.search_for_numpy_blas:
-            # Find related Numpy libs.
-            # Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
-            # Even do this before the other libs below, as it is likely
-            # that this OpenBlas lib is correctly initialized already.
-            import numpy
-            numpy_dir = os.path.dirname(numpy.__file__)
-            if os.path.exists("%s/.libs" % numpy_dir):
-                ld_flags += ["-L%s/.libs" % numpy_dir]
-                from glob import glob
-                for f in glob("%s/.libs/*.so" % numpy_dir):
-                    f = os.path.basename(f)
-                    if self.blas_lib is not None and self.blas_lib not in f:
-                        continue
-                    if f.startswith("lib"):
-                        f = f[3:]
-                    if f.endswith(".so"):
-                        f = f[:-3]
-                    ld_flags += ["-l%s" % f]
-                    have_blas_lib = True
-        if not have_blas_lib and self.search_for_system_blas:
-            # Try to just link against blas/f77blas
-            # (both can potentially have the symbol) if it finds the lib.
-            if find_lib("blas"):
-                ld_flags += ["-lblas"]
-                have_blas_lib = True
-            if find_lib("f77blas"):
-                ld_flags += ["-lf77blas"]
-                have_blas_lib = True
-        if not have_blas_lib:
-            print("WARNING: OpMaker: no BLAS lib found")
+            c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
         comp = tf_util.OpCodeCompiler(
             base_name=self.name,
             code_version=self.description.code_version,
             code=self._make_code(),
             include_deps=[self.support_native_op_cpp_filename],
             ld_flags=ld_flags,
+            c_macro_defines=c_macro_defines,
             use_cuda_if_available=self.with_cuda,
             log_stream=self.log_stream,
             **dict(self.compiler_opts),

returnn/tf/network.py CHANGED Viewed

@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
                     data = extern_data.data[data_key]
                     info += ", %s" % data
             print("  %r: %s" % (key, info), file=file)
-            if data and data.sparse:
+            if data is not None and data.sparse:
                 if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
                     print("  WARNING, invalid label for data", data, file=file)
     elif feed_dict is None:

returnn 1.20250901.123052__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl

returnn 1.20250901.123052py3-none-any.whl → 1.20260105.192646py3-none-any.whl