PyPI - returnn - Versions diffs - 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl - Mend

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

returnn/PKG-INFO +2 -2
returnn/__old_mod_loader__.py +26 -2
returnn/_setup_info_generated.py +2 -2
returnn/datasets/lm.py +130 -42
returnn/datasets/meta.py +93 -43
returnn/datasets/postprocessing.py +597 -108
returnn/datasets/util/vocabulary.py +90 -0
returnn/frontend/__init__.py +1 -0
returnn/frontend/_backend.py +41 -0
returnn/frontend/_native/__init__.py +22 -0
returnn/frontend/_numpy_backend.py +7 -0
returnn/frontend/_utils.py +1 -1
returnn/frontend/array_.py +48 -2
returnn/frontend/assert_.py +35 -0
returnn/frontend/attention.py +54 -20
returnn/frontend/conv.py +273 -54
returnn/frontend/device.py +14 -1
returnn/frontend/encoder/conformer.py +20 -0
returnn/frontend/encoder/transformer.py +2 -0
returnn/frontend/loss.py +222 -3
returnn/frontend/math_.py +54 -14
returnn/native_op.cpp +182 -172
returnn/native_op.py +36 -31
returnn/sprint/cache.py +12 -13
returnn/tensor/_dim_extra.py +7 -7
returnn/tensor/_tensor_extra.py +10 -10
returnn/tensor/utils.py +8 -5
returnn/tf/frontend_layers/_backend.py +7 -3
returnn/tf/layers/basic.py +27 -40
returnn/tf/native_op.py +27 -63
returnn/tf/network.py +1 -1
returnn/tf/util/basic.py +22 -197
returnn/torch/engine.py +157 -6
returnn/torch/frontend/_backend.py +280 -29
returnn/torch/frontend/bridge.py +61 -0
returnn/torch/frontend/compile_helper.py +106 -0
returnn/torch/util/array_.py +30 -0
returnn/torch/util/assert_.py +122 -0
returnn/torch/util/exception_helper.py +7 -1
returnn/torch/util/native_op.py +885 -0
returnn/torch/util/native_op_code_compiler.py +308 -0
returnn/util/basic.py +6 -7
returnn/util/better_exchook.py +4 -0
returnn/util/cuda_env.py +332 -0
returnn/util/debug.py +12 -2
returnn/util/file_cache.py +15 -1
returnn/util/fsa.py +17 -13
returnn/util/native_code_compiler.py +104 -47
returnn/util/task_system.py +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0

returnn/tensor/_dim_extra.py CHANGED Viewed

@@ -858,7 +858,7 @@ class _DimMixin:
         self._make_extra()
         dim_order_default = self.dyn_size_ext.dims + (self,)
         if dim_order is not None:
-            dim_order = tuple(d for d in dim_order if d in dim_order_default)  # filter
+            dim_order = tuple([d for d in dim_order if d in dim_order_default])  # filter
         else:
             dim_order = dim_order_default
         cache_key = (device, dim_order)
@@ -2484,16 +2484,16 @@ _BinOpStrs = {
 def _math_get_dim_via_bin_op(dims: Sequence[Union[Dim, int]], op_kind: str) -> Dim:
     dims = [d if isinstance(d, _d.Dim) else _make_constant_static_dim(d) for d in dims]
-    if all(d.dimension is not None for d in dims):
+    if all([d.dimension is not None for d in dims]):
         op = _BinOps[op_kind]
         dim_value = dims[0].dimension
         for d in dims[1:]:
             dim_value = op(dim_value, d.dimension)
     else:
         dim_value = None
-    if all(d.is_constant_static_dim() for d in dims):
+    if all([d.is_constant_static_dim() for d in dims]):
         return _make_constant_static_dim(dim_value, kind=_get_merged_dim_kind(dims))
-    desc = _BinOpStrs[op_kind].join(_get_description(d) for d in dims)
+    desc = _BinOpStrs[op_kind].join([_get_description(d) for d in dims])
     if op_kind.startswith("ceildiv"):
         desc = f"⌈{desc}⌉"
     return _d.Dim(
@@ -2676,16 +2676,16 @@ def _get_description(dim, brackets=True):
 def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
-    if any(tag.is_batch_dim() for tag in dim_tags):
+    if any([tag.is_batch_dim() for tag in dim_tags]):
         return DimTypes.Batch
-    elif any(tag.is_feature_dim() for tag in dim_tags):
+    elif any([tag.is_feature_dim() for tag in dim_tags]):
         return DimTypes.Feature
     else:
         return DimTypes.Spatial
 def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
-    if any(not term_.auto_generated for term_ in terms):
+    if any([not term_.auto_generated for term_ in terms]):
         # Always prefer non-auto-generated.
         terms = [term_ for term_ in terms if not term_.auto_generated]
     # First find any dynamic.

returnn/tensor/_tensor_extra.py CHANGED Viewed

@@ -32,8 +32,8 @@ class _TensorExtra:
         tensor: Tensor,
         time_dim_axis=NotSpecified,
         available_for_inference=True,
-        batch=None,
-        beam=None,
+        batch: Optional[BatchInfo] = None,
+        beam: Optional[SearchBeam] = None,
         control_flow_ctx=None,
     ):
         """
@@ -41,8 +41,8 @@ class _TensorExtra:
         :param int|None|NotSpecified time_dim_axis: where we have the time dim axis, after we added the batch-dim.
             this is often 1. however, can be None if there is no time-dim.
         :param bool available_for_inference: e.g. the extern data "classes" is usually not available for inference
-        :param BatchInfo|None batch:
-        :param SearchBeam|None beam: the batch-dim could be extended by a beam-size,
+        :param batch:
+        :param beam: the batch-dim could be extended by a beam-size,
             such that it represents the merged dims [batch, beam_size].
         :param ControlFlowContext|None control_flow_ctx:
         """
@@ -668,11 +668,11 @@ class _TensorMixin(_TensorMixinBase):
         if not perm:
             return self.copy()
         if allow_int and isinstance(perm[0], int):
-            assert all(isinstance(a, int) for a in perm), f"{self}: invalid perm {perm!r} types"
+            assert all([isinstance(a, int) for a in perm]), f"{self}: invalid perm {perm!r} types"
             assert set(perm) == set(range(len(perm))), f"{self}: invalid perm {perm!r}"
             return self._copy_compatible_to_dims_with_perm([self._dims[i] for i in perm], perm)
         else:
-            assert all(isinstance(a, Dim) for a in perm), f"{self}: invalid perm {perm!r} types"
+            assert all([isinstance(a, Dim) for a in perm]), f"{self}: invalid perm {perm!r} types"
             return self.copy_compatible_to_dims(perm)
     def copy_move_axis(self, old_axis, new_axis) -> _t.Tensor:
@@ -1155,7 +1155,7 @@ class _TensorMixin(_TensorMixinBase):
                     )
         assert v.batch_ndim == data.batch_ndim
-        assert all(mapped_axes[ax] == ax for ax in range(v.batch_ndim))
+        assert all([mapped_axes[ax] == ax for ax in range(v.batch_ndim)])
         if self.version == 1:
             # Ensure time_dim_axis and feature_dim_axis is same as in data
@@ -1702,7 +1702,7 @@ class _TensorMixin(_TensorMixinBase):
         """
         :return: shape with added batch-dim. e.g. (batch,time,feat) = (None,None,128)
         """
-        return tuple(tag.dimension for tag in self.dim_tags)
+        return tuple([tag.dimension for tag in self.dim_tags])
     # noinspection PyShadowingNames
     def get_batch_shape(self, batch_dim):
@@ -3214,7 +3214,7 @@ class _TensorMixin(_TensorMixinBase):
         if len(sources) == 1:
             return sources[0].copy_template()
         max_ndim = max([s.batch_ndim for s in sources])
-        if any(src.batch for src in sources):
+        if any([src.batch for src in sources]):
             from returnn.tf.util.data import BatchInfo
             common_batch = BatchInfo.get_common_batch_info([src.batch for src in sources if src.batch])
@@ -3254,7 +3254,7 @@ class _TensorMixin(_TensorMixinBase):
             else:
                 axis = common.get_default_new_axis_for_dim_tag(dim_tag)
                 common = common.copy_add_dim_by_tag(dim_tag, unbroadcast=True, axis=axis)
-        if all(s.batch_ndim < common.batch_ndim for s in sources):
+        if all([s.batch_ndim < common.batch_ndim for s in sources]):
             from returnn.util.basic import validate_broadcast_all_sources
             validate_broadcast_all_sources(

returnn/tensor/utils.py CHANGED Viewed

@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
     *,
     min_val: int = 0,
     max_val: Optional[int] = None,
-    rnd: numpy.random.RandomState,
+    rnd: Optional[numpy.random.RandomState] = None,
     dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
     dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
 ) -> bool:
     """fill. return whether sth was filled"""
+    if rnd is None:
+        # noinspection PyUnresolvedReferences,PyProtectedMember
+        rnd = numpy.random.mtrand._rand
     if dyn_dim_max_sizes is None:
         dyn_dim_max_sizes = {}
     if dyn_dim_min_sizes is None:
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
                 continue
             if tensor_fill_random_numpy_(
                 dim.dyn_size_ext,
-                min_val=dyn_dim_min_sizes.get(dim, 2),
+                min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
                 max_val=dyn_dim_max_sizes.get(dim, None),
                 rnd=rnd,
                 dyn_dim_max_sizes=dyn_dim_max_sizes,
@@ -68,7 +71,7 @@ def tensor_fill_random_numpy_(
                     # Make sure at least one of the dyn sizes matches the max size.
                     i = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size)
                     dim.dyn_size_ext.raw_tensor.flat[i] = dyn_dim_max_sizes[dim]
-                    if dim in dyn_dim_min_sizes:
+                    if dim in dyn_dim_min_sizes and dim.dyn_size_ext.raw_tensor.size > 1:
                         j = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size - 1)
                         if j >= i:
                             j += 1
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
             if max_val is None:
                 max_val = rnd.randint(5, 20)
             if x.sparse_dim and x.sparse_dim.dimension is not None:
-                max_val = x.sparse_dim.dimension
-            x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
+                max_val = x.sparse_dim.dimension - 1
+            x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
         elif x.dtype == "bool":
             x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
         elif x.dtype.startswith("float"):

returnn/tf/frontend_layers/_backend.py CHANGED Viewed

@@ -465,6 +465,8 @@ class ReturnnLayersBackend(Backend[Layer]):
         targets_spatial_dim: Dim,
         blank_index: int,
         max_approx: bool = False,
+        use_native_op: Optional[bool] = None,
+        label_loop: bool = True,
     ) -> Tensor:
         """CTC"""
         assert targets.sparse_dim and targets.sparse_dim.dimension <= logits.feature_dim.dimension
@@ -482,6 +484,7 @@ class ReturnnLayersBackend(Backend[Layer]):
                 "targets": targets,
                 "blank_index": blank_index,
                 "max_approx": max_approx,
+                "label_loop": label_loop,
             },
             name="ctc_loss",
         )
@@ -944,7 +947,6 @@ class ReturnnLayersBackend(Backend[Layer]):
         """
         assert mask.dtype == "bool"
         assert set(mask.dims) == set(dims)
-        assert set(mask.dims).issubset(set(tensor.dims))
         if not out_dim:
             out_dim = Dim(None, name="mask")
         return (
@@ -1067,14 +1069,16 @@ class ReturnnLayersBackend(Backend[Layer]):
                 s = filter_size[i].dimension if not strides else strides[i]
                 if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
                     out_spatial_dims[i] = in_spatial_dims[i]
-        layer_dict = {
+        assert all(size.is_static() for size in filter_size)
+        layer_dict: Dict[str, Any] = {
             "class": "transposed_conv",
             "from": source,
             "in_dim": in_dim,
             "in_spatial_dims": in_spatial_dims,
             "out_dim": out_dim,
             "out_spatial_dims": out_spatial_dims,
-            "filter_size": filter_size,
+            "filter_size": [size.dimension for size in filter_size],
+            "filter_perm": list(filter_size) + [out_dim, in_dim],
             "padding": padding,
         }
         if remove_padding:

returnn/tf/layers/basic.py CHANGED Viewed

@@ -2741,7 +2741,7 @@ class BooleanMaskLayer(LayerBase):
         tensor = self.sources[0].output
         remaining_dims = [d for d in tensor.dims if d not in dims]
         tensor_templ = tensor.copy_template_new_dim_tags(tuple(dims) + tuple(remaining_dims))
-        tensor = tensor.copy_compatible_to(tensor_templ, add_dims=False)
+        tensor = tensor.copy_compatible_to(tensor_templ, unbroadcast=True)
         mask_templ = mask.output.copy_template_new_dim_tags(new_dim_tags=tuple(dims))
         mask_ = mask.output.copy_compatible_to(mask_templ, add_dims=False)
         self.output.raw_tensor = tf.boolean_mask(tensor.raw_tensor, mask=mask_.raw_tensor)
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
         """
         from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
-        super(TransposedConvLayer, self).__init__(**kwargs)
+        super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
         out_dim  # noqa  # via get_out_data_from_opts
         assert not self.input_data.sparse
         assert self.input_data.have_batch_axis()
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
     ):
         """
         Determines output length of a transposed convolution given input length.
-        Copied from conv_utils.deconv_output_length, adapted with simplification.
+        Copied from TF/Keras conv_utils.deconv_output_length
+        (https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
+        adapted with simplification.
         Also see :func:`ConvLayer.calc_out_dim`.
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
         """
         if out_dim and out_dim.is_dim_known():
             return out_dim.get_dim_value()
-        assert padding in {"same", "valid", "full"}
-        # Get the dilated kernel size
-        filter_size = filter_size + (filter_size - 1) * (dilation - 1)
-        if stride != 1:
-            input_length = input_length * stride
+        import returnn.frontend as rf
-        # Infer length if output padding is None, else compute the exact length
-        if output_padding is None:
-            if padding == "valid":
-                if isinstance(input_length, Dim):
-                    length = input_length + max(filter_size - stride, 0)
-                else:
-                    length = tf_util.simplify_add(input_length, max(filter_size - stride, 0))
-            elif padding == "full":
-                if isinstance(input_length, Dim):
-                    length = input_length - (stride + filter_size - 2)
-                else:
-                    length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
-            elif padding == "same":
-                length = input_length
-            else:
-                raise Exception("invalid padding %r" % (padding,))
-        else:  # output_padding
-            if padding == "same":
-                pad = filter_size // 2
-            elif padding == "valid":
-                pad = 0
-            elif padding == "full":
-                pad = filter_size - 1
-            else:
-                raise Exception("invalid padding %r" % (padding,))
-            if isinstance(input_length, Dim):
-                length = input_length + (-stride + filter_size - 2 * pad + output_padding)
-            else:
-                length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
-        return length
+        return rf.calc_transposed_conv_out_length(
+            input_length,
+            filter_size=filter_size,
+            padding=padding,
+            output_padding=output_padding,
+            stride=stride,
+            dilation_rate=dilation,
+        )
     @classmethod
     def get_out_data_from_opts(
@@ -11562,13 +11538,23 @@ class CtcLossLayer(LayerBase):
     layer_class = "ctc_loss"
     recurrent = True  # order matters
-    def __init__(self, logits, targets, logits_normalized=False, blank_index=-1, max_approx=False, **kwargs):
+    def __init__(
+        self,
+        logits,
+        targets,
+        logits_normalized=False,
+        blank_index=-1,
+        max_approx=False,
+        label_loop: bool = True,
+        **kwargs,
+    ):
         """
         :param LayerBase logits: (before softmax). shape [B,T,D]
         :param LayerBase targets: sparse. shape [B,T]
         :param bool logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
         :param int blank_index: vocab index of the blank symbol
         :param bool max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
+        :param label_loop:
         """
         from returnn.tf.native_op import ctc_loss, ctc_loss_viterbi
@@ -11591,6 +11577,7 @@ class CtcLossLayer(LayerBase):
             targets=targets.output.copy_as_batch_major().placeholder,
             targets_seq_lens=targets.output.get_sequence_lengths(),
             blank_index=blank_index,
+            label_loop=label_loop,
         )
     def get_dep_layers(self):

returnn/tf/native_op.py CHANGED Viewed

@@ -528,77 +528,30 @@ class OpMaker:
     def _make_mod(self):
         if self.cache_key in self.mod_cache:
             return self.mod_cache[self.cache_key]
-        from returnn.util.basic import find_lib
-        # Note about BLAS linkage:
-        # TensorFlow (or its Eigen lib) likely has linked against some BLAS lib itself.
-        # For our CPU code, we directly call some BLAS functions such as `sgemm_`.
-        # On platforms where there is a flat namespace (e.g. Mac),
-        # it probably is not needed to explicitly link it again for this module.
-        # In other cases, it's probably needed, but it's not so clear which lib has the
-        # right symbols (e.g. the `sgemm_` symbol).
+        # Note about BLAS / matmul:
+        # Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
+        # and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
+        # Now, by default, we use the underlying Eigen library,
+        # which is the same code path that TF also uses for CPU matmul.
+        # Only if an explicit BLAS library is specified, we use that instead.
         ld_flags = []
-        have_blas_lib = False
+        c_macro_defines = {}
         if self.blas_lib is not None and os.path.exists(self.blas_lib):
             path = os.path.dirname(self.blas_lib)
             if path == "":
                 path = "."
             ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
-            have_blas_lib = True
-        if not have_blas_lib and self.search_for_runtime_blas:
-            from returnn.util.basic import find_sgemm_libs_from_runtime
-            libs = find_sgemm_libs_from_runtime()
-            if libs:
-                numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
-                if numpy_libs:
-                    # Prefer Numpy; move to front.
-                    libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
-                if self.blas_lib is not None:
-                    libs = [lib for lib in libs if self.blas_lib in lib]
-                for fn in libs:
-                    ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
-                    have_blas_lib = True
-        if not have_blas_lib and self.search_for_numpy_blas:
-            # Find related Numpy libs.
-            # Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
-            # Even do this before the other libs below, as it is likely
-            # that this OpenBlas lib is correctly initialized already.
-            import numpy
-            numpy_dir = os.path.dirname(numpy.__file__)
-            if os.path.exists("%s/.libs" % numpy_dir):
-                ld_flags += ["-L%s/.libs" % numpy_dir]
-                from glob import glob
-                for f in glob("%s/.libs/*.so" % numpy_dir):
-                    f = os.path.basename(f)
-                    if self.blas_lib is not None and self.blas_lib not in f:
-                        continue
-                    if f.startswith("lib"):
-                        f = f[3:]
-                    if f.endswith(".so"):
-                        f = f[:-3]
-                    ld_flags += ["-l%s" % f]
-                    have_blas_lib = True
-        if not have_blas_lib and self.search_for_system_blas:
-            # Try to just link against blas/f77blas
-            # (both can potentially have the symbol) if it finds the lib.
-            if find_lib("blas"):
-                ld_flags += ["-lblas"]
-                have_blas_lib = True
-            if find_lib("f77blas"):
-                ld_flags += ["-lf77blas"]
-                have_blas_lib = True
-        if not have_blas_lib:
-            print("WARNING: OpMaker: no BLAS lib found")
+            c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
         comp = tf_util.OpCodeCompiler(
             base_name=self.name,
             code_version=self.description.code_version,
             code=self._make_code(),
             include_deps=[self.support_native_op_cpp_filename],
             ld_flags=ld_flags,
+            c_macro_defines=c_macro_defines,
             use_cuda_if_available=self.with_cuda,
             log_stream=self.log_stream,
             **dict(self.compiler_opts),
@@ -1520,12 +1473,14 @@ def fast_baum_welch_staircase(am_scores, seq_lens, **opts):
 def ctc_loss(
+    *,
     logits,
     logits_seq_lens,
     logits_time_major,
     targets,
     targets_seq_lens,
-    ctc_merge_repeated=True,
+    label_loop: Optional[bool] = None,
+    ctc_merge_repeated: Optional[bool] = None,
     logits_normalize=True,
     grad_wrt_softmax_in=True,
     blank_index=-1,
@@ -1540,7 +1495,8 @@ def ctc_loss(
     :param bool logits_time_major:
     :param tf.Tensor targets: batch-major, [batch,time]
     :param tf.Tensor targets_seq_lens: (batch,)
-    :param bool ctc_merge_repeated:
+    :param label_loop:
+    :param ctc_merge_repeated: alias for label_loop
     :param bool logits_normalize: apply log_softmax on logits (default).
       if False, you might also set grad_wrt_softmax_in=False
     :param bool grad_wrt_softmax_in: assume ``p(s|x) = softmax(logits)``, and define the gradient w.r.t. logits.
@@ -1551,6 +1507,11 @@ def ctc_loss(
     :return: loss, shape (batch,)
     :rtype: tf.Tensor
     """
+    if ctc_merge_repeated is not None:
+        assert label_loop is None
+        label_loop = ctc_merge_repeated
+    if label_loop is None:
+        label_loop = True
     assert logits.get_shape().ndims == 3 and logits.get_shape().dims[-1].value
     dim = logits.get_shape().dims[-1].value
     if not logits_time_major:
@@ -1567,7 +1528,7 @@ def ctc_loss(
         blank_index += dim
     assert 0 <= blank_index < dim
     edges, weights, start_end_states = get_ctc_fsa_fast_bw(
-        targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=ctc_merge_repeated
+        targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
     )
     fwdbwd, obs_scores = fast_baum_welch(
         am_scores=-log_sm, float_idx=seq_mask, edges=edges, weights=weights, start_end_states=start_end_states
@@ -1607,7 +1568,9 @@ def fast_viterbi(am_scores, am_seq_len, edges, weights, start_end_states):
     return alignment, scores
-def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, targets_seq_lens, blank_index=-1):
+def ctc_loss_viterbi(
+    *, logits, logits_seq_lens, logits_time_major, targets, targets_seq_lens, blank_index=-1, label_loop: bool = True
+):
     """
     Similar to :func:`ctc_loss`.
     However, instead of using the full sum, we use the best path (i.e. Viterbi instead of Baum-Welch).
@@ -1619,6 +1582,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
     :param tf.Tensor targets: batch-major, [batch,time]
     :param tf.Tensor targets_seq_lens: (batch,)
     :param int blank_index: vocab index of the blank symbol
+    :param label_loop:
     :return: loss, shape (batch,)
     :rtype: tf.Tensor
     """
@@ -1632,7 +1596,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
         blank_index += dim
     assert 0 <= blank_index < dim
     edges, weights, start_end_states = get_ctc_fsa_fast_bw(
-        targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index
+        targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
     )
     alignment, scores = fast_viterbi(
         am_scores=log_sm, am_seq_len=logits_seq_lens, edges=edges, weights=weights, start_end_states=start_end_states

returnn/tf/network.py CHANGED Viewed

@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
                     data = extern_data.data[data_key]
                     info += ", %s" % data
             print("  %r: %s" % (key, info), file=file)
-            if data and data.sparse:
+            if data is not None and data.sparse:
                 if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
                     print("  WARNING, invalid label for data", data, file=file)
     elif feed_dict is None:

returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl