PyPI - returnn - Versions diffs - 1.20240905.172412__tar.gz → 1.20240906.11340__tar.gz - Mend

returnn 1.20240905.172412tar.gz → 1.20240906.11340tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (464) hide show

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240905.172412
+Version: 1.20240906.11340
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20240906.11340/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20240906.011340'
2	+ long_version = '1.20240906.011340+git.1eb8103'

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/returnn/frontend/_utils.py RENAMED Viewed

@@ -292,5 +292,5 @@ def _slice_value_is_reduce(v: Union[None, slice, int, numpy.number, numpy.ndarra
         return v.ndim == 0
     if isinstance(v, Tensor):
         assert len(v.dims) <= 1, f"strided_slice: expect scalar or vector, got Tensor with dims {v.dims}"
-        return v.dims == 0
+        return len(v.dims) == 0
     raise TypeError(f"strided_slice: got unexpected value of type {type(v).__name__}")

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/returnn/frontend/array_.py RENAMED Viewed

@@ -412,16 +412,17 @@ def pad(
     :return: padded tensor, out_dims. out dims are for each dim in axes
     """
     assert len(axes) == len(padding)
-    if not out_dims:
-        for left, right in padding:
-            if isinstance(left, Dim):
-                assert not left.need_masking(), f"padding {padding} does not support dynamic left padding"
-            if isinstance(right, Dim):
-                assert not right.need_masking(), f"padding {padding} does not support dynamic right padding"
-            # Note that even dynamic middle dims is not exactly correct...
-        out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
     if handle_dynamic_dims is None:
         handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
+    if not out_dims:
+        if handle_dynamic_dims:
+            for left, right in padding:
+                if isinstance(left, Dim):
+                    assert not left.need_masking(), f"padding {padding} does not support dynamic left padding"
+                if isinstance(right, Dim):
+                    assert not right.need_masking(), f"padding {padding} does not support dynamic right padding"
+                # Note that even dynamic middle dims is not exactly correct...
+        out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
     # noinspection PyProtectedMember
     return (
         source._raw_backend.pad(

returnn-1.20240906.11340/returnn/frontend/loss.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""
+Loss functions
+"""
+from __future__ import annotations
+from returnn.tensor import Tensor, Dim
+import returnn.frontend as rf
+__all__ = ["cross_entropy", "ctc_loss", "edit_distance"]
+def cross_entropy(
+    *,
+    estimated: Tensor,
+    target: Tensor,
+    axis: Dim,
+    estimated_type: str,
+) -> Tensor:
+    """
+    ``target`` is supposed to be in probability space (normalized). It can also be sparse, i.e. contain class indices.
+    ``estimated`` can be probs, log-probs or logits, specified via ``estimated_type``.
+    Assuming both are in probability space, the cross entropy is:
+        H(target,estimated) = -reduce_sum(target * log(estimated), axis=axis)
+                            = -matmul(target, log(estimated), reduce=axis)
+    In case you want label smoothing, you can use e.g.::
+        ce = nn.cross_entropy(
+            target=nn.label_smoothing(target, 0.1),
+            estimated=estimated)
+    :param estimated: probs, log-probs or logits, specified via ``estimated_type``
+    :param target: probs, normalized, can also be sparse
+    :param axis: class labels dim over which softmax is computed
+    :param estimated_type: "probs", "log-probs" or "logits"
+    :return: cross entropy (same Dims as 'estimated' but without 'axis')
+    """
+    if estimated_type == "logits":
+        # This is a common case and most backends provide optimized functions for it.
+        # noinspection PyProtectedMember
+        return estimated._raw_backend.softmax_cross_entropy_with_logits(logits=estimated, targets=target, axis=axis)
+    if estimated_type == "probs":
+        log_prob = rf.log(estimated)  # TODO: make numerically stable
+    elif estimated_type == "log-probs":
+        log_prob = estimated
+    else:
+        raise ValueError("estimated_type must be 'probs', 'log-probs' or 'logits'")
+    if target.sparse_dim:
+        return -rf.gather(log_prob, indices=target, axis=axis)
+    return -rf.matmul(target, log_prob, reduce=axis)
+def ctc_loss(
+    *,
+    logits: Tensor,
+    logits_normalized: bool = False,
+    targets: Tensor,
+    input_spatial_dim: Dim,
+    targets_spatial_dim: Dim,
+    blank_index: int,
+    max_approx: bool = False,
+) -> Tensor:
+    """
+    Calculates the CTC loss.
+    Internally, this uses :func:`returnn.tf.native_op.ctc_loss`
+    which is equivalent to tf.nn.ctc_loss but more efficient.
+    Output is of shape [B].
+    :param logits: (before softmax). shape [B...,input_spatial,C]
+    :param logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
+    :param targets: sparse. shape [B...,targets_spatial] -> C
+    :param input_spatial_dim: spatial dim of input logits
+    :param targets_spatial_dim: spatial dim of targets
+    :param blank_index: vocab index of the blank symbol
+    :param max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
+    :return: loss shape [B...]
+    """
+    # noinspection PyProtectedMember
+    return logits._raw_backend.ctc_loss(
+        logits=logits,
+        logits_normalized=logits_normalized,
+        targets=targets,
+        input_spatial_dim=input_spatial_dim,
+        targets_spatial_dim=targets_spatial_dim,
+        blank_index=blank_index,
+        max_approx=max_approx,
+    )
+def edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim, *, dtype: str = "int32") -> Tensor:
+    """
+    :param a: [B,Ta]
+    :param a_spatial_dim: Ta
+    :param b: [B,Tb]
+    :param b_spatial_dim: Tb
+    :param dtype:
+    :return: [B]
+    """
+    import numpy  # just for iinfo on dtype to get max value
+    # The axis permutation is just an efficiency optimization.
+    a = a.copy_transpose([a_spatial_dim] + a.remaining_dims(a_spatial_dim))
+    b = b.copy_transpose([b_spatial_dim] + b.remaining_dims(b_spatial_dim))
+    dev = a.device
+    max_dist_err = numpy.iinfo(dtype).max
+    n_a_max_len = a_spatial_dim.get_dim_value()
+    n_b_max_len = b_spatial_dim.get_dim_value()
+    if int(n_a_max_len) < int(n_b_max_len):
+        a, b = b, a
+        a_spatial_dim, b_spatial_dim = b_spatial_dim, a_spatial_dim
+        n_a_max_len, n_b_max_len = n_b_max_len, n_a_max_len
+    # Now n_a_max_len >= n_b_max_len.
+    batch_dims = a.remaining_dims(a_spatial_dim)
+    for dim in b.remaining_dims(b_spatial_dim):
+        if dim not in batch_dims:
+            batch_dims.append(dim)
+    a_seq_len = a_spatial_dim.get_dyn_size_ext_for_device(dev)  # [B]
+    b_seq_len = b_spatial_dim.get_dyn_size_ext_for_device(dev)  # [B]
+    a_tensor_ext, (a_spatial_dim_ext,) = rf.pad(
+        a, axes=[a_spatial_dim], padding=[(b_spatial_dim, b_spatial_dim)], handle_dynamic_dims=False
+    )  # [Tb+Ta+Tb,B]
+    a_spatial_dim_ext: Dim
+    b_tensor_flipped = rf.reverse_sequence(b, axis=b_spatial_dim, handle_dynamic_dims=False)  # [Tb,B]
+    entry_idx_ = rf.range_over_dim(b_spatial_dim, device=dev)  # [Tb]->Tb
+    b_spatial_dim1 = b_spatial_dim + 1
+    buffer_dim = Dim(3 * b_spatial_dim1.get_dim_value_tensor(), name="buffer")
+    buffer = rf.Parameter([buffer_dim] + batch_dims, device=dev, dtype=dtype, auxiliary=True)  # [3*(Tb+1),B]
+    buffer_offsets = [0, b_spatial_dim1.get_dim_value_tensor(), b_spatial_dim1.get_dim_value_tensor() * 2]
+    result = rf.where((a_seq_len == 0) & (b_seq_len == 0), 0, max_dist_err)  # [B]  # noqa
+    # We are going diagonal over (Ta+1) and (Tb+1). (Similar as RETURNN native EditDistanceOp.)
+    # You need to draw the grid on paper to understand all the index math...
+    for u in range(1, n_a_max_len + n_b_max_len + 1):
+        prev2_dist, _ = rf.slice(
+            buffer, axis=buffer_dim, start=buffer_offsets[u % 3], size=b_spatial_dim1, out_dim=b_spatial_dim1
+        )  # [Tb+1,B]
+        prev_dist, _ = rf.slice(
+            buffer, axis=buffer_dim, start=buffer_offsets[(u + 1) % 3], size=b_spatial_dim1, out_dim=b_spatial_dim1
+        )  # [Tb+1,B]
+        cur_dist_start_offset = buffer_offsets[(u + 2) % 3]
+        del_cost = (
+            rf.slice(prev_dist, axis=b_spatial_dim1, end=b_spatial_dim.get_dim_value_tensor(), out_dim=b_spatial_dim)[0]
+            + 1
+        )  # [Tb,B]
+        ins_cost = rf.slice(prev_dist, axis=b_spatial_dim1, start=1, out_dim=b_spatial_dim)[0] + 1  # [Tb,B]
+        sub_cost = rf.slice(prev2_dist, axis=b_spatial_dim1, start=1, out_dim=b_spatial_dim)[0] + rf.cast(
+            rf.slice(a_tensor_ext, axis=a_spatial_dim_ext, start=u - 1, size=b_spatial_dim, out_dim=b_spatial_dim)[0]
+            != b_tensor_flipped,
+            dtype=dtype,
+        )
+        min_cost = rf.minimum(del_cost, ins_cost, sub_cost)  # [Tb,B]
+        t_a_gt_zero_mask = entry_idx_ > n_b_max_len - u  # [Tb]
+        buffer.assign_key(
+            axis=buffer_dim,
+            key=slice(cur_dist_start_offset, cur_dist_start_offset + b_spatial_dim.get_dim_value_tensor()),
+            key_dim=b_spatial_dim,
+            value=rf.where(t_a_gt_zero_mask, min_cost, u),
+        )
+        # last entry in cur_dist, that is where t_b == 0
+        buffer.assign_key(
+            axis=buffer_dim, key=cur_dist_start_offset + b_spatial_dim.get_dim_value_tensor(), key_dim=None, value=u
+        )
+        end_offset_a = n_b_max_len + a_seq_len - u  # [B]
+        end_offset_b = n_b_max_len - b_seq_len  # [B]
+        result = rf.where(
+            end_offset_a == end_offset_b,
+            rf.gather(buffer, axis=buffer_dim, indices=cur_dist_start_offset + end_offset_a, clip_to_valid=True),
+            result,
+        )
+    return result

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/returnn/frontend/parameter.py RENAMED Viewed

@@ -75,8 +75,6 @@ class Parameter(Tensor[T]):
             raise TypeError(f"rf.Parameter: invalid type for dims_or_tensor: {type(dims_or_tensor)}")
         if not all(isinstance(dim, Dim) for dim in dims):
             raise TypeError(f"rf.Parameter: shape {dims} must be a sequence of Dim")
-        if not all(isinstance(dim.dimension, int) for dim in dims):
-            raise ValueError(f"rf.Parameter: shape {dims} must be static")
         if len(dims) != len(set((d, d.match_priority) for d in dims)):
             raise ValueError(f"rf.Parameter: shape {dims} dims must be unique")
         super(Parameter, self).__init__(

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -690,9 +690,8 @@ class TorchBackend(Backend[torch.Tensor]):
         """
         :return: parameter
         """
-        assert all(d.is_static() for d in tensor.dims)
         data = torch.zeros(
-            [d.dimension for d in tensor.dims],
+            [d.get_dim_value() for d in tensor.dims],
             dtype=TorchBackend.as_dtype_raw(tensor.dtype),
             device=device or rf.get_default_device(),
         )

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/returnn.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240905.172412
+Version: 1.20240906.11340
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20240905.172412 → returnn-1.20240906.11340}/tests/test_rf_base.py RENAMED Viewed

@@ -560,3 +560,118 @@ def test_weight_noise():
     rf.weight_noise(conv, "filter", std=0.1)
     time_dim = Dim(11, name="time")
     conv(rf.random_normal([time_dim, in_dim]), in_spatial_dim=time_dim)
+def test_edit_distance():
+    import numpy
+    import torch
+    from typing import Sequence
+    from collections import namedtuple
+    import itertools
+    def _edit_distance_ref_b1(a: Sequence[int], b: Sequence[int]) -> int:
+        """
+        Reference implementation for edit distance.
+        """
+        n = len(a) + 1
+        m = len(b) + 1
+        d = torch.zeros((n, m), dtype=torch.int32)
+        for i in range(n):
+            d[i, 0] = i
+        for j in range(m):
+            d[0, j] = j
+        for j in range(1, m):
+            for i in range(1, n):
+                if a[i - 1] == b[j - 1]:
+                    d[i, j] = d[i - 1, j - 1]
+                else:
+                    d[i, j] = min(
+                        d[i - 1, j] + 1,  # deletion
+                        d[i, j - 1] + 1,  # insertion
+                        d[i - 1, j - 1] + 1,  # substitution
+                    )
+        return int(d[n - 1, m - 1])
+    # noinspection PyShadowingNames
+    def _edit_distance_ref(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim) -> torch.Tensor:
+        """
+        Reference implementation for edit distance.
+        """
+        batch_dim = a.dims[0]
+        assert a.dims == (batch_dim, a_spatial_dim) and b.dims == (batch_dim, b_spatial_dim)
+        res = []
+        for i in range(batch_dim.dimension):
+            assert a_spatial_dim.dyn_size[i] <= a.raw_tensor.size(1)
+            assert b_spatial_dim.dyn_size[i] <= b.raw_tensor.size(1)
+            res.append(
+                _edit_distance_ref_b1(
+                    a.raw_tensor[i, : a_spatial_dim.dyn_size[i]], b.raw_tensor[i, : b_spatial_dim.dyn_size[i]]
+                )
+            )
+        return torch.tensor(res, dtype=torch.int32)
+    # noinspection PyShadowingNames
+    def _check_edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim):
+        ref = _edit_distance_ref(a, a_spatial_dim, b, b_spatial_dim)
+        res = rf.edit_distance(a, a_spatial_dim, b, b_spatial_dim)
+        assert res.raw_tensor.shape == ref.shape == a_spatial_dim.dyn_size.shape == b_spatial_dim.dyn_size.shape
+        assert len(ref.shape) == 1
+        print("ref:", ref, "res:", res.raw_tensor)
+        batch_size = ref.shape[0]
+        for i in range(batch_size):
+            assert res.raw_tensor[i] == ref[i], (
+                f"batch idx i={i}, a[i]={a.raw_tensor[i]} len {a_spatial_dim.dyn_size[i]},"
+                f" b[i]={b.raw_tensor[i]} len {b_spatial_dim.dyn_size[i]},"
+                f" ref[i]={ref[i]}, res[i]={res.raw_tensor[i]};\n"
+                f" a={a.raw_tensor} lens {a_spatial_dim.dyn_size},"
+                f" b={b.raw_tensor} lens {b_spatial_dim.dyn_size}"
+            )
+        assert (res.raw_tensor == ref).all()
+    SizedTensor = namedtuple("SizedTensor", ["tensor", "seq_lens"])
+    _SeqsB1 = [
+        SizedTensor(torch.tensor([[1, 2, 3, 4]]), torch.tensor([4])),
+        SizedTensor(torch.tensor([[1, 2, 3]]), torch.tensor([3])),
+        SizedTensor(torch.tensor([[1, 2, 4]]), torch.tensor([3])),
+        SizedTensor(torch.tensor([[1, 4]]), torch.tensor([2])),
+        SizedTensor(torch.tensor([[5, 2, 4]]), torch.tensor([3])),
+        SizedTensor(torch.tensor([[]], dtype=torch.int64), torch.tensor([0])),
+    ]
+    for a, b in itertools.product(_SeqsB1, _SeqsB1):
+        a: SizedTensor
+        b: SizedTensor
+        # noinspection PyShadowingNames
+        batch_dim = Dim(1, name="batch")
+        a_spatial_dim = Dim(Tensor("a_sizes", [batch_dim], dtype="int64", raw_tensor=a.seq_lens))
+        b_spatial_dim = Dim(Tensor("b_sizes", [batch_dim], dtype="int64", raw_tensor=b.seq_lens))
+        a_ = Tensor("a", [batch_dim, a_spatial_dim], dtype="int64", raw_tensor=a.tensor)
+        b_ = Tensor("b", [batch_dim, b_spatial_dim], dtype="int64", raw_tensor=b.tensor)
+        _check_edit_distance(a_, a_spatial_dim, b_, b_spatial_dim)
+    rnd = numpy.random.RandomState(42)
+    for a, b in itertools.product(_SeqsB1, _SeqsB1):
+        batch_size = rnd.randint(2, 11)
+        a_max_len = rnd.randint(a.seq_lens[0], a.seq_lens[0] + 5)
+        b_max_len = rnd.randint(b.seq_lens[0], b.seq_lens[0] + 5)
+        a_sizes = rnd.randint(0, a_max_len + 1, size=(batch_size,))
+        b_sizes = rnd.randint(0, b_max_len + 1, size=(batch_size,))
+        a_sizes[0] = a.seq_lens[0]
+        b_sizes[0] = b.seq_lens[0]
+        a_max_len = max(a_sizes)
+        b_max_len = max(b_sizes)
+        a_values = rnd.randint(0, 10, (batch_size, a_max_len))
+        b_values = rnd.randint(0, 10, (batch_size, b_max_len))
+        a_values[0, : a.seq_lens[0]] = a.tensor[0, : a.seq_lens[0]]
+        b_values[0, : b.seq_lens[0]] = b.tensor[0, : b.seq_lens[0]]
+        a_sizes = torch.tensor(a_sizes, dtype=torch.int32)
+        b_sizes = torch.tensor(b_sizes, dtype=torch.int32)
+        # noinspection PyShadowingNames
+        batch_dim = Dim(batch_size, name="batch")
+        a_spatial_dim = Dim(Tensor("a_sizes", [batch_dim], dtype="int32", raw_tensor=a_sizes))
+        b_spatial_dim = Dim(Tensor("b_sizes", [batch_dim], dtype="int32", raw_tensor=b_sizes))
+        a_ = Tensor("a", [batch_dim, a_spatial_dim], dtype="int64", raw_tensor=torch.tensor(a_values))
+        b_ = Tensor("b", [batch_dim, b_spatial_dim], dtype="int64", raw_tensor=torch.tensor(b_values))
+        _check_edit_distance(a_, a_spatial_dim, b_, b_spatial_dim)

returnn-1.20240905.172412/_setup_info_generated.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- version = '1.20240905.172412'
2	- long_version = '1.20240905.172412+git.fb9d5c3'

returnn-1.20240905.172412/returnn/frontend/loss.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""
-Loss functions
-"""
-from __future__ import annotations
-from returnn.tensor import Tensor, Dim
-import returnn.frontend as rf
-__all__ = ["cross_entropy", "ctc_loss"]
-def cross_entropy(
-    *,
-    estimated: Tensor,
-    target: Tensor,
-    axis: Dim,
-    estimated_type: str,
-) -> Tensor:
-    """
-    ``target`` is supposed to be in probability space (normalized). It can also be sparse, i.e. contain class indices.
-    ``estimated`` can be probs, log-probs or logits, specified via ``estimated_type``.
-    Assuming both are in probability space, the cross entropy is:
-        H(target,estimated) = -reduce_sum(target * log(estimated), axis=axis)
-                            = -matmul(target, log(estimated), reduce=axis)
-    In case you want label smoothing, you can use e.g.::
-        ce = nn.cross_entropy(
-            target=nn.label_smoothing(target, 0.1),
-            estimated=estimated)
-    :param estimated: probs, log-probs or logits, specified via ``estimated_type``
-    :param target: probs, normalized, can also be sparse
-    :param axis: class labels dim over which softmax is computed
-    :param estimated_type: "probs", "log-probs" or "logits"
-    :return: cross entropy (same Dims as 'estimated' but without 'axis')
-    """
-    if estimated_type == "logits":
-        # This is a common case and most backends provide optimized functions for it.
-        # noinspection PyProtectedMember
-        return estimated._raw_backend.softmax_cross_entropy_with_logits(logits=estimated, targets=target, axis=axis)
-    if estimated_type == "probs":
-        log_prob = rf.log(estimated)  # TODO: make numerically stable
-    elif estimated_type == "log-probs":
-        log_prob = estimated
-    else:
-        raise ValueError("estimated_type must be 'probs', 'log-probs' or 'logits'")
-    if target.sparse_dim:
-        return -rf.gather(log_prob, indices=target, axis=axis)
-    return -rf.matmul(target, log_prob, reduce=axis)
-def ctc_loss(
-    *,
-    logits: Tensor,
-    logits_normalized: bool = False,
-    targets: Tensor,
-    input_spatial_dim: Dim,
-    targets_spatial_dim: Dim,
-    blank_index: int,
-    max_approx: bool = False,
-) -> Tensor:
-    """
-    Calculates the CTC loss.
-    Internally, this uses :func:`returnn.tf.native_op.ctc_loss`
-    which is equivalent to tf.nn.ctc_loss but more efficient.
-    Output is of shape [B].
-    :param logits: (before softmax). shape [B...,input_spatial,C]
-    :param logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
-    :param targets: sparse. shape [B...,targets_spatial] -> C
-    :param input_spatial_dim: spatial dim of input logits
-    :param targets_spatial_dim: spatial dim of targets
-    :param blank_index: vocab index of the blank symbol
-    :param max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
-    :return: loss shape [B...]
-    """
-    # noinspection PyProtectedMember
-    return logits._raw_backend.ctc_loss(
-        logits=logits,
-        logits_normalized=logits_normalized,
-        targets=targets,
-        input_spatial_dim=input_spatial_dim,
-        targets_spatial_dim=targets_spatial_dim,
-        blank_index=blank_index,
-        max_approx=max_approx,
-    )