PyPI - returnn - Versions diffs - 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl - Mend

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

returnn/PKG-INFO +2 -2
returnn/__old_mod_loader__.py +26 -2
returnn/_setup_info_generated.py +2 -2
returnn/datasets/lm.py +130 -42
returnn/datasets/meta.py +93 -43
returnn/datasets/postprocessing.py +597 -108
returnn/datasets/util/vocabulary.py +90 -0
returnn/frontend/__init__.py +1 -0
returnn/frontend/_backend.py +41 -0
returnn/frontend/_native/__init__.py +22 -0
returnn/frontend/_numpy_backend.py +7 -0
returnn/frontend/_utils.py +1 -1
returnn/frontend/array_.py +48 -2
returnn/frontend/assert_.py +35 -0
returnn/frontend/attention.py +54 -20
returnn/frontend/conv.py +273 -54
returnn/frontend/device.py +14 -1
returnn/frontend/encoder/conformer.py +20 -0
returnn/frontend/encoder/transformer.py +2 -0
returnn/frontend/loss.py +222 -3
returnn/frontend/math_.py +54 -14
returnn/native_op.cpp +182 -172
returnn/native_op.py +36 -31
returnn/sprint/cache.py +12 -13
returnn/tensor/_dim_extra.py +7 -7
returnn/tensor/_tensor_extra.py +10 -10
returnn/tensor/utils.py +8 -5
returnn/tf/frontend_layers/_backend.py +7 -3
returnn/tf/layers/basic.py +27 -40
returnn/tf/native_op.py +27 -63
returnn/tf/network.py +1 -1
returnn/tf/util/basic.py +22 -197
returnn/torch/engine.py +157 -6
returnn/torch/frontend/_backend.py +280 -29
returnn/torch/frontend/bridge.py +61 -0
returnn/torch/frontend/compile_helper.py +106 -0
returnn/torch/util/array_.py +30 -0
returnn/torch/util/assert_.py +122 -0
returnn/torch/util/exception_helper.py +7 -1
returnn/torch/util/native_op.py +885 -0
returnn/torch/util/native_op_code_compiler.py +308 -0
returnn/util/basic.py +6 -7
returnn/util/better_exchook.py +4 -0
returnn/util/cuda_env.py +332 -0
returnn/util/debug.py +12 -2
returnn/util/file_cache.py +15 -1
returnn/util/fsa.py +17 -13
returnn/util/native_code_compiler.py +104 -47
returnn/util/task_system.py +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
{returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0

returnn/datasets/util/vocabulary.py CHANGED Viewed

@@ -11,6 +11,7 @@ __all__ = [
     "SentencePieces",
     "CharacterTargets",
     "Utf8ByteTargets",
+    "HuggingFaceTokenizer",
 ]
 from typing import Optional, Union, Type, Callable, List, Dict
@@ -691,3 +692,92 @@ class Utf8ByteTargets(Vocabulary):
             assert ((seq >= 0) & (seq < 256)).all(), f"invalid byte value, must be within 0-255: {seq}"
             seq = seq.astype(numpy.uint8)
         return bytearray(seq).decode(encoding="utf8")
+class HuggingFaceTokenizer(Vocabulary):
+    """
+    Uses the `AutoTokenizer` class from the `transformers` package.
+    """
+    def __init__(self, *, huggingface_repo_dir: str):
+        """
+        :param str huggingface_repo_dir: the directory containing the `tokenizer_config.json` file.
+        """
+        import transformers  # noqa
+        # Make sure it is a string. (Could be e.g. Sis Path.)
+        huggingface_repo_dir = str(huggingface_repo_dir)
+        self._opts = {"huggingface_repo_dir": huggingface_repo_dir}
+        self._cache_key = huggingface_repo_dir
+        self.tokenizer = transformers.AutoTokenizer.from_pretrained(huggingface_repo_dir, trust_remote_code=True)
+        super().__init__(
+            vocab_file=None,
+            seq_postfix=None,
+            unknown_label=self.tokenizer.unk_token_id,
+            eos_label=self.tokenizer.eos_token_id,
+            bos_label=self.tokenizer.bos_token_id,
+            pad_label=self.tokenizer.pad_token_id,
+        )
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._opts)
+    def _parse_vocab(self):
+        self.num_labels = len(self.tokenizer)
+        # Do not load labels/vocab here. This is not really needed.
+    @property
+    def labels(self) -> List[str]:
+        """list of labels"""
+        if self._cache_key and self._cache_key in self._cache:
+            self._vocab, self._labels = self._cache[self._cache_key]
+            assert self.num_labels == len(self._vocab) == len(self._labels)
+        else:
+            self._labels = [self.tokenizer._convert_id_to_token(i) for i in range(self.num_labels)]  # noqa
+            self._vocab = {label: i for (i, label) in enumerate(self._labels)}
+            if self._cache_key:
+                self._cache[self._cache_key] = (self._vocab, self._labels)
+        return self._labels
+    def is_id_valid(self, idx: int) -> bool:
+        """
+        :param idx:
+        """
+        return 0 <= idx < len(self.tokenizer)
+    def id_to_label(self, idx: int, default: Union[str, Type[KeyError], None] = KeyError) -> Optional[str]:
+        """
+        :param idx:
+        :param default:
+        """
+        if default is not KeyError and not self.is_id_valid(idx):
+            return default
+        return self.tokenizer.convert_ids_to_tokens(idx)
+    def label_to_id(self, label: str, default: Union[int, Type[KeyError], None] = KeyError) -> Optional[int]:
+        """
+        :param label:
+        :param default:
+        """
+        res = self.tokenizer.convert_token_to_id(label)
+        if res == self.unknown_label_id or res < 0 or res is None:
+            # It could be that the label really is the unknown-label, or it could be that the label is unknown.
+            if label == self.id_to_label(self.unknown_label_id):
+                return self.unknown_label_id
+            if default is KeyError:
+                raise KeyError("label %r not found" % label)
+            return default
+        return res
+    def get_seq(self, sentence: str) -> List[int]:
+        """
+        :param sentence: assumed to be seq of vocab entries separated by whitespace
+        """
+        return self.tokenizer(sentence)["input_ids"]
+    def get_seq_labels(self, seq):
+        """
+        :param list[int]|numpy.ndarray seq: 1D sequence
+        :rtype: str
+        """
+        return self.tokenizer.decode(seq, skip_special_tokens=True)

returnn/frontend/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .state import *
 # Now the rest, in alphabetical order.
 from .array_ import *
+from .assert_ import *
 from .attention import *
 from .backend import *
 from .build_from_dict import *

returnn/frontend/_backend.py CHANGED Viewed

@@ -42,6 +42,11 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def assert_(condition: Tensor, message: str):
+        """assert"""
+        raise NotImplementedError
     @staticmethod
     def get_tensor_dependencies(x: Tensor) -> Sequence[Tensor]:
         """
@@ -624,12 +629,48 @@ class Backend(Generic[T]):
         targets_spatial_dim: Dim,
         blank_index: int,
         max_approx: bool = False,
+        use_native_op: Optional[bool] = None,
+        label_loop: bool = True,
     ) -> Tensor:
         """
         Calculates the CTC loss.
         """
         raise NotImplementedError
+    @staticmethod
+    def ctc_best_path(
+        *,
+        logits: Tensor,
+        logits_normalized: bool = False,
+        targets: Tensor,
+        input_spatial_dim: Dim,
+        targets_spatial_dim: Dim,
+        blank_index: int,
+        label_loop: bool = True,
+    ) -> Tensor:
+        """
+        Calculates the CTC best path.
+        """
+        raise NotImplementedError
+    @staticmethod
+    def have_edit_distance() -> bool:
+        """
+        :return: whether we have an edit_distance implementation
+        """
+        return False
+    @staticmethod
+    def edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim) -> Tensor:
+        """
+        :param a: [B,Ta]
+        :param a_spatial_dim: Ta
+        :param b: [B,Tb]
+        :param b_spatial_dim: Tb
+        :return: [B]
+        """
+        raise NotImplementedError
     @staticmethod
     def have_sequence_mask_raw() -> bool:
         """

returnn/frontend/_native/__init__.py CHANGED Viewed

@@ -67,6 +67,24 @@ def _code_hash_md5(filename: str) -> str:
 _is_set_up = False
+_enabled = True
+def set_enabled(enabled: bool):
+    """
+    Enable or disable the native code setup.
+    :param enabled:
+    """
+    global _enabled
+    _enabled = enabled
+def is_set_up() -> bool:
+    """
+    :return: whether the native code is set up
+    """
+    return _is_set_up
 def setup():
@@ -76,6 +94,8 @@ def setup():
     global _is_set_up
     if _is_set_up:
         return
+    if not _enabled:
+        return
     _is_set_up = True  # only try once
     from returnn.tensor import Tensor, Dim
@@ -177,6 +197,8 @@ def setup_torch():
     global _is_set_up_torch
     if _is_set_up_torch:
         return
+    if not _enabled:
+        return
     _is_set_up_torch = True  # only try once
     import torch

returnn/frontend/_numpy_backend.py CHANGED Viewed

@@ -26,6 +26,13 @@ class NumpyBackend(Backend[numpy.ndarray]):
         """executing eagerly"""
         return True
+    @staticmethod
+    def assert_(condition: Tensor, message: str):
+        """assert"""
+        assert condition.dims == (), "condition for assert must be a scalar"
+        if not condition.raw_tensor.item():
+            raise AssertionError(message)
     @staticmethod
     def get_dtype_name_raw(raw_tensor: numpy.ndarray) -> str:
         """

returnn/frontend/_utils.py CHANGED Viewed

@@ -110,7 +110,7 @@ def bin_op_out_template(
             all_dims.extend([dim_ for dim_ in a.dims if dim_ == dim])
         else:
             all_dims.extend([dim_ for dim_ in b.dims if dim_ == dim])
-    if all(set(x.dims) != set(all_dims) for x in (a, b)):
+    if all([set(x.dims) != set(all_dims) for x in (a, b)]):
         if allow_broadcast_all_sources is False:
             raise ValueError(f"compare: sources {a!r} {b!r} not allowed with allow_broadcast_all_sources=False")
         elif allow_broadcast_all_sources is None:

returnn/frontend/array_.py CHANGED Viewed

@@ -54,6 +54,7 @@ __all__ = [
     "one_hot",
     "top_k_mask",
     "top_p_mask",
+    "repeat",
 ]
@@ -84,6 +85,10 @@ def convert_to_tensor(
     :return: tensor
     """
     if isinstance(value, Tensor):  # fast path
+        if device and value.device != device:
+            value = rf.copy_to_device(value, device)
+        if dtype and value.dtype != dtype:
+            value = rf.cast(value, dtype=dtype)
         return value
     if isinstance(value, (tuple, list)):
         value = numpy.array(value, dtype=dtype)
@@ -190,7 +195,7 @@ def merge_dims(
     if out_dim is None:
         from returnn.util.basic import prod
-        if any(d.need_masking() for d in dims[1:]):
+        if any([d.need_masking() for d in dims[1:]]):
             # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
             # This would then potentially discard some of the data in the tensor in subsequent operations,
             # when masking is applied.
@@ -905,7 +910,7 @@ def scatter(
         else:
             raise ValueError(f"scatter: invalid mode {mode!r}")
     indices_dim = indices_dim if isinstance(indices_dim, (list, tuple)) else [indices_dim]
-    if any(dim.need_masking() for dim in indices_dim):
+    if any([dim.need_masking() for dim in indices_dim]):
         if use_mask is None:
             use_mask = rf.use_mask_default(
                 default=True, default_false_for_behavior_version_up_to=22, func_name="scatter"
@@ -1341,3 +1346,44 @@ def top_p_mask(
         mask = mask | (rf.range_over_dim(sorted_dim, device=mask.device) < min_tokens_to_keep)
     mask = rf.scatter(mask, indices=sorted_indices, indices_dim=sorted_dim)
     return mask
+def repeat(
+    values: Tensor, *, in_spatial_dim: Dim, repeats: Tensor, out_spatial_dim: Optional[Dim] = None
+) -> Tuple[Tensor, Dim]:
+    """
+    Repeats certain elements in a tensor along a given spatial dimension.
+    0 repeats means to remove that element.
+    This can be used to implement duration-based expansion, e.g. in text-to-speech.
+    :param values: [common..., values..., in_spatial_dim]
+    :param in_spatial_dim:
+    :param repeats: [common..., repeats..., in_spatial_dim] -> int32 durations / number of repetitions for each element
+    :param out_spatial_dim:
+    :return: expanded_values: [common..., values..., repeats..., out_spatial_dim], out_spatial_dim
+    """
+    # Similar to masked_select
+    repeats = repeats.copy_masked(0, dims=[in_spatial_dim])
+    idxs = rf.cumsum(repeats, spatial_dim=in_spatial_dim)  # [batch...,in_spatial_dim] -> idx in out_spatial_dim + 1
+    new_size = rf.gather(idxs, indices=in_spatial_dim.get_dim_value_tensor() - 1, axis=in_spatial_dim)  # [batch...]
+    dim_dev = rf.get_default_dim_size_device()
+    if out_spatial_dim is None:
+        out_spatial_dim = Dim(rf.copy_to_device(new_size, dim_dev), name="repeat")
+    elif out_spatial_dim.dyn_size_ext is None:
+        out_spatial_dim.dyn_size_ext = rf.copy_to_device(new_size, dim_dev)
+    elif out_spatial_dim.dyn_size_ext is not None and out_spatial_dim.dyn_size_ext.raw_tensor is None:
+        out_spatial_dim.dyn_size_ext.raw_tensor = rf.copy_to_device(new_size, dim_dev).raw_tensor
+    out_spatial_dim_ext = out_spatial_dim + 1
+    rel_idx_counts = rf.scatter(
+        rf.expand_dims(rf.ones((), device=values.device, dtype="int32"), dims=idxs.dims),
+        indices=idxs,
+        indices_dim=in_spatial_dim,
+        out_dim=out_spatial_dim_ext,
+    )
+    # rel_idx_counts: [batch...,out_spatial_dim+1] -> count of how many times each index was selected
+    idxs_ = rf.cumsum(rel_idx_counts, spatial_dim=out_spatial_dim_ext)
+    # idxs_: [batch...,out_spatial_dim+1] -> idx in in_spatial_dim
+    idxs_, _ = rf.slice(idxs_, axis=out_spatial_dim_ext, size=out_spatial_dim)  # remove last element
+    # idxs_: [batch...,out_spatial_dim] -> idx in in_spatial_dim (potentially with invalid indices in padded area)
+    return rf.gather(values, indices=idxs_, axis=in_spatial_dim, clip_to_valid=True), out_spatial_dim

returnn/frontend/assert_.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Assertion utility functions for validating conditions in Python code.
+"""
+from __future__ import annotations
+from typing import Union
+import returnn.frontend as rf
+from returnn.tensor import Tensor
+__all__ = ["assert_"]
+def assert_(condition: Union[Tensor, bool], message: str):
+    """
+    Asserts that a given condition is True.
+    If the condition is False, raises an AssertionError with the provided message.
+    This runs async on GPU.
+    :param condition:
+    :param message:
+    :return: nothing
+    """
+    if isinstance(condition, bool):
+        if not condition:
+            raise AssertionError(message)
+    elif isinstance(condition, Tensor):
+        if condition.dims:
+            condition = rf.reduce_all(condition, axis=condition.dims)  # reduce to scalar
+        # noinspection PyProtectedMember
+        condition._raw_backend.assert_(condition, message=message)
+    else:
+        raise TypeError(f"Condition must be a boolean or a Tensor, got {type(condition)}")

returnn/frontend/attention.py CHANGED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "LearnedRelativePositionalEncoding",
     "relative_positional_encoding",
     "sinusoidal_positional_encoding",
+    "sinusoidal_encoding",
 ]
@@ -454,7 +455,7 @@ class RelPosSelfAttention(SelfAttentionBase):
             pos_emb, pos_emb_spatial_dim = self.learned_pos_emb(query_spatial_dim=axis, key_value_spatial_dim=axis)
         else:
             pos_emb, pos_emb_spatial_dim = relative_positional_encoding(
-                query_spatial_dim=axis, key_value_spatial_dim=axis, feat_dim=self.pos_emb_feat_dim
+                query_spatial_dim=axis, key_value_spatial_dim=axis, feat_dim=self.pos_emb_feat_dim, device=source.device
             )
         if self.pos_emb_dropout:
             pos_emb = rf.dropout(pos_emb, self.pos_emb_dropout)
@@ -483,6 +484,7 @@ class RelPosSelfAttention(SelfAttentionBase):
         matrix_bd = _rel_pos_enc_shift(matrix_bd, axis, pos_emb_spatial_dim, hist_dim)
         scores = matrix_ac + matrix_bd  # (batch, head, time1, time2)
+        del matrix_ac, matrix_bd
         scores *= self.key_dim_per_head.dimension**-0.5
         att_weights = rf.softmax(scores, axis=hist_dim)
         att_weights = rf.dropout(att_weights, self.att_dropout, axis=self.att_dropout_broadcast and hist_dim)
@@ -609,7 +611,10 @@ class RelPosCausalSelfAttention(CausalSelfAttention):
             pos_emb, pos_emb_spatial_dim = self.learned_pos_emb(query_spatial_dim=axis, key_value_spatial_dim=hist_dim)
         else:
             pos_emb, pos_emb_spatial_dim = relative_positional_encoding(
-                query_spatial_dim=axis, key_value_spatial_dim=hist_dim, feat_dim=self.pos_emb_feat_dim
+                query_spatial_dim=axis,
+                key_value_spatial_dim=hist_dim,
+                feat_dim=self.pos_emb_feat_dim,
+                device=source.device,
             )
         # pos_emb_spatial_dim is 2*time1-1 if axis!=single_step_dim, else time1
         if self.pos_emb_dropout:
@@ -724,6 +729,7 @@ class CrossAttention(rf.Module):
         """
         Transformer encoder output. This is intended as an initial API suggestion.
         """
+        assert axis in encoder.dims
         k, v = self.forward_kv(encoder)
         return rf.State(k=k, v=v, kv_axis=axis)
@@ -811,7 +817,9 @@ class LearnedRelativePositionalEncoding(rf.Module):
         :return: tensor of shape [spatial_dim * 2 - 1, feat_dim], and the out spatial dim (spatial_dim * 2 - 1).
             In the center is the rel pos i-j=0. All to the right are for i-j>0, all to the left for i-j<0.
         """
-        indices, out_spatial_dim = _make_indices(query_spatial_dim, key_value_spatial_dim, query_offset)
+        indices, out_spatial_dim = _make_indices(
+            query_spatial_dim, key_value_spatial_dim, query_offset, device=self.pos_emb.device
+        )
         indices = rf.clip_by_value(indices, -self.clipping, 0 if self.causal else self.clipping)
         # Shift values to be >= 0. Each integer still uniquely identifies a relative position difference.
         indices = indices + self.clipping
@@ -851,8 +859,9 @@ def _make_indices(
     query_spatial_dim: Dim,
     key_value_spatial_dim: Dim,
     query_offset: Optional[Union[int, Tensor]] = None,
+    device: Optional[str] = None,
 ) -> Tuple[Tensor, Dim]:
-    kv_pos_vec = rf.range_over_dim(key_value_spatial_dim)  # [kv_len]
+    kv_pos_vec = rf.range_over_dim(key_value_spatial_dim, device=device)  # [kv_len]
     # See also RelativePositionalEncodingLayer
     if query_spatial_dim == single_step_dim:
@@ -865,7 +874,7 @@ def _make_indices(
         query_offset = key_value_spatial_dim.get_size_tensor() - 1
     else:
         query_spatial_dim_m1 = query_spatial_dim - 1
-        q_pos_vec = rf.range_over_dim(query_spatial_dim_m1)  # [q_len-1]
+        q_pos_vec = rf.range_over_dim(query_spatial_dim_m1, device=device)  # [q_len-1]
         # The masking in the output is quite custom (left+right masking), so our seq lens don't make sense,
         # and might even cause to fail some tests (that e.g. max(q_seq_len+k_seq_len-1) == shape).
@@ -902,6 +911,7 @@ def relative_positional_encoding(
     feat_dim: Dim,
     query_offset: int = 0,
     dtype: Optional[str] = None,
+    device: Optional[str] = None,
 ) -> Tuple[Tensor, Dim]:
     """
     Implements relative positional encoding, Transformer-XL style (https://arxiv.org/abs/1901.02860),
@@ -924,7 +934,9 @@ def relative_positional_encoding(
     """
     if not dtype:
         dtype = rf.get_default_float_dtype()
-    cache_key = (query_spatial_dim, key_value_spatial_dim, feat_dim, query_offset, dtype)
+    if not device:
+        device = rf.get_default_device()
+    cache_key = (query_spatial_dim, key_value_spatial_dim, feat_dim, query_offset, dtype, device)
     cache_entry = _relative_positional_encoding_cache.get(cache_key)
     if cache_entry is not None:
         return cache_entry
@@ -932,7 +944,7 @@ def relative_positional_encoding(
     with rf.control_flow_ctx(None):
         # See also RelativePositionalEncodingLayer, LearnedRelativePositionalEncoding
-        indices, out_spatial_dim = _make_indices(query_spatial_dim, key_value_spatial_dim, query_offset)
+        indices, out_spatial_dim = _make_indices(query_spatial_dim, key_value_spatial_dim, query_offset, device=device)
         feat2_dim = feat_dim.div_left(2)
         div_term = rf.exp(rf.range_over_dim(feat2_dim, dtype=dtype) * -(2.0 * math.log(1e4) / feat_dim.dimension))
@@ -986,7 +998,6 @@ def sinusoidal_positional_encoding(
     cache_entry = _sinusoidal_positional_encoding_cache.get(cache_key)
     if cache_entry is not None:
         return cache_entry
-    import math
     with rf.control_flow_ctx(None):
         # See also RelativePositionalEncodingLayer, LearnedRelativePositionalEncoding
@@ -997,26 +1008,49 @@ def sinusoidal_positional_encoding(
             indices = rf.range_over_dim(spatial_dim, device=device)  # [len]
             if offset is not None:
                 indices = indices + offset
-        indices = rf.copy_to_device(indices, device)
-        feat2_dim = feat_dim.div_left(2)
-        div_term = rf.exp(
-            rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(base) / (feat2_dim.dimension - 1))
-        )
-        arg_sin = rf.combine_bc(rf.cast(indices, dtype), "*", div_term)
-        arg_cos = arg_sin + math.pi / 2.0
-        arg, feat_dim_ = rf.concat((arg_sin, feat2_dim), (arg_cos, feat2_dim))
-        arg, feat_dim_ = rf.replace_dim(arg, in_dim=feat_dim_, out_dim=feat_dim)
-        emb = rf.sin(arg)
+        emb = sinusoidal_encoding(indices, base=base, feat_dim=feat_dim, dtype=dtype)
         emb.verify_out_shape(
             {feat_dim} | indices.dims_set | ({spatial_dim} if spatial_dim != single_step_dim else set()),
             allow_missing_implicit_dims=True,
         )
-        emb.feature_dim = feat_dim
         _sinusoidal_positional_encoding_cache.set(cache_key, emb)
         return emb
+def sinusoidal_encoding(
+    indices: Tensor,
+    *,
+    feat_dim: Dim,
+    base: Union[int, float] = 1e4,
+    dtype: Optional[str] = None,
+) -> Tensor:
+    """
+    :param indices: [...], to be encoded
+    :param feat_dim:
+    :param base: base for the angles
+    :param dtype: data type
+    :return: tensor of shape [..., feat_dim]
+    """
+    import math
+    if not dtype:
+        dtype = rf.get_default_float_dtype()
+    device = indices.device
+    feat2_dim = feat_dim.div_left(2)
+    div_term = rf.exp(
+        rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(base) / (feat2_dim.dimension - 1))
+    )
+    arg_sin = rf.combine_bc(rf.cast(indices, dtype), "*", div_term)
+    arg_cos = arg_sin + math.pi / 2.0
+    arg, feat_dim_ = rf.concat((arg_sin, feat2_dim), (arg_cos, feat2_dim))
+    arg, feat_dim_ = rf.replace_dim(arg, in_dim=feat_dim_, out_dim=feat_dim)
+    emb = rf.sin(arg)
+    emb.feature_dim = feat_dim
+    return emb
 _att_dropout_broadcast_shown_warning = False

returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

returnn 1.20251027.232712py3-none-any.whl → 1.20260119.15400py3-none-any.whl