returnn 1.20251027.232712__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
returnn/frontend/conv.py CHANGED
@@ -3,7 +3,7 @@ Convolution, transposed convolution, pooling
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Sequence, Tuple, Union
6
+ from typing import Optional, Union, TypeVar, Sequence, Tuple, List
7
7
  from returnn.util.basic import next_type_attrib_in_mro_chain
8
8
  from returnn.tensor import Tensor, Dim
9
9
  import returnn.frontend as rf
@@ -25,6 +25,9 @@ __all__ = [
25
25
  "pool2d",
26
26
  "pool3d",
27
27
  "make_conv_out_spatial_dims",
28
+ "calc_conv_out_length",
29
+ "make_transposed_conv_out_spatial_dims",
30
+ "calc_transposed_conv_out_length",
28
31
  ]
29
32
 
30
33
 
@@ -396,7 +399,11 @@ def transposed_conv(
396
399
  )
397
400
  if use_mask:
398
401
  source = source.copy_masked(0, dims=in_spatial_dims)
399
- if padding == "same" and _any_is_non_default(strides, default=1) and _should_use_consistent_same_padding():
402
+ if (
403
+ padding == "same"
404
+ and any(s != 1 for s in (strides or [fs.dimension for fs in filter_size]))
405
+ and _should_use_consistent_same_padding()
406
+ ):
400
407
  # I don't really know what this should mean here... Investigate this further...
401
408
  raise NotImplementedError("consistent same padding not implemented for transposed conv")
402
409
  # noinspection PyProtectedMember
@@ -424,6 +431,39 @@ class TransposedConv1d(_TransposedConv):
424
431
 
425
432
  nd = 1
426
433
 
434
+ def __init__(
435
+ self,
436
+ in_dim: Dim,
437
+ out_dim: Dim,
438
+ filter_size: Union[int, Dim],
439
+ *,
440
+ padding: str,
441
+ remove_padding: int = 0,
442
+ output_padding: Optional[int] = None,
443
+ strides: Optional[int] = None,
444
+ with_bias: bool = True,
445
+ ):
446
+ """
447
+ :param in_dim:
448
+ :param out_dim:
449
+ :param filter_size:
450
+ :param strides: specifies the upscaling. by default, same as filter_size
451
+ :param padding: "same" or "valid"
452
+ :param remove_padding:
453
+ :param output_padding:
454
+ :param with_bias: whether to add a bias. enabled by default
455
+ """
456
+ super().__init__(
457
+ in_dim=in_dim,
458
+ out_dim=out_dim,
459
+ filter_size=[filter_size],
460
+ padding=padding,
461
+ remove_padding=remove_padding,
462
+ output_padding=output_padding,
463
+ strides=[strides] if strides is not None else None,
464
+ with_bias=with_bias,
465
+ )
466
+
427
467
  __call__ = _ConvOrTransposedConv._call_nd1
428
468
 
429
469
 
@@ -704,7 +744,7 @@ def make_conv_out_spatial_dims(
704
744
  strides: Union[Sequence[int], int] = 1,
705
745
  dilation_rate: Union[Sequence[int], int] = 1,
706
746
  description_prefix: Optional[str] = None,
707
- ) -> Sequence[Dim]:
747
+ ) -> List[Dim]:
708
748
  """create out spatial dims from in spatial dims"""
709
749
  nd = len(in_spatial_dims)
710
750
  if isinstance(filter_size, (int, Dim)):
@@ -715,84 +755,263 @@ def make_conv_out_spatial_dims(
715
755
  strides = [strides] * nd
716
756
  if isinstance(dilation_rate, int):
717
757
  dilation_rate = [dilation_rate] * nd
718
- assert nd == len(in_spatial_dims) == len(filter_size) == len(strides) == len(dilation_rate)
719
758
  if isinstance(padding, (int, str)):
720
759
  padding = [padding] * nd
760
+ assert nd == len(in_spatial_dims) == len(filter_size) == len(strides) == len(dilation_rate) == len(padding)
721
761
  padding = [p.lower() if isinstance(p, str) else p for p in padding]
722
762
  out_spatial_dims = []
723
763
  for i in range(nd):
724
- in_spatial_dim = in_spatial_dims[i]
725
- if (filter_size[i] == strides[i] == 1 and padding[i] in ("valid", "same", 0)) or (
726
- strides[i] == 1 and padding[i] == "same"
727
- ):
728
- out_spatial_dims.append(in_spatial_dim)
729
- else:
730
- out_spatial_dim = _calc_out_dim(
731
- in_dim=in_spatial_dim,
764
+ out_spatial_dims.append(
765
+ calc_conv_out_length(
766
+ in_spatial_dims[i],
732
767
  filter_size=filter_size[i],
768
+ padding=padding[i],
733
769
  stride=strides[i],
734
770
  dilation_rate=dilation_rate[i],
735
- padding=padding[i],
771
+ name=f"{description_prefix}:spatial{i}" if description_prefix else None,
736
772
  )
737
- assert isinstance(out_spatial_dim, Dim)
738
- if description_prefix and out_spatial_dim != in_spatial_dim:
739
- out_spatial_dim.name = f"{description_prefix}:spatial{i}"
740
- if in_spatial_dim.dyn_size_ext is not None and out_spatial_dim.dyn_size_ext is None:
741
- out_spatial_dim.dyn_size_ext = _calc_out_dim(
742
- in_dim=in_spatial_dim.dyn_size_ext,
743
- filter_size=filter_size[i],
744
- stride=strides[i],
745
- dilation_rate=dilation_rate[i],
746
- padding=padding[i],
747
- )
748
- out_spatial_dims.append(out_spatial_dim)
773
+ )
749
774
  return out_spatial_dims
750
775
 
751
776
 
752
- def _calc_out_dim(in_dim, filter_size, stride, padding, dilation_rate=1):
777
+ T = TypeVar("T", int, Dim, Tensor)
778
+
779
+
780
+ def calc_conv_out_length(
781
+ in_length: Union[T, int, Dim, Tensor],
782
+ *,
783
+ filter_size: Union[T, int, Dim, Tensor],
784
+ stride: int,
785
+ padding: Union[str, int],
786
+ dilation_rate: int = 1,
787
+ name: Optional[str] = None,
788
+ ) -> T:
753
789
  """
754
790
  Copied and adapted from TF ConvLayer.calc_out_dim.
755
791
 
756
- :param T|int|Tensor|torch.Tensor|tensorflow.Tensor|Dim in_dim: dimension in some axis
757
- :param int filter_size: e.g. 2, for the corresponding axis
758
- :param int stride: e.g. 1, for the corresponding axis
759
- :param int dilation_rate: e.g. 1
760
- :param str|int padding: "valid" or "same" or int
792
+ :param T in_length: dimension in some axis
793
+ :param filter_size: e.g. 2, for the corresponding axis
794
+ :param stride: e.g. 1, for the corresponding axis
795
+ :param dilation_rate: e.g. 1
796
+ :param padding: "valid" or "same" or int
797
+ :param name:
761
798
  :return: the output dimension
762
- :rtype: T
763
799
  """
800
+ padding = padding.lower() if isinstance(padding, str) else padding
801
+ if isinstance(filter_size, int):
802
+ filter_size_int = filter_size
803
+ elif isinstance(filter_size, Dim):
804
+ filter_size_int = filter_size.dimension
805
+ else:
806
+ filter_size_int = None
807
+ filter_size_ = filter_size_int if isinstance(filter_size_int, int) else filter_size
764
808
 
765
- def ceildiv(a, b):
766
- """
767
- :param T|int|Tensor|torch.Tensor|tensorflow.Tensor a:
768
- :param T|int|Tensor|torch.Tensor|tensorflow.Tensor b:
769
- :rtype: T
770
- """
771
- if isinstance(b, int) and b == 1:
772
- return a
773
- if isinstance(a, Tensor):
774
- return rf.ceil_divide(a, b)
775
- return -(-a // b)
809
+ if (filter_size_int == stride == 1 and padding in ("valid", "same", 0)) or (stride == 1 and padding == "same"):
810
+ return in_length
776
811
 
777
- padding = padding.lower() if isinstance(padding, str) else padding
778
812
  # See tf.compat.v1.nn.convolution() documentation for more.
779
813
  if padding == "same":
780
- if isinstance(in_dim, Dim):
781
- return in_dim.ceildiv_right(stride)
782
- return ceildiv(in_dim, stride)
814
+ if isinstance(in_length, Dim):
815
+ out_length = in_length.ceildiv_right(stride)
816
+ else:
817
+ out_length = _ceildiv(in_length, stride)
783
818
  elif padding == "valid" or isinstance(padding, int):
784
819
  if isinstance(padding, int) and padding != 0:
785
820
  assert padding > 0
786
- in_dim = padding + in_dim + padding
787
- if isinstance(in_dim, Dim):
788
- filter_left_dilated = (filter_size - 1) * dilation_rate // 2
789
- filter_right_dilated = (filter_size - 1) * dilation_rate - filter_left_dilated
790
- valid_part = in_dim.sub_left(filter_left_dilated).sub_right(filter_right_dilated)
791
- return valid_part.ceildiv_right(stride)
792
- return ceildiv(in_dim - (filter_size - 1) * dilation_rate, stride)
821
+ in_length = padding + in_length + padding
822
+
823
+ if filter_size_int == 1:
824
+ valid_part = in_length
825
+ elif isinstance(in_length, Dim):
826
+ filter_left_dilated = (filter_size_ - 1) * dilation_rate // 2
827
+ filter_right_dilated = (filter_size_ - 1) * dilation_rate - filter_left_dilated
828
+ valid_part = in_length.sub_left(filter_left_dilated).sub_right(filter_right_dilated)
829
+ else:
830
+ valid_part = in_length - (filter_size_ - 1) * dilation_rate
831
+
832
+ if isinstance(valid_part, Dim):
833
+ out_length = valid_part.ceildiv_right(stride)
834
+ else:
835
+ out_length = _ceildiv(valid_part, stride)
836
+
793
837
  else:
794
838
  raise ValueError(f"invalid padding {padding!r} (type {type(padding).__name__})")
795
839
 
840
+ if isinstance(in_length, Dim):
841
+ assert isinstance(out_length, Dim)
842
+ if name and out_length != in_length:
843
+ out_length.name = name
844
+ if in_length.dyn_size_ext is not None and out_length.dyn_size_ext is None:
845
+ out_dyn_size_ext = calc_conv_out_length(
846
+ in_length=in_length.dyn_size_ext,
847
+ filter_size=filter_size,
848
+ stride=stride,
849
+ dilation_rate=dilation_rate,
850
+ padding=padding,
851
+ )
852
+ assert isinstance(out_dyn_size_ext, Tensor)
853
+ out_length.dyn_size_ext = out_dyn_size_ext
854
+
855
+ return out_length
856
+
857
+
858
+ def make_transposed_conv_out_spatial_dims(
859
+ in_spatial_dims: Sequence[Dim],
860
+ *,
861
+ filter_size: Union[Sequence[Union[int, Dim]], int, Dim],
862
+ padding: Union[str, int, Sequence[int]],
863
+ output_padding: Optional[Union[Sequence[Optional[int]], int]] = None,
864
+ strides: Union[Sequence[Optional[int]], None, int] = None,
865
+ dilation_rate: Union[Sequence[int], int] = 1,
866
+ description_prefix: Optional[str] = None,
867
+ ) -> List[Dim]:
868
+ """create out spatial dims from in spatial dims"""
869
+ nd = len(in_spatial_dims)
870
+ if isinstance(filter_size, (int, Dim)):
871
+ filter_size = [filter_size] * nd
872
+ filter_size = [d.dimension if isinstance(d, Dim) else d for d in filter_size]
873
+ assert all(isinstance(s, int) for s in filter_size)
874
+ if isinstance(strides, int) or strides is None:
875
+ strides = [strides] * nd
876
+ if isinstance(dilation_rate, int):
877
+ dilation_rate = [dilation_rate] * nd
878
+ if isinstance(padding, (int, str)):
879
+ padding = [padding] * nd
880
+ if isinstance(output_padding, int) or output_padding is None:
881
+ output_padding = [output_padding] * nd
882
+ assert (
883
+ nd
884
+ == len(in_spatial_dims)
885
+ == len(filter_size)
886
+ == len(strides)
887
+ == len(dilation_rate)
888
+ == len(padding)
889
+ == len(output_padding)
890
+ )
891
+ padding = [p.lower() if isinstance(p, str) else p for p in padding]
892
+ out_spatial_dims = []
893
+ for i in range(nd):
894
+ out_spatial_dims.append(
895
+ calc_transposed_conv_out_length(
896
+ in_spatial_dims[i],
897
+ filter_size=filter_size[i],
898
+ padding=padding[i],
899
+ stride=strides[i],
900
+ dilation_rate=dilation_rate[i],
901
+ name=f"{description_prefix}:spatial{i}" if description_prefix else None,
902
+ )
903
+ )
904
+ return out_spatial_dims
905
+
906
+
907
+ def calc_transposed_conv_out_length(
908
+ in_length: Union[T, int, Dim, Tensor],
909
+ *,
910
+ filter_size: Union[int, Dim],
911
+ padding: Union[int, str],
912
+ output_padding: Optional[int] = None,
913
+ stride: Optional[int] = None,
914
+ dilation_rate: int = 1,
915
+ name: Optional[str] = None,
916
+ ) -> T:
917
+ """
918
+ Determines output length of a transposed convolution given input length.
919
+
920
+ Copied from TF/Keras conv_utils.deconv_output_length
921
+ (https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
922
+ adapted with simplification.
923
+
924
+ Also see :func:`calc_conv_out_length`.
925
+
926
+ :param in_length:
927
+ :param filter_size:
928
+ :param padding: one of `"same"`, `"valid"`, `"full"`.
929
+ :param output_padding: amount of padding along the output dimension.
930
+ Can be set to `None` in which case the output length is inferred.
931
+ :param stride:
932
+ :param dilation_rate:
933
+ :param name:
934
+ :returns: The output length (integer)
935
+ """
936
+ assert padding in {"same", "valid", "full"} or isinstance(padding, int)
937
+
938
+ if isinstance(filter_size, int):
939
+ filter_size_int = filter_size
940
+ elif isinstance(filter_size, Dim):
941
+ filter_size_int = filter_size.dimension
942
+ else:
943
+ filter_size_int = None
944
+ filter_size_ = filter_size_int if isinstance(filter_size_int, int) else filter_size
945
+
946
+ # Get the dilated kernel size
947
+ if dilation_rate != 1 and filter_size_int != 1:
948
+ filter_size = filter_size + (filter_size_ - 1) * (dilation_rate - 1)
949
+
950
+ if stride is None:
951
+ assert filter_size_int is not None
952
+ stride = filter_size_int
953
+ if stride != 1:
954
+ in_length = in_length * stride
955
+
956
+ # Infer length if output padding is None, else compute the exact length
957
+ if output_padding is None:
958
+ if padding == "valid" or padding == 0:
959
+ if filter_size_int == stride:
960
+ out_length = in_length
961
+ elif filter_size_int is not None:
962
+ out_length = in_length + max(filter_size_int - stride, 0)
963
+ elif isinstance(filter_size, Tensor):
964
+ out_length = in_length + rf.relu(filter_size - stride)
965
+ elif isinstance(filter_size, Dim):
966
+ out_length = in_length + (filter_size - stride)
967
+ else:
968
+ raise ValueError(f"invalid filter_size {filter_size!r} type {type(filter_size)}")
969
+ elif padding == "full":
970
+ out_length = in_length - (stride + filter_size_ - 2)
971
+ elif padding == "same":
972
+ out_length = in_length
973
+ else:
974
+ raise ValueError(f"invalid padding {padding!r}")
975
+
976
+ else: # output_padding
977
+ if padding == "same":
978
+ pad = filter_size // 2
979
+ elif padding == "valid":
980
+ pad = 0
981
+ elif padding == "full":
982
+ pad = filter_size - 1
983
+ elif isinstance(padding, int):
984
+ pad = padding
985
+ else:
986
+ raise ValueError(f"invalid padding {padding!r}")
987
+ out_length = in_length + (filter_size - stride - 2 * pad + output_padding)
988
+
989
+ if isinstance(in_length, Dim):
990
+ assert isinstance(out_length, Dim)
991
+ if name and out_length != in_length:
992
+ out_length.name = name
993
+ if in_length.dyn_size_ext is not None and out_length.dyn_size_ext is None:
994
+ out_dyn_size_ext = calc_transposed_conv_out_length(
995
+ in_length=in_length.dyn_size_ext,
996
+ filter_size=filter_size,
997
+ padding=padding,
998
+ output_padding=output_padding,
999
+ stride=stride,
1000
+ dilation_rate=dilation_rate,
1001
+ )
1002
+ assert isinstance(out_dyn_size_ext, Tensor)
1003
+ out_length.dyn_size_ext = out_dyn_size_ext
1004
+
1005
+ return out_length
1006
+
1007
+
1008
+ def _ceildiv(a: T, b: Union[T, int, Tensor]) -> T:
1009
+ if isinstance(b, int) and b == 1:
1010
+ return a
1011
+ if isinstance(a, Tensor):
1012
+ return rf.ceil_divide(a, b)
1013
+ return -(-a // b)
1014
+
796
1015
 
797
1016
  def _should_use_consistent_same_padding() -> bool:
798
1017
  """
@@ -273,6 +273,7 @@ class ConformerEncoderLayer(rf.Module):
273
273
  x_mhsa = self.self_att(x_mhsa_ln, axis=spatial_dim)
274
274
  x_mhsa = rf.dropout(x_mhsa, self.dropout, axis=self.dropout_broadcast and self.out_dim)
275
275
  x_mhsa_out = x_mhsa + x_ffn1_out
276
+ del x_mhsa
276
277
 
277
278
  # Conv
278
279
  x_conv_ln = self.conv_layer_norm(x_mhsa_out)
@@ -79,6 +79,8 @@ class TransformerEncoder(rf.Module):
79
79
  self.model_dim = model_dim
80
80
  self.embed_dim = embed_dim
81
81
 
82
+ self.out_dim = self.model_dim # alias. consistency, compatibility
83
+
82
84
  if input_embedding is None or isinstance(input_embedding, rf.Module):
83
85
  pass
84
86
  elif isinstance(input_embedding, type):
returnn/frontend/loss.py CHANGED
@@ -3,11 +3,12 @@ Loss functions
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
+ from typing import Optional, Tuple
6
7
  from returnn.tensor import Tensor, Dim
7
8
  import returnn.frontend as rf
8
9
 
9
10
 
10
- __all__ = ["cross_entropy", "ctc_loss", "edit_distance"]
11
+ __all__ = ["cross_entropy", "ctc_loss", "ctc_greedy_decode", "edit_distance"]
11
12
 
12
13
 
13
14
  def cross_entropy(
@@ -93,6 +94,44 @@ def ctc_loss(
93
94
  )
94
95
 
95
96
 
97
+ def ctc_greedy_decode(
98
+ logits: Tensor,
99
+ *,
100
+ in_spatial_dim: Dim,
101
+ blank_index: int,
102
+ out_spatial_dim: Optional[Dim] = None,
103
+ target_dim: Optional[Dim] = None,
104
+ wb_target_dim: Optional[Dim] = None,
105
+ ) -> Tuple[Tensor, Dim]:
106
+ """
107
+ Greedy CTC decode.
108
+
109
+ :return: (labels, out_spatial_dim)
110
+ """
111
+ if wb_target_dim is None:
112
+ assert logits.feature_dim
113
+ wb_target_dim = logits.feature_dim
114
+
115
+ labels = rf.reduce_argmax(logits, axis=wb_target_dim)
116
+ labels = rf.cast(labels, "int32")
117
+
118
+ labels_shifted = rf.shift_right(labels, axis=in_spatial_dim, pad_value=blank_index)
119
+ mask_repeat = labels != labels_shifted
120
+ labels, out_spatial_dim = rf.masked_select(
121
+ labels,
122
+ mask=(labels != blank_index) & mask_repeat,
123
+ dims=[in_spatial_dim],
124
+ out_dim=out_spatial_dim,
125
+ )
126
+
127
+ if target_dim:
128
+ # Set correct sparse_dim. Only currently implemented if blank comes after.
129
+ assert target_dim.dimension == blank_index
130
+ labels.sparse_dim = target_dim
131
+
132
+ return labels, out_spatial_dim
133
+
134
+
96
135
  def edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim, *, dtype: str = "int32") -> Tensor:
97
136
  """
98
137
  :param a: [B,Ta]
returnn/native_op.cpp CHANGED
@@ -206,6 +206,14 @@ Ndarray* Ndarray_Copy(const Ndarray* self) {
206
206
 
207
207
  #include "tensorflow/core/public/version.h"
208
208
 
209
+ #ifndef TF_MAJOR_VERSION
210
+ #error "TF_MAJOR_VERSION is not defined!"
211
+ #endif
212
+
213
+ #ifndef TF_MINOR_VERSION
214
+ #error "TF_MINOR_VERSION is not defined!"
215
+ #endif
216
+
209
217
  #if (TF_MAJOR_VERSION == 1 && TF_MINOR_VERSION >= 6) || (TF_MAJOR_VERSION > 1)
210
218
  #define TF_issue_6602_workaround 0
211
219
  #define TWOD_LSTM_SUPPORT 1
@@ -402,6 +410,9 @@ static void tf_cuda_sgemm_batched(
402
410
 
403
411
 
404
412
  #else // CUDA
413
+
414
+ #ifdef HAVE_CUSTOM_BLAS
415
+
405
416
  /*
406
417
  // matrices are in column-major form
407
418
  int sgemm_(char *transa, char *transb,
@@ -419,6 +430,75 @@ static void tf_cuda_sgemm_batched(
419
430
  sgemm_(&transa, &transb, \
420
431
  &m_, &n_, &k_, alpha, A, &lda_, B, &ldb_, beta, C, &ldc_); \
421
432
  }
433
+
434
+ #else // HAVE_CUSTOM_BLAS
435
+
436
+ template<typename T>
437
+ static void tf_cpu_sgemm(
438
+ OpKernelContext* context,
439
+ char transa_, char transb_,
440
+ int m, int n, int k,
441
+ const T* alpha_ptr, const T* a_ptr, int lda,
442
+ const T* b_ptr, int ldb, const T* beta_ptr,
443
+ T* c_ptr, int ldc)
444
+ {
445
+ if (m <= 0 || n <= 0 || k <= 0) return;
446
+
447
+ auto d = context->eigen_cpu_device();
448
+ const T alpha = *alpha_ptr;
449
+ const T beta = *beta_ptr;
450
+
451
+ bool transa = (transa_ == 'T' || transa_ == 't' || transa_ == 'C' || transa_ == 'c');
452
+ bool transb = (transb_ == 'T' || transb_ == 't' || transb_ == 'C' || transb_ == 'c');
453
+
454
+ // 1. Map as COLUMN-MAJOR
455
+ // Physical rows (height) for the Map is always the leading dimension (lda, ldb, ldc)
456
+ typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::ColMajor>, Eigen::Unaligned> ConstMap;
457
+ typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::ColMajor>, Eigen::Unaligned> MutableMap;
458
+
459
+ // Logical height/width of slices before any transposition
460
+ int a_slice_rows = transa ? k : m;
461
+ int a_slice_cols = transa ? m : k;
462
+ int b_slice_rows = transb ? n : k;
463
+ int b_slice_cols = transb ? k : n;
464
+
465
+ // Map and Slice
466
+ auto a = ConstMap(a_ptr, lda, a_slice_cols).slice(
467
+ Eigen::array<Eigen::Index, 2>({0, 0}),
468
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)a_slice_rows, (Eigen::Index)a_slice_cols}));
469
+
470
+ auto b = ConstMap(b_ptr, ldb, b_slice_cols).slice(
471
+ Eigen::array<Eigen::Index, 2>({0, 0}),
472
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)b_slice_rows, (Eigen::Index)b_slice_cols}));
473
+
474
+ auto c = MutableMap(c_ptr, ldc, n).slice(
475
+ Eigen::array<Eigen::Index, 2>({0, 0}),
476
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)m, (Eigen::Index)n}));
477
+
478
+ // 2. Define Contraction Pairs based on Transposition
479
+ // Column-Major Matrix Mult: (M x K) * (K x N)
480
+ // Standard: Contract Axis 1 of A with Axis 0 of B
481
+ // If A is Transposed: A is (K x M), contract Axis 0 of A
482
+ // If B is Transposed: B is (N x K), contract Axis 1 of B
483
+ Eigen::array<Eigen::IndexPair<int>, 1> pairs;
484
+ pairs[0] = Eigen::IndexPair<int>(transa ? 0 : 1, transb ? 1 : 0);
485
+
486
+ // 3. Execution
487
+ if (alpha == T(1) && beta == T(0)) {
488
+ c.device(d) = a.contract(b, pairs);
489
+ } else if (alpha == T(1) && beta == T(1)) {
490
+ c.device(d) += a.contract(b, pairs);
491
+ } else {
492
+ c.device(d) = a.contract(b, pairs) * alpha + c * beta;
493
+ }
494
+ }
495
+
496
+ #define Ndarray_sgemm(\
497
+ transpose_A, transpose_B, \
498
+ m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) \
499
+ tf_cpu_sgemm<float>(context, transpose_A, transpose_B, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
500
+
501
+ #endif // HAVE_CUSTOM_BLAS
422
502
  #endif // CUDA
423
503
 
424
504
  // See Context struct below.
returnn/sprint/cache.py CHANGED
@@ -7,10 +7,9 @@ This module is about reading (maybe later also writing) the Sprint archive forma
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from typing import List, Optional, Tuple
10
+ from typing import Optional, List, Tuple, Dict
11
11
  import sys
12
12
  import os
13
- import typing
14
13
  import array
15
14
  from struct import pack, unpack
16
15
  import numpy
@@ -212,7 +211,7 @@ class FileArchive:
212
211
  def __init__(self, filename, must_exists=True, encoding="ascii"):
213
212
  self.encoding = encoding
214
213
 
215
- self.ft = {} # type: typing.Dict[str,FileInfo]
214
+ self.ft: Dict[str, FileInfo] = {}
216
215
  if os.path.exists(filename):
217
216
  self.allophones = []
218
217
  self.f = open(filename, "rb")
@@ -334,8 +333,8 @@ class FileArchive:
334
333
  # print(typ)
335
334
  assert type_ == "vector-f32"
336
335
  count = self.read_U32()
337
- data = [None] * count # type: typing.List[typing.Optional[numpy.ndarray]]
338
- time_ = [None] * count # type: typing.List[typing.Optional[numpy.ndarray]]
336
+ data: List[Optional[numpy.ndarray]] = [None] * count
337
+ time_: List[Optional[numpy.ndarray]] = [None] * count
339
338
  for i in range(count):
340
339
  size = self.read_U32()
341
340
  data[i] = self.read_v("f", size) # size x f32
@@ -450,7 +449,7 @@ class FileArchive:
450
449
  a = array.array("b")
451
450
  a.fromfile(self.f, comp)
452
451
  # unpack
453
- b = zlib.decompress(a.tostring(), 15 + 32)
452
+ b = zlib.decompress(a.tobytes(), 15 + 32)
454
453
  # substitute self.f by an anonymous memmap file object
455
454
  # restore original file handle after we're done
456
455
  backup_f = self.f
@@ -575,17 +574,17 @@ class FileArchiveBundle:
575
574
  :param str encoding: encoding used in the files
576
575
  """
577
576
  # filename -> FileArchive
578
- self.archives = {} # type: typing.Dict[str,FileArchive]
577
+ self.archives: Dict[str, FileArchive] = {}
579
578
  # archive content file -> FileArchive
580
- self.files = {} # type: typing.Dict[str,FileArchive]
579
+ self.files: Dict[str, FileArchive] = {}
581
580
  self._short_seg_names = {}
582
581
  if filename is not None:
583
582
  self.add_bundle(filename=filename, encoding=encoding)
584
583
 
585
- def add_bundle(self, filename, encoding="ascii"):
584
+ def add_bundle(self, filename: str, encoding: str = "ascii"):
586
585
  """
587
- :param str filename: bundle
588
- :param str encoding:
586
+ :param filename: bundle
587
+ :param encoding:
589
588
  """
590
589
  file_dir = os.path.dirname(filename) or "."
591
590
  for line in open(filename).read().splitlines():
@@ -837,7 +836,7 @@ class MixtureSet:
837
836
  """
838
837
  a = array.array("b")
839
838
  a.fromfile(self.f, length)
840
- return a.tostring().decode(encoding)
839
+ return a.tobytes().decode(encoding)
841
840
 
842
841
  def read_f32(self):
843
842
  """
@@ -1003,7 +1002,7 @@ class WordBoundaries:
1003
1002
  """
1004
1003
  a = array.array("b")
1005
1004
  a.fromfile(self.f, length)
1006
- return a.tostring().decode(encoding)
1005
+ return a.tobytes().decode(encoding)
1007
1006
 
1008
1007
  def __init__(self, filename):
1009
1008
  """
returnn/tensor/utils.py CHANGED
@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
36
36
  *,
37
37
  min_val: int = 0,
38
38
  max_val: Optional[int] = None,
39
- rnd: numpy.random.RandomState,
39
+ rnd: Optional[numpy.random.RandomState] = None,
40
40
  dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
41
41
  dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
42
42
  ) -> bool:
43
43
  """fill. return whether sth was filled"""
44
+ if rnd is None:
45
+ # noinspection PyUnresolvedReferences,PyProtectedMember
46
+ rnd = numpy.random.mtrand._rand
44
47
  if dyn_dim_max_sizes is None:
45
48
  dyn_dim_max_sizes = {}
46
49
  if dyn_dim_min_sizes is None:
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
59
62
  continue
60
63
  if tensor_fill_random_numpy_(
61
64
  dim.dyn_size_ext,
62
- min_val=dyn_dim_min_sizes.get(dim, 2),
65
+ min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
63
66
  max_val=dyn_dim_max_sizes.get(dim, None),
64
67
  rnd=rnd,
65
68
  dyn_dim_max_sizes=dyn_dim_max_sizes,
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
98
101
  if max_val is None:
99
102
  max_val = rnd.randint(5, 20)
100
103
  if x.sparse_dim and x.sparse_dim.dimension is not None:
101
- max_val = x.sparse_dim.dimension
102
- x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
104
+ max_val = x.sparse_dim.dimension - 1
105
+ x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
103
106
  elif x.dtype == "bool":
104
107
  x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
105
108
  elif x.dtype.startswith("float"):