returnn 1.20250901.123052__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. returnn/PKG-INFO +2 -2
  2. returnn/_setup_info_generated.py +2 -2
  3. returnn/config.py +1 -1
  4. returnn/datasets/basic.py +29 -13
  5. returnn/datasets/distrib_files.py +61 -3
  6. returnn/datasets/generating.py +12 -21
  7. returnn/datasets/huggingface.py +434 -0
  8. returnn/datasets/lm.py +20 -0
  9. returnn/datasets/meta.py +179 -60
  10. returnn/datasets/multi_proc.py +1 -1
  11. returnn/datasets/postprocessing.py +597 -108
  12. returnn/datasets/text_dict.py +1 -1
  13. returnn/datasets/util/vocabulary.py +90 -0
  14. returnn/frontend/_backend.py +7 -0
  15. returnn/frontend/array_.py +54 -1
  16. returnn/frontend/attention.py +54 -20
  17. returnn/frontend/conv.py +273 -54
  18. returnn/frontend/decoder/transformer.py +36 -17
  19. returnn/frontend/encoder/conformer.py +1 -0
  20. returnn/frontend/encoder/transformer.py +2 -0
  21. returnn/frontend/loss.py +40 -1
  22. returnn/frontend/module.py +8 -1
  23. returnn/frontend/nested.py +9 -0
  24. returnn/native_op.cpp +80 -0
  25. returnn/sprint/cache.py +12 -13
  26. returnn/tensor/_dim_extra.py +51 -29
  27. returnn/tensor/_tensor_extra.py +6 -1
  28. returnn/tensor/utils.py +7 -4
  29. returnn/tf/frontend_layers/_backend.py +11 -2
  30. returnn/tf/frontend_low_level/_backend.py +15 -0
  31. returnn/tf/layers/basic.py +16 -38
  32. returnn/tf/native_op.py +11 -58
  33. returnn/tf/network.py +1 -1
  34. returnn/tf/util/basic.py +19 -0
  35. returnn/torch/data/returnn_dataset_wrapper.py +9 -3
  36. returnn/torch/engine.py +67 -2
  37. returnn/torch/frontend/_backend.py +119 -7
  38. returnn/torch/util/diagnose_gpu.py +65 -31
  39. returnn/torch/util/exception_helper.py +7 -1
  40. returnn/util/basic.py +6 -7
  41. returnn/util/better_exchook.py +4 -0
  42. returnn/util/collect_outputs_dict.py +79 -0
  43. returnn/util/debug.py +11 -2
  44. returnn/util/file_cache.py +42 -4
  45. returnn/util/task_system.py +1 -1
  46. {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/METADATA +2 -2
  47. {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/RECORD +50 -48
  48. {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/LICENSE +0 -0
  49. {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/WHEEL +0 -0
  50. {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/top_level.txt +0 -0
returnn/native_op.cpp CHANGED
@@ -206,6 +206,14 @@ Ndarray* Ndarray_Copy(const Ndarray* self) {
206
206
 
207
207
  #include "tensorflow/core/public/version.h"
208
208
 
209
+ #ifndef TF_MAJOR_VERSION
210
+ #error "TF_MAJOR_VERSION is not defined!"
211
+ #endif
212
+
213
+ #ifndef TF_MINOR_VERSION
214
+ #error "TF_MINOR_VERSION is not defined!"
215
+ #endif
216
+
209
217
  #if (TF_MAJOR_VERSION == 1 && TF_MINOR_VERSION >= 6) || (TF_MAJOR_VERSION > 1)
210
218
  #define TF_issue_6602_workaround 0
211
219
  #define TWOD_LSTM_SUPPORT 1
@@ -402,6 +410,9 @@ static void tf_cuda_sgemm_batched(
402
410
 
403
411
 
404
412
  #else // CUDA
413
+
414
+ #ifdef HAVE_CUSTOM_BLAS
415
+
405
416
  /*
406
417
  // matrices are in column-major form
407
418
  int sgemm_(char *transa, char *transb,
@@ -419,6 +430,75 @@ static void tf_cuda_sgemm_batched(
419
430
  sgemm_(&transa, &transb, \
420
431
  &m_, &n_, &k_, alpha, A, &lda_, B, &ldb_, beta, C, &ldc_); \
421
432
  }
433
+
434
+ #else // HAVE_CUSTOM_BLAS
435
+
436
+ template<typename T>
437
+ static void tf_cpu_sgemm(
438
+ OpKernelContext* context,
439
+ char transa_, char transb_,
440
+ int m, int n, int k,
441
+ const T* alpha_ptr, const T* a_ptr, int lda,
442
+ const T* b_ptr, int ldb, const T* beta_ptr,
443
+ T* c_ptr, int ldc)
444
+ {
445
+ if (m <= 0 || n <= 0 || k <= 0) return;
446
+
447
+ auto d = context->eigen_cpu_device();
448
+ const T alpha = *alpha_ptr;
449
+ const T beta = *beta_ptr;
450
+
451
+ bool transa = (transa_ == 'T' || transa_ == 't' || transa_ == 'C' || transa_ == 'c');
452
+ bool transb = (transb_ == 'T' || transb_ == 't' || transb_ == 'C' || transb_ == 'c');
453
+
454
+ // 1. Map as COLUMN-MAJOR
455
+ // Physical rows (height) for the Map is always the leading dimension (lda, ldb, ldc)
456
+ typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::ColMajor>, Eigen::Unaligned> ConstMap;
457
+ typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::ColMajor>, Eigen::Unaligned> MutableMap;
458
+
459
+ // Logical height/width of slices before any transposition
460
+ int a_slice_rows = transa ? k : m;
461
+ int a_slice_cols = transa ? m : k;
462
+ int b_slice_rows = transb ? n : k;
463
+ int b_slice_cols = transb ? k : n;
464
+
465
+ // Map and Slice
466
+ auto a = ConstMap(a_ptr, lda, a_slice_cols).slice(
467
+ Eigen::array<Eigen::Index, 2>({0, 0}),
468
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)a_slice_rows, (Eigen::Index)a_slice_cols}));
469
+
470
+ auto b = ConstMap(b_ptr, ldb, b_slice_cols).slice(
471
+ Eigen::array<Eigen::Index, 2>({0, 0}),
472
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)b_slice_rows, (Eigen::Index)b_slice_cols}));
473
+
474
+ auto c = MutableMap(c_ptr, ldc, n).slice(
475
+ Eigen::array<Eigen::Index, 2>({0, 0}),
476
+ Eigen::array<Eigen::Index, 2>({(Eigen::Index)m, (Eigen::Index)n}));
477
+
478
+ // 2. Define Contraction Pairs based on Transposition
479
+ // Column-Major Matrix Mult: (M x K) * (K x N)
480
+ // Standard: Contract Axis 1 of A with Axis 0 of B
481
+ // If A is Transposed: A is (K x M), contract Axis 0 of A
482
+ // If B is Transposed: B is (N x K), contract Axis 1 of B
483
+ Eigen::array<Eigen::IndexPair<int>, 1> pairs;
484
+ pairs[0] = Eigen::IndexPair<int>(transa ? 0 : 1, transb ? 1 : 0);
485
+
486
+ // 3. Execution
487
+ if (alpha == T(1) && beta == T(0)) {
488
+ c.device(d) = a.contract(b, pairs);
489
+ } else if (alpha == T(1) && beta == T(1)) {
490
+ c.device(d) += a.contract(b, pairs);
491
+ } else {
492
+ c.device(d) = a.contract(b, pairs) * alpha + c * beta;
493
+ }
494
+ }
495
+
496
+ #define Ndarray_sgemm(\
497
+ transpose_A, transpose_B, \
498
+ m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) \
499
+ tf_cpu_sgemm<float>(context, transpose_A, transpose_B, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
500
+
501
+ #endif // HAVE_CUSTOM_BLAS
422
502
  #endif // CUDA
423
503
 
424
504
  // See Context struct below.
returnn/sprint/cache.py CHANGED
@@ -7,10 +7,9 @@ This module is about reading (maybe later also writing) the Sprint archive forma
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from typing import List, Optional, Tuple
10
+ from typing import Optional, List, Tuple, Dict
11
11
  import sys
12
12
  import os
13
- import typing
14
13
  import array
15
14
  from struct import pack, unpack
16
15
  import numpy
@@ -212,7 +211,7 @@ class FileArchive:
212
211
  def __init__(self, filename, must_exists=True, encoding="ascii"):
213
212
  self.encoding = encoding
214
213
 
215
- self.ft = {} # type: typing.Dict[str,FileInfo]
214
+ self.ft: Dict[str, FileInfo] = {}
216
215
  if os.path.exists(filename):
217
216
  self.allophones = []
218
217
  self.f = open(filename, "rb")
@@ -334,8 +333,8 @@ class FileArchive:
334
333
  # print(typ)
335
334
  assert type_ == "vector-f32"
336
335
  count = self.read_U32()
337
- data = [None] * count # type: typing.List[typing.Optional[numpy.ndarray]]
338
- time_ = [None] * count # type: typing.List[typing.Optional[numpy.ndarray]]
336
+ data: List[Optional[numpy.ndarray]] = [None] * count
337
+ time_: List[Optional[numpy.ndarray]] = [None] * count
339
338
  for i in range(count):
340
339
  size = self.read_U32()
341
340
  data[i] = self.read_v("f", size) # size x f32
@@ -450,7 +449,7 @@ class FileArchive:
450
449
  a = array.array("b")
451
450
  a.fromfile(self.f, comp)
452
451
  # unpack
453
- b = zlib.decompress(a.tostring(), 15 + 32)
452
+ b = zlib.decompress(a.tobytes(), 15 + 32)
454
453
  # substitute self.f by an anonymous memmap file object
455
454
  # restore original file handle after we're done
456
455
  backup_f = self.f
@@ -575,17 +574,17 @@ class FileArchiveBundle:
575
574
  :param str encoding: encoding used in the files
576
575
  """
577
576
  # filename -> FileArchive
578
- self.archives = {} # type: typing.Dict[str,FileArchive]
577
+ self.archives: Dict[str, FileArchive] = {}
579
578
  # archive content file -> FileArchive
580
- self.files = {} # type: typing.Dict[str,FileArchive]
579
+ self.files: Dict[str, FileArchive] = {}
581
580
  self._short_seg_names = {}
582
581
  if filename is not None:
583
582
  self.add_bundle(filename=filename, encoding=encoding)
584
583
 
585
- def add_bundle(self, filename, encoding="ascii"):
584
+ def add_bundle(self, filename: str, encoding: str = "ascii"):
586
585
  """
587
- :param str filename: bundle
588
- :param str encoding:
586
+ :param filename: bundle
587
+ :param encoding:
589
588
  """
590
589
  file_dir = os.path.dirname(filename) or "."
591
590
  for line in open(filename).read().splitlines():
@@ -837,7 +836,7 @@ class MixtureSet:
837
836
  """
838
837
  a = array.array("b")
839
838
  a.fromfile(self.f, length)
840
- return a.tostring().decode(encoding)
839
+ return a.tobytes().decode(encoding)
841
840
 
842
841
  def read_f32(self):
843
842
  """
@@ -1003,7 +1002,7 @@ class WordBoundaries:
1003
1002
  """
1004
1003
  a = array.array("b")
1005
1004
  a.fromfile(self.f, length)
1006
- return a.tostring().decode(encoding)
1005
+ return a.tobytes().decode(encoding)
1007
1006
 
1008
1007
  def __init__(self, filename):
1009
1008
  """
@@ -18,6 +18,8 @@ if TYPE_CHECKING:
18
18
  # just for type hints, otherwise use _d.Dim
19
19
  from .dim import Dim
20
20
 
21
+ from returnn.datasets.util.vocabulary import Vocabulary
22
+
21
23
  from . import dim as _d
22
24
  from . import tensor as _t
23
25
  from . import marked_dim as _m
@@ -41,54 +43,63 @@ class _DimExtra:
41
43
  self,
42
44
  *,
43
45
  dim: Dim,
44
- kind=DimTypes.Unspecified,
45
- vocab=None,
46
- undefined=False,
47
- special=False,
48
- auto_generated=False,
49
- match_priority=0,
50
- derived_from_tag=None,
51
- derived_from_op=None,
52
- batch=None,
53
- control_flow_ctx=None,
46
+ kind: Entity = DimTypes.Unspecified,
47
+ vocab: Union[None, Dict[str, Any], Vocabulary] = None,
48
+ undefined: bool = False,
49
+ special: bool = False,
50
+ auto_generated: bool = False,
51
+ match_priority: int = 0,
52
+ derived_from_tag: Optional[Dim] = None,
53
+ derived_from_op: Optional[Op] = None,
54
+ batch: Optional[BatchInfo] = None,
55
+ control_flow_ctx: Optional[ControlFlowContext] = None,
54
56
  src_data: Optional[_t.Tensor] = None,
55
57
  src_axis: Optional[int] = None,
56
58
  ):
57
59
  """
58
60
  :param dim:
59
- :param Entity|None kind:
60
- :param returnn.datasets.util.vocabulary.Vocabulary|None vocab:
61
- :param bool undefined: When this is specified as `None` by the user via `shape`.
62
- :param bool special: this can not be a dim tag of :class:`Tensor`.
61
+ :param kind:
62
+ :param vocab:
63
+ :param undefined: When this is specified as `None` by the user via `shape`.
64
+ :param special: this can not be a dim tag of :class:`Tensor`.
63
65
  But this dim tag also does not match anything except itself.
64
66
  So it can be used to represent special placeholders with special meanings like ``single_step``.
65
- :param bool auto_generated:
67
+ :param auto_generated:
66
68
  This is auto-generated by RETURNN because it was not explicitly specified by the user.
67
69
  E.g. for ConvLayer and others.
68
70
  This implies certain behavior on equality, such as comparing the description,
69
71
  to allow for several independent creations of the dim tag during template construction.
70
- :param Dim|None derived_from_tag:
72
+ :param derived_from_tag:
71
73
  Whether this new tag is reduced, down/up sampled, padded etc from this given other tag.
72
74
  In situations where dim tags are being matched (Data.get_common_data),
73
75
  the behavior is to consider them as equal,
74
76
  and assume that the chain of operations (e.g. padding + valid conv) results in the same dim.
75
- :param Op|None derived_from_op:
76
- :param int match_priority: when there is ambiguity between multiple dim tags, this value defines the order
77
+ :param derived_from_op:
78
+ :param match_priority: when there is ambiguity between multiple dim tags, this value defines the order
77
79
  in which the dimension are assigned to their matching counterparts.
78
80
  A dimension tag with a higher priority value is assigned first.
79
81
  E.g. for a square matrix used for a linear transformation,
80
82
  the reduce dim tag should have a higher priority.
81
- :param BatchInfo|None batch: for batch-dim, or dynamic dims per batch
82
- :param ControlFlowContext|None control_flow_ctx:
83
+ :param batch: for batch-dim, or dynamic dims per batch
84
+ :param control_flow_ctx:
83
85
  :param src_data:
84
86
  :param src_axis:
85
87
  """
86
88
  self.dim = dim
87
89
  assert kind is None or (isinstance(kind, Entity) and kind in DimTypes.Types)
88
90
  self.kind = kind
91
+ if vocab:
92
+ from returnn.datasets.util.vocabulary import Vocabulary
93
+
94
+ if isinstance(vocab, Vocabulary):
95
+ pass
96
+ elif isinstance(vocab, dict):
97
+ vocab = Vocabulary.create_vocab(**vocab)
98
+ else:
99
+ raise TypeError(f"invalid vocab {vocab!r} type {type(vocab)}")
89
100
  self.vocab = vocab
90
- self.same_as = None # type: Optional[_d.Dim]
91
- self.copy_same_as = None # type: Optional[_d.Dim]
101
+ self.same_as: Optional[Dim] = None
102
+ self.copy_same_as: Optional[Dim] = None
92
103
  self.derived_from_tag = derived_from_tag
93
104
  self.derived_from_op = derived_from_op
94
105
  if derived_from_op and not derived_from_op.output:
@@ -116,8 +127,8 @@ class _DimExtra:
116
127
  self.auto_generated = auto_generated
117
128
  # We can have different tag variants per batch info (e.g. with beam), or per control flow ctx.
118
129
  # They each have same_as = self. The same_base should have the base (global) batch info.
119
- self.same_for_batch_ctx = {} # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
120
- self.cache_dyn_size_ext_dev = {} # type: Dict[str,_t.Tensor] # device -> dyn_size_ext
130
+ self.same_for_batch_ctx: Dict[Tuple[BatchInfo, Optional[ControlFlowContext]], Dim] = {}
131
+ self.cache_dyn_size_ext_dev: Dict[str, _t.Tensor] = {} # device -> dyn_size_ext
121
132
  self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
122
133
  self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
123
134
 
@@ -134,6 +145,7 @@ class _DimExtra:
134
145
  def __setstate__(self, state):
135
146
  self.__dict__.update(state)
136
147
  if self.kind is not None:
148
+ # noinspection PyTypeChecker
137
149
  self.kind = {v.name: v for v in DimTypes.Types}[self.kind]
138
150
 
139
151
  def __sis_state__(self):
@@ -151,6 +163,9 @@ class _DimMixin:
151
163
  def _handle_extra_kwargs(self: Dim, *, dyn_size: Optional[_t.RawTensorType] = None, **kwargs):
152
164
  if kwargs:
153
165
  self._extra = _DimExtra(dim=self, **kwargs)
166
+ if self._extra.vocab and self.size is None:
167
+ self.size = self._extra.vocab.num_labels
168
+ self.capacity = self.capacity or self.size
154
169
  if dyn_size is not None:
155
170
  self.dyn_size = dyn_size
156
171
  if self.derived_from_op and self.is_dynamic():
@@ -2184,7 +2199,7 @@ class _DimMixin:
2184
2199
  other = other.dimension # makes matching easier
2185
2200
  if isinstance(other, int) and other == 1:
2186
2201
  return self
2187
- if self.is_constant_static_dim() and isinstance(other, _d.Dim):
2202
+ if self.is_constant_static_dim() and isinstance(other, _d.Dim) and not other.is_constant_static_dim():
2188
2203
  return self.dimension * other # use rmul
2189
2204
  cache_key = ("mul", other)
2190
2205
  cache = self.get_same_base()._make_extra().cache_dim_math
@@ -2571,14 +2586,19 @@ class _MathFindMatchingAdditive:
2571
2586
 
2572
2587
 
2573
2588
  def _math_find_matching_mult(start: Dim, other: Union[int, Dim], *, right: bool) -> Optional[Dim]:
2574
- if (isinstance(other, int) or other.is_constant_static_dim()) and start.is_constant_static_dim():
2589
+ # we assume, if other is Dim, then it is not constant static dim
2590
+ if isinstance(other, int) and start.is_constant_static_dim():
2575
2591
  return _math_get_dim_via_bin_op([start, other] if right else [other, start], "mul")
2576
2592
  c_op = start.derived_from_op
2577
2593
  if c_op and c_op.kind == "mul" and len(c_op.inputs) == 2:
2578
2594
  if right:
2579
2595
  return c_op.inputs[0] * (c_op.inputs[1] * other)
2580
- else:
2581
- return (other * c_op.inputs[0]) * c_op.inputs[1]
2596
+ # Don't do right=False -> (other * c_op.inputs[0]) * c_op.inputs[1],
2597
+ # because this can lead to infinite recursions,
2598
+ # and also we don't have a proper normalized form for multiplication.
2599
+ # However, if both left-most factors are constant static dims, then we can merge it.
2600
+ elif isinstance(other, int) and c_op.inputs[0].is_constant_static_dim():
2601
+ return (other * c_op.inputs[0].dimension) * c_op.inputs[1]
2582
2602
  return None
2583
2603
 
2584
2604
 
@@ -2665,7 +2685,9 @@ def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
2665
2685
 
2666
2686
 
2667
2687
  def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
2668
- # Also see _OpLinearTerm.representative_tag().
2688
+ if any(not term_.auto_generated for term_ in terms):
2689
+ # Always prefer non-auto-generated.
2690
+ terms = [term_ for term_ in terms if not term_.auto_generated]
2669
2691
  # First find any dynamic.
2670
2692
  for term_ in terms:
2671
2693
  if term_.is_dynamic_seq_length():
@@ -588,7 +588,12 @@ class _TensorMixin(_TensorMixinBase):
588
588
 
589
589
  def __getstate__(self):
590
590
  d = {k: getattr(self, k) for k in self.__slots__}
591
- d["_raw_tensor"] = None # do not store the TF tensors
591
+ if (
592
+ self._raw_tensor is not None
593
+ and self._raw_backend is not None
594
+ and not self._raw_backend.should_pickle_tensor(self._raw_tensor)
595
+ ):
596
+ d["_raw_tensor"] = None
592
597
  return d
593
598
 
594
599
  def __setstate__(self, state):
returnn/tensor/utils.py CHANGED
@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
36
36
  *,
37
37
  min_val: int = 0,
38
38
  max_val: Optional[int] = None,
39
- rnd: numpy.random.RandomState,
39
+ rnd: Optional[numpy.random.RandomState] = None,
40
40
  dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
41
41
  dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
42
42
  ) -> bool:
43
43
  """fill. return whether sth was filled"""
44
+ if rnd is None:
45
+ # noinspection PyUnresolvedReferences,PyProtectedMember
46
+ rnd = numpy.random.mtrand._rand
44
47
  if dyn_dim_max_sizes is None:
45
48
  dyn_dim_max_sizes = {}
46
49
  if dyn_dim_min_sizes is None:
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
59
62
  continue
60
63
  if tensor_fill_random_numpy_(
61
64
  dim.dyn_size_ext,
62
- min_val=dyn_dim_min_sizes.get(dim, 2),
65
+ min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
63
66
  max_val=dyn_dim_max_sizes.get(dim, None),
64
67
  rnd=rnd,
65
68
  dyn_dim_max_sizes=dyn_dim_max_sizes,
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
98
101
  if max_val is None:
99
102
  max_val = rnd.randint(5, 20)
100
103
  if x.sparse_dim and x.sparse_dim.dimension is not None:
101
- max_val = x.sparse_dim.dimension
102
- x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
104
+ max_val = x.sparse_dim.dimension - 1
105
+ x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
103
106
  elif x.dtype == "bool":
104
107
  x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
105
108
  elif x.dtype.startswith("float"):
@@ -45,6 +45,13 @@ class ReturnnLayersBackend(Backend[Layer]):
45
45
  """executing eagerly"""
46
46
  return False
47
47
 
48
+ @staticmethod
49
+ def should_pickle_tensor(raw_tensor: Layer) -> bool:
50
+ """
51
+ :return: whether the tensor should be included in a pickle or set to `None`.
52
+ """
53
+ return False
54
+
48
55
  @staticmethod
49
56
  def get_tensor_dependencies(x: Tensor[Layer]) -> Sequence[Tensor]:
50
57
  """get tensor inputs"""
@@ -1060,14 +1067,16 @@ class ReturnnLayersBackend(Backend[Layer]):
1060
1067
  s = filter_size[i].dimension if not strides else strides[i]
1061
1068
  if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
1062
1069
  out_spatial_dims[i] = in_spatial_dims[i]
1063
- layer_dict = {
1070
+ assert all(size.is_static() for size in filter_size)
1071
+ layer_dict: Dict[str, Any] = {
1064
1072
  "class": "transposed_conv",
1065
1073
  "from": source,
1066
1074
  "in_dim": in_dim,
1067
1075
  "in_spatial_dims": in_spatial_dims,
1068
1076
  "out_dim": out_dim,
1069
1077
  "out_spatial_dims": out_spatial_dims,
1070
- "filter_size": filter_size,
1078
+ "filter_size": [size.dimension for size in filter_size],
1079
+ "filter_perm": list(filter_size) + [out_dim, in_dim],
1071
1080
  "padding": padding,
1072
1081
  }
1073
1082
  if remove_padding:
@@ -38,6 +38,21 @@ class TFBackend(Backend[tf.Tensor]):
38
38
  """
39
39
  return tf.executing_eagerly()
40
40
 
41
+ @staticmethod
42
+ def should_pickle_tensor(raw_tensor: tf.Tensor) -> bool:
43
+ """
44
+ :return: whether the tensor should be included in a pickle or set to `None`.
45
+ """
46
+
47
+ from tensorflow.python.framework.ops import EagerTensor
48
+
49
+ # Can not pickle symbolic TF tensors.
50
+ #
51
+ # See for discussion:
52
+ # - https://github.com/rwth-i6/returnn/issues/1541
53
+ # - https://github.com/rwth-i6/returnn/issues/1763
54
+ return isinstance(raw_tensor, EagerTensor)
55
+
41
56
  @staticmethod
42
57
  def get_dtype_name_raw(raw_tensor: tf.Tensor) -> str:
43
58
  """
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
7371
7371
  """
7372
7372
  from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
7373
7373
 
7374
- super(TransposedConvLayer, self).__init__(**kwargs)
7374
+ super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
7375
7375
  out_dim # noqa # via get_out_data_from_opts
7376
7376
  assert not self.input_data.sparse
7377
7377
  assert self.input_data.have_batch_axis()
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
7516
7516
  ):
7517
7517
  """
7518
7518
  Determines output length of a transposed convolution given input length.
7519
- Copied from conv_utils.deconv_output_length, adapted with simplification.
7519
+
7520
+ Copied from TF/Keras conv_utils.deconv_output_length
7521
+ (https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
7522
+ adapted with simplification.
7520
7523
 
7521
7524
  Also see :func:`ConvLayer.calc_out_dim`.
7522
7525
 
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
7533
7536
  """
7534
7537
  if out_dim and out_dim.is_dim_known():
7535
7538
  return out_dim.get_dim_value()
7536
- assert padding in {"same", "valid", "full"}
7537
-
7538
- # Get the dilated kernel size
7539
- filter_size = filter_size + (filter_size - 1) * (dilation - 1)
7540
7539
 
7541
- if stride != 1:
7542
- input_length = input_length * stride
7540
+ import returnn.frontend as rf
7543
7541
 
7544
- # Infer length if output padding is None, else compute the exact length
7545
- if output_padding is None:
7546
- if padding == "valid":
7547
- if isinstance(input_length, Dim):
7548
- length = input_length + max(filter_size - stride, 0)
7549
- else:
7550
- length = tf_util.simplify_add(input_length, max(filter_size - stride, 0))
7551
- elif padding == "full":
7552
- if isinstance(input_length, Dim):
7553
- length = input_length - (stride + filter_size - 2)
7554
- else:
7555
- length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
7556
- elif padding == "same":
7557
- length = input_length
7558
- else:
7559
- raise Exception("invalid padding %r" % (padding,))
7560
- else: # output_padding
7561
- if padding == "same":
7562
- pad = filter_size // 2
7563
- elif padding == "valid":
7564
- pad = 0
7565
- elif padding == "full":
7566
- pad = filter_size - 1
7567
- else:
7568
- raise Exception("invalid padding %r" % (padding,))
7569
- if isinstance(input_length, Dim):
7570
- length = input_length + (-stride + filter_size - 2 * pad + output_padding)
7571
- else:
7572
- length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
7573
- return length
7542
+ return rf.calc_transposed_conv_out_length(
7543
+ input_length,
7544
+ filter_size=filter_size,
7545
+ padding=padding,
7546
+ output_padding=output_padding,
7547
+ stride=stride,
7548
+ dilation_rate=dilation,
7549
+ )
7574
7550
 
7575
7551
  @classmethod
7576
7552
  def get_out_data_from_opts(
@@ -10488,6 +10464,7 @@ class TopKLayer(LayerBase):
10488
10464
  self._sub_layers = {}
10489
10465
  for key, (v, a) in sub_outputs.items():
10490
10466
  sub_out_data = self.output.copy_template(name="%s/%s" % (self.name, key))
10467
+ sub_out_data.feature_dim = None
10491
10468
  sub_out_data.dtype = "int32"
10492
10469
  sub_out_data.sparse_dim = a
10493
10470
  sub_out_data.placeholder = v
@@ -10527,6 +10504,7 @@ class TopKLayer(LayerBase):
10527
10504
  axis = [in_data.get_dim_tag_from_description(a) for a in axis]
10528
10505
  out_dims = [dim for dim in in_data.dim_tags if dim not in axis] + [k_dim]
10529
10506
  out_data = in_data.copy_template(name=name).copy_template_new_dim_tags(out_dims)
10507
+ out_data.feature_dim = None
10530
10508
  if for_indices is not None:
10531
10509
  assert 0 <= for_indices < len(axis)
10532
10510
  out_data.dtype = "int32"
returnn/tf/native_op.py CHANGED
@@ -528,77 +528,30 @@ class OpMaker:
528
528
  def _make_mod(self):
529
529
  if self.cache_key in self.mod_cache:
530
530
  return self.mod_cache[self.cache_key]
531
- from returnn.util.basic import find_lib
532
-
533
- # Note about BLAS linkage:
534
- # TensorFlow (or its Eigen lib) likely has linked against some BLAS lib itself.
535
- # For our CPU code, we directly call some BLAS functions such as `sgemm_`.
536
- # On platforms where there is a flat namespace (e.g. Mac),
537
- # it probably is not needed to explicitly link it again for this module.
538
- # In other cases, it's probably needed, but it's not so clear which lib has the
539
- # right symbols (e.g. the `sgemm_` symbol).
531
+
532
+ # Note about BLAS / matmul:
533
+ # Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
534
+ # and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
535
+ # Now, by default, we use the underlying Eigen library,
536
+ # which is the same code path that TF also uses for CPU matmul.
537
+ # Only if an explicit BLAS library is specified, we use that instead.
540
538
  ld_flags = []
541
- have_blas_lib = False
539
+ c_macro_defines = {}
542
540
 
543
541
  if self.blas_lib is not None and os.path.exists(self.blas_lib):
544
542
  path = os.path.dirname(self.blas_lib)
545
543
  if path == "":
546
544
  path = "."
547
545
  ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
548
- have_blas_lib = True
549
- if not have_blas_lib and self.search_for_runtime_blas:
550
- from returnn.util.basic import find_sgemm_libs_from_runtime
551
-
552
- libs = find_sgemm_libs_from_runtime()
553
- if libs:
554
- numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
555
- if numpy_libs:
556
- # Prefer Numpy; move to front.
557
- libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
558
- if self.blas_lib is not None:
559
- libs = [lib for lib in libs if self.blas_lib in lib]
560
- for fn in libs:
561
- ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
562
- have_blas_lib = True
563
- if not have_blas_lib and self.search_for_numpy_blas:
564
- # Find related Numpy libs.
565
- # Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
566
- # Even do this before the other libs below, as it is likely
567
- # that this OpenBlas lib is correctly initialized already.
568
- import numpy
569
-
570
- numpy_dir = os.path.dirname(numpy.__file__)
571
- if os.path.exists("%s/.libs" % numpy_dir):
572
- ld_flags += ["-L%s/.libs" % numpy_dir]
573
- from glob import glob
574
-
575
- for f in glob("%s/.libs/*.so" % numpy_dir):
576
- f = os.path.basename(f)
577
- if self.blas_lib is not None and self.blas_lib not in f:
578
- continue
579
- if f.startswith("lib"):
580
- f = f[3:]
581
- if f.endswith(".so"):
582
- f = f[:-3]
583
- ld_flags += ["-l%s" % f]
584
- have_blas_lib = True
585
- if not have_blas_lib and self.search_for_system_blas:
586
- # Try to just link against blas/f77blas
587
- # (both can potentially have the symbol) if it finds the lib.
588
- if find_lib("blas"):
589
- ld_flags += ["-lblas"]
590
- have_blas_lib = True
591
- if find_lib("f77blas"):
592
- ld_flags += ["-lf77blas"]
593
- have_blas_lib = True
594
- if not have_blas_lib:
595
- print("WARNING: OpMaker: no BLAS lib found")
546
+ c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
547
+
596
548
  comp = tf_util.OpCodeCompiler(
597
549
  base_name=self.name,
598
550
  code_version=self.description.code_version,
599
551
  code=self._make_code(),
600
552
  include_deps=[self.support_native_op_cpp_filename],
601
553
  ld_flags=ld_flags,
554
+ c_macro_defines=c_macro_defines,
602
555
  use_cuda_if_available=self.with_cuda,
603
556
  log_stream=self.log_stream,
604
557
  **dict(self.compiler_opts),
returnn/tf/network.py CHANGED
@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
4428
4428
  data = extern_data.data[data_key]
4429
4429
  info += ", %s" % data
4430
4430
  print(" %r: %s" % (key, info), file=file)
4431
- if data and data.sparse:
4431
+ if data is not None and data.sparse:
4432
4432
  if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
4433
4433
  print(" WARNING, invalid label for data", data, file=file)
4434
4434
  elif feed_dict is None: