returnn 1.20250901.123052__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- returnn/PKG-INFO +2 -2
- returnn/_setup_info_generated.py +2 -2
- returnn/config.py +1 -1
- returnn/datasets/basic.py +29 -13
- returnn/datasets/distrib_files.py +61 -3
- returnn/datasets/generating.py +12 -21
- returnn/datasets/huggingface.py +434 -0
- returnn/datasets/lm.py +20 -0
- returnn/datasets/meta.py +179 -60
- returnn/datasets/multi_proc.py +1 -1
- returnn/datasets/postprocessing.py +597 -108
- returnn/datasets/text_dict.py +1 -1
- returnn/datasets/util/vocabulary.py +90 -0
- returnn/frontend/_backend.py +7 -0
- returnn/frontend/array_.py +54 -1
- returnn/frontend/attention.py +54 -20
- returnn/frontend/conv.py +273 -54
- returnn/frontend/decoder/transformer.py +36 -17
- returnn/frontend/encoder/conformer.py +1 -0
- returnn/frontend/encoder/transformer.py +2 -0
- returnn/frontend/loss.py +40 -1
- returnn/frontend/module.py +8 -1
- returnn/frontend/nested.py +9 -0
- returnn/native_op.cpp +80 -0
- returnn/sprint/cache.py +12 -13
- returnn/tensor/_dim_extra.py +51 -29
- returnn/tensor/_tensor_extra.py +6 -1
- returnn/tensor/utils.py +7 -4
- returnn/tf/frontend_layers/_backend.py +11 -2
- returnn/tf/frontend_low_level/_backend.py +15 -0
- returnn/tf/layers/basic.py +16 -38
- returnn/tf/native_op.py +11 -58
- returnn/tf/network.py +1 -1
- returnn/tf/util/basic.py +19 -0
- returnn/torch/data/returnn_dataset_wrapper.py +9 -3
- returnn/torch/engine.py +67 -2
- returnn/torch/frontend/_backend.py +119 -7
- returnn/torch/util/diagnose_gpu.py +65 -31
- returnn/torch/util/exception_helper.py +7 -1
- returnn/util/basic.py +6 -7
- returnn/util/better_exchook.py +4 -0
- returnn/util/collect_outputs_dict.py +79 -0
- returnn/util/debug.py +11 -2
- returnn/util/file_cache.py +42 -4
- returnn/util/task_system.py +1 -1
- {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/METADATA +2 -2
- {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/RECORD +50 -48
- {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/LICENSE +0 -0
- {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/WHEEL +0 -0
- {returnn-1.20250901.123052.dist-info → returnn-1.20260105.192646.dist-info}/top_level.txt +0 -0
returnn/native_op.cpp
CHANGED
|
@@ -206,6 +206,14 @@ Ndarray* Ndarray_Copy(const Ndarray* self) {
|
|
|
206
206
|
|
|
207
207
|
#include "tensorflow/core/public/version.h"
|
|
208
208
|
|
|
209
|
+
#ifndef TF_MAJOR_VERSION
|
|
210
|
+
#error "TF_MAJOR_VERSION is not defined!"
|
|
211
|
+
#endif
|
|
212
|
+
|
|
213
|
+
#ifndef TF_MINOR_VERSION
|
|
214
|
+
#error "TF_MINOR_VERSION is not defined!"
|
|
215
|
+
#endif
|
|
216
|
+
|
|
209
217
|
#if (TF_MAJOR_VERSION == 1 && TF_MINOR_VERSION >= 6) || (TF_MAJOR_VERSION > 1)
|
|
210
218
|
#define TF_issue_6602_workaround 0
|
|
211
219
|
#define TWOD_LSTM_SUPPORT 1
|
|
@@ -402,6 +410,9 @@ static void tf_cuda_sgemm_batched(
|
|
|
402
410
|
|
|
403
411
|
|
|
404
412
|
#else // CUDA
|
|
413
|
+
|
|
414
|
+
#ifdef HAVE_CUSTOM_BLAS
|
|
415
|
+
|
|
405
416
|
/*
|
|
406
417
|
// matrices are in column-major form
|
|
407
418
|
int sgemm_(char *transa, char *transb,
|
|
@@ -419,6 +430,75 @@ static void tf_cuda_sgemm_batched(
|
|
|
419
430
|
sgemm_(&transa, &transb, \
|
|
420
431
|
&m_, &n_, &k_, alpha, A, &lda_, B, &ldb_, beta, C, &ldc_); \
|
|
421
432
|
}
|
|
433
|
+
|
|
434
|
+
#else // HAVE_CUSTOM_BLAS
|
|
435
|
+
|
|
436
|
+
template<typename T>
|
|
437
|
+
static void tf_cpu_sgemm(
|
|
438
|
+
OpKernelContext* context,
|
|
439
|
+
char transa_, char transb_,
|
|
440
|
+
int m, int n, int k,
|
|
441
|
+
const T* alpha_ptr, const T* a_ptr, int lda,
|
|
442
|
+
const T* b_ptr, int ldb, const T* beta_ptr,
|
|
443
|
+
T* c_ptr, int ldc)
|
|
444
|
+
{
|
|
445
|
+
if (m <= 0 || n <= 0 || k <= 0) return;
|
|
446
|
+
|
|
447
|
+
auto d = context->eigen_cpu_device();
|
|
448
|
+
const T alpha = *alpha_ptr;
|
|
449
|
+
const T beta = *beta_ptr;
|
|
450
|
+
|
|
451
|
+
bool transa = (transa_ == 'T' || transa_ == 't' || transa_ == 'C' || transa_ == 'c');
|
|
452
|
+
bool transb = (transb_ == 'T' || transb_ == 't' || transb_ == 'C' || transb_ == 'c');
|
|
453
|
+
|
|
454
|
+
// 1. Map as COLUMN-MAJOR
|
|
455
|
+
// Physical rows (height) for the Map is always the leading dimension (lda, ldb, ldc)
|
|
456
|
+
typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::ColMajor>, Eigen::Unaligned> ConstMap;
|
|
457
|
+
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::ColMajor>, Eigen::Unaligned> MutableMap;
|
|
458
|
+
|
|
459
|
+
// Logical height/width of slices before any transposition
|
|
460
|
+
int a_slice_rows = transa ? k : m;
|
|
461
|
+
int a_slice_cols = transa ? m : k;
|
|
462
|
+
int b_slice_rows = transb ? n : k;
|
|
463
|
+
int b_slice_cols = transb ? k : n;
|
|
464
|
+
|
|
465
|
+
// Map and Slice
|
|
466
|
+
auto a = ConstMap(a_ptr, lda, a_slice_cols).slice(
|
|
467
|
+
Eigen::array<Eigen::Index, 2>({0, 0}),
|
|
468
|
+
Eigen::array<Eigen::Index, 2>({(Eigen::Index)a_slice_rows, (Eigen::Index)a_slice_cols}));
|
|
469
|
+
|
|
470
|
+
auto b = ConstMap(b_ptr, ldb, b_slice_cols).slice(
|
|
471
|
+
Eigen::array<Eigen::Index, 2>({0, 0}),
|
|
472
|
+
Eigen::array<Eigen::Index, 2>({(Eigen::Index)b_slice_rows, (Eigen::Index)b_slice_cols}));
|
|
473
|
+
|
|
474
|
+
auto c = MutableMap(c_ptr, ldc, n).slice(
|
|
475
|
+
Eigen::array<Eigen::Index, 2>({0, 0}),
|
|
476
|
+
Eigen::array<Eigen::Index, 2>({(Eigen::Index)m, (Eigen::Index)n}));
|
|
477
|
+
|
|
478
|
+
// 2. Define Contraction Pairs based on Transposition
|
|
479
|
+
// Column-Major Matrix Mult: (M x K) * (K x N)
|
|
480
|
+
// Standard: Contract Axis 1 of A with Axis 0 of B
|
|
481
|
+
// If A is Transposed: A is (K x M), contract Axis 0 of A
|
|
482
|
+
// If B is Transposed: B is (N x K), contract Axis 1 of B
|
|
483
|
+
Eigen::array<Eigen::IndexPair<int>, 1> pairs;
|
|
484
|
+
pairs[0] = Eigen::IndexPair<int>(transa ? 0 : 1, transb ? 1 : 0);
|
|
485
|
+
|
|
486
|
+
// 3. Execution
|
|
487
|
+
if (alpha == T(1) && beta == T(0)) {
|
|
488
|
+
c.device(d) = a.contract(b, pairs);
|
|
489
|
+
} else if (alpha == T(1) && beta == T(1)) {
|
|
490
|
+
c.device(d) += a.contract(b, pairs);
|
|
491
|
+
} else {
|
|
492
|
+
c.device(d) = a.contract(b, pairs) * alpha + c * beta;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
#define Ndarray_sgemm(\
|
|
497
|
+
transpose_A, transpose_B, \
|
|
498
|
+
m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) \
|
|
499
|
+
tf_cpu_sgemm<float>(context, transpose_A, transpose_B, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
|
|
500
|
+
|
|
501
|
+
#endif // HAVE_CUSTOM_BLAS
|
|
422
502
|
#endif // CUDA
|
|
423
503
|
|
|
424
504
|
// See Context struct below.
|
returnn/sprint/cache.py
CHANGED
|
@@ -7,10 +7,9 @@ This module is about reading (maybe later also writing) the Sprint archive forma
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
|
-
from typing import List,
|
|
10
|
+
from typing import Optional, List, Tuple, Dict
|
|
11
11
|
import sys
|
|
12
12
|
import os
|
|
13
|
-
import typing
|
|
14
13
|
import array
|
|
15
14
|
from struct import pack, unpack
|
|
16
15
|
import numpy
|
|
@@ -212,7 +211,7 @@ class FileArchive:
|
|
|
212
211
|
def __init__(self, filename, must_exists=True, encoding="ascii"):
|
|
213
212
|
self.encoding = encoding
|
|
214
213
|
|
|
215
|
-
self.ft
|
|
214
|
+
self.ft: Dict[str, FileInfo] = {}
|
|
216
215
|
if os.path.exists(filename):
|
|
217
216
|
self.allophones = []
|
|
218
217
|
self.f = open(filename, "rb")
|
|
@@ -334,8 +333,8 @@ class FileArchive:
|
|
|
334
333
|
# print(typ)
|
|
335
334
|
assert type_ == "vector-f32"
|
|
336
335
|
count = self.read_U32()
|
|
337
|
-
data
|
|
338
|
-
time_
|
|
336
|
+
data: List[Optional[numpy.ndarray]] = [None] * count
|
|
337
|
+
time_: List[Optional[numpy.ndarray]] = [None] * count
|
|
339
338
|
for i in range(count):
|
|
340
339
|
size = self.read_U32()
|
|
341
340
|
data[i] = self.read_v("f", size) # size x f32
|
|
@@ -450,7 +449,7 @@ class FileArchive:
|
|
|
450
449
|
a = array.array("b")
|
|
451
450
|
a.fromfile(self.f, comp)
|
|
452
451
|
# unpack
|
|
453
|
-
b = zlib.decompress(a.
|
|
452
|
+
b = zlib.decompress(a.tobytes(), 15 + 32)
|
|
454
453
|
# substitute self.f by an anonymous memmap file object
|
|
455
454
|
# restore original file handle after we're done
|
|
456
455
|
backup_f = self.f
|
|
@@ -575,17 +574,17 @@ class FileArchiveBundle:
|
|
|
575
574
|
:param str encoding: encoding used in the files
|
|
576
575
|
"""
|
|
577
576
|
# filename -> FileArchive
|
|
578
|
-
self.archives
|
|
577
|
+
self.archives: Dict[str, FileArchive] = {}
|
|
579
578
|
# archive content file -> FileArchive
|
|
580
|
-
self.files
|
|
579
|
+
self.files: Dict[str, FileArchive] = {}
|
|
581
580
|
self._short_seg_names = {}
|
|
582
581
|
if filename is not None:
|
|
583
582
|
self.add_bundle(filename=filename, encoding=encoding)
|
|
584
583
|
|
|
585
|
-
def add_bundle(self, filename, encoding="ascii"):
|
|
584
|
+
def add_bundle(self, filename: str, encoding: str = "ascii"):
|
|
586
585
|
"""
|
|
587
|
-
:param
|
|
588
|
-
:param
|
|
586
|
+
:param filename: bundle
|
|
587
|
+
:param encoding:
|
|
589
588
|
"""
|
|
590
589
|
file_dir = os.path.dirname(filename) or "."
|
|
591
590
|
for line in open(filename).read().splitlines():
|
|
@@ -837,7 +836,7 @@ class MixtureSet:
|
|
|
837
836
|
"""
|
|
838
837
|
a = array.array("b")
|
|
839
838
|
a.fromfile(self.f, length)
|
|
840
|
-
return a.
|
|
839
|
+
return a.tobytes().decode(encoding)
|
|
841
840
|
|
|
842
841
|
def read_f32(self):
|
|
843
842
|
"""
|
|
@@ -1003,7 +1002,7 @@ class WordBoundaries:
|
|
|
1003
1002
|
"""
|
|
1004
1003
|
a = array.array("b")
|
|
1005
1004
|
a.fromfile(self.f, length)
|
|
1006
|
-
return a.
|
|
1005
|
+
return a.tobytes().decode(encoding)
|
|
1007
1006
|
|
|
1008
1007
|
def __init__(self, filename):
|
|
1009
1008
|
"""
|
returnn/tensor/_dim_extra.py
CHANGED
|
@@ -18,6 +18,8 @@ if TYPE_CHECKING:
|
|
|
18
18
|
# just for type hints, otherwise use _d.Dim
|
|
19
19
|
from .dim import Dim
|
|
20
20
|
|
|
21
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
22
|
+
|
|
21
23
|
from . import dim as _d
|
|
22
24
|
from . import tensor as _t
|
|
23
25
|
from . import marked_dim as _m
|
|
@@ -41,54 +43,63 @@ class _DimExtra:
|
|
|
41
43
|
self,
|
|
42
44
|
*,
|
|
43
45
|
dim: Dim,
|
|
44
|
-
kind=DimTypes.Unspecified,
|
|
45
|
-
vocab=None,
|
|
46
|
-
undefined=False,
|
|
47
|
-
special=False,
|
|
48
|
-
auto_generated=False,
|
|
49
|
-
match_priority=0,
|
|
50
|
-
derived_from_tag=None,
|
|
51
|
-
derived_from_op=None,
|
|
52
|
-
batch=None,
|
|
53
|
-
control_flow_ctx=None,
|
|
46
|
+
kind: Entity = DimTypes.Unspecified,
|
|
47
|
+
vocab: Union[None, Dict[str, Any], Vocabulary] = None,
|
|
48
|
+
undefined: bool = False,
|
|
49
|
+
special: bool = False,
|
|
50
|
+
auto_generated: bool = False,
|
|
51
|
+
match_priority: int = 0,
|
|
52
|
+
derived_from_tag: Optional[Dim] = None,
|
|
53
|
+
derived_from_op: Optional[Op] = None,
|
|
54
|
+
batch: Optional[BatchInfo] = None,
|
|
55
|
+
control_flow_ctx: Optional[ControlFlowContext] = None,
|
|
54
56
|
src_data: Optional[_t.Tensor] = None,
|
|
55
57
|
src_axis: Optional[int] = None,
|
|
56
58
|
):
|
|
57
59
|
"""
|
|
58
60
|
:param dim:
|
|
59
|
-
:param
|
|
60
|
-
:param
|
|
61
|
-
:param
|
|
62
|
-
:param
|
|
61
|
+
:param kind:
|
|
62
|
+
:param vocab:
|
|
63
|
+
:param undefined: When this is specified as `None` by the user via `shape`.
|
|
64
|
+
:param special: this can not be a dim tag of :class:`Tensor`.
|
|
63
65
|
But this dim tag also does not match anything except itself.
|
|
64
66
|
So it can be used to represent special placeholders with special meanings like ``single_step``.
|
|
65
|
-
:param
|
|
67
|
+
:param auto_generated:
|
|
66
68
|
This is auto-generated by RETURNN because it was not explicitly specified by the user.
|
|
67
69
|
E.g. for ConvLayer and others.
|
|
68
70
|
This implies certain behavior on equality, such as comparing the description,
|
|
69
71
|
to allow for several independent creations of the dim tag during template construction.
|
|
70
|
-
:param
|
|
72
|
+
:param derived_from_tag:
|
|
71
73
|
Whether this new tag is reduced, down/up sampled, padded etc from this given other tag.
|
|
72
74
|
In situations where dim tags are being matched (Data.get_common_data),
|
|
73
75
|
the behavior is to consider them as equal,
|
|
74
76
|
and assume that the chain of operations (e.g. padding + valid conv) results in the same dim.
|
|
75
|
-
:param
|
|
76
|
-
:param
|
|
77
|
+
:param derived_from_op:
|
|
78
|
+
:param match_priority: when there is ambiguity between multiple dim tags, this value defines the order
|
|
77
79
|
in which the dimension are assigned to their matching counterparts.
|
|
78
80
|
A dimension tag with a higher priority value is assigned first.
|
|
79
81
|
E.g. for a square matrix used for a linear transformation,
|
|
80
82
|
the reduce dim tag should have a higher priority.
|
|
81
|
-
:param
|
|
82
|
-
:param
|
|
83
|
+
:param batch: for batch-dim, or dynamic dims per batch
|
|
84
|
+
:param control_flow_ctx:
|
|
83
85
|
:param src_data:
|
|
84
86
|
:param src_axis:
|
|
85
87
|
"""
|
|
86
88
|
self.dim = dim
|
|
87
89
|
assert kind is None or (isinstance(kind, Entity) and kind in DimTypes.Types)
|
|
88
90
|
self.kind = kind
|
|
91
|
+
if vocab:
|
|
92
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
93
|
+
|
|
94
|
+
if isinstance(vocab, Vocabulary):
|
|
95
|
+
pass
|
|
96
|
+
elif isinstance(vocab, dict):
|
|
97
|
+
vocab = Vocabulary.create_vocab(**vocab)
|
|
98
|
+
else:
|
|
99
|
+
raise TypeError(f"invalid vocab {vocab!r} type {type(vocab)}")
|
|
89
100
|
self.vocab = vocab
|
|
90
|
-
self.same_as
|
|
91
|
-
self.copy_same_as
|
|
101
|
+
self.same_as: Optional[Dim] = None
|
|
102
|
+
self.copy_same_as: Optional[Dim] = None
|
|
92
103
|
self.derived_from_tag = derived_from_tag
|
|
93
104
|
self.derived_from_op = derived_from_op
|
|
94
105
|
if derived_from_op and not derived_from_op.output:
|
|
@@ -116,8 +127,8 @@ class _DimExtra:
|
|
|
116
127
|
self.auto_generated = auto_generated
|
|
117
128
|
# We can have different tag variants per batch info (e.g. with beam), or per control flow ctx.
|
|
118
129
|
# They each have same_as = self. The same_base should have the base (global) batch info.
|
|
119
|
-
self.same_for_batch_ctx
|
|
120
|
-
self.cache_dyn_size_ext_dev
|
|
130
|
+
self.same_for_batch_ctx: Dict[Tuple[BatchInfo, Optional[ControlFlowContext]], Dim] = {}
|
|
131
|
+
self.cache_dyn_size_ext_dev: Dict[str, _t.Tensor] = {} # device -> dyn_size_ext
|
|
121
132
|
self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
|
|
122
133
|
self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
|
|
123
134
|
|
|
@@ -134,6 +145,7 @@ class _DimExtra:
|
|
|
134
145
|
def __setstate__(self, state):
|
|
135
146
|
self.__dict__.update(state)
|
|
136
147
|
if self.kind is not None:
|
|
148
|
+
# noinspection PyTypeChecker
|
|
137
149
|
self.kind = {v.name: v for v in DimTypes.Types}[self.kind]
|
|
138
150
|
|
|
139
151
|
def __sis_state__(self):
|
|
@@ -151,6 +163,9 @@ class _DimMixin:
|
|
|
151
163
|
def _handle_extra_kwargs(self: Dim, *, dyn_size: Optional[_t.RawTensorType] = None, **kwargs):
|
|
152
164
|
if kwargs:
|
|
153
165
|
self._extra = _DimExtra(dim=self, **kwargs)
|
|
166
|
+
if self._extra.vocab and self.size is None:
|
|
167
|
+
self.size = self._extra.vocab.num_labels
|
|
168
|
+
self.capacity = self.capacity or self.size
|
|
154
169
|
if dyn_size is not None:
|
|
155
170
|
self.dyn_size = dyn_size
|
|
156
171
|
if self.derived_from_op and self.is_dynamic():
|
|
@@ -2184,7 +2199,7 @@ class _DimMixin:
|
|
|
2184
2199
|
other = other.dimension # makes matching easier
|
|
2185
2200
|
if isinstance(other, int) and other == 1:
|
|
2186
2201
|
return self
|
|
2187
|
-
if self.is_constant_static_dim() and isinstance(other, _d.Dim):
|
|
2202
|
+
if self.is_constant_static_dim() and isinstance(other, _d.Dim) and not other.is_constant_static_dim():
|
|
2188
2203
|
return self.dimension * other # use rmul
|
|
2189
2204
|
cache_key = ("mul", other)
|
|
2190
2205
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
@@ -2571,14 +2586,19 @@ class _MathFindMatchingAdditive:
|
|
|
2571
2586
|
|
|
2572
2587
|
|
|
2573
2588
|
def _math_find_matching_mult(start: Dim, other: Union[int, Dim], *, right: bool) -> Optional[Dim]:
|
|
2574
|
-
if
|
|
2589
|
+
# we assume, if other is Dim, then it is not constant static dim
|
|
2590
|
+
if isinstance(other, int) and start.is_constant_static_dim():
|
|
2575
2591
|
return _math_get_dim_via_bin_op([start, other] if right else [other, start], "mul")
|
|
2576
2592
|
c_op = start.derived_from_op
|
|
2577
2593
|
if c_op and c_op.kind == "mul" and len(c_op.inputs) == 2:
|
|
2578
2594
|
if right:
|
|
2579
2595
|
return c_op.inputs[0] * (c_op.inputs[1] * other)
|
|
2580
|
-
|
|
2581
|
-
|
|
2596
|
+
# Don't do right=False -> (other * c_op.inputs[0]) * c_op.inputs[1],
|
|
2597
|
+
# because this can lead to infinite recursions,
|
|
2598
|
+
# and also we don't have a proper normalized form for multiplication.
|
|
2599
|
+
# However, if both left-most factors are constant static dims, then we can merge it.
|
|
2600
|
+
elif isinstance(other, int) and c_op.inputs[0].is_constant_static_dim():
|
|
2601
|
+
return (other * c_op.inputs[0].dimension) * c_op.inputs[1]
|
|
2582
2602
|
return None
|
|
2583
2603
|
|
|
2584
2604
|
|
|
@@ -2665,7 +2685,9 @@ def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
|
|
|
2665
2685
|
|
|
2666
2686
|
|
|
2667
2687
|
def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
|
|
2668
|
-
|
|
2688
|
+
if any(not term_.auto_generated for term_ in terms):
|
|
2689
|
+
# Always prefer non-auto-generated.
|
|
2690
|
+
terms = [term_ for term_ in terms if not term_.auto_generated]
|
|
2669
2691
|
# First find any dynamic.
|
|
2670
2692
|
for term_ in terms:
|
|
2671
2693
|
if term_.is_dynamic_seq_length():
|
returnn/tensor/_tensor_extra.py
CHANGED
|
@@ -588,7 +588,12 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
588
588
|
|
|
589
589
|
def __getstate__(self):
|
|
590
590
|
d = {k: getattr(self, k) for k in self.__slots__}
|
|
591
|
-
|
|
591
|
+
if (
|
|
592
|
+
self._raw_tensor is not None
|
|
593
|
+
and self._raw_backend is not None
|
|
594
|
+
and not self._raw_backend.should_pickle_tensor(self._raw_tensor)
|
|
595
|
+
):
|
|
596
|
+
d["_raw_tensor"] = None
|
|
592
597
|
return d
|
|
593
598
|
|
|
594
599
|
def __setstate__(self, state):
|
returnn/tensor/utils.py
CHANGED
|
@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
|
|
|
36
36
|
*,
|
|
37
37
|
min_val: int = 0,
|
|
38
38
|
max_val: Optional[int] = None,
|
|
39
|
-
rnd: numpy.random.RandomState,
|
|
39
|
+
rnd: Optional[numpy.random.RandomState] = None,
|
|
40
40
|
dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
|
|
41
41
|
dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
|
|
42
42
|
) -> bool:
|
|
43
43
|
"""fill. return whether sth was filled"""
|
|
44
|
+
if rnd is None:
|
|
45
|
+
# noinspection PyUnresolvedReferences,PyProtectedMember
|
|
46
|
+
rnd = numpy.random.mtrand._rand
|
|
44
47
|
if dyn_dim_max_sizes is None:
|
|
45
48
|
dyn_dim_max_sizes = {}
|
|
46
49
|
if dyn_dim_min_sizes is None:
|
|
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
|
|
|
59
62
|
continue
|
|
60
63
|
if tensor_fill_random_numpy_(
|
|
61
64
|
dim.dyn_size_ext,
|
|
62
|
-
min_val=dyn_dim_min_sizes.get(dim, 2),
|
|
65
|
+
min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
|
|
63
66
|
max_val=dyn_dim_max_sizes.get(dim, None),
|
|
64
67
|
rnd=rnd,
|
|
65
68
|
dyn_dim_max_sizes=dyn_dim_max_sizes,
|
|
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
|
|
|
98
101
|
if max_val is None:
|
|
99
102
|
max_val = rnd.randint(5, 20)
|
|
100
103
|
if x.sparse_dim and x.sparse_dim.dimension is not None:
|
|
101
|
-
max_val = x.sparse_dim.dimension
|
|
102
|
-
x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
|
|
104
|
+
max_val = x.sparse_dim.dimension - 1
|
|
105
|
+
x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
|
|
103
106
|
elif x.dtype == "bool":
|
|
104
107
|
x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
|
|
105
108
|
elif x.dtype.startswith("float"):
|
|
@@ -45,6 +45,13 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
45
45
|
"""executing eagerly"""
|
|
46
46
|
return False
|
|
47
47
|
|
|
48
|
+
@staticmethod
|
|
49
|
+
def should_pickle_tensor(raw_tensor: Layer) -> bool:
|
|
50
|
+
"""
|
|
51
|
+
:return: whether the tensor should be included in a pickle or set to `None`.
|
|
52
|
+
"""
|
|
53
|
+
return False
|
|
54
|
+
|
|
48
55
|
@staticmethod
|
|
49
56
|
def get_tensor_dependencies(x: Tensor[Layer]) -> Sequence[Tensor]:
|
|
50
57
|
"""get tensor inputs"""
|
|
@@ -1060,14 +1067,16 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
1060
1067
|
s = filter_size[i].dimension if not strides else strides[i]
|
|
1061
1068
|
if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
|
|
1062
1069
|
out_spatial_dims[i] = in_spatial_dims[i]
|
|
1063
|
-
|
|
1070
|
+
assert all(size.is_static() for size in filter_size)
|
|
1071
|
+
layer_dict: Dict[str, Any] = {
|
|
1064
1072
|
"class": "transposed_conv",
|
|
1065
1073
|
"from": source,
|
|
1066
1074
|
"in_dim": in_dim,
|
|
1067
1075
|
"in_spatial_dims": in_spatial_dims,
|
|
1068
1076
|
"out_dim": out_dim,
|
|
1069
1077
|
"out_spatial_dims": out_spatial_dims,
|
|
1070
|
-
"filter_size": filter_size,
|
|
1078
|
+
"filter_size": [size.dimension for size in filter_size],
|
|
1079
|
+
"filter_perm": list(filter_size) + [out_dim, in_dim],
|
|
1071
1080
|
"padding": padding,
|
|
1072
1081
|
}
|
|
1073
1082
|
if remove_padding:
|
|
@@ -38,6 +38,21 @@ class TFBackend(Backend[tf.Tensor]):
|
|
|
38
38
|
"""
|
|
39
39
|
return tf.executing_eagerly()
|
|
40
40
|
|
|
41
|
+
@staticmethod
|
|
42
|
+
def should_pickle_tensor(raw_tensor: tf.Tensor) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
:return: whether the tensor should be included in a pickle or set to `None`.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
from tensorflow.python.framework.ops import EagerTensor
|
|
48
|
+
|
|
49
|
+
# Can not pickle symbolic TF tensors.
|
|
50
|
+
#
|
|
51
|
+
# See for discussion:
|
|
52
|
+
# - https://github.com/rwth-i6/returnn/issues/1541
|
|
53
|
+
# - https://github.com/rwth-i6/returnn/issues/1763
|
|
54
|
+
return isinstance(raw_tensor, EagerTensor)
|
|
55
|
+
|
|
41
56
|
@staticmethod
|
|
42
57
|
def get_dtype_name_raw(raw_tensor: tf.Tensor) -> str:
|
|
43
58
|
"""
|
returnn/tf/layers/basic.py
CHANGED
|
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7371
7371
|
"""
|
|
7372
7372
|
from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
|
|
7373
7373
|
|
|
7374
|
-
super(TransposedConvLayer, self).__init__(**kwargs)
|
|
7374
|
+
super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
|
|
7375
7375
|
out_dim # noqa # via get_out_data_from_opts
|
|
7376
7376
|
assert not self.input_data.sparse
|
|
7377
7377
|
assert self.input_data.have_batch_axis()
|
|
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7516
7516
|
):
|
|
7517
7517
|
"""
|
|
7518
7518
|
Determines output length of a transposed convolution given input length.
|
|
7519
|
-
|
|
7519
|
+
|
|
7520
|
+
Copied from TF/Keras conv_utils.deconv_output_length
|
|
7521
|
+
(https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
|
|
7522
|
+
adapted with simplification.
|
|
7520
7523
|
|
|
7521
7524
|
Also see :func:`ConvLayer.calc_out_dim`.
|
|
7522
7525
|
|
|
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7533
7536
|
"""
|
|
7534
7537
|
if out_dim and out_dim.is_dim_known():
|
|
7535
7538
|
return out_dim.get_dim_value()
|
|
7536
|
-
assert padding in {"same", "valid", "full"}
|
|
7537
|
-
|
|
7538
|
-
# Get the dilated kernel size
|
|
7539
|
-
filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
|
7540
7539
|
|
|
7541
|
-
|
|
7542
|
-
input_length = input_length * stride
|
|
7540
|
+
import returnn.frontend as rf
|
|
7543
7541
|
|
|
7544
|
-
|
|
7545
|
-
|
|
7546
|
-
|
|
7547
|
-
|
|
7548
|
-
|
|
7549
|
-
|
|
7550
|
-
|
|
7551
|
-
|
|
7552
|
-
if isinstance(input_length, Dim):
|
|
7553
|
-
length = input_length - (stride + filter_size - 2)
|
|
7554
|
-
else:
|
|
7555
|
-
length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
|
|
7556
|
-
elif padding == "same":
|
|
7557
|
-
length = input_length
|
|
7558
|
-
else:
|
|
7559
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7560
|
-
else: # output_padding
|
|
7561
|
-
if padding == "same":
|
|
7562
|
-
pad = filter_size // 2
|
|
7563
|
-
elif padding == "valid":
|
|
7564
|
-
pad = 0
|
|
7565
|
-
elif padding == "full":
|
|
7566
|
-
pad = filter_size - 1
|
|
7567
|
-
else:
|
|
7568
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7569
|
-
if isinstance(input_length, Dim):
|
|
7570
|
-
length = input_length + (-stride + filter_size - 2 * pad + output_padding)
|
|
7571
|
-
else:
|
|
7572
|
-
length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
|
|
7573
|
-
return length
|
|
7542
|
+
return rf.calc_transposed_conv_out_length(
|
|
7543
|
+
input_length,
|
|
7544
|
+
filter_size=filter_size,
|
|
7545
|
+
padding=padding,
|
|
7546
|
+
output_padding=output_padding,
|
|
7547
|
+
stride=stride,
|
|
7548
|
+
dilation_rate=dilation,
|
|
7549
|
+
)
|
|
7574
7550
|
|
|
7575
7551
|
@classmethod
|
|
7576
7552
|
def get_out_data_from_opts(
|
|
@@ -10488,6 +10464,7 @@ class TopKLayer(LayerBase):
|
|
|
10488
10464
|
self._sub_layers = {}
|
|
10489
10465
|
for key, (v, a) in sub_outputs.items():
|
|
10490
10466
|
sub_out_data = self.output.copy_template(name="%s/%s" % (self.name, key))
|
|
10467
|
+
sub_out_data.feature_dim = None
|
|
10491
10468
|
sub_out_data.dtype = "int32"
|
|
10492
10469
|
sub_out_data.sparse_dim = a
|
|
10493
10470
|
sub_out_data.placeholder = v
|
|
@@ -10527,6 +10504,7 @@ class TopKLayer(LayerBase):
|
|
|
10527
10504
|
axis = [in_data.get_dim_tag_from_description(a) for a in axis]
|
|
10528
10505
|
out_dims = [dim for dim in in_data.dim_tags if dim not in axis] + [k_dim]
|
|
10529
10506
|
out_data = in_data.copy_template(name=name).copy_template_new_dim_tags(out_dims)
|
|
10507
|
+
out_data.feature_dim = None
|
|
10530
10508
|
if for_indices is not None:
|
|
10531
10509
|
assert 0 <= for_indices < len(axis)
|
|
10532
10510
|
out_data.dtype = "int32"
|
returnn/tf/native_op.py
CHANGED
|
@@ -528,77 +528,30 @@ class OpMaker:
|
|
|
528
528
|
def _make_mod(self):
|
|
529
529
|
if self.cache_key in self.mod_cache:
|
|
530
530
|
return self.mod_cache[self.cache_key]
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
#
|
|
534
|
-
#
|
|
535
|
-
#
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
# In other cases, it's probably needed, but it's not so clear which lib has the
|
|
539
|
-
# right symbols (e.g. the `sgemm_` symbol).
|
|
531
|
+
|
|
532
|
+
# Note about BLAS / matmul:
|
|
533
|
+
# Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
|
|
534
|
+
# and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
|
|
535
|
+
# Now, by default, we use the underlying Eigen library,
|
|
536
|
+
# which is the same code path that TF also uses for CPU matmul.
|
|
537
|
+
# Only if an explicit BLAS library is specified, we use that instead.
|
|
540
538
|
ld_flags = []
|
|
541
|
-
|
|
539
|
+
c_macro_defines = {}
|
|
542
540
|
|
|
543
541
|
if self.blas_lib is not None and os.path.exists(self.blas_lib):
|
|
544
542
|
path = os.path.dirname(self.blas_lib)
|
|
545
543
|
if path == "":
|
|
546
544
|
path = "."
|
|
547
545
|
ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
from returnn.util.basic import find_sgemm_libs_from_runtime
|
|
551
|
-
|
|
552
|
-
libs = find_sgemm_libs_from_runtime()
|
|
553
|
-
if libs:
|
|
554
|
-
numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
|
|
555
|
-
if numpy_libs:
|
|
556
|
-
# Prefer Numpy; move to front.
|
|
557
|
-
libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
|
|
558
|
-
if self.blas_lib is not None:
|
|
559
|
-
libs = [lib for lib in libs if self.blas_lib in lib]
|
|
560
|
-
for fn in libs:
|
|
561
|
-
ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
|
|
562
|
-
have_blas_lib = True
|
|
563
|
-
if not have_blas_lib and self.search_for_numpy_blas:
|
|
564
|
-
# Find related Numpy libs.
|
|
565
|
-
# Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
|
|
566
|
-
# Even do this before the other libs below, as it is likely
|
|
567
|
-
# that this OpenBlas lib is correctly initialized already.
|
|
568
|
-
import numpy
|
|
569
|
-
|
|
570
|
-
numpy_dir = os.path.dirname(numpy.__file__)
|
|
571
|
-
if os.path.exists("%s/.libs" % numpy_dir):
|
|
572
|
-
ld_flags += ["-L%s/.libs" % numpy_dir]
|
|
573
|
-
from glob import glob
|
|
574
|
-
|
|
575
|
-
for f in glob("%s/.libs/*.so" % numpy_dir):
|
|
576
|
-
f = os.path.basename(f)
|
|
577
|
-
if self.blas_lib is not None and self.blas_lib not in f:
|
|
578
|
-
continue
|
|
579
|
-
if f.startswith("lib"):
|
|
580
|
-
f = f[3:]
|
|
581
|
-
if f.endswith(".so"):
|
|
582
|
-
f = f[:-3]
|
|
583
|
-
ld_flags += ["-l%s" % f]
|
|
584
|
-
have_blas_lib = True
|
|
585
|
-
if not have_blas_lib and self.search_for_system_blas:
|
|
586
|
-
# Try to just link against blas/f77blas
|
|
587
|
-
# (both can potentially have the symbol) if it finds the lib.
|
|
588
|
-
if find_lib("blas"):
|
|
589
|
-
ld_flags += ["-lblas"]
|
|
590
|
-
have_blas_lib = True
|
|
591
|
-
if find_lib("f77blas"):
|
|
592
|
-
ld_flags += ["-lf77blas"]
|
|
593
|
-
have_blas_lib = True
|
|
594
|
-
if not have_blas_lib:
|
|
595
|
-
print("WARNING: OpMaker: no BLAS lib found")
|
|
546
|
+
c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
|
|
547
|
+
|
|
596
548
|
comp = tf_util.OpCodeCompiler(
|
|
597
549
|
base_name=self.name,
|
|
598
550
|
code_version=self.description.code_version,
|
|
599
551
|
code=self._make_code(),
|
|
600
552
|
include_deps=[self.support_native_op_cpp_filename],
|
|
601
553
|
ld_flags=ld_flags,
|
|
554
|
+
c_macro_defines=c_macro_defines,
|
|
602
555
|
use_cuda_if_available=self.with_cuda,
|
|
603
556
|
log_stream=self.log_stream,
|
|
604
557
|
**dict(self.compiler_opts),
|
returnn/tf/network.py
CHANGED
|
@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
|
|
|
4428
4428
|
data = extern_data.data[data_key]
|
|
4429
4429
|
info += ", %s" % data
|
|
4430
4430
|
print(" %r: %s" % (key, info), file=file)
|
|
4431
|
-
if data and data.sparse:
|
|
4431
|
+
if data is not None and data.sparse:
|
|
4432
4432
|
if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
|
|
4433
4433
|
print(" WARNING, invalid label for data", data, file=file)
|
|
4434
4434
|
elif feed_dict is None:
|