returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- returnn/PKG-INFO +2 -2
- returnn/__old_mod_loader__.py +26 -2
- returnn/_setup_info_generated.py +2 -2
- returnn/datasets/lm.py +130 -42
- returnn/datasets/meta.py +93 -43
- returnn/datasets/postprocessing.py +597 -108
- returnn/datasets/util/vocabulary.py +90 -0
- returnn/frontend/__init__.py +1 -0
- returnn/frontend/_backend.py +41 -0
- returnn/frontend/_native/__init__.py +22 -0
- returnn/frontend/_numpy_backend.py +7 -0
- returnn/frontend/_utils.py +1 -1
- returnn/frontend/array_.py +48 -2
- returnn/frontend/assert_.py +35 -0
- returnn/frontend/attention.py +54 -20
- returnn/frontend/conv.py +273 -54
- returnn/frontend/device.py +14 -1
- returnn/frontend/encoder/conformer.py +20 -0
- returnn/frontend/encoder/transformer.py +2 -0
- returnn/frontend/loss.py +222 -3
- returnn/frontend/math_.py +54 -14
- returnn/native_op.cpp +182 -172
- returnn/native_op.py +36 -31
- returnn/sprint/cache.py +12 -13
- returnn/tensor/_dim_extra.py +7 -7
- returnn/tensor/_tensor_extra.py +10 -10
- returnn/tensor/utils.py +8 -5
- returnn/tf/frontend_layers/_backend.py +7 -3
- returnn/tf/layers/basic.py +27 -40
- returnn/tf/native_op.py +27 -63
- returnn/tf/network.py +1 -1
- returnn/tf/util/basic.py +22 -197
- returnn/torch/engine.py +157 -6
- returnn/torch/frontend/_backend.py +280 -29
- returnn/torch/frontend/bridge.py +61 -0
- returnn/torch/frontend/compile_helper.py +106 -0
- returnn/torch/util/array_.py +30 -0
- returnn/torch/util/assert_.py +122 -0
- returnn/torch/util/exception_helper.py +7 -1
- returnn/torch/util/native_op.py +885 -0
- returnn/torch/util/native_op_code_compiler.py +308 -0
- returnn/util/basic.py +6 -7
- returnn/util/better_exchook.py +4 -0
- returnn/util/cuda_env.py +332 -0
- returnn/util/debug.py +12 -2
- returnn/util/file_cache.py +15 -1
- returnn/util/fsa.py +17 -13
- returnn/util/native_code_compiler.py +104 -47
- returnn/util/task_system.py +1 -1
- {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
- {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
- {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
- {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
- {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0
returnn/tensor/_dim_extra.py
CHANGED
|
@@ -858,7 +858,7 @@ class _DimMixin:
|
|
|
858
858
|
self._make_extra()
|
|
859
859
|
dim_order_default = self.dyn_size_ext.dims + (self,)
|
|
860
860
|
if dim_order is not None:
|
|
861
|
-
dim_order = tuple(d for d in dim_order if d in dim_order_default) # filter
|
|
861
|
+
dim_order = tuple([d for d in dim_order if d in dim_order_default]) # filter
|
|
862
862
|
else:
|
|
863
863
|
dim_order = dim_order_default
|
|
864
864
|
cache_key = (device, dim_order)
|
|
@@ -2484,16 +2484,16 @@ _BinOpStrs = {
|
|
|
2484
2484
|
|
|
2485
2485
|
def _math_get_dim_via_bin_op(dims: Sequence[Union[Dim, int]], op_kind: str) -> Dim:
|
|
2486
2486
|
dims = [d if isinstance(d, _d.Dim) else _make_constant_static_dim(d) for d in dims]
|
|
2487
|
-
if all(d.dimension is not None for d in dims):
|
|
2487
|
+
if all([d.dimension is not None for d in dims]):
|
|
2488
2488
|
op = _BinOps[op_kind]
|
|
2489
2489
|
dim_value = dims[0].dimension
|
|
2490
2490
|
for d in dims[1:]:
|
|
2491
2491
|
dim_value = op(dim_value, d.dimension)
|
|
2492
2492
|
else:
|
|
2493
2493
|
dim_value = None
|
|
2494
|
-
if all(d.is_constant_static_dim() for d in dims):
|
|
2494
|
+
if all([d.is_constant_static_dim() for d in dims]):
|
|
2495
2495
|
return _make_constant_static_dim(dim_value, kind=_get_merged_dim_kind(dims))
|
|
2496
|
-
desc = _BinOpStrs[op_kind].join(_get_description(d) for d in dims)
|
|
2496
|
+
desc = _BinOpStrs[op_kind].join([_get_description(d) for d in dims])
|
|
2497
2497
|
if op_kind.startswith("ceildiv"):
|
|
2498
2498
|
desc = f"⌈{desc}⌉"
|
|
2499
2499
|
return _d.Dim(
|
|
@@ -2676,16 +2676,16 @@ def _get_description(dim, brackets=True):
|
|
|
2676
2676
|
|
|
2677
2677
|
|
|
2678
2678
|
def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
|
|
2679
|
-
if any(tag.is_batch_dim() for tag in dim_tags):
|
|
2679
|
+
if any([tag.is_batch_dim() for tag in dim_tags]):
|
|
2680
2680
|
return DimTypes.Batch
|
|
2681
|
-
elif any(tag.is_feature_dim() for tag in dim_tags):
|
|
2681
|
+
elif any([tag.is_feature_dim() for tag in dim_tags]):
|
|
2682
2682
|
return DimTypes.Feature
|
|
2683
2683
|
else:
|
|
2684
2684
|
return DimTypes.Spatial
|
|
2685
2685
|
|
|
2686
2686
|
|
|
2687
2687
|
def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
|
|
2688
|
-
if any(not term_.auto_generated for term_ in terms):
|
|
2688
|
+
if any([not term_.auto_generated for term_ in terms]):
|
|
2689
2689
|
# Always prefer non-auto-generated.
|
|
2690
2690
|
terms = [term_ for term_ in terms if not term_.auto_generated]
|
|
2691
2691
|
# First find any dynamic.
|
returnn/tensor/_tensor_extra.py
CHANGED
|
@@ -32,8 +32,8 @@ class _TensorExtra:
|
|
|
32
32
|
tensor: Tensor,
|
|
33
33
|
time_dim_axis=NotSpecified,
|
|
34
34
|
available_for_inference=True,
|
|
35
|
-
batch=None,
|
|
36
|
-
beam=None,
|
|
35
|
+
batch: Optional[BatchInfo] = None,
|
|
36
|
+
beam: Optional[SearchBeam] = None,
|
|
37
37
|
control_flow_ctx=None,
|
|
38
38
|
):
|
|
39
39
|
"""
|
|
@@ -41,8 +41,8 @@ class _TensorExtra:
|
|
|
41
41
|
:param int|None|NotSpecified time_dim_axis: where we have the time dim axis, after we added the batch-dim.
|
|
42
42
|
this is often 1. however, can be None if there is no time-dim.
|
|
43
43
|
:param bool available_for_inference: e.g. the extern data "classes" is usually not available for inference
|
|
44
|
-
:param
|
|
45
|
-
:param
|
|
44
|
+
:param batch:
|
|
45
|
+
:param beam: the batch-dim could be extended by a beam-size,
|
|
46
46
|
such that it represents the merged dims [batch, beam_size].
|
|
47
47
|
:param ControlFlowContext|None control_flow_ctx:
|
|
48
48
|
"""
|
|
@@ -668,11 +668,11 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
668
668
|
if not perm:
|
|
669
669
|
return self.copy()
|
|
670
670
|
if allow_int and isinstance(perm[0], int):
|
|
671
|
-
assert all(isinstance(a, int) for a in perm), f"{self}: invalid perm {perm!r} types"
|
|
671
|
+
assert all([isinstance(a, int) for a in perm]), f"{self}: invalid perm {perm!r} types"
|
|
672
672
|
assert set(perm) == set(range(len(perm))), f"{self}: invalid perm {perm!r}"
|
|
673
673
|
return self._copy_compatible_to_dims_with_perm([self._dims[i] for i in perm], perm)
|
|
674
674
|
else:
|
|
675
|
-
assert all(isinstance(a, Dim) for a in perm), f"{self}: invalid perm {perm!r} types"
|
|
675
|
+
assert all([isinstance(a, Dim) for a in perm]), f"{self}: invalid perm {perm!r} types"
|
|
676
676
|
return self.copy_compatible_to_dims(perm)
|
|
677
677
|
|
|
678
678
|
def copy_move_axis(self, old_axis, new_axis) -> _t.Tensor:
|
|
@@ -1155,7 +1155,7 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
1155
1155
|
)
|
|
1156
1156
|
|
|
1157
1157
|
assert v.batch_ndim == data.batch_ndim
|
|
1158
|
-
assert all(mapped_axes[ax] == ax for ax in range(v.batch_ndim))
|
|
1158
|
+
assert all([mapped_axes[ax] == ax for ax in range(v.batch_ndim)])
|
|
1159
1159
|
|
|
1160
1160
|
if self.version == 1:
|
|
1161
1161
|
# Ensure time_dim_axis and feature_dim_axis is same as in data
|
|
@@ -1702,7 +1702,7 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
1702
1702
|
"""
|
|
1703
1703
|
:return: shape with added batch-dim. e.g. (batch,time,feat) = (None,None,128)
|
|
1704
1704
|
"""
|
|
1705
|
-
return tuple(tag.dimension for tag in self.dim_tags)
|
|
1705
|
+
return tuple([tag.dimension for tag in self.dim_tags])
|
|
1706
1706
|
|
|
1707
1707
|
# noinspection PyShadowingNames
|
|
1708
1708
|
def get_batch_shape(self, batch_dim):
|
|
@@ -3214,7 +3214,7 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
3214
3214
|
if len(sources) == 1:
|
|
3215
3215
|
return sources[0].copy_template()
|
|
3216
3216
|
max_ndim = max([s.batch_ndim for s in sources])
|
|
3217
|
-
if any(src.batch for src in sources):
|
|
3217
|
+
if any([src.batch for src in sources]):
|
|
3218
3218
|
from returnn.tf.util.data import BatchInfo
|
|
3219
3219
|
|
|
3220
3220
|
common_batch = BatchInfo.get_common_batch_info([src.batch for src in sources if src.batch])
|
|
@@ -3254,7 +3254,7 @@ class _TensorMixin(_TensorMixinBase):
|
|
|
3254
3254
|
else:
|
|
3255
3255
|
axis = common.get_default_new_axis_for_dim_tag(dim_tag)
|
|
3256
3256
|
common = common.copy_add_dim_by_tag(dim_tag, unbroadcast=True, axis=axis)
|
|
3257
|
-
if all(s.batch_ndim < common.batch_ndim for s in sources):
|
|
3257
|
+
if all([s.batch_ndim < common.batch_ndim for s in sources]):
|
|
3258
3258
|
from returnn.util.basic import validate_broadcast_all_sources
|
|
3259
3259
|
|
|
3260
3260
|
validate_broadcast_all_sources(
|
returnn/tensor/utils.py
CHANGED
|
@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
|
|
|
36
36
|
*,
|
|
37
37
|
min_val: int = 0,
|
|
38
38
|
max_val: Optional[int] = None,
|
|
39
|
-
rnd: numpy.random.RandomState,
|
|
39
|
+
rnd: Optional[numpy.random.RandomState] = None,
|
|
40
40
|
dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
|
|
41
41
|
dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
|
|
42
42
|
) -> bool:
|
|
43
43
|
"""fill. return whether sth was filled"""
|
|
44
|
+
if rnd is None:
|
|
45
|
+
# noinspection PyUnresolvedReferences,PyProtectedMember
|
|
46
|
+
rnd = numpy.random.mtrand._rand
|
|
44
47
|
if dyn_dim_max_sizes is None:
|
|
45
48
|
dyn_dim_max_sizes = {}
|
|
46
49
|
if dyn_dim_min_sizes is None:
|
|
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
|
|
|
59
62
|
continue
|
|
60
63
|
if tensor_fill_random_numpy_(
|
|
61
64
|
dim.dyn_size_ext,
|
|
62
|
-
min_val=dyn_dim_min_sizes.get(dim, 2),
|
|
65
|
+
min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
|
|
63
66
|
max_val=dyn_dim_max_sizes.get(dim, None),
|
|
64
67
|
rnd=rnd,
|
|
65
68
|
dyn_dim_max_sizes=dyn_dim_max_sizes,
|
|
@@ -68,7 +71,7 @@ def tensor_fill_random_numpy_(
|
|
|
68
71
|
# Make sure at least one of the dyn sizes matches the max size.
|
|
69
72
|
i = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size)
|
|
70
73
|
dim.dyn_size_ext.raw_tensor.flat[i] = dyn_dim_max_sizes[dim]
|
|
71
|
-
if dim in dyn_dim_min_sizes:
|
|
74
|
+
if dim in dyn_dim_min_sizes and dim.dyn_size_ext.raw_tensor.size > 1:
|
|
72
75
|
j = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size - 1)
|
|
73
76
|
if j >= i:
|
|
74
77
|
j += 1
|
|
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
|
|
|
98
101
|
if max_val is None:
|
|
99
102
|
max_val = rnd.randint(5, 20)
|
|
100
103
|
if x.sparse_dim and x.sparse_dim.dimension is not None:
|
|
101
|
-
max_val = x.sparse_dim.dimension
|
|
102
|
-
x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
|
|
104
|
+
max_val = x.sparse_dim.dimension - 1
|
|
105
|
+
x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
|
|
103
106
|
elif x.dtype == "bool":
|
|
104
107
|
x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
|
|
105
108
|
elif x.dtype.startswith("float"):
|
|
@@ -465,6 +465,8 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
465
465
|
targets_spatial_dim: Dim,
|
|
466
466
|
blank_index: int,
|
|
467
467
|
max_approx: bool = False,
|
|
468
|
+
use_native_op: Optional[bool] = None,
|
|
469
|
+
label_loop: bool = True,
|
|
468
470
|
) -> Tensor:
|
|
469
471
|
"""CTC"""
|
|
470
472
|
assert targets.sparse_dim and targets.sparse_dim.dimension <= logits.feature_dim.dimension
|
|
@@ -482,6 +484,7 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
482
484
|
"targets": targets,
|
|
483
485
|
"blank_index": blank_index,
|
|
484
486
|
"max_approx": max_approx,
|
|
487
|
+
"label_loop": label_loop,
|
|
485
488
|
},
|
|
486
489
|
name="ctc_loss",
|
|
487
490
|
)
|
|
@@ -944,7 +947,6 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
944
947
|
"""
|
|
945
948
|
assert mask.dtype == "bool"
|
|
946
949
|
assert set(mask.dims) == set(dims)
|
|
947
|
-
assert set(mask.dims).issubset(set(tensor.dims))
|
|
948
950
|
if not out_dim:
|
|
949
951
|
out_dim = Dim(None, name="mask")
|
|
950
952
|
return (
|
|
@@ -1067,14 +1069,16 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
1067
1069
|
s = filter_size[i].dimension if not strides else strides[i]
|
|
1068
1070
|
if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
|
|
1069
1071
|
out_spatial_dims[i] = in_spatial_dims[i]
|
|
1070
|
-
|
|
1072
|
+
assert all(size.is_static() for size in filter_size)
|
|
1073
|
+
layer_dict: Dict[str, Any] = {
|
|
1071
1074
|
"class": "transposed_conv",
|
|
1072
1075
|
"from": source,
|
|
1073
1076
|
"in_dim": in_dim,
|
|
1074
1077
|
"in_spatial_dims": in_spatial_dims,
|
|
1075
1078
|
"out_dim": out_dim,
|
|
1076
1079
|
"out_spatial_dims": out_spatial_dims,
|
|
1077
|
-
"filter_size": filter_size,
|
|
1080
|
+
"filter_size": [size.dimension for size in filter_size],
|
|
1081
|
+
"filter_perm": list(filter_size) + [out_dim, in_dim],
|
|
1078
1082
|
"padding": padding,
|
|
1079
1083
|
}
|
|
1080
1084
|
if remove_padding:
|
returnn/tf/layers/basic.py
CHANGED
|
@@ -2741,7 +2741,7 @@ class BooleanMaskLayer(LayerBase):
|
|
|
2741
2741
|
tensor = self.sources[0].output
|
|
2742
2742
|
remaining_dims = [d for d in tensor.dims if d not in dims]
|
|
2743
2743
|
tensor_templ = tensor.copy_template_new_dim_tags(tuple(dims) + tuple(remaining_dims))
|
|
2744
|
-
tensor = tensor.copy_compatible_to(tensor_templ,
|
|
2744
|
+
tensor = tensor.copy_compatible_to(tensor_templ, unbroadcast=True)
|
|
2745
2745
|
mask_templ = mask.output.copy_template_new_dim_tags(new_dim_tags=tuple(dims))
|
|
2746
2746
|
mask_ = mask.output.copy_compatible_to(mask_templ, add_dims=False)
|
|
2747
2747
|
self.output.raw_tensor = tf.boolean_mask(tensor.raw_tensor, mask=mask_.raw_tensor)
|
|
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7371
7371
|
"""
|
|
7372
7372
|
from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
|
|
7373
7373
|
|
|
7374
|
-
super(TransposedConvLayer, self).__init__(**kwargs)
|
|
7374
|
+
super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
|
|
7375
7375
|
out_dim # noqa # via get_out_data_from_opts
|
|
7376
7376
|
assert not self.input_data.sparse
|
|
7377
7377
|
assert self.input_data.have_batch_axis()
|
|
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7516
7516
|
):
|
|
7517
7517
|
"""
|
|
7518
7518
|
Determines output length of a transposed convolution given input length.
|
|
7519
|
-
|
|
7519
|
+
|
|
7520
|
+
Copied from TF/Keras conv_utils.deconv_output_length
|
|
7521
|
+
(https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
|
|
7522
|
+
adapted with simplification.
|
|
7520
7523
|
|
|
7521
7524
|
Also see :func:`ConvLayer.calc_out_dim`.
|
|
7522
7525
|
|
|
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7533
7536
|
"""
|
|
7534
7537
|
if out_dim and out_dim.is_dim_known():
|
|
7535
7538
|
return out_dim.get_dim_value()
|
|
7536
|
-
assert padding in {"same", "valid", "full"}
|
|
7537
|
-
|
|
7538
|
-
# Get the dilated kernel size
|
|
7539
|
-
filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
|
7540
7539
|
|
|
7541
|
-
|
|
7542
|
-
input_length = input_length * stride
|
|
7540
|
+
import returnn.frontend as rf
|
|
7543
7541
|
|
|
7544
|
-
|
|
7545
|
-
|
|
7546
|
-
|
|
7547
|
-
|
|
7548
|
-
|
|
7549
|
-
|
|
7550
|
-
|
|
7551
|
-
|
|
7552
|
-
if isinstance(input_length, Dim):
|
|
7553
|
-
length = input_length - (stride + filter_size - 2)
|
|
7554
|
-
else:
|
|
7555
|
-
length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
|
|
7556
|
-
elif padding == "same":
|
|
7557
|
-
length = input_length
|
|
7558
|
-
else:
|
|
7559
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7560
|
-
else: # output_padding
|
|
7561
|
-
if padding == "same":
|
|
7562
|
-
pad = filter_size // 2
|
|
7563
|
-
elif padding == "valid":
|
|
7564
|
-
pad = 0
|
|
7565
|
-
elif padding == "full":
|
|
7566
|
-
pad = filter_size - 1
|
|
7567
|
-
else:
|
|
7568
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7569
|
-
if isinstance(input_length, Dim):
|
|
7570
|
-
length = input_length + (-stride + filter_size - 2 * pad + output_padding)
|
|
7571
|
-
else:
|
|
7572
|
-
length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
|
|
7573
|
-
return length
|
|
7542
|
+
return rf.calc_transposed_conv_out_length(
|
|
7543
|
+
input_length,
|
|
7544
|
+
filter_size=filter_size,
|
|
7545
|
+
padding=padding,
|
|
7546
|
+
output_padding=output_padding,
|
|
7547
|
+
stride=stride,
|
|
7548
|
+
dilation_rate=dilation,
|
|
7549
|
+
)
|
|
7574
7550
|
|
|
7575
7551
|
@classmethod
|
|
7576
7552
|
def get_out_data_from_opts(
|
|
@@ -11562,13 +11538,23 @@ class CtcLossLayer(LayerBase):
|
|
|
11562
11538
|
layer_class = "ctc_loss"
|
|
11563
11539
|
recurrent = True # order matters
|
|
11564
11540
|
|
|
11565
|
-
def __init__(
|
|
11541
|
+
def __init__(
|
|
11542
|
+
self,
|
|
11543
|
+
logits,
|
|
11544
|
+
targets,
|
|
11545
|
+
logits_normalized=False,
|
|
11546
|
+
blank_index=-1,
|
|
11547
|
+
max_approx=False,
|
|
11548
|
+
label_loop: bool = True,
|
|
11549
|
+
**kwargs,
|
|
11550
|
+
):
|
|
11566
11551
|
"""
|
|
11567
11552
|
:param LayerBase logits: (before softmax). shape [B,T,D]
|
|
11568
11553
|
:param LayerBase targets: sparse. shape [B,T]
|
|
11569
11554
|
:param bool logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
|
|
11570
11555
|
:param int blank_index: vocab index of the blank symbol
|
|
11571
11556
|
:param bool max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
|
|
11557
|
+
:param label_loop:
|
|
11572
11558
|
"""
|
|
11573
11559
|
from returnn.tf.native_op import ctc_loss, ctc_loss_viterbi
|
|
11574
11560
|
|
|
@@ -11591,6 +11577,7 @@ class CtcLossLayer(LayerBase):
|
|
|
11591
11577
|
targets=targets.output.copy_as_batch_major().placeholder,
|
|
11592
11578
|
targets_seq_lens=targets.output.get_sequence_lengths(),
|
|
11593
11579
|
blank_index=blank_index,
|
|
11580
|
+
label_loop=label_loop,
|
|
11594
11581
|
)
|
|
11595
11582
|
|
|
11596
11583
|
def get_dep_layers(self):
|
returnn/tf/native_op.py
CHANGED
|
@@ -528,77 +528,30 @@ class OpMaker:
|
|
|
528
528
|
def _make_mod(self):
|
|
529
529
|
if self.cache_key in self.mod_cache:
|
|
530
530
|
return self.mod_cache[self.cache_key]
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
#
|
|
534
|
-
#
|
|
535
|
-
#
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
# In other cases, it's probably needed, but it's not so clear which lib has the
|
|
539
|
-
# right symbols (e.g. the `sgemm_` symbol).
|
|
531
|
+
|
|
532
|
+
# Note about BLAS / matmul:
|
|
533
|
+
# Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
|
|
534
|
+
# and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
|
|
535
|
+
# Now, by default, we use the underlying Eigen library,
|
|
536
|
+
# which is the same code path that TF also uses for CPU matmul.
|
|
537
|
+
# Only if an explicit BLAS library is specified, we use that instead.
|
|
540
538
|
ld_flags = []
|
|
541
|
-
|
|
539
|
+
c_macro_defines = {}
|
|
542
540
|
|
|
543
541
|
if self.blas_lib is not None and os.path.exists(self.blas_lib):
|
|
544
542
|
path = os.path.dirname(self.blas_lib)
|
|
545
543
|
if path == "":
|
|
546
544
|
path = "."
|
|
547
545
|
ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
from returnn.util.basic import find_sgemm_libs_from_runtime
|
|
551
|
-
|
|
552
|
-
libs = find_sgemm_libs_from_runtime()
|
|
553
|
-
if libs:
|
|
554
|
-
numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
|
|
555
|
-
if numpy_libs:
|
|
556
|
-
# Prefer Numpy; move to front.
|
|
557
|
-
libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
|
|
558
|
-
if self.blas_lib is not None:
|
|
559
|
-
libs = [lib for lib in libs if self.blas_lib in lib]
|
|
560
|
-
for fn in libs:
|
|
561
|
-
ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
|
|
562
|
-
have_blas_lib = True
|
|
563
|
-
if not have_blas_lib and self.search_for_numpy_blas:
|
|
564
|
-
# Find related Numpy libs.
|
|
565
|
-
# Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
|
|
566
|
-
# Even do this before the other libs below, as it is likely
|
|
567
|
-
# that this OpenBlas lib is correctly initialized already.
|
|
568
|
-
import numpy
|
|
569
|
-
|
|
570
|
-
numpy_dir = os.path.dirname(numpy.__file__)
|
|
571
|
-
if os.path.exists("%s/.libs" % numpy_dir):
|
|
572
|
-
ld_flags += ["-L%s/.libs" % numpy_dir]
|
|
573
|
-
from glob import glob
|
|
574
|
-
|
|
575
|
-
for f in glob("%s/.libs/*.so" % numpy_dir):
|
|
576
|
-
f = os.path.basename(f)
|
|
577
|
-
if self.blas_lib is not None and self.blas_lib not in f:
|
|
578
|
-
continue
|
|
579
|
-
if f.startswith("lib"):
|
|
580
|
-
f = f[3:]
|
|
581
|
-
if f.endswith(".so"):
|
|
582
|
-
f = f[:-3]
|
|
583
|
-
ld_flags += ["-l%s" % f]
|
|
584
|
-
have_blas_lib = True
|
|
585
|
-
if not have_blas_lib and self.search_for_system_blas:
|
|
586
|
-
# Try to just link against blas/f77blas
|
|
587
|
-
# (both can potentially have the symbol) if it finds the lib.
|
|
588
|
-
if find_lib("blas"):
|
|
589
|
-
ld_flags += ["-lblas"]
|
|
590
|
-
have_blas_lib = True
|
|
591
|
-
if find_lib("f77blas"):
|
|
592
|
-
ld_flags += ["-lf77blas"]
|
|
593
|
-
have_blas_lib = True
|
|
594
|
-
if not have_blas_lib:
|
|
595
|
-
print("WARNING: OpMaker: no BLAS lib found")
|
|
546
|
+
c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
|
|
547
|
+
|
|
596
548
|
comp = tf_util.OpCodeCompiler(
|
|
597
549
|
base_name=self.name,
|
|
598
550
|
code_version=self.description.code_version,
|
|
599
551
|
code=self._make_code(),
|
|
600
552
|
include_deps=[self.support_native_op_cpp_filename],
|
|
601
553
|
ld_flags=ld_flags,
|
|
554
|
+
c_macro_defines=c_macro_defines,
|
|
602
555
|
use_cuda_if_available=self.with_cuda,
|
|
603
556
|
log_stream=self.log_stream,
|
|
604
557
|
**dict(self.compiler_opts),
|
|
@@ -1520,12 +1473,14 @@ def fast_baum_welch_staircase(am_scores, seq_lens, **opts):
|
|
|
1520
1473
|
|
|
1521
1474
|
|
|
1522
1475
|
def ctc_loss(
|
|
1476
|
+
*,
|
|
1523
1477
|
logits,
|
|
1524
1478
|
logits_seq_lens,
|
|
1525
1479
|
logits_time_major,
|
|
1526
1480
|
targets,
|
|
1527
1481
|
targets_seq_lens,
|
|
1528
|
-
|
|
1482
|
+
label_loop: Optional[bool] = None,
|
|
1483
|
+
ctc_merge_repeated: Optional[bool] = None,
|
|
1529
1484
|
logits_normalize=True,
|
|
1530
1485
|
grad_wrt_softmax_in=True,
|
|
1531
1486
|
blank_index=-1,
|
|
@@ -1540,7 +1495,8 @@ def ctc_loss(
|
|
|
1540
1495
|
:param bool logits_time_major:
|
|
1541
1496
|
:param tf.Tensor targets: batch-major, [batch,time]
|
|
1542
1497
|
:param tf.Tensor targets_seq_lens: (batch,)
|
|
1543
|
-
:param
|
|
1498
|
+
:param label_loop:
|
|
1499
|
+
:param ctc_merge_repeated: alias for label_loop
|
|
1544
1500
|
:param bool logits_normalize: apply log_softmax on logits (default).
|
|
1545
1501
|
if False, you might also set grad_wrt_softmax_in=False
|
|
1546
1502
|
:param bool grad_wrt_softmax_in: assume ``p(s|x) = softmax(logits)``, and define the gradient w.r.t. logits.
|
|
@@ -1551,6 +1507,11 @@ def ctc_loss(
|
|
|
1551
1507
|
:return: loss, shape (batch,)
|
|
1552
1508
|
:rtype: tf.Tensor
|
|
1553
1509
|
"""
|
|
1510
|
+
if ctc_merge_repeated is not None:
|
|
1511
|
+
assert label_loop is None
|
|
1512
|
+
label_loop = ctc_merge_repeated
|
|
1513
|
+
if label_loop is None:
|
|
1514
|
+
label_loop = True
|
|
1554
1515
|
assert logits.get_shape().ndims == 3 and logits.get_shape().dims[-1].value
|
|
1555
1516
|
dim = logits.get_shape().dims[-1].value
|
|
1556
1517
|
if not logits_time_major:
|
|
@@ -1567,7 +1528,7 @@ def ctc_loss(
|
|
|
1567
1528
|
blank_index += dim
|
|
1568
1529
|
assert 0 <= blank_index < dim
|
|
1569
1530
|
edges, weights, start_end_states = get_ctc_fsa_fast_bw(
|
|
1570
|
-
targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=
|
|
1531
|
+
targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
|
|
1571
1532
|
)
|
|
1572
1533
|
fwdbwd, obs_scores = fast_baum_welch(
|
|
1573
1534
|
am_scores=-log_sm, float_idx=seq_mask, edges=edges, weights=weights, start_end_states=start_end_states
|
|
@@ -1607,7 +1568,9 @@ def fast_viterbi(am_scores, am_seq_len, edges, weights, start_end_states):
|
|
|
1607
1568
|
return alignment, scores
|
|
1608
1569
|
|
|
1609
1570
|
|
|
1610
|
-
def ctc_loss_viterbi(
|
|
1571
|
+
def ctc_loss_viterbi(
|
|
1572
|
+
*, logits, logits_seq_lens, logits_time_major, targets, targets_seq_lens, blank_index=-1, label_loop: bool = True
|
|
1573
|
+
):
|
|
1611
1574
|
"""
|
|
1612
1575
|
Similar to :func:`ctc_loss`.
|
|
1613
1576
|
However, instead of using the full sum, we use the best path (i.e. Viterbi instead of Baum-Welch).
|
|
@@ -1619,6 +1582,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
|
|
|
1619
1582
|
:param tf.Tensor targets: batch-major, [batch,time]
|
|
1620
1583
|
:param tf.Tensor targets_seq_lens: (batch,)
|
|
1621
1584
|
:param int blank_index: vocab index of the blank symbol
|
|
1585
|
+
:param label_loop:
|
|
1622
1586
|
:return: loss, shape (batch,)
|
|
1623
1587
|
:rtype: tf.Tensor
|
|
1624
1588
|
"""
|
|
@@ -1632,7 +1596,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
|
|
|
1632
1596
|
blank_index += dim
|
|
1633
1597
|
assert 0 <= blank_index < dim
|
|
1634
1598
|
edges, weights, start_end_states = get_ctc_fsa_fast_bw(
|
|
1635
|
-
targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index
|
|
1599
|
+
targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
|
|
1636
1600
|
)
|
|
1637
1601
|
alignment, scores = fast_viterbi(
|
|
1638
1602
|
am_scores=log_sm, am_seq_len=logits_seq_lens, edges=edges, weights=weights, start_end_states=start_end_states
|
returnn/tf/network.py
CHANGED
|
@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
|
|
|
4428
4428
|
data = extern_data.data[data_key]
|
|
4429
4429
|
info += ", %s" % data
|
|
4430
4430
|
print(" %r: %s" % (key, info), file=file)
|
|
4431
|
-
if data and data.sparse:
|
|
4431
|
+
if data is not None and data.sparse:
|
|
4432
4432
|
if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
|
|
4433
4433
|
print(" WARNING, invalid label for data", data, file=file)
|
|
4434
4434
|
elif feed_dict is None:
|