returnn 1.20250826.155029__py3-none-any.whl → 1.20250828.142552__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- returnn/PKG-INFO +1 -1
- returnn/_setup_info_generated.py +2 -2
- returnn/frontend/_cache.py +4 -2
- returnn/frontend/array_.py +72 -18
- returnn/frontend/conv.py +2 -1
- returnn/frontend/encoder/conformer.py +32 -8
- returnn/tensor/_dim_extra.py +34 -6
- returnn/util/basic.py +8 -6
- {returnn-1.20250826.155029.dist-info → returnn-1.20250828.142552.dist-info}/METADATA +1 -1
- {returnn-1.20250826.155029.dist-info → returnn-1.20250828.142552.dist-info}/RECORD +13 -13
- {returnn-1.20250826.155029.dist-info → returnn-1.20250828.142552.dist-info}/LICENSE +0 -0
- {returnn-1.20250826.155029.dist-info → returnn-1.20250828.142552.dist-info}/WHEEL +0 -0
- {returnn-1.20250826.155029.dist-info → returnn-1.20250828.142552.dist-info}/top_level.txt +0 -0
returnn/PKG-INFO
CHANGED
returnn/_setup_info_generated.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
version = '1.
|
|
2
|
-
long_version = '1.
|
|
1
|
+
version = '1.20250828.142552'
|
|
2
|
+
long_version = '1.20250828.142552+git.f81cb9a'
|
returnn/frontend/_cache.py
CHANGED
|
@@ -6,7 +6,7 @@ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relati
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
|
-
from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
|
|
9
|
+
from typing import Optional, Union, Any, Type, Callable, Tuple, Dict, List
|
|
10
10
|
from weakref import ref
|
|
11
11
|
import tree
|
|
12
12
|
from returnn.util.lru_cache import lru_cache
|
|
@@ -59,6 +59,8 @@ class Cache:
|
|
|
59
59
|
if isinstance(key_item_orig, DimWrapper):
|
|
60
60
|
assert isinstance(key_item, DimWrapper)
|
|
61
61
|
dim_orig = key_item_orig.dim_ref()
|
|
62
|
+
if dim_orig is None: # orig dim could be dead. but then it would not be used anyway
|
|
63
|
+
continue
|
|
62
64
|
dim = key_item.dim_ref()
|
|
63
65
|
assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
|
|
64
66
|
dim_map[dim_orig] = dim
|
|
@@ -103,7 +105,7 @@ def _transform_key(
|
|
|
103
105
|
key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
|
|
104
106
|
) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
|
|
105
107
|
backend = _get_backend(key)
|
|
106
|
-
keys_flat = [backend]
|
|
108
|
+
keys_flat: List[Any] = [backend]
|
|
107
109
|
if not backend.executing_eagerly():
|
|
108
110
|
# See comment above: If graph-mode, the cached value becomes invalid
|
|
109
111
|
# when the current run ctx goes out of scope.
|
returnn/frontend/array_.py
CHANGED
|
@@ -188,22 +188,18 @@ def merge_dims(
|
|
|
188
188
|
return source, dims[0]
|
|
189
189
|
return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
|
|
190
190
|
if out_dim is None:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
for d in dims[1:]:
|
|
194
|
-
reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
|
|
195
|
-
out_dim = out_dim * d
|
|
196
|
-
if reset_dyn_size:
|
|
191
|
+
from returnn.util.basic import prod
|
|
192
|
+
|
|
193
|
+
if any(d.need_masking() for d in dims[1:]):
|
|
197
194
|
# The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
|
|
198
195
|
# This would then potentially discard some of the data in the tensor in subsequent operations,
|
|
199
196
|
# when masking is applied.
|
|
200
197
|
# Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
|
|
201
198
|
# https://github.com/rwth-i6/returnn/issues/1694
|
|
202
|
-
|
|
203
|
-
for d in dims
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
out_dim.dyn_size_ext = out_dim_size
|
|
199
|
+
# See also similar logic in :func:`concat`.
|
|
200
|
+
out_dim = Dim(prod(d.get_dim_value_tensor() for d in dims), name="merged")
|
|
201
|
+
else:
|
|
202
|
+
out_dim = prod(dims)
|
|
207
203
|
# noinspection PyProtectedMember
|
|
208
204
|
return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
|
|
209
205
|
|
|
@@ -427,13 +423,40 @@ def concat(
|
|
|
427
423
|
dims = sources[0][0].dims_set - {sources[0][1]}
|
|
428
424
|
for src, dim in sources:
|
|
429
425
|
assert src.dims_set - {dim} == dims, f"concat {sources}, need allow_broadcast=True"
|
|
426
|
+
need_handle_dynamic_dims = False
|
|
427
|
+
for src, dim in sources[:-1]:
|
|
428
|
+
if dim.need_masking():
|
|
429
|
+
need_handle_dynamic_dims = True
|
|
430
|
+
if handle_dynamic_dims is None:
|
|
431
|
+
handle_dynamic_dims = need_handle_dynamic_dims
|
|
430
432
|
if not out_dim:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
433
|
+
if handle_dynamic_dims or not need_handle_dynamic_dims:
|
|
434
|
+
out_dim = sum(d for _, d in sources)
|
|
435
|
+
else: # not handle_dynamic_dims but need_handle_dynamic_dims
|
|
436
|
+
# There are dynamic dims, but we don't want to handle them.
|
|
437
|
+
# So, summing the dims would be incorrect.
|
|
438
|
+
# Just add the dim values.
|
|
439
|
+
out_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources if d.dimension is not None), name="concat")
|
|
440
|
+
if handle_dynamic_dims:
|
|
441
|
+
out_non_masked_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources))
|
|
442
|
+
# noinspection PyProtectedMember
|
|
443
|
+
out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_non_masked_dim)
|
|
444
|
+
masks = []
|
|
445
|
+
for _, dim in sources:
|
|
446
|
+
masks.append(
|
|
447
|
+
dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
|
|
448
|
+
if dim.need_masking()
|
|
449
|
+
else rf.constant(True, dims=[dim], device=out.device)
|
|
450
|
+
)
|
|
451
|
+
# noinspection PyProtectedMember
|
|
452
|
+
mask_concat = sources[0][0]._raw_backend.concat(
|
|
453
|
+
*[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_non_masked_dim
|
|
454
|
+
)
|
|
455
|
+
out, _ = rf.masked_select(out, mask=mask_concat, dims=[out_non_masked_dim], out_dim=out_dim)
|
|
456
|
+
else:
|
|
457
|
+
# noinspection PyProtectedMember
|
|
458
|
+
out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
|
|
459
|
+
return out, out_dim
|
|
437
460
|
|
|
438
461
|
|
|
439
462
|
def concat_features(*sources: Tensor, allow_broadcast=False) -> Tensor:
|
|
@@ -478,7 +501,12 @@ def pad(
|
|
|
478
501
|
if handle_dynamic_dims is None:
|
|
479
502
|
handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
|
|
480
503
|
if not out_dims:
|
|
481
|
-
out_dims = [
|
|
504
|
+
out_dims = [
|
|
505
|
+
(left + middle + right)
|
|
506
|
+
if handle_dynamic_dims or not _pad_need_dyn_dim_handling(middle, left, right, mode=mode)
|
|
507
|
+
else _pad_sum_dims_no_dyn_dim_handling(middle, left, right)
|
|
508
|
+
for middle, (left, right) in zip(axes, padding)
|
|
509
|
+
]
|
|
482
510
|
# noinspection PyProtectedMember
|
|
483
511
|
return (
|
|
484
512
|
source._raw_backend.pad(
|
|
@@ -544,6 +572,32 @@ def _pad_need_dyn_dim_handling(
|
|
|
544
572
|
return True
|
|
545
573
|
|
|
546
574
|
|
|
575
|
+
def _pad_sum_dims_no_dyn_dim_handling(
|
|
576
|
+
middle: Dim, left: Union[Dim, int, Tensor], right: Union[Dim, int, Tensor]
|
|
577
|
+
) -> Dim:
|
|
578
|
+
"""
|
|
579
|
+
This gets called when we need to handle dyn dims, but handle_dynamic_dims=False.
|
|
580
|
+
See also the same logic in :func:`concat`.
|
|
581
|
+
"""
|
|
582
|
+
if isinstance(left, Dim):
|
|
583
|
+
left = left.get_dim_value_tensor()
|
|
584
|
+
elif isinstance(left, int):
|
|
585
|
+
pass
|
|
586
|
+
elif isinstance(left, Tensor):
|
|
587
|
+
assert left.dims == () # scalar
|
|
588
|
+
else:
|
|
589
|
+
raise TypeError(f"invalid left pad {left}")
|
|
590
|
+
if isinstance(right, Dim):
|
|
591
|
+
right = right.get_dim_value_tensor()
|
|
592
|
+
elif isinstance(right, int):
|
|
593
|
+
pass
|
|
594
|
+
elif isinstance(right, Tensor):
|
|
595
|
+
assert right.dims == () # scalar
|
|
596
|
+
else:
|
|
597
|
+
raise TypeError(f"invalid right pad {right}")
|
|
598
|
+
return Dim(left + middle.get_dim_value_tensor() + right, name="pad")
|
|
599
|
+
|
|
600
|
+
|
|
547
601
|
def cum_concat_step(
|
|
548
602
|
source: Tensor, *, prev_accum: Tensor, axis: Dim, out_spatial_dim: Optional[Dim] = None
|
|
549
603
|
) -> Tuple[Tensor, Dim]:
|
returnn/frontend/conv.py
CHANGED
|
@@ -862,8 +862,9 @@ def _consistent_same_padding(
|
|
|
862
862
|
pad_right = (s - 1) * d - pad_left
|
|
863
863
|
paddings.append((pad_left, pad_right))
|
|
864
864
|
# We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
|
|
865
|
+
out_dims = [(left + middle + right) for middle, (left, right) in zip(in_spatial_dims, paddings)]
|
|
865
866
|
source, in_spatial_dims = rf.pad(
|
|
866
|
-
source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
|
|
867
|
+
source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False, out_dims=out_dims
|
|
867
868
|
)
|
|
868
869
|
return source, in_spatial_dims, 0
|
|
869
870
|
|
|
@@ -8,6 +8,8 @@ https://github.com/rwth-i6/returnn_common/issues/233
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
from typing import Optional, Union, Any, Tuple, List, Dict, Callable
|
|
11
|
+
from types import FunctionType
|
|
12
|
+
import functools
|
|
11
13
|
import copy as _copy
|
|
12
14
|
from returnn.tensor import Tensor, Dim
|
|
13
15
|
import returnn.frontend as rf
|
|
@@ -298,7 +300,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
|
|
|
298
300
|
*,
|
|
299
301
|
num_layers: int,
|
|
300
302
|
input_layer: Optional[Union[ConformerConvSubsample, ISeqDownsamplingEncoder, rf.Module, Any]],
|
|
301
|
-
input_embedding_scale: float =
|
|
303
|
+
input_embedding_scale: Optional[float] = None,
|
|
304
|
+
pos_enc: Union[None, Callable, Dict[str, Any], rf.Module] = None,
|
|
302
305
|
input_dropout: float = 0.1,
|
|
303
306
|
ff_dim: Dim = NotSpecified,
|
|
304
307
|
ff_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = NotSpecified,
|
|
@@ -317,8 +320,17 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
|
|
|
317
320
|
:param num_layers: the number of encoder layers
|
|
318
321
|
:param input_layer: input/frontend/prenet with potential subsampling.
|
|
319
322
|
(x, in_spatial_dim) -> (y, out_spatial_dim)
|
|
320
|
-
:param input_embedding_scale: applied after input_layer.
|
|
321
|
-
|
|
323
|
+
:param input_embedding_scale: applied after input_layer.
|
|
324
|
+
1.0 by default for historic reasons if pos_enc is None,
|
|
325
|
+
else sqrt(out_dim) by default.
|
|
326
|
+
In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim),
|
|
327
|
+
which is relevant when you add positional encoding.
|
|
328
|
+
:param pos_enc: positional encoding, applied after input_embedding_scale.
|
|
329
|
+
None (no positional encoding) by default, unlike standard Transformer.
|
|
330
|
+
E.g. :func:`rf.sinusoidal_positional_encoding` for absolute pos enc.
|
|
331
|
+
Note, relative positional encoding is usually part of the attention layer,
|
|
332
|
+
e.g. :class:`rf.RelPosSelfAttention`,
|
|
333
|
+
and nothing needs to be set here.
|
|
322
334
|
:param input_dropout: applied after input_projection(input_layer(x))
|
|
323
335
|
:param ff_dim: the dimension of feed-forward layers. 2048 originally, or 4 times out_dim
|
|
324
336
|
:param ff_activation: activation function for feed-forward network
|
|
@@ -352,12 +364,22 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
|
|
|
352
364
|
else:
|
|
353
365
|
raise TypeError(f"unexpected input_layer {input_layer!r}")
|
|
354
366
|
self.input_layer = input_layer
|
|
355
|
-
self.
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
else
|
|
359
|
-
)
|
|
367
|
+
in_dim = self.input_layer.out_dim if self.input_layer else self.in_dim
|
|
368
|
+
self.input_projection = rf.Linear(in_dim, self.out_dim, with_bias=False) if in_dim != self.out_dim else None
|
|
369
|
+
if input_embedding_scale is None:
|
|
370
|
+
input_embedding_scale = (self.out_dim.dimension**0.5) if pos_enc is not None else 1.0
|
|
360
371
|
self.input_embedding_scale = input_embedding_scale
|
|
372
|
+
if pos_enc is None:
|
|
373
|
+
pass
|
|
374
|
+
elif isinstance(pos_enc, dict):
|
|
375
|
+
pos_enc = rf.build_from_dict(pos_enc, feat_dim=self.out_dim)
|
|
376
|
+
elif isinstance(pos_enc, rf.Module):
|
|
377
|
+
pass
|
|
378
|
+
elif isinstance(pos_enc, FunctionType):
|
|
379
|
+
pos_enc = functools.partial(pos_enc, feat_dim=self.out_dim)
|
|
380
|
+
else:
|
|
381
|
+
raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
|
|
382
|
+
self.pos_enc = pos_enc
|
|
361
383
|
self.input_dropout = input_dropout
|
|
362
384
|
|
|
363
385
|
if not encoder_layer or isinstance(encoder_layer, (dict, type)):
|
|
@@ -411,6 +433,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
|
|
|
411
433
|
x = self.input_projection(x_subsample) if self.input_projection else x_subsample
|
|
412
434
|
if self.input_embedding_scale != 1.0:
|
|
413
435
|
x = x * self.input_embedding_scale
|
|
436
|
+
if self.pos_enc is not None:
|
|
437
|
+
x = x + self.pos_enc(spatial_dim=out_spatial_dim)
|
|
414
438
|
x = rf.dropout(x, self.input_dropout, axis=self.dropout_broadcast and self.out_dim)
|
|
415
439
|
x = self.layers(x, spatial_dim=out_spatial_dim, collected_outputs=collected_outputs)
|
|
416
440
|
return x, out_spatial_dim
|
returnn/tensor/_dim_extra.py
CHANGED
|
@@ -1264,7 +1264,6 @@ class _DimMixin:
|
|
|
1264
1264
|
raise TypeError(f"complete_dyn_size: _relu: unexpected type {type(a)}")
|
|
1265
1265
|
|
|
1266
1266
|
y: Optional[_t.Tensor] = None # resulting dyn size
|
|
1267
|
-
y_max_value: Optional[_t.Tensor] = None # resulting dyn size max value
|
|
1268
1267
|
inputs = list(op.inputs)
|
|
1269
1268
|
assert inputs
|
|
1270
1269
|
for x_dim in inputs:
|
|
@@ -1275,8 +1274,6 @@ class _DimMixin:
|
|
|
1275
1274
|
if x_dim.dyn_size_ext is None and x_dim.dimension is None:
|
|
1276
1275
|
return
|
|
1277
1276
|
y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
|
|
1278
|
-
if not template_only and y.raw_tensor is not None:
|
|
1279
|
-
y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
|
|
1280
1277
|
assert y is not None, f"op {op}?"
|
|
1281
1278
|
if self.dyn_size_ext is not None:
|
|
1282
1279
|
assert self.dyn_size_ext.dim_tags == y.dim_tags
|
|
@@ -1286,9 +1283,14 @@ class _DimMixin:
|
|
|
1286
1283
|
else:
|
|
1287
1284
|
self.batch = y.batch
|
|
1288
1285
|
self.dyn_size_ext = y
|
|
1289
|
-
if not template_only and
|
|
1290
|
-
|
|
1291
|
-
|
|
1286
|
+
if not template_only and y.raw_tensor is not None:
|
|
1287
|
+
# Note: Earlier, we had this wrong.
|
|
1288
|
+
# It is not correct to replicate the same math (bin ops)
|
|
1289
|
+
# on the dim values (_dyn_size_max_value of each dim).
|
|
1290
|
+
# Consider sizes1=[2,3], sizes2=[5,4], and the op is "add".
|
|
1291
|
+
# Then the result sizes would be [7,7], thus its max is 7,
|
|
1292
|
+
# but max(sizes1)+max(sizes2)=3+5=8.
|
|
1293
|
+
self._dyn_size_max_value = rf.reduce_max(y, axis=y.dims) if y.dims else y
|
|
1292
1294
|
if tf and y.placeholder is not None:
|
|
1293
1295
|
self.set_tag_on_size_tensor(y.placeholder)
|
|
1294
1296
|
|
|
@@ -2080,6 +2082,8 @@ class _DimMixin:
|
|
|
2080
2082
|
:return: self + other. note that this is not commutative, i.e. different from other + self.
|
|
2081
2083
|
:rtype: Dim
|
|
2082
2084
|
"""
|
|
2085
|
+
if isinstance(other, int) and other == 0:
|
|
2086
|
+
return self
|
|
2083
2087
|
cache_key = ("add", other)
|
|
2084
2088
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2085
2089
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2098,6 +2102,8 @@ class _DimMixin:
|
|
|
2098
2102
|
:return: other + self
|
|
2099
2103
|
:rtype: Dim
|
|
2100
2104
|
"""
|
|
2105
|
+
if isinstance(other, int) and other == 0:
|
|
2106
|
+
return self
|
|
2101
2107
|
cache_key = ("add_left", other)
|
|
2102
2108
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2103
2109
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2115,6 +2121,8 @@ class _DimMixin:
|
|
|
2115
2121
|
:param Dim|int other:
|
|
2116
2122
|
:rtype: Dim
|
|
2117
2123
|
"""
|
|
2124
|
+
if isinstance(other, int) and other == 0:
|
|
2125
|
+
return self
|
|
2118
2126
|
return self.sub_right(other)
|
|
2119
2127
|
|
|
2120
2128
|
def sub_right(self: Dim, other):
|
|
@@ -2123,6 +2131,8 @@ class _DimMixin:
|
|
|
2123
2131
|
:return: self - other
|
|
2124
2132
|
:rtype: Dim
|
|
2125
2133
|
"""
|
|
2134
|
+
if isinstance(other, int) and other == 0:
|
|
2135
|
+
return self
|
|
2126
2136
|
cache_key = ("sub", other)
|
|
2127
2137
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2128
2138
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2141,6 +2151,8 @@ class _DimMixin:
|
|
|
2141
2151
|
:return: (-other) + self
|
|
2142
2152
|
:rtype: Dim
|
|
2143
2153
|
"""
|
|
2154
|
+
if isinstance(other, int) and other == 0:
|
|
2155
|
+
return self
|
|
2144
2156
|
cache_key = ("sub_left", other)
|
|
2145
2157
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2146
2158
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2158,6 +2170,8 @@ class _DimMixin:
|
|
|
2158
2170
|
:param Dim|int other:
|
|
2159
2171
|
:rtype: Dim
|
|
2160
2172
|
"""
|
|
2173
|
+
if isinstance(other, int) and other == 1:
|
|
2174
|
+
return self
|
|
2161
2175
|
cache_key = ("mul", other)
|
|
2162
2176
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2163
2177
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2175,6 +2189,8 @@ class _DimMixin:
|
|
|
2175
2189
|
:param Dim|int other:
|
|
2176
2190
|
:rtype: Dim
|
|
2177
2191
|
"""
|
|
2192
|
+
if isinstance(other, int) and other == 1:
|
|
2193
|
+
return self
|
|
2178
2194
|
cache_key = ("mul_left", other)
|
|
2179
2195
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2180
2196
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2192,6 +2208,8 @@ class _DimMixin:
|
|
|
2192
2208
|
:param Dim|int other:
|
|
2193
2209
|
:rtype: Dim
|
|
2194
2210
|
"""
|
|
2211
|
+
if isinstance(other, int) and other == 1:
|
|
2212
|
+
return self
|
|
2195
2213
|
cache_key = ("floordiv", other)
|
|
2196
2214
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2197
2215
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2209,6 +2227,8 @@ class _DimMixin:
|
|
|
2209
2227
|
:param Dim|int other:
|
|
2210
2228
|
:rtype: Dim
|
|
2211
2229
|
"""
|
|
2230
|
+
if isinstance(other, int) and other == 1:
|
|
2231
|
+
return self
|
|
2212
2232
|
return self.div_right(other)
|
|
2213
2233
|
|
|
2214
2234
|
def div_left(self: Dim, other):
|
|
@@ -2216,6 +2236,8 @@ class _DimMixin:
|
|
|
2216
2236
|
:param Dim|int other:
|
|
2217
2237
|
:rtype: Dim
|
|
2218
2238
|
"""
|
|
2239
|
+
if isinstance(other, int) and other == 1:
|
|
2240
|
+
return self
|
|
2219
2241
|
cache_key = ("truediv_left", other)
|
|
2220
2242
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2221
2243
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2233,6 +2255,8 @@ class _DimMixin:
|
|
|
2233
2255
|
:param Dim|int other:
|
|
2234
2256
|
:rtype: Dim
|
|
2235
2257
|
"""
|
|
2258
|
+
if isinstance(other, int) and other == 1:
|
|
2259
|
+
return self
|
|
2236
2260
|
cache_key = ("truediv", other)
|
|
2237
2261
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2238
2262
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2250,6 +2274,8 @@ class _DimMixin:
|
|
|
2250
2274
|
:param Dim|int other:
|
|
2251
2275
|
:rtype: Dim
|
|
2252
2276
|
"""
|
|
2277
|
+
if isinstance(other, int) and other == 1:
|
|
2278
|
+
return self
|
|
2253
2279
|
cache_key = ("ceildiv_left", other)
|
|
2254
2280
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2255
2281
|
cache_entry = cache.get(cache_key, None)
|
|
@@ -2267,6 +2293,8 @@ class _DimMixin:
|
|
|
2267
2293
|
:param Dim|int other:
|
|
2268
2294
|
:rtype: Dim
|
|
2269
2295
|
"""
|
|
2296
|
+
if isinstance(other, int) and other == 1:
|
|
2297
|
+
return self
|
|
2270
2298
|
cache_key = ("ceildiv", other)
|
|
2271
2299
|
cache = self.get_same_base()._make_extra().cache_dim_math
|
|
2272
2300
|
cache_entry = cache.get(cache_key, None)
|
returnn/util/basic.py
CHANGED
|
@@ -1693,15 +1693,17 @@ def inplace_increment(x: numpy.ndarray, idx: numpy.ndarray, y: Union[numpy.ndarr
|
|
|
1693
1693
|
raise NotImplementedError("This feature was removed with dropped Theano support")
|
|
1694
1694
|
|
|
1695
1695
|
|
|
1696
|
-
def prod(ls):
|
|
1696
|
+
def prod(ls: Union[Iterable[T], numpy.ndarray]) -> Union[int, T, float]:
|
|
1697
1697
|
"""
|
|
1698
|
-
:param
|
|
1699
|
-
:
|
|
1698
|
+
:param ls:
|
|
1699
|
+
:return: ls[0] * ls[1] * ...
|
|
1700
1700
|
"""
|
|
1701
|
-
|
|
1701
|
+
it = iter(ls)
|
|
1702
|
+
try:
|
|
1703
|
+
x = next(it)
|
|
1704
|
+
except StopIteration:
|
|
1702
1705
|
return 1
|
|
1703
|
-
|
|
1704
|
-
for y in ls[1:]:
|
|
1706
|
+
for y in it:
|
|
1705
1707
|
x = x * y # *= doesn't work because x might be a tensor, and for e.g. torch.Tensor this op is in-place
|
|
1706
1708
|
return x
|
|
1707
1709
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
returnn/PKG-INFO,sha256=
|
|
1
|
+
returnn/PKG-INFO,sha256=Uvf8zgSBctl_SphH-v2ikfVE9N-jlwEAGCiUPtlx8iY,5215
|
|
2
2
|
returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
|
|
3
3
|
returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
|
|
4
4
|
returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
|
|
5
5
|
returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
|
|
6
|
-
returnn/_setup_info_generated.py,sha256=
|
|
6
|
+
returnn/_setup_info_generated.py,sha256=QXZpIuHEV8wkbvcOm3273IFMbpbj15j4l3g_UoApY08,77
|
|
7
7
|
returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
|
|
8
8
|
returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
|
|
9
9
|
returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
|
|
@@ -76,11 +76,11 @@ returnn/extern/graph_editor/transform.py,sha256=qMGSenpbAnGqdG6QP6iWjlm6_ccySYJa
|
|
|
76
76
|
returnn/extern/graph_editor/util.py,sha256=HfRbyQPmQ6_n5-O-096n0KeJtllQXFtaurpeJS_URZ0,18706
|
|
77
77
|
returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
|
|
78
78
|
returnn/frontend/_backend.py,sha256=39l5MC1DaT0MPklMM8HXAW9nqisIIZQ9g2QSHOOtPQE,50741
|
|
79
|
-
returnn/frontend/_cache.py,sha256=
|
|
79
|
+
returnn/frontend/_cache.py,sha256=Uao2xzfvVaKABk1fkxcpXzxKIGJaI9FwwlTvvoNUstk,8550
|
|
80
80
|
returnn/frontend/_numpy_backend.py,sha256=fZjks7p3dgxVZ6tSDazTTgBxNjJqXjfqgw_7mA7rDEE,9066
|
|
81
81
|
returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
|
|
82
82
|
returnn/frontend/_utils.py,sha256=uVQldGHyYKIyhSEmumJ04ix5eP5tjZw4CEC0w6-zhyQ,12074
|
|
83
|
-
returnn/frontend/array_.py,sha256=
|
|
83
|
+
returnn/frontend/array_.py,sha256=Up5cB5kPaBgGToDdnyhJ2KsoMWKIdmuRdXq6OdBLzi4,53754
|
|
84
84
|
returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
|
|
85
85
|
returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
|
|
86
86
|
returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
|
|
@@ -88,7 +88,7 @@ returnn/frontend/cond.py,sha256=gh6wg0aSbAJQfKRv4BQAu-EfPWtWPLFjgc8IaPPFmwg,1023
|
|
|
88
88
|
returnn/frontend/const.py,sha256=A5fP9w6Akv56d89pPvdoZaXvC9ZTYcexepnS9O2clOc,3945
|
|
89
89
|
returnn/frontend/container.py,sha256=wF3OlQN7WlOVmmdapUth_Unha3DVf6h1B7okBJAuJDA,8011
|
|
90
90
|
returnn/frontend/control_flow_ctx.py,sha256=v17CsNwRnZYe8GdMtGJt2ftibfxMCGK1i0l-GX5ILu0,699
|
|
91
|
-
returnn/frontend/conv.py,sha256=
|
|
91
|
+
returnn/frontend/conv.py,sha256=lca3hG0FO2IEOoe5OvOnm9NU2Ofx_RPqnCxZqxocUdM,32079
|
|
92
92
|
returnn/frontend/device.py,sha256=Sjara0EmFLhu9O55cN_p6OwU0NgdNCCQjyAuQhiWpGw,1437
|
|
93
93
|
returnn/frontend/dims.py,sha256=aH5FQ_m0xMD6Rj-BUWGx8lB-HkCuwZfMBf6mZbGGW5E,12611
|
|
94
94
|
returnn/frontend/dropout.py,sha256=TjqZCKDIOBeHr14-NCemOm9m3p84LxQuPH1DvRAYg88,5028
|
|
@@ -138,7 +138,7 @@ returnn/frontend/decoder/__init__.py,sha256=A-koKyPVlXp_V_2bk6GKZ1Xfv4rYIcfxGMXQ
|
|
|
138
138
|
returnn/frontend/decoder/transformer.py,sha256=20a37hMiPbQBHx3tSbOeiAbFPVRcX_KYpPuw8tmY6GU,23658
|
|
139
139
|
returnn/frontend/encoder/__init__.py,sha256=0QGLlujRIKx3zBREeShza_-xhGIxj73zbd7t-g1m-ho,17
|
|
140
140
|
returnn/frontend/encoder/base.py,sha256=A759EwCYAmSi-kzXz1vaTjR2l59TvNGQlzaNdp3UOKs,2109
|
|
141
|
-
returnn/frontend/encoder/conformer.py,sha256=
|
|
141
|
+
returnn/frontend/encoder/conformer.py,sha256=rWulygolesbYkLw9naSxwygaZhWqKpHKEVj-1AQbel0,21351
|
|
142
142
|
returnn/frontend/encoder/conformer_v2.py,sha256=vAYdT8m2Zzg3IIZZafeccClFHU1_c9T-EgBOsHadQPA,7701
|
|
143
143
|
returnn/frontend/encoder/e_branchformer.py,sha256=SZdhpb90FaQdpzgvSOtFPLbLCa0NdycbB5Z4vMoY4TM,12279
|
|
144
144
|
returnn/frontend/encoder/transformer.py,sha256=Jj0mF1D2MohOk-9sGYdsLtVW_86fwoq4pKWCdPMvPR8,11580
|
|
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
|
|
|
154
154
|
returnn/sprint/interface.py,sha256=1j5SB0V8hSW8A5song9ciZtcBnZoKKfNipk9ezOIMuA,36491
|
|
155
155
|
returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
|
|
156
156
|
returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
|
|
157
|
-
returnn/tensor/_dim_extra.py,sha256=
|
|
157
|
+
returnn/tensor/_dim_extra.py,sha256=VN7Smn1Q0Y0DO7GSPM-aJUhp_jy5pzSMJbPkCk6JnqY,123448
|
|
158
158
|
returnn/tensor/_tensor_extra.py,sha256=gbSl6HMtn8WFYloanew_RaNNwx3eCpnKv3UfCkntJiQ,164923
|
|
159
159
|
returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
|
|
160
160
|
returnn/tensor/_tensor_op_overloads.py,sha256=HklwuTBjy7mH_665VKaCUdu-oC3aa7Uz1ZQiCz4jeZc,5448
|
|
@@ -233,7 +233,7 @@ returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko8
|
|
|
233
233
|
returnn/torch/util/module.py,sha256=MXHIrF9Isu575DDJIa81212ULKwdqu1oOLxDVZecVSk,1693
|
|
234
234
|
returnn/torch/util/scaled_gradient.py,sha256=C5e79mpqtxdtw08OTSy413TSBSlOertRisc-ioiFIaU,3191
|
|
235
235
|
returnn/util/__init__.py,sha256=UIG1qw4idqhW71BV60ha7h9PktxvEVcBIu0lYRossK8,336
|
|
236
|
-
returnn/util/basic.py,sha256=
|
|
236
|
+
returnn/util/basic.py,sha256=S2ABKcP0pf2UexuMXDNHGcfAu7GDSD2mr6OIByM152M,143168
|
|
237
237
|
returnn/util/better_exchook.py,sha256=39yvRecluDgYhViwSkaQ8crJ_cBWI63KeEGuK4RKe5w,70843
|
|
238
238
|
returnn/util/bpe.py,sha256=LWFhICZsEOnMwNws0lybPNzKRX6rSr8yKCvP65vjl9Y,19656
|
|
239
239
|
returnn/util/debug.py,sha256=wuRzdg9zB84WWCGyTjmRR_zYypu8gXxlc0nZ6si9OC8,28224
|
|
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
|
|
|
253
253
|
returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
|
|
254
254
|
returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
|
|
255
255
|
returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
|
|
256
|
-
returnn-1.
|
|
257
|
-
returnn-1.
|
|
258
|
-
returnn-1.
|
|
259
|
-
returnn-1.
|
|
260
|
-
returnn-1.
|
|
256
|
+
returnn-1.20250828.142552.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
|
|
257
|
+
returnn-1.20250828.142552.dist-info/METADATA,sha256=Uvf8zgSBctl_SphH-v2ikfVE9N-jlwEAGCiUPtlx8iY,5215
|
|
258
|
+
returnn-1.20250828.142552.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
259
|
+
returnn-1.20250828.142552.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
|
|
260
|
+
returnn-1.20250828.142552.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|