returnn 1.20250826.155029__py3-none-any.whl → 1.20250828.142552__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250826.155029
3
+ Version: 1.20250828.142552
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250826.155029'
2
- long_version = '1.20250826.155029+git.cca4212'
1
+ version = '1.20250828.142552'
2
+ long_version = '1.20250828.142552+git.f81cb9a'
@@ -6,7 +6,7 @@ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relati
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
- from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
9
+ from typing import Optional, Union, Any, Type, Callable, Tuple, Dict, List
10
10
  from weakref import ref
11
11
  import tree
12
12
  from returnn.util.lru_cache import lru_cache
@@ -59,6 +59,8 @@ class Cache:
59
59
  if isinstance(key_item_orig, DimWrapper):
60
60
  assert isinstance(key_item, DimWrapper)
61
61
  dim_orig = key_item_orig.dim_ref()
62
+ if dim_orig is None: # orig dim could be dead. but then it would not be used anyway
63
+ continue
62
64
  dim = key_item.dim_ref()
63
65
  assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
64
66
  dim_map[dim_orig] = dim
@@ -103,7 +105,7 @@ def _transform_key(
103
105
  key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
104
106
  ) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
105
107
  backend = _get_backend(key)
106
- keys_flat = [backend]
108
+ keys_flat: List[Any] = [backend]
107
109
  if not backend.executing_eagerly():
108
110
  # See comment above: If graph-mode, the cached value becomes invalid
109
111
  # when the current run ctx goes out of scope.
@@ -188,22 +188,18 @@ def merge_dims(
188
188
  return source, dims[0]
189
189
  return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
190
190
  if out_dim is None:
191
- out_dim = dims[0]
192
- reset_dyn_size = False
193
- for d in dims[1:]:
194
- reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
195
- out_dim = out_dim * d
196
- if reset_dyn_size:
191
+ from returnn.util.basic import prod
192
+
193
+ if any(d.need_masking() for d in dims[1:]):
197
194
  # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
198
195
  # This would then potentially discard some of the data in the tensor in subsequent operations,
199
196
  # when masking is applied.
200
197
  # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
201
198
  # https://github.com/rwth-i6/returnn/issues/1694
202
- out_dim_size = dims[0].get_dim_value_tensor()
203
- for d in dims[1:]:
204
- out_dim_size *= d.get_dim_value_tensor()
205
- assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == () # scalar
206
- out_dim.dyn_size_ext = out_dim_size
199
+ # See also similar logic in :func:`concat`.
200
+ out_dim = Dim(prod(d.get_dim_value_tensor() for d in dims), name="merged")
201
+ else:
202
+ out_dim = prod(dims)
207
203
  # noinspection PyProtectedMember
208
204
  return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
209
205
 
@@ -427,13 +423,40 @@ def concat(
427
423
  dims = sources[0][0].dims_set - {sources[0][1]}
428
424
  for src, dim in sources:
429
425
  assert src.dims_set - {dim} == dims, f"concat {sources}, need allow_broadcast=True"
426
+ need_handle_dynamic_dims = False
427
+ for src, dim in sources[:-1]:
428
+ if dim.need_masking():
429
+ need_handle_dynamic_dims = True
430
+ if handle_dynamic_dims is None:
431
+ handle_dynamic_dims = need_handle_dynamic_dims
430
432
  if not out_dim:
431
- out_dim = sum(d for _, d in sources)
432
- if handle_dynamic_dims is None or handle_dynamic_dims:
433
- for src, dim in sources[:-1]:
434
- assert dim.is_static(), f"concat {sources}, dim {dim} is not static, not yet implemented..."
435
- # noinspection PyProtectedMember
436
- return sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim), out_dim
433
+ if handle_dynamic_dims or not need_handle_dynamic_dims:
434
+ out_dim = sum(d for _, d in sources)
435
+ else: # not handle_dynamic_dims but need_handle_dynamic_dims
436
+ # There are dynamic dims, but we don't want to handle them.
437
+ # So, summing the dims would be incorrect.
438
+ # Just add the dim values.
439
+ out_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources if d.dimension is not None), name="concat")
440
+ if handle_dynamic_dims:
441
+ out_non_masked_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources))
442
+ # noinspection PyProtectedMember
443
+ out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_non_masked_dim)
444
+ masks = []
445
+ for _, dim in sources:
446
+ masks.append(
447
+ dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
448
+ if dim.need_masking()
449
+ else rf.constant(True, dims=[dim], device=out.device)
450
+ )
451
+ # noinspection PyProtectedMember
452
+ mask_concat = sources[0][0]._raw_backend.concat(
453
+ *[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_non_masked_dim
454
+ )
455
+ out, _ = rf.masked_select(out, mask=mask_concat, dims=[out_non_masked_dim], out_dim=out_dim)
456
+ else:
457
+ # noinspection PyProtectedMember
458
+ out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
459
+ return out, out_dim
437
460
 
438
461
 
439
462
  def concat_features(*sources: Tensor, allow_broadcast=False) -> Tensor:
@@ -478,7 +501,12 @@ def pad(
478
501
  if handle_dynamic_dims is None:
479
502
  handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
480
503
  if not out_dims:
481
- out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
504
+ out_dims = [
505
+ (left + middle + right)
506
+ if handle_dynamic_dims or not _pad_need_dyn_dim_handling(middle, left, right, mode=mode)
507
+ else _pad_sum_dims_no_dyn_dim_handling(middle, left, right)
508
+ for middle, (left, right) in zip(axes, padding)
509
+ ]
482
510
  # noinspection PyProtectedMember
483
511
  return (
484
512
  source._raw_backend.pad(
@@ -544,6 +572,32 @@ def _pad_need_dyn_dim_handling(
544
572
  return True
545
573
 
546
574
 
575
+ def _pad_sum_dims_no_dyn_dim_handling(
576
+ middle: Dim, left: Union[Dim, int, Tensor], right: Union[Dim, int, Tensor]
577
+ ) -> Dim:
578
+ """
579
+ This gets called when we need to handle dyn dims, but handle_dynamic_dims=False.
580
+ See also the same logic in :func:`concat`.
581
+ """
582
+ if isinstance(left, Dim):
583
+ left = left.get_dim_value_tensor()
584
+ elif isinstance(left, int):
585
+ pass
586
+ elif isinstance(left, Tensor):
587
+ assert left.dims == () # scalar
588
+ else:
589
+ raise TypeError(f"invalid left pad {left}")
590
+ if isinstance(right, Dim):
591
+ right = right.get_dim_value_tensor()
592
+ elif isinstance(right, int):
593
+ pass
594
+ elif isinstance(right, Tensor):
595
+ assert right.dims == () # scalar
596
+ else:
597
+ raise TypeError(f"invalid right pad {right}")
598
+ return Dim(left + middle.get_dim_value_tensor() + right, name="pad")
599
+
600
+
547
601
  def cum_concat_step(
548
602
  source: Tensor, *, prev_accum: Tensor, axis: Dim, out_spatial_dim: Optional[Dim] = None
549
603
  ) -> Tuple[Tensor, Dim]:
returnn/frontend/conv.py CHANGED
@@ -862,8 +862,9 @@ def _consistent_same_padding(
862
862
  pad_right = (s - 1) * d - pad_left
863
863
  paddings.append((pad_left, pad_right))
864
864
  # We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
865
+ out_dims = [(left + middle + right) for middle, (left, right) in zip(in_spatial_dims, paddings)]
865
866
  source, in_spatial_dims = rf.pad(
866
- source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
867
+ source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False, out_dims=out_dims
867
868
  )
868
869
  return source, in_spatial_dims, 0
869
870
 
@@ -8,6 +8,8 @@ https://github.com/rwth-i6/returnn_common/issues/233
8
8
 
9
9
  from __future__ import annotations
10
10
  from typing import Optional, Union, Any, Tuple, List, Dict, Callable
11
+ from types import FunctionType
12
+ import functools
11
13
  import copy as _copy
12
14
  from returnn.tensor import Tensor, Dim
13
15
  import returnn.frontend as rf
@@ -298,7 +300,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
298
300
  *,
299
301
  num_layers: int,
300
302
  input_layer: Optional[Union[ConformerConvSubsample, ISeqDownsamplingEncoder, rf.Module, Any]],
301
- input_embedding_scale: float = 1.0,
303
+ input_embedding_scale: Optional[float] = None,
304
+ pos_enc: Union[None, Callable, Dict[str, Any], rf.Module] = None,
302
305
  input_dropout: float = 0.1,
303
306
  ff_dim: Dim = NotSpecified,
304
307
  ff_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = NotSpecified,
@@ -317,8 +320,17 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
317
320
  :param num_layers: the number of encoder layers
318
321
  :param input_layer: input/frontend/prenet with potential subsampling.
319
322
  (x, in_spatial_dim) -> (y, out_spatial_dim)
320
- :param input_embedding_scale: applied after input_layer. 1.0 by default for historic reasons.
321
- In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim).
323
+ :param input_embedding_scale: applied after input_layer.
324
+ 1.0 by default for historic reasons if pos_enc is None,
325
+ else sqrt(out_dim) by default.
326
+ In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim),
327
+ which is relevant when you add positional encoding.
328
+ :param pos_enc: positional encoding, applied after input_embedding_scale.
329
+ None (no positional encoding) by default, unlike standard Transformer.
330
+ E.g. :func:`rf.sinusoidal_positional_encoding` for absolute pos enc.
331
+ Note, relative positional encoding is usually part of the attention layer,
332
+ e.g. :class:`rf.RelPosSelfAttention`,
333
+ and nothing needs to be set here.
322
334
  :param input_dropout: applied after input_projection(input_layer(x))
323
335
  :param ff_dim: the dimension of feed-forward layers. 2048 originally, or 4 times out_dim
324
336
  :param ff_activation: activation function for feed-forward network
@@ -352,12 +364,22 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
352
364
  else:
353
365
  raise TypeError(f"unexpected input_layer {input_layer!r}")
354
366
  self.input_layer = input_layer
355
- self.input_projection = (
356
- rf.Linear(self.input_layer.out_dim if self.input_layer else self.in_dim, self.out_dim, with_bias=False)
357
- if input_layer
358
- else None
359
- )
367
+ in_dim = self.input_layer.out_dim if self.input_layer else self.in_dim
368
+ self.input_projection = rf.Linear(in_dim, self.out_dim, with_bias=False) if in_dim != self.out_dim else None
369
+ if input_embedding_scale is None:
370
+ input_embedding_scale = (self.out_dim.dimension**0.5) if pos_enc is not None else 1.0
360
371
  self.input_embedding_scale = input_embedding_scale
372
+ if pos_enc is None:
373
+ pass
374
+ elif isinstance(pos_enc, dict):
375
+ pos_enc = rf.build_from_dict(pos_enc, feat_dim=self.out_dim)
376
+ elif isinstance(pos_enc, rf.Module):
377
+ pass
378
+ elif isinstance(pos_enc, FunctionType):
379
+ pos_enc = functools.partial(pos_enc, feat_dim=self.out_dim)
380
+ else:
381
+ raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
382
+ self.pos_enc = pos_enc
361
383
  self.input_dropout = input_dropout
362
384
 
363
385
  if not encoder_layer or isinstance(encoder_layer, (dict, type)):
@@ -411,6 +433,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
411
433
  x = self.input_projection(x_subsample) if self.input_projection else x_subsample
412
434
  if self.input_embedding_scale != 1.0:
413
435
  x = x * self.input_embedding_scale
436
+ if self.pos_enc is not None:
437
+ x = x + self.pos_enc(spatial_dim=out_spatial_dim)
414
438
  x = rf.dropout(x, self.input_dropout, axis=self.dropout_broadcast and self.out_dim)
415
439
  x = self.layers(x, spatial_dim=out_spatial_dim, collected_outputs=collected_outputs)
416
440
  return x, out_spatial_dim
@@ -1264,7 +1264,6 @@ class _DimMixin:
1264
1264
  raise TypeError(f"complete_dyn_size: _relu: unexpected type {type(a)}")
1265
1265
 
1266
1266
  y: Optional[_t.Tensor] = None # resulting dyn size
1267
- y_max_value: Optional[_t.Tensor] = None # resulting dyn size max value
1268
1267
  inputs = list(op.inputs)
1269
1268
  assert inputs
1270
1269
  for x_dim in inputs:
@@ -1275,8 +1274,6 @@ class _DimMixin:
1275
1274
  if x_dim.dyn_size_ext is None and x_dim.dimension is None:
1276
1275
  return
1277
1276
  y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
1278
- if not template_only and y.raw_tensor is not None:
1279
- y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
1280
1277
  assert y is not None, f"op {op}?"
1281
1278
  if self.dyn_size_ext is not None:
1282
1279
  assert self.dyn_size_ext.dim_tags == y.dim_tags
@@ -1286,9 +1283,14 @@ class _DimMixin:
1286
1283
  else:
1287
1284
  self.batch = y.batch
1288
1285
  self.dyn_size_ext = y
1289
- if not template_only and y_max_value is not None:
1290
- assert y_max_value is not None and y_max_value.raw_tensor is not None
1291
- self._dyn_size_max_value = y_max_value
1286
+ if not template_only and y.raw_tensor is not None:
1287
+ # Note: Earlier, we had this wrong.
1288
+ # It is not correct to replicate the same math (bin ops)
1289
+ # on the dim values (_dyn_size_max_value of each dim).
1290
+ # Consider sizes1=[2,3], sizes2=[5,4], and the op is "add".
1291
+ # Then the result sizes would be [7,7], thus its max is 7,
1292
+ # but max(sizes1)+max(sizes2)=3+5=8.
1293
+ self._dyn_size_max_value = rf.reduce_max(y, axis=y.dims) if y.dims else y
1292
1294
  if tf and y.placeholder is not None:
1293
1295
  self.set_tag_on_size_tensor(y.placeholder)
1294
1296
 
@@ -2080,6 +2082,8 @@ class _DimMixin:
2080
2082
  :return: self + other. note that this is not commutative, i.e. different from other + self.
2081
2083
  :rtype: Dim
2082
2084
  """
2085
+ if isinstance(other, int) and other == 0:
2086
+ return self
2083
2087
  cache_key = ("add", other)
2084
2088
  cache = self.get_same_base()._make_extra().cache_dim_math
2085
2089
  cache_entry = cache.get(cache_key, None)
@@ -2098,6 +2102,8 @@ class _DimMixin:
2098
2102
  :return: other + self
2099
2103
  :rtype: Dim
2100
2104
  """
2105
+ if isinstance(other, int) and other == 0:
2106
+ return self
2101
2107
  cache_key = ("add_left", other)
2102
2108
  cache = self.get_same_base()._make_extra().cache_dim_math
2103
2109
  cache_entry = cache.get(cache_key, None)
@@ -2115,6 +2121,8 @@ class _DimMixin:
2115
2121
  :param Dim|int other:
2116
2122
  :rtype: Dim
2117
2123
  """
2124
+ if isinstance(other, int) and other == 0:
2125
+ return self
2118
2126
  return self.sub_right(other)
2119
2127
 
2120
2128
  def sub_right(self: Dim, other):
@@ -2123,6 +2131,8 @@ class _DimMixin:
2123
2131
  :return: self - other
2124
2132
  :rtype: Dim
2125
2133
  """
2134
+ if isinstance(other, int) and other == 0:
2135
+ return self
2126
2136
  cache_key = ("sub", other)
2127
2137
  cache = self.get_same_base()._make_extra().cache_dim_math
2128
2138
  cache_entry = cache.get(cache_key, None)
@@ -2141,6 +2151,8 @@ class _DimMixin:
2141
2151
  :return: (-other) + self
2142
2152
  :rtype: Dim
2143
2153
  """
2154
+ if isinstance(other, int) and other == 0:
2155
+ return self
2144
2156
  cache_key = ("sub_left", other)
2145
2157
  cache = self.get_same_base()._make_extra().cache_dim_math
2146
2158
  cache_entry = cache.get(cache_key, None)
@@ -2158,6 +2170,8 @@ class _DimMixin:
2158
2170
  :param Dim|int other:
2159
2171
  :rtype: Dim
2160
2172
  """
2173
+ if isinstance(other, int) and other == 1:
2174
+ return self
2161
2175
  cache_key = ("mul", other)
2162
2176
  cache = self.get_same_base()._make_extra().cache_dim_math
2163
2177
  cache_entry = cache.get(cache_key, None)
@@ -2175,6 +2189,8 @@ class _DimMixin:
2175
2189
  :param Dim|int other:
2176
2190
  :rtype: Dim
2177
2191
  """
2192
+ if isinstance(other, int) and other == 1:
2193
+ return self
2178
2194
  cache_key = ("mul_left", other)
2179
2195
  cache = self.get_same_base()._make_extra().cache_dim_math
2180
2196
  cache_entry = cache.get(cache_key, None)
@@ -2192,6 +2208,8 @@ class _DimMixin:
2192
2208
  :param Dim|int other:
2193
2209
  :rtype: Dim
2194
2210
  """
2211
+ if isinstance(other, int) and other == 1:
2212
+ return self
2195
2213
  cache_key = ("floordiv", other)
2196
2214
  cache = self.get_same_base()._make_extra().cache_dim_math
2197
2215
  cache_entry = cache.get(cache_key, None)
@@ -2209,6 +2227,8 @@ class _DimMixin:
2209
2227
  :param Dim|int other:
2210
2228
  :rtype: Dim
2211
2229
  """
2230
+ if isinstance(other, int) and other == 1:
2231
+ return self
2212
2232
  return self.div_right(other)
2213
2233
 
2214
2234
  def div_left(self: Dim, other):
@@ -2216,6 +2236,8 @@ class _DimMixin:
2216
2236
  :param Dim|int other:
2217
2237
  :rtype: Dim
2218
2238
  """
2239
+ if isinstance(other, int) and other == 1:
2240
+ return self
2219
2241
  cache_key = ("truediv_left", other)
2220
2242
  cache = self.get_same_base()._make_extra().cache_dim_math
2221
2243
  cache_entry = cache.get(cache_key, None)
@@ -2233,6 +2255,8 @@ class _DimMixin:
2233
2255
  :param Dim|int other:
2234
2256
  :rtype: Dim
2235
2257
  """
2258
+ if isinstance(other, int) and other == 1:
2259
+ return self
2236
2260
  cache_key = ("truediv", other)
2237
2261
  cache = self.get_same_base()._make_extra().cache_dim_math
2238
2262
  cache_entry = cache.get(cache_key, None)
@@ -2250,6 +2274,8 @@ class _DimMixin:
2250
2274
  :param Dim|int other:
2251
2275
  :rtype: Dim
2252
2276
  """
2277
+ if isinstance(other, int) and other == 1:
2278
+ return self
2253
2279
  cache_key = ("ceildiv_left", other)
2254
2280
  cache = self.get_same_base()._make_extra().cache_dim_math
2255
2281
  cache_entry = cache.get(cache_key, None)
@@ -2267,6 +2293,8 @@ class _DimMixin:
2267
2293
  :param Dim|int other:
2268
2294
  :rtype: Dim
2269
2295
  """
2296
+ if isinstance(other, int) and other == 1:
2297
+ return self
2270
2298
  cache_key = ("ceildiv", other)
2271
2299
  cache = self.get_same_base()._make_extra().cache_dim_math
2272
2300
  cache_entry = cache.get(cache_key, None)
returnn/util/basic.py CHANGED
@@ -1693,15 +1693,17 @@ def inplace_increment(x: numpy.ndarray, idx: numpy.ndarray, y: Union[numpy.ndarr
1693
1693
  raise NotImplementedError("This feature was removed with dropped Theano support")
1694
1694
 
1695
1695
 
1696
- def prod(ls):
1696
+ def prod(ls: Union[Iterable[T], numpy.ndarray]) -> Union[int, T, float]:
1697
1697
  """
1698
- :param list[T]|tuple[T]|numpy.ndarray ls:
1699
- :rtype: T|int|float
1698
+ :param ls:
1699
+ :return: ls[0] * ls[1] * ...
1700
1700
  """
1701
- if len(ls) == 0:
1701
+ it = iter(ls)
1702
+ try:
1703
+ x = next(it)
1704
+ except StopIteration:
1702
1705
  return 1
1703
- x = ls[0]
1704
- for y in ls[1:]:
1706
+ for y in it:
1705
1707
  x = x * y # *= doesn't work because x might be a tensor, and for e.g. torch.Tensor this op is in-place
1706
1708
  return x
1707
1709
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250826.155029
3
+ Version: 1.20250828.142552
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=ruOzYwqndb2UaDGaPErhnL7kyTDRDm0J-R2DCrSeNyI,5215
1
+ returnn/PKG-INFO,sha256=Uvf8zgSBctl_SphH-v2ikfVE9N-jlwEAGCiUPtlx8iY,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=ZrVDjVsQQnP6xCFRVMbLAnFYhuL76CBWniasxrCMZSw,77
6
+ returnn/_setup_info_generated.py,sha256=QXZpIuHEV8wkbvcOm3273IFMbpbj15j4l3g_UoApY08,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -76,11 +76,11 @@ returnn/extern/graph_editor/transform.py,sha256=qMGSenpbAnGqdG6QP6iWjlm6_ccySYJa
76
76
  returnn/extern/graph_editor/util.py,sha256=HfRbyQPmQ6_n5-O-096n0KeJtllQXFtaurpeJS_URZ0,18706
77
77
  returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
78
78
  returnn/frontend/_backend.py,sha256=39l5MC1DaT0MPklMM8HXAW9nqisIIZQ9g2QSHOOtPQE,50741
79
- returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
79
+ returnn/frontend/_cache.py,sha256=Uao2xzfvVaKABk1fkxcpXzxKIGJaI9FwwlTvvoNUstk,8550
80
80
  returnn/frontend/_numpy_backend.py,sha256=fZjks7p3dgxVZ6tSDazTTgBxNjJqXjfqgw_7mA7rDEE,9066
81
81
  returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
82
82
  returnn/frontend/_utils.py,sha256=uVQldGHyYKIyhSEmumJ04ix5eP5tjZw4CEC0w6-zhyQ,12074
83
- returnn/frontend/array_.py,sha256=o_NSq87pB5I2XvFUjk40Dobqx6tTfEY1wzgmaelujgM,51511
83
+ returnn/frontend/array_.py,sha256=Up5cB5kPaBgGToDdnyhJ2KsoMWKIdmuRdXq6OdBLzi4,53754
84
84
  returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
85
85
  returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
86
86
  returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -88,7 +88,7 @@ returnn/frontend/cond.py,sha256=gh6wg0aSbAJQfKRv4BQAu-EfPWtWPLFjgc8IaPPFmwg,1023
88
88
  returnn/frontend/const.py,sha256=A5fP9w6Akv56d89pPvdoZaXvC9ZTYcexepnS9O2clOc,3945
89
89
  returnn/frontend/container.py,sha256=wF3OlQN7WlOVmmdapUth_Unha3DVf6h1B7okBJAuJDA,8011
90
90
  returnn/frontend/control_flow_ctx.py,sha256=v17CsNwRnZYe8GdMtGJt2ftibfxMCGK1i0l-GX5ILu0,699
91
- returnn/frontend/conv.py,sha256=Q0q90-uu9d6qV-v8_DlFGxpZtc6FjfXVpfkkXmv1Alk,31959
91
+ returnn/frontend/conv.py,sha256=lca3hG0FO2IEOoe5OvOnm9NU2Ofx_RPqnCxZqxocUdM,32079
92
92
  returnn/frontend/device.py,sha256=Sjara0EmFLhu9O55cN_p6OwU0NgdNCCQjyAuQhiWpGw,1437
93
93
  returnn/frontend/dims.py,sha256=aH5FQ_m0xMD6Rj-BUWGx8lB-HkCuwZfMBf6mZbGGW5E,12611
94
94
  returnn/frontend/dropout.py,sha256=TjqZCKDIOBeHr14-NCemOm9m3p84LxQuPH1DvRAYg88,5028
@@ -138,7 +138,7 @@ returnn/frontend/decoder/__init__.py,sha256=A-koKyPVlXp_V_2bk6GKZ1Xfv4rYIcfxGMXQ
138
138
  returnn/frontend/decoder/transformer.py,sha256=20a37hMiPbQBHx3tSbOeiAbFPVRcX_KYpPuw8tmY6GU,23658
139
139
  returnn/frontend/encoder/__init__.py,sha256=0QGLlujRIKx3zBREeShza_-xhGIxj73zbd7t-g1m-ho,17
140
140
  returnn/frontend/encoder/base.py,sha256=A759EwCYAmSi-kzXz1vaTjR2l59TvNGQlzaNdp3UOKs,2109
141
- returnn/frontend/encoder/conformer.py,sha256=ro0uzEzDbAyNGYN5ff0KmiDl4HOYQluu64mJxYzuy-M,19972
141
+ returnn/frontend/encoder/conformer.py,sha256=rWulygolesbYkLw9naSxwygaZhWqKpHKEVj-1AQbel0,21351
142
142
  returnn/frontend/encoder/conformer_v2.py,sha256=vAYdT8m2Zzg3IIZZafeccClFHU1_c9T-EgBOsHadQPA,7701
143
143
  returnn/frontend/encoder/e_branchformer.py,sha256=SZdhpb90FaQdpzgvSOtFPLbLCa0NdycbB5Z4vMoY4TM,12279
144
144
  returnn/frontend/encoder/transformer.py,sha256=Jj0mF1D2MohOk-9sGYdsLtVW_86fwoq4pKWCdPMvPR8,11580
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
154
154
  returnn/sprint/interface.py,sha256=1j5SB0V8hSW8A5song9ciZtcBnZoKKfNipk9ezOIMuA,36491
155
155
  returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
156
156
  returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
157
- returnn/tensor/_dim_extra.py,sha256=oxJgPxrYQvew8MrFcYo5YjbKFC7Dd2yR1kcGWAf0afg,122380
157
+ returnn/tensor/_dim_extra.py,sha256=VN7Smn1Q0Y0DO7GSPM-aJUhp_jy5pzSMJbPkCk6JnqY,123448
158
158
  returnn/tensor/_tensor_extra.py,sha256=gbSl6HMtn8WFYloanew_RaNNwx3eCpnKv3UfCkntJiQ,164923
159
159
  returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
160
160
  returnn/tensor/_tensor_op_overloads.py,sha256=HklwuTBjy7mH_665VKaCUdu-oC3aa7Uz1ZQiCz4jeZc,5448
@@ -233,7 +233,7 @@ returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko8
233
233
  returnn/torch/util/module.py,sha256=MXHIrF9Isu575DDJIa81212ULKwdqu1oOLxDVZecVSk,1693
234
234
  returnn/torch/util/scaled_gradient.py,sha256=C5e79mpqtxdtw08OTSy413TSBSlOertRisc-ioiFIaU,3191
235
235
  returnn/util/__init__.py,sha256=UIG1qw4idqhW71BV60ha7h9PktxvEVcBIu0lYRossK8,336
236
- returnn/util/basic.py,sha256=UjHujX9pSu_dOgTxozWD0ujj5eSpyj_zD5vFU6bfyms,143096
236
+ returnn/util/basic.py,sha256=S2ABKcP0pf2UexuMXDNHGcfAu7GDSD2mr6OIByM152M,143168
237
237
  returnn/util/better_exchook.py,sha256=39yvRecluDgYhViwSkaQ8crJ_cBWI63KeEGuK4RKe5w,70843
238
238
  returnn/util/bpe.py,sha256=LWFhICZsEOnMwNws0lybPNzKRX6rSr8yKCvP65vjl9Y,19656
239
239
  returnn/util/debug.py,sha256=wuRzdg9zB84WWCGyTjmRR_zYypu8gXxlc0nZ6si9OC8,28224
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250826.155029.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250826.155029.dist-info/METADATA,sha256=ruOzYwqndb2UaDGaPErhnL7kyTDRDm0J-R2DCrSeNyI,5215
258
- returnn-1.20250826.155029.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
- returnn-1.20250826.155029.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250826.155029.dist-info/RECORD,,
256
+ returnn-1.20250828.142552.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250828.142552.dist-info/METADATA,sha256=Uvf8zgSBctl_SphH-v2ikfVE9N-jlwEAGCiUPtlx8iY,5215
258
+ returnn-1.20250828.142552.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
+ returnn-1.20250828.142552.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250828.142552.dist-info/RECORD,,