returnn 1.20250225.145151__py3-none-any.whl → 1.20250226.104737__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250225.145151
3
+ Version: 1.20250226.104737
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250225.145151'
2
- long_version = '1.20250225.145151+git.8debd7e'
1
+ version = '1.20250226.104737'
2
+ long_version = '1.20250226.104737+git.5a134ff'
@@ -391,17 +391,17 @@ class Backend(Generic[T]):
391
391
  source: Tensor,
392
392
  *,
393
393
  dims: Sequence[Dim],
394
- out_dim: Optional[Dim] = None,
395
- ) -> Tuple[Tensor, Dim]:
394
+ out_dim: Dim,
395
+ ) -> Tensor:
396
396
  """
397
397
  Merges a list of axes into a single one. (Flatten the dims.)
398
398
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
399
399
  Or input is (batch, time, height, dim) and axes=(height,dim), then we get (batch, time, height*dim).
400
400
 
401
401
  :param source:
402
- :param dims:
403
- :param out_dim:
404
- :return: tensor, out_dim
402
+ :param dims: list of dims to merge. len(dims) >= 2
403
+ :param out_dim: resulting merged dim
404
+ :return: tensor
405
405
  """
406
406
  raise NotImplementedError
407
407
 
@@ -939,8 +939,8 @@ class Backend(Generic[T]):
939
939
  raise NotImplementedError
940
940
 
941
941
  @staticmethod
942
- def flip(source: Tensor, *, axis: Dim) -> Tensor:
943
- """flip"""
942
+ def flip_no_mask(source: Tensor, *, axis: Dim) -> Tensor:
943
+ """flip, ignoring masking"""
944
944
  raise NotImplementedError
945
945
 
946
946
  @staticmethod
@@ -182,8 +182,29 @@ def merge_dims(
182
182
  else:
183
183
  out_dim = Dim(1, name="ext")
184
184
  return rf.expand_dim(source, out_dim), out_dim
185
+ if len(dims) == 1:
186
+ if out_dim is None or out_dim == dims[0]:
187
+ return source, dims[0]
188
+ return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
189
+ if out_dim is None:
190
+ out_dim = dims[0]
191
+ reset_dyn_size = False
192
+ for d in dims[1:]:
193
+ reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
194
+ out_dim = out_dim * d
195
+ if reset_dyn_size:
196
+ # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
197
+ # This would then potentially discard some of the data in the tensor in subsequent operations,
198
+ # when masking is applied.
199
+ # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
200
+ # https://github.com/rwth-i6/returnn/issues/1694
201
+ out_dim_size = dims[0].get_dim_value_tensor()
202
+ for d in dims[1:]:
203
+ out_dim_size *= d.get_dim_value_tensor()
204
+ assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == () # scalar
205
+ out_dim.dyn_size_ext = out_dim_size
185
206
  # noinspection PyProtectedMember
186
- return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim)
207
+ return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
187
208
 
188
209
 
189
210
  def split_dims(
@@ -1066,7 +1087,7 @@ def reverse_sequence(tensor: Tensor, *, axis: Dim, handle_dynamic_dims: bool = T
1066
1087
  """
1067
1088
  if not handle_dynamic_dims or not axis.need_masking():
1068
1089
  # noinspection PyProtectedMember
1069
- return tensor._raw_backend.flip(tensor, axis=axis)
1090
+ return tensor._raw_backend.flip_no_mask(tensor, axis=axis)
1070
1091
  indices = rf.combine_bc(axis.get_size_tensor(), "-", rf.range_over_dim(axis)) - 1
1071
1092
  return rf.gather(tensor, indices=indices, axis=axis, clip_to_valid=True)
1072
1093
 
@@ -241,8 +241,8 @@ class ReturnnLayersBackend(Backend[Layer]):
241
241
  source: Tensor,
242
242
  *,
243
243
  dims: Sequence[Dim],
244
- out_dim: Optional[Dim] = None,
245
- ) -> Tuple[Tensor, Dim]:
244
+ out_dim: Dim,
245
+ ) -> Tensor:
246
246
  """
247
247
  Merges a list of axes into a single one. (Flatten the dims.)
248
248
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -251,18 +251,14 @@ class ReturnnLayersBackend(Backend[Layer]):
251
251
  :param source:
252
252
  :param dims:
253
253
  :param out_dim:
254
- :return: tensor, out_dim
254
+ :return: tensor
255
255
  """
256
256
  if not isinstance(source, Tensor):
257
257
  raise TypeError(f"merge_dims: unexpected type for source {source!r}, need tensor")
258
- if out_dim is None:
259
- out_dim = dims[0]
260
- for d in dims[1:]:
261
- out_dim = out_dim * d
262
258
  layer = rfl.make_layer(
263
259
  {"class": "merge_dims", "from": source, "axes": dims, "out_dim": out_dim}, name="merge_dims"
264
260
  )
265
- return layer, out_dim
261
+ return layer
266
262
 
267
263
  @staticmethod
268
264
  def split_dims(
@@ -692,7 +688,7 @@ class ReturnnLayersBackend(Backend[Layer]):
692
688
  )
693
689
 
694
690
  @staticmethod
695
- def flip(source: Tensor, *, axis: Dim) -> Tensor:
691
+ def flip_no_mask(source: Tensor, *, axis: Dim) -> Tensor:
696
692
  """flip"""
697
693
  return rfl.make_layer(
698
694
  {"class": "slice", "from": source, "axis": axis, "out_dim": axis, "slice_step": -1}, name="flip"
@@ -262,8 +262,8 @@ class TorchBackend(Backend[torch.Tensor]):
262
262
  source: Tensor,
263
263
  *,
264
264
  dims: Sequence[Dim],
265
- out_dim: Optional[Dim] = None,
266
- ) -> Tuple[Tensor, Dim]:
265
+ out_dim: Dim,
266
+ ) -> Tensor:
267
267
  """
268
268
  Merges a list of axes into a single one. (Flatten the dims.)
269
269
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -272,18 +272,12 @@ class TorchBackend(Backend[torch.Tensor]):
272
272
  :param source:
273
273
  :param dims:
274
274
  :param out_dim:
275
- :return: tensor, out_dim
275
+ :return: tensor
276
276
  """
277
- assert dims
278
- if len(dims) == 1:
279
- return source, dims[0]
277
+ assert len(dims) >= 2
280
278
  first_axis = min(source.dims.index(d) for d in dims)
281
279
  pre_dims = source.dims[:first_axis]
282
280
  post_dims = [d for d in source.dims if d not in dims and d not in pre_dims]
283
- if out_dim is None:
284
- out_dim = dims[0]
285
- for d in dims[1:]:
286
- out_dim = out_dim * d
287
281
  source = source.copy_transpose(tuple(pre_dims) + tuple(dims) + tuple(post_dims), allow_int=False)
288
282
  out = Tensor(
289
283
  "merge_dims",
@@ -295,7 +289,7 @@ class TorchBackend(Backend[torch.Tensor]):
295
289
  out.raw_tensor = torch.reshape(source.raw_tensor, out_shape)
296
290
  if source.feature_dim and source.feature_dim in dims:
297
291
  out.feature_dim = out_dim
298
- return out, out_dim
292
+ return out
299
293
 
300
294
  @staticmethod
301
295
  def split_dims(
@@ -1185,8 +1179,8 @@ class TorchBackend(Backend[torch.Tensor]):
1185
1179
  return out
1186
1180
 
1187
1181
  @staticmethod
1188
- def flip(source: Tensor, *, axis: Dim) -> Tensor:
1189
- """flip"""
1182
+ def flip_no_mask(source: Tensor, *, axis: Dim) -> Tensor:
1183
+ """flip, ignoring masking"""
1190
1184
  axis_int = source.get_axis_from_description(axis, allow_int=False)
1191
1185
  out = source.copy_template("flip")
1192
1186
  out.raw_tensor = torch.flip(source.raw_tensor, [axis_int])
@@ -1224,6 +1218,8 @@ class TorchBackend(Backend[torch.Tensor]):
1224
1218
  @staticmethod
1225
1219
  def sort(source: Tensor, *, axis: Dim, descending: bool, stable: bool) -> Tuple[Tensor, Tensor, Dim]:
1226
1220
  """sort. return values and indices"""
1221
+ if axis.need_masking():
1222
+ raise NotImplementedError(f"sort: dynamic axis {axis} not supported")
1227
1223
  axis_int = source.get_axis_from_description(axis, allow_int=False)
1228
1224
  # Move to last axis. Should be more efficient.
1229
1225
  source = source.copy_move_axis(axis_int, -1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250225.145151
3
+ Version: 1.20250226.104737
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=0lRqYeJkO1RAESSTHtCKMfJxt5g_UsQD6RroLBWtTcc,5215
1
+ returnn/PKG-INFO,sha256=5lh-Rq34xX-h2hjCNLJJZRLvlGG_Il6ERH0sGNnrsn4,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=5JJsfXcG4qAzzZoLcP7WWGdNPTtjJyUkQW4R9nn8848,77
6
+ returnn/_setup_info_generated.py,sha256=6dgSPP2rSJRBVLNxzHUptIagp7HFYq8QefRn7VHPiOI,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
75
75
  returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
76
76
  returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
77
77
  returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
78
- returnn/frontend/_backend.py,sha256=lRAtOT0oAkgc_WGYBUviGbgIH3Yet6D17sjlEJH56Pg,50327
78
+ returnn/frontend/_backend.py,sha256=TNkEdj9GKxJfSM1ZMQ_SdAQzn2TU7SQbG6JGdaWhUeI,50374
79
79
  returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
80
80
  returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
81
81
  returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
82
82
  returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
83
- returnn/frontend/array_.py,sha256=ngNWAxuiIVuOLgaYXW-iYaaofck-ZFfa5gD2aUeOQJ8,48005
83
+ returnn/frontend/array_.py,sha256=M5vCeH0nlwJ-zrdjbZpsMLN6StOLn0iM7PnXvGLLE3g,49154
84
84
  returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
85
85
  returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
86
86
  returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -177,7 +177,7 @@ returnn/tf/sprint.py,sha256=Yqjh0-6sCWHpdDPQCzHKx7TwQCOjJyjfd0KHtnYdd-8,5471
177
177
  returnn/tf/updater.py,sha256=St4Z5iBjlkWaB6CiS-K1VNc_iLaan2e6-mVMTTPldzk,72034
178
178
  returnn/tf/frontend_layers/README.md,sha256=P4vVl_EK-4jT55m40mq-K4Nr9yFY0tJR5fmDzTHSDFE,1096
179
179
  returnn/tf/frontend_layers/__init__.py,sha256=MGUn7rv6fOefbtkX-5pq6fC1T6Y5h0oh1uOPSEcv1_I,506
180
- returnn/tf/frontend_layers/_backend.py,sha256=8lWE6LxxdNx8FnFvp2Pnk-UqJ8oymxXwx7s9HTEgDug,47443
180
+ returnn/tf/frontend_layers/_backend.py,sha256=U7rbRY9XgMkxxyWY2D8KG-KesSOEGLCxn-Gl6dgwmPc,47277
181
181
  returnn/tf/frontend_layers/_utils.py,sha256=ijByaDOqPDod5mZC9EoTkt8PHBEODXHsWbkwDOF9XW4,4205
182
182
  returnn/tf/frontend_layers/cond.py,sha256=yQ2h5W0sgMZndJdrWv2EE9k9yIcspQ1U0HwBSh3hOKE,14830
183
183
  returnn/tf/frontend_layers/config_entry_points.py,sha256=t01RWOiaZohzuqPXX-MLV0P5yCOfE0dz-9dZ77_pK4c,5751
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
216
216
  returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
217
217
  returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
218
218
  returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
219
- returnn/torch/frontend/_backend.py,sha256=6GDHPo3hqqANUGsTX1iozonHFCeL5vSWuI9-420jAXU,101328
219
+ returnn/torch/frontend/_backend.py,sha256=TqyDWNP4XCvJNNGn8jyxaT8BOEjVE24QCUR3qsTIS3A,101242
220
220
  returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
221
221
  returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
222
222
  returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250225.145151.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250225.145151.dist-info/METADATA,sha256=0lRqYeJkO1RAESSTHtCKMfJxt5g_UsQD6RroLBWtTcc,5215
258
- returnn-1.20250225.145151.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250225.145151.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250225.145151.dist-info/RECORD,,
256
+ returnn-1.20250226.104737.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250226.104737.dist-info/METADATA,sha256=5lh-Rq34xX-h2hjCNLJJZRLvlGG_Il6ERH0sGNnrsn4,5215
258
+ returnn-1.20250226.104737.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250226.104737.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250226.104737.dist-info/RECORD,,