returnn 1.20251027.232712__py3-none-any.whl → 1.20260105.192646__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- returnn/PKG-INFO +2 -2
- returnn/_setup_info_generated.py +2 -2
- returnn/datasets/lm.py +20 -0
- returnn/datasets/meta.py +93 -43
- returnn/datasets/postprocessing.py +597 -108
- returnn/datasets/util/vocabulary.py +90 -0
- returnn/frontend/array_.py +45 -0
- returnn/frontend/attention.py +54 -20
- returnn/frontend/conv.py +273 -54
- returnn/frontend/encoder/conformer.py +1 -0
- returnn/frontend/encoder/transformer.py +2 -0
- returnn/frontend/loss.py +40 -1
- returnn/native_op.cpp +80 -0
- returnn/sprint/cache.py +12 -13
- returnn/tensor/utils.py +7 -4
- returnn/tf/frontend_layers/_backend.py +4 -2
- returnn/tf/layers/basic.py +14 -38
- returnn/tf/native_op.py +11 -58
- returnn/tf/network.py +1 -1
- returnn/tf/util/basic.py +19 -0
- returnn/torch/engine.py +37 -3
- returnn/torch/frontend/_backend.py +114 -7
- returnn/torch/util/exception_helper.py +7 -1
- returnn/util/basic.py +3 -6
- returnn/util/better_exchook.py +4 -0
- returnn/util/debug.py +11 -2
- returnn/util/file_cache.py +15 -1
- returnn/util/task_system.py +1 -1
- {returnn-1.20251027.232712.dist-info → returnn-1.20260105.192646.dist-info}/METADATA +2 -2
- {returnn-1.20251027.232712.dist-info → returnn-1.20260105.192646.dist-info}/RECORD +33 -33
- {returnn-1.20251027.232712.dist-info → returnn-1.20260105.192646.dist-info}/LICENSE +0 -0
- {returnn-1.20251027.232712.dist-info → returnn-1.20260105.192646.dist-info}/WHEEL +0 -0
- {returnn-1.20251027.232712.dist-info → returnn-1.20260105.192646.dist-info}/top_level.txt +0 -0
|
@@ -1067,14 +1067,16 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
1067
1067
|
s = filter_size[i].dimension if not strides else strides[i]
|
|
1068
1068
|
if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
|
|
1069
1069
|
out_spatial_dims[i] = in_spatial_dims[i]
|
|
1070
|
-
|
|
1070
|
+
assert all(size.is_static() for size in filter_size)
|
|
1071
|
+
layer_dict: Dict[str, Any] = {
|
|
1071
1072
|
"class": "transposed_conv",
|
|
1072
1073
|
"from": source,
|
|
1073
1074
|
"in_dim": in_dim,
|
|
1074
1075
|
"in_spatial_dims": in_spatial_dims,
|
|
1075
1076
|
"out_dim": out_dim,
|
|
1076
1077
|
"out_spatial_dims": out_spatial_dims,
|
|
1077
|
-
"filter_size": filter_size,
|
|
1078
|
+
"filter_size": [size.dimension for size in filter_size],
|
|
1079
|
+
"filter_perm": list(filter_size) + [out_dim, in_dim],
|
|
1078
1080
|
"padding": padding,
|
|
1079
1081
|
}
|
|
1080
1082
|
if remove_padding:
|
returnn/tf/layers/basic.py
CHANGED
|
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7371
7371
|
"""
|
|
7372
7372
|
from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
|
|
7373
7373
|
|
|
7374
|
-
super(TransposedConvLayer, self).__init__(**kwargs)
|
|
7374
|
+
super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
|
|
7375
7375
|
out_dim # noqa # via get_out_data_from_opts
|
|
7376
7376
|
assert not self.input_data.sparse
|
|
7377
7377
|
assert self.input_data.have_batch_axis()
|
|
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7516
7516
|
):
|
|
7517
7517
|
"""
|
|
7518
7518
|
Determines output length of a transposed convolution given input length.
|
|
7519
|
-
|
|
7519
|
+
|
|
7520
|
+
Copied from TF/Keras conv_utils.deconv_output_length
|
|
7521
|
+
(https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
|
|
7522
|
+
adapted with simplification.
|
|
7520
7523
|
|
|
7521
7524
|
Also see :func:`ConvLayer.calc_out_dim`.
|
|
7522
7525
|
|
|
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
|
|
|
7533
7536
|
"""
|
|
7534
7537
|
if out_dim and out_dim.is_dim_known():
|
|
7535
7538
|
return out_dim.get_dim_value()
|
|
7536
|
-
assert padding in {"same", "valid", "full"}
|
|
7537
|
-
|
|
7538
|
-
# Get the dilated kernel size
|
|
7539
|
-
filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
|
7540
7539
|
|
|
7541
|
-
|
|
7542
|
-
input_length = input_length * stride
|
|
7540
|
+
import returnn.frontend as rf
|
|
7543
7541
|
|
|
7544
|
-
|
|
7545
|
-
|
|
7546
|
-
|
|
7547
|
-
|
|
7548
|
-
|
|
7549
|
-
|
|
7550
|
-
|
|
7551
|
-
|
|
7552
|
-
if isinstance(input_length, Dim):
|
|
7553
|
-
length = input_length - (stride + filter_size - 2)
|
|
7554
|
-
else:
|
|
7555
|
-
length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
|
|
7556
|
-
elif padding == "same":
|
|
7557
|
-
length = input_length
|
|
7558
|
-
else:
|
|
7559
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7560
|
-
else: # output_padding
|
|
7561
|
-
if padding == "same":
|
|
7562
|
-
pad = filter_size // 2
|
|
7563
|
-
elif padding == "valid":
|
|
7564
|
-
pad = 0
|
|
7565
|
-
elif padding == "full":
|
|
7566
|
-
pad = filter_size - 1
|
|
7567
|
-
else:
|
|
7568
|
-
raise Exception("invalid padding %r" % (padding,))
|
|
7569
|
-
if isinstance(input_length, Dim):
|
|
7570
|
-
length = input_length + (-stride + filter_size - 2 * pad + output_padding)
|
|
7571
|
-
else:
|
|
7572
|
-
length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
|
|
7573
|
-
return length
|
|
7542
|
+
return rf.calc_transposed_conv_out_length(
|
|
7543
|
+
input_length,
|
|
7544
|
+
filter_size=filter_size,
|
|
7545
|
+
padding=padding,
|
|
7546
|
+
output_padding=output_padding,
|
|
7547
|
+
stride=stride,
|
|
7548
|
+
dilation_rate=dilation,
|
|
7549
|
+
)
|
|
7574
7550
|
|
|
7575
7551
|
@classmethod
|
|
7576
7552
|
def get_out_data_from_opts(
|
returnn/tf/native_op.py
CHANGED
|
@@ -528,77 +528,30 @@ class OpMaker:
|
|
|
528
528
|
def _make_mod(self):
|
|
529
529
|
if self.cache_key in self.mod_cache:
|
|
530
530
|
return self.mod_cache[self.cache_key]
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
#
|
|
534
|
-
#
|
|
535
|
-
#
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
# In other cases, it's probably needed, but it's not so clear which lib has the
|
|
539
|
-
# right symbols (e.g. the `sgemm_` symbol).
|
|
531
|
+
|
|
532
|
+
# Note about BLAS / matmul:
|
|
533
|
+
# Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
|
|
534
|
+
# and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
|
|
535
|
+
# Now, by default, we use the underlying Eigen library,
|
|
536
|
+
# which is the same code path that TF also uses for CPU matmul.
|
|
537
|
+
# Only if an explicit BLAS library is specified, we use that instead.
|
|
540
538
|
ld_flags = []
|
|
541
|
-
|
|
539
|
+
c_macro_defines = {}
|
|
542
540
|
|
|
543
541
|
if self.blas_lib is not None and os.path.exists(self.blas_lib):
|
|
544
542
|
path = os.path.dirname(self.blas_lib)
|
|
545
543
|
if path == "":
|
|
546
544
|
path = "."
|
|
547
545
|
ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
from returnn.util.basic import find_sgemm_libs_from_runtime
|
|
551
|
-
|
|
552
|
-
libs = find_sgemm_libs_from_runtime()
|
|
553
|
-
if libs:
|
|
554
|
-
numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
|
|
555
|
-
if numpy_libs:
|
|
556
|
-
# Prefer Numpy; move to front.
|
|
557
|
-
libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
|
|
558
|
-
if self.blas_lib is not None:
|
|
559
|
-
libs = [lib for lib in libs if self.blas_lib in lib]
|
|
560
|
-
for fn in libs:
|
|
561
|
-
ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
|
|
562
|
-
have_blas_lib = True
|
|
563
|
-
if not have_blas_lib and self.search_for_numpy_blas:
|
|
564
|
-
# Find related Numpy libs.
|
|
565
|
-
# Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
|
|
566
|
-
# Even do this before the other libs below, as it is likely
|
|
567
|
-
# that this OpenBlas lib is correctly initialized already.
|
|
568
|
-
import numpy
|
|
569
|
-
|
|
570
|
-
numpy_dir = os.path.dirname(numpy.__file__)
|
|
571
|
-
if os.path.exists("%s/.libs" % numpy_dir):
|
|
572
|
-
ld_flags += ["-L%s/.libs" % numpy_dir]
|
|
573
|
-
from glob import glob
|
|
574
|
-
|
|
575
|
-
for f in glob("%s/.libs/*.so" % numpy_dir):
|
|
576
|
-
f = os.path.basename(f)
|
|
577
|
-
if self.blas_lib is not None and self.blas_lib not in f:
|
|
578
|
-
continue
|
|
579
|
-
if f.startswith("lib"):
|
|
580
|
-
f = f[3:]
|
|
581
|
-
if f.endswith(".so"):
|
|
582
|
-
f = f[:-3]
|
|
583
|
-
ld_flags += ["-l%s" % f]
|
|
584
|
-
have_blas_lib = True
|
|
585
|
-
if not have_blas_lib and self.search_for_system_blas:
|
|
586
|
-
# Try to just link against blas/f77blas
|
|
587
|
-
# (both can potentially have the symbol) if it finds the lib.
|
|
588
|
-
if find_lib("blas"):
|
|
589
|
-
ld_flags += ["-lblas"]
|
|
590
|
-
have_blas_lib = True
|
|
591
|
-
if find_lib("f77blas"):
|
|
592
|
-
ld_flags += ["-lf77blas"]
|
|
593
|
-
have_blas_lib = True
|
|
594
|
-
if not have_blas_lib:
|
|
595
|
-
print("WARNING: OpMaker: no BLAS lib found")
|
|
546
|
+
c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
|
|
547
|
+
|
|
596
548
|
comp = tf_util.OpCodeCompiler(
|
|
597
549
|
base_name=self.name,
|
|
598
550
|
code_version=self.description.code_version,
|
|
599
551
|
code=self._make_code(),
|
|
600
552
|
include_deps=[self.support_native_op_cpp_filename],
|
|
601
553
|
ld_flags=ld_flags,
|
|
554
|
+
c_macro_defines=c_macro_defines,
|
|
602
555
|
use_cuda_if_available=self.with_cuda,
|
|
603
556
|
log_stream=self.log_stream,
|
|
604
557
|
**dict(self.compiler_opts),
|
returnn/tf/network.py
CHANGED
|
@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
|
|
|
4428
4428
|
data = extern_data.data[data_key]
|
|
4429
4429
|
info += ", %s" % data
|
|
4430
4430
|
print(" %r: %s" % (key, info), file=file)
|
|
4431
|
-
if data and data.sparse:
|
|
4431
|
+
if data is not None and data.sparse:
|
|
4432
4432
|
if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
|
|
4433
4433
|
print(" WARNING, invalid label for data", data, file=file)
|
|
4434
4434
|
elif feed_dict is None:
|
returnn/tf/util/basic.py
CHANGED
|
@@ -2784,6 +2784,10 @@ class CudaEnv:
|
|
|
2784
2784
|
self.cuda_path = None
|
|
2785
2785
|
if self.verbose_find_cuda:
|
|
2786
2786
|
print("CUDA disabled via env DISABLE_CUDA.")
|
|
2787
|
+
elif os.environ.get("CUDA_VISIBLE_DEVICES", None) in ["", "-1"]:
|
|
2788
|
+
self.cuda_path = None
|
|
2789
|
+
if self.verbose_find_cuda:
|
|
2790
|
+
print(f"CUDA disabled via env CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']!r}.")
|
|
2787
2791
|
else:
|
|
2788
2792
|
self.cuda_path = self._find_cuda_path()
|
|
2789
2793
|
if self.verbose_find_cuda:
|
|
@@ -3020,6 +3024,21 @@ class OpCodeCompiler(NativeCodeCompiler):
|
|
|
3020
3024
|
ld_flags += tf.sysconfig.get_link_flags()
|
|
3021
3025
|
elif have_min_tf_version((1, 4)):
|
|
3022
3026
|
ld_flags += ["-L%s" % tf.sysconfig.get_lib(), "-ltensorflow_framework"]
|
|
3027
|
+
if have_min_tf_version((2, 20)):
|
|
3028
|
+
# TF 2.20 removed TF_MAJOR_VERSION and co from version.h,
|
|
3029
|
+
# and one is supposed to define these macros externally.
|
|
3030
|
+
# Also, release_version.h was added to define TF_VERSION_STRING based on this (if needed).
|
|
3031
|
+
# https://github.com/tensorflow/tensorflow/commit/c8f0e0620e5678d0f165a07e64114024a966ab7f
|
|
3032
|
+
major, minor, patch = tf.__version__.split(".", 2)
|
|
3033
|
+
patch, suffix = patch.split("-", 1) if "-" in patch else (patch, "")
|
|
3034
|
+
c_macro_defines.update(
|
|
3035
|
+
{
|
|
3036
|
+
"TF_MAJOR_VERSION": major,
|
|
3037
|
+
"TF_MINOR_VERSION": minor,
|
|
3038
|
+
"TF_PATCH_VERSION": patch,
|
|
3039
|
+
"TF_VERSION_SUFFIX": suffix,
|
|
3040
|
+
}
|
|
3041
|
+
)
|
|
3023
3042
|
use_cxx11_abi = getattr(getattr(tf, "sysconfig", tf), "CXX11_ABI_FLAG", getattr(tf, "CXX11_ABI_FLAG", False))
|
|
3024
3043
|
super(OpCodeCompiler, self).__init__(
|
|
3025
3044
|
include_paths=include_paths,
|
returnn/torch/engine.py
CHANGED
|
@@ -532,7 +532,7 @@ class Engine(EngineBase):
|
|
|
532
532
|
for key, val in eval_info.items():
|
|
533
533
|
self._tensorboard_writer.add_scalar(f"train/{key}", val, global_step=self.global_train_step)
|
|
534
534
|
self._tensorboard_writer.add_scalar(
|
|
535
|
-
|
|
535
|
+
"train/learning_rate",
|
|
536
536
|
self._updater.get_effective_learning_rate(),
|
|
537
537
|
global_step=self.global_train_step,
|
|
538
538
|
)
|
|
@@ -930,7 +930,7 @@ class Engine(EngineBase):
|
|
|
930
930
|
if not os.path.exists(filename) and os.path.exists(model_epoch_filename):
|
|
931
931
|
filename = model_epoch_filename
|
|
932
932
|
print("Load model %s" % (filename,), file=log.v4)
|
|
933
|
-
checkpoint_state =
|
|
933
|
+
checkpoint_state = _torch_load(filename, device=self._device)
|
|
934
934
|
if epoch is None:
|
|
935
935
|
epoch = checkpoint_state.get("epoch", self._start_epoch or 1)
|
|
936
936
|
step = checkpoint_state.get("step", 1)
|
|
@@ -1030,7 +1030,7 @@ class Engine(EngineBase):
|
|
|
1030
1030
|
print("(No relevant parameters matching.)", file=log.v3)
|
|
1031
1031
|
continue
|
|
1032
1032
|
print(f"Pre-load weights for key '{preload_key}' from {opts['filename']}", file=log.v3)
|
|
1033
|
-
preload_model_state =
|
|
1033
|
+
preload_model_state = _torch_load(opts["filename"], device=self._device)
|
|
1034
1034
|
if opts.get("checkpoint_key", "model") is not None:
|
|
1035
1035
|
# This can be used if an external checkpoint saves a checkpoint a different structure that just the
|
|
1036
1036
|
# model state dict. E.g., if a checkpoint is created using
|
|
@@ -1063,6 +1063,28 @@ class Engine(EngineBase):
|
|
|
1063
1063
|
preload_model_state_keys = set(preload_model_state.keys())
|
|
1064
1064
|
loaded_state_keys.update(preload_model_state.keys())
|
|
1065
1065
|
missing_keys.difference_update(preload_model_state.keys())
|
|
1066
|
+
|
|
1067
|
+
custom_missing_load_func = opts.get("custom_missing_load_func")
|
|
1068
|
+
if custom_missing_load_func:
|
|
1069
|
+
custom_missing_vars_map = {}
|
|
1070
|
+
for var_name in missing_keys_preload:
|
|
1071
|
+
var_shape = self._pt_model.state_dict()[var_name].shape
|
|
1072
|
+
var_val = custom_missing_load_func(
|
|
1073
|
+
name=var_name,
|
|
1074
|
+
shape=var_shape,
|
|
1075
|
+
preload_model_state=preload_model_state,
|
|
1076
|
+
**util.get_fwd_compat_kwargs(),
|
|
1077
|
+
)
|
|
1078
|
+
if var_val is not None:
|
|
1079
|
+
assert var_val.shape == var_shape
|
|
1080
|
+
custom_missing_vars_map[var_name] = var_val
|
|
1081
|
+
preload_model_state.update(custom_missing_vars_map)
|
|
1082
|
+
missing_keys_preload, unexpected_keys_preload = self._pt_model.load_state_dict(
|
|
1083
|
+
preload_model_state, strict=False
|
|
1084
|
+
)
|
|
1085
|
+
loaded_state_keys.update(preload_model_state.keys())
|
|
1086
|
+
missing_keys.difference_update(preload_model_state.keys())
|
|
1087
|
+
|
|
1066
1088
|
del preload_model_state
|
|
1067
1089
|
gc.collect()
|
|
1068
1090
|
|
|
@@ -1700,3 +1722,15 @@ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
|
|
|
1700
1722
|
p=p,
|
|
1701
1723
|
).item()
|
|
1702
1724
|
)
|
|
1725
|
+
|
|
1726
|
+
|
|
1727
|
+
def _torch_load(filename: Union[str, os.PathLike], *, device: str) -> Dict[str, Any]:
|
|
1728
|
+
# Might resolve PtCheckpoint or Sisyphus Path objects or so.
|
|
1729
|
+
filename = os.fspath(filename)
|
|
1730
|
+
|
|
1731
|
+
if filename.endswith(".safetensors"):
|
|
1732
|
+
from safetensors.torch import load_file as safetensors_load
|
|
1733
|
+
|
|
1734
|
+
return safetensors_load(filename, device=device)
|
|
1735
|
+
|
|
1736
|
+
return torch.load(filename, map_location=device)
|
|
@@ -1166,20 +1166,29 @@ class TorchBackend(Backend[torch.Tensor]):
|
|
|
1166
1166
|
if start is None:
|
|
1167
1167
|
start = 0
|
|
1168
1168
|
if isinstance(size, Dim):
|
|
1169
|
+
assert end is None
|
|
1169
1170
|
size = size.get_dim_value()
|
|
1170
1171
|
elif isinstance(size, Tensor):
|
|
1172
|
+
assert end is None
|
|
1171
1173
|
assert size.dims == () # scalar
|
|
1172
1174
|
size = size.raw_tensor
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
else:
|
|
1175
|
+
elif isinstance(size, int):
|
|
1176
|
+
pass
|
|
1177
|
+
elif size is None:
|
|
1177
1178
|
if isinstance(end, Tensor):
|
|
1178
1179
|
assert end.dims == ()
|
|
1179
1180
|
end = end.raw_tensor
|
|
1180
|
-
|
|
1181
|
+
elif isinstance(end, int):
|
|
1182
|
+
if end < 0:
|
|
1183
|
+
end += axis.get_dim_value()
|
|
1184
|
+
elif end is None:
|
|
1181
1185
|
end = axis.get_dim_value()
|
|
1182
|
-
|
|
1186
|
+
else:
|
|
1187
|
+
raise TypeError(f"slice: unsupported type for end: {type(end)}")
|
|
1188
|
+
size = end - start
|
|
1189
|
+
else:
|
|
1190
|
+
raise TypeError(f"slice: unsupported type for size: {type(size)}")
|
|
1191
|
+
out.raw_tensor = torch.narrow(source.raw_tensor, dim=axis_int, start=start, length=size)
|
|
1183
1192
|
return out
|
|
1184
1193
|
|
|
1185
1194
|
@staticmethod
|
|
@@ -1920,7 +1929,7 @@ class TorchBackend(Backend[torch.Tensor]):
|
|
|
1920
1929
|
if not out_spatial_dims:
|
|
1921
1930
|
out_spatial_dims = rf.make_conv_out_spatial_dims(
|
|
1922
1931
|
in_spatial_dims=in_spatial_dims,
|
|
1923
|
-
filter_size=
|
|
1932
|
+
filter_size=filter_size,
|
|
1924
1933
|
strides=strides or 1,
|
|
1925
1934
|
dilation_rate=dilation_rate or 1,
|
|
1926
1935
|
padding=padding,
|
|
@@ -2033,6 +2042,104 @@ class TorchBackend(Backend[torch.Tensor]):
|
|
|
2033
2042
|
out.feature_dim = out_dim
|
|
2034
2043
|
return out, out_spatial_dims
|
|
2035
2044
|
|
|
2045
|
+
# noinspection PyShadowingBuiltins
|
|
2046
|
+
@staticmethod
|
|
2047
|
+
def transposed_conv(
|
|
2048
|
+
source: Tensor,
|
|
2049
|
+
*,
|
|
2050
|
+
in_dim: Dim,
|
|
2051
|
+
out_dim: Dim,
|
|
2052
|
+
in_spatial_dims: Sequence[Dim],
|
|
2053
|
+
out_spatial_dims: Optional[Sequence[Dim]] = None,
|
|
2054
|
+
filter: Tensor,
|
|
2055
|
+
filter_size: Sequence[Dim],
|
|
2056
|
+
padding: str,
|
|
2057
|
+
remove_padding: Union[Sequence[int], int] = 0,
|
|
2058
|
+
output_padding: Optional[Union[Sequence[Optional[int]], int]] = None,
|
|
2059
|
+
strides: Optional[Sequence[int]] = None,
|
|
2060
|
+
bias: Optional[Tensor] = None,
|
|
2061
|
+
) -> Tuple[Tensor, Sequence[Dim]]:
|
|
2062
|
+
"""transposed convolution"""
|
|
2063
|
+
if not out_spatial_dims:
|
|
2064
|
+
out_spatial_dims = rf.make_transposed_conv_out_spatial_dims(
|
|
2065
|
+
in_spatial_dims=in_spatial_dims,
|
|
2066
|
+
filter_size=filter_size,
|
|
2067
|
+
strides=strides,
|
|
2068
|
+
padding=padding,
|
|
2069
|
+
output_padding=output_padding,
|
|
2070
|
+
)
|
|
2071
|
+
assert remove_padding == 0 # not implemented yet otherwise...
|
|
2072
|
+
if strides is None:
|
|
2073
|
+
strides = [fs.dimension for fs in filter_size]
|
|
2074
|
+
filter_dims = (in_dim, out_dim) + tuple(filter_size)
|
|
2075
|
+
filter = filter.copy_transpose(filter_dims)
|
|
2076
|
+
batch_dims = [d for d in source.dims if d not in (in_dim,) + tuple(in_spatial_dims)]
|
|
2077
|
+
# Torch conv expects (N,C,<spatial dims>) as shape.
|
|
2078
|
+
source = source.copy_transpose(batch_dims + [in_dim] + list(in_spatial_dims))
|
|
2079
|
+
if len(batch_dims) == 1:
|
|
2080
|
+
src_raw = source.raw_tensor
|
|
2081
|
+
else:
|
|
2082
|
+
src_raw = torch.reshape(
|
|
2083
|
+
source.raw_tensor,
|
|
2084
|
+
# potentially merge batch dims all together
|
|
2085
|
+
[-1, in_dim.get_dim_value()] + [d.get_dim_value() for d in in_spatial_dims],
|
|
2086
|
+
)
|
|
2087
|
+
if padding == "same":
|
|
2088
|
+
raise NotImplementedError("transposed_conv with padding='same' not implemented")
|
|
2089
|
+
if padding == "valid":
|
|
2090
|
+
padding_val = 0
|
|
2091
|
+
else:
|
|
2092
|
+
raise ValueError(f"invalid padding {padding!r}, expected 'same' or 'valid'")
|
|
2093
|
+
if len(filter_size) == 1:
|
|
2094
|
+
out_raw = torch.nn.functional.conv_transpose1d(
|
|
2095
|
+
src_raw,
|
|
2096
|
+
weight=filter.raw_tensor,
|
|
2097
|
+
bias=bias.raw_tensor if bias is not None else None,
|
|
2098
|
+
stride=strides,
|
|
2099
|
+
padding=padding_val,
|
|
2100
|
+
output_padding=output_padding or 0,
|
|
2101
|
+
)
|
|
2102
|
+
elif len(filter_size) == 2:
|
|
2103
|
+
out_raw = torch.nn.functional.conv_transpose2d(
|
|
2104
|
+
src_raw,
|
|
2105
|
+
weight=filter.raw_tensor,
|
|
2106
|
+
bias=bias.raw_tensor if bias is not None else None,
|
|
2107
|
+
stride=strides,
|
|
2108
|
+
padding=padding_val,
|
|
2109
|
+
output_padding=output_padding or 0,
|
|
2110
|
+
)
|
|
2111
|
+
elif len(filter_size) == 3:
|
|
2112
|
+
out_raw = torch.nn.functional.conv_transpose3d(
|
|
2113
|
+
src_raw,
|
|
2114
|
+
weight=filter.raw_tensor,
|
|
2115
|
+
bias=bias.raw_tensor if bias is not None else None,
|
|
2116
|
+
stride=strides,
|
|
2117
|
+
padding=padding_val,
|
|
2118
|
+
output_padding=output_padding or 0,
|
|
2119
|
+
)
|
|
2120
|
+
else:
|
|
2121
|
+
raise ValueError(f"invalid number of filter dims {filter_size}, expected 1, 2, or 3")
|
|
2122
|
+
if remove_padding:
|
|
2123
|
+
if isinstance(remove_padding, int):
|
|
2124
|
+
remove_padding = [remove_padding] * len(out_spatial_dims)
|
|
2125
|
+
assert len(remove_padding) == len(out_spatial_dims)
|
|
2126
|
+
slices = [slice(None)] * out_raw.ndim
|
|
2127
|
+
for i, pad in enumerate(remove_padding):
|
|
2128
|
+
if pad > 0:
|
|
2129
|
+
slices[2 + i] = slice(0, -pad)
|
|
2130
|
+
out_raw = out_raw[tuple(slices)]
|
|
2131
|
+
out = Tensor(
|
|
2132
|
+
"transposed_conv",
|
|
2133
|
+
dims=batch_dims + [out_dim] + list(out_spatial_dims),
|
|
2134
|
+
dtype=TorchBackend.get_dtype_name_raw(out_raw),
|
|
2135
|
+
)
|
|
2136
|
+
if len(batch_dims) == 1:
|
|
2137
|
+
out.raw_tensor = out_raw
|
|
2138
|
+
else:
|
|
2139
|
+
out.raw_tensor = torch.reshape(out_raw, [d.get_dim_value() for d in out.dims])
|
|
2140
|
+
out.feature_dim = out_dim
|
|
2141
|
+
return out, out_spatial_dims
|
|
2142
|
+
|
|
2036
2143
|
@staticmethod
|
|
2037
2144
|
def pool(
|
|
2038
2145
|
source: Tensor,
|
|
@@ -71,7 +71,13 @@ def help_on_torch_exception(
|
|
|
71
71
|
if not count_frames:
|
|
72
72
|
exc_ext.append("(No module call frames.)")
|
|
73
73
|
|
|
74
|
-
if
|
|
74
|
+
if (
|
|
75
|
+
# KeyError formatting would be wrong, showing `KeyError: "enc_spatial_dim\n\nStep idx: 0\..."`
|
|
76
|
+
not isinstance(exc, KeyError)
|
|
77
|
+
and len(exc.args) == 1
|
|
78
|
+
and isinstance(exc.args[0], str)
|
|
79
|
+
and not always_direct_print
|
|
80
|
+
):
|
|
75
81
|
exc.args = ("\n".join([exc.args[0], ""] + exc_ext),)
|
|
76
82
|
else:
|
|
77
83
|
for msg in exc_ext:
|
returnn/util/basic.py
CHANGED
|
@@ -365,12 +365,9 @@ def get_checkpoint_filepattern(filepath):
|
|
|
365
365
|
:return: CheckpointLoader compatible filepattern
|
|
366
366
|
:rtype: str
|
|
367
367
|
"""
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
return filepath[: -len(".index")]
|
|
372
|
-
elif filepath.endswith(".pt"):
|
|
373
|
-
return filepath[: -len(".pt")]
|
|
368
|
+
for ext in [".meta", ".index", ".pt"]:
|
|
369
|
+
if filepath.endswith(ext):
|
|
370
|
+
return filepath[: -len(ext)]
|
|
374
371
|
return filepath
|
|
375
372
|
|
|
376
373
|
|
returnn/util/better_exchook.py
CHANGED
|
@@ -1093,6 +1093,7 @@ def format_tb(
|
|
|
1093
1093
|
with_color=None,
|
|
1094
1094
|
with_vars=None,
|
|
1095
1095
|
clear_frames=True,
|
|
1096
|
+
colorize=None,
|
|
1096
1097
|
):
|
|
1097
1098
|
"""
|
|
1098
1099
|
Formats a traceback into a list of strings, each corresponding to one frame.
|
|
@@ -1110,11 +1111,14 @@ def format_tb(
|
|
|
1110
1111
|
That will potentially fix some mem leaks regarding locals, so it can be important.
|
|
1111
1112
|
Also see https://github.com/python/cpython/issues/113939.
|
|
1112
1113
|
However, any further access to frame locals will not work (e.g., if you want to use a debugger afterward).
|
|
1114
|
+
:param colorize: for compat with Python >=3.13, currently ignored
|
|
1113
1115
|
:return: list of strings, each corresponding to one frame in the traceback.
|
|
1114
1116
|
Each string contains the file name, line number, function name, source code line, maybe relevant variables,
|
|
1115
1117
|
etc., and a final newline.
|
|
1116
1118
|
:rtype: list[str]
|
|
1117
1119
|
"""
|
|
1120
|
+
if colorize is not None and with_color is None:
|
|
1121
|
+
with_color = colorize
|
|
1118
1122
|
color = Color(enable=with_color)
|
|
1119
1123
|
output = _OutputLinesCollector(color=color)
|
|
1120
1124
|
|
returnn/util/debug.py
CHANGED
|
@@ -704,7 +704,7 @@ def check_py_traces_rf_to_pt_equal(
|
|
|
704
704
|
"""
|
|
705
705
|
import random
|
|
706
706
|
import torch
|
|
707
|
-
from returnn.tensor import
|
|
707
|
+
from returnn.tensor import Dim
|
|
708
708
|
import returnn.frontend as rf
|
|
709
709
|
|
|
710
710
|
# noinspection PyProtectedMember
|
|
@@ -715,9 +715,18 @@ def check_py_traces_rf_to_pt_equal(
|
|
|
715
715
|
def _get_entry(trace, func, i, name, j):
|
|
716
716
|
return trace[func][i][name][j]
|
|
717
717
|
|
|
718
|
+
def _get_entry_attr(trace, func, i, name, j):
|
|
719
|
+
name, attr = name.split(".", 1)
|
|
720
|
+
obj = trace[func][i][name][j]
|
|
721
|
+
return eval(f"{name}.{attr}", {name: obj})
|
|
722
|
+
|
|
718
723
|
def _resolve_dim(dim: Union[Dim, str]) -> Dim:
|
|
719
724
|
if isinstance(dim, Dim):
|
|
720
725
|
return dim
|
|
726
|
+
elif isinstance(dim, str) and "." in dim:
|
|
727
|
+
dim = _get_entry_attr(trace_rf, *check_rf[:2], dim, -1)
|
|
728
|
+
assert isinstance(dim, Dim)
|
|
729
|
+
return dim
|
|
721
730
|
elif isinstance(dim, str):
|
|
722
731
|
dim = _get_entry(trace_rf, *check_rf[:2], dim, -1)
|
|
723
732
|
assert isinstance(dim, Dim)
|
|
@@ -763,7 +772,7 @@ def check_py_traces_rf_to_pt_equal(
|
|
|
763
772
|
if len(indices) > 5:
|
|
764
773
|
msgs.append(" non-matching ...")
|
|
765
774
|
non_matching.append("\n".join(msgs_prefix + msgs))
|
|
766
|
-
print(
|
|
775
|
+
print(" mismatch!")
|
|
767
776
|
for msg in msgs:
|
|
768
777
|
print(msg)
|
|
769
778
|
|
returnn/util/file_cache.py
CHANGED
|
@@ -426,7 +426,21 @@ class FileCache:
|
|
|
426
426
|
orig_mtime_ns = os.stat(src_filename).st_mtime_ns
|
|
427
427
|
FileInfo(mtime_ns=orig_mtime_ns).save(info_file_name)
|
|
428
428
|
|
|
429
|
-
|
|
429
|
+
try:
|
|
430
|
+
_copy_with_prealloc(src_filename, dst_tmp_filename)
|
|
431
|
+
except Exception:
|
|
432
|
+
# Cleanup if it was created already.
|
|
433
|
+
# That avoids some of the ambiguity of the existence of the .copy file.
|
|
434
|
+
# https://github.com/rwth-i6/returnn/issues/1785
|
|
435
|
+
try:
|
|
436
|
+
os.remove(dst_tmp_filename)
|
|
437
|
+
except FileNotFoundError:
|
|
438
|
+
pass
|
|
439
|
+
try:
|
|
440
|
+
os.remove(info_file_name)
|
|
441
|
+
except FileNotFoundError: # not really expected here, but safe to ignore
|
|
442
|
+
pass
|
|
443
|
+
raise
|
|
430
444
|
os.rename(dst_tmp_filename, dst_filename)
|
|
431
445
|
|
|
432
446
|
@staticmethod
|
returnn/util/task_system.py
CHANGED
|
@@ -671,7 +671,7 @@ class Pickler(_BasePickler):
|
|
|
671
671
|
return
|
|
672
672
|
# For some reason, Numpy fromstring/tostring is faster than Numpy loads/dumps.
|
|
673
673
|
self.save(make_numpy_ndarray_fromstring)
|
|
674
|
-
self.save((obj.
|
|
674
|
+
self.save((obj.tobytes(), str(obj.dtype), obj.shape))
|
|
675
675
|
self.write(pickle.REDUCE)
|
|
676
676
|
|
|
677
677
|
dispatch[numpy.ndarray] = save_ndarray
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: returnn
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.20260105.192646
|
|
4
4
|
Summary: The RWTH extensible training framework for universal recurrent neural networks
|
|
5
5
|
Home-page: https://github.com/rwth-i6/returnn/
|
|
6
6
|
Author: Albert Zeyer
|
|
@@ -36,7 +36,7 @@ Welcome to RETURNN
|
|
|
36
36
|
`RETURNN paper 2018 <https://arxiv.org/abs/1805.05225>`_.
|
|
37
37
|
|
|
38
38
|
RETURNN - RWTH extensible training framework for universal recurrent neural networks,
|
|
39
|
-
is a
|
|
39
|
+
is a PyTorch/TensorFlow-based implementation of modern recurrent neural network architectures.
|
|
40
40
|
It is optimized for fast and reliable training of recurrent neural networks in a multi-GPU environment.
|
|
41
41
|
|
|
42
42
|
The high-level features and goals of RETURNN are:
|