PyPI - returnn - Versions diffs - 1.20250820.123936__py3-none-any.whl → 1.20250821.93927__py3-none-any.whl - Mend

returnn 1.20250820.123936py3-none-any.whl → 1.20250821.93927py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (9) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250820.123936
+Version: 1.20250821.93927
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250820.123936'
-long_version = '1.20250820.123936+git.9c74169'
+version = '1.20250821.093927'
+long_version = '1.20250821.093927+git.ec56958'

returnn/torch/updater.py CHANGED Viewed

@@ -95,6 +95,8 @@ class Updater:
     Wraps a torch.optim.Optimizer, and extends it by some further functionality.
     """
+    _OptimizerParamGroupsExtraOpts = ("learning_rate_multiplier",)
     def __init__(self, *, config, network, device, initial_learning_rate=1.0):
         """
         :param returnn.config.Config config: config defining the training conditions.
@@ -131,6 +133,7 @@ class Updater:
         self._optimizer_opts: Optional[Dict[str, Any]] = None
         self.optimizer: Optional[torch.optim.Optimizer] = None
+        self._optimizer_param_groups_extra_opts: Optional[List[Dict[str, Any]]] = None
         self._grad_clip = self.config.float("gradient_clip", 0.0)
         self._grad_clip_global_norm = self.config.float("gradient_clip_global_norm", 0.0)
@@ -189,8 +192,15 @@ class Updater:
             )
             self._effective_learning_rate = float(lr)
         if self.optimizer:
-            for param_group in self.optimizer.param_groups:
-                param_group["lr"] = self._effective_learning_rate
+            if self._optimizer_param_groups_extra_opts:
+                assert len(self.optimizer.param_groups) == len(self._optimizer_param_groups_extra_opts)
+                lr_multiplies = [
+                    opts.get("learning_rate_multiplier", 1.0) for opts in self._optimizer_param_groups_extra_opts
+                ]
+            else:
+                lr_multiplies = [1.0] * len(self.optimizer.param_groups)
+            for i, param_group in enumerate(self.optimizer.param_groups):
+                param_group["lr"] = self._effective_learning_rate * lr_multiplies[i]
     def set_current_train_step(self, *, global_train_step: int, epoch: int, epoch_continuous: Optional[float] = None):
         """
@@ -273,7 +283,7 @@ class Updater:
         if optimizer_opts is None:
             raise ValueError("config field 'optimizer' needs to be set explicitely for the Torch backend")
         self._optimizer_opts = optimizer_opts
-        self.optimizer = self._create_optimizer(optimizer_opts)
+        self.optimizer, self._optimizer_param_groups_extra_opts = self._create_optimizer(optimizer_opts)
     def load_optimizer(self, filename):
         """
@@ -421,21 +431,20 @@ class Updater:
         """
         return self.optimizer
-    def _create_optimizer(self, optimizer_opts):
+    def _create_optimizer(self, optimizer_opts) -> Tuple[torch.optim.Optimizer, Optional[List[Dict[str, Any]]]]:
         """
         Returns a valid optimizer considering the dictionary given by the user in the config.
         :param dict[str]|str optimizer_opts: Optimizer configuration specified by the user.
             If it's a dict, it must contain "class" with the optimizer name or callable.
             If it's a str, it must be the optimizer name.
-        :return: A valid optimizer.
-        :rtype: torch.optim.Optimizer
+        :return: tuple (optimizer, optional optimizer_param_groups_extra_opts).
         """
         lr = self.learning_rate
         # If the parameter is already a valid optimizer, return it without further processing
         if isinstance(optimizer_opts, torch.optim.Optimizer):
-            return optimizer_opts
+            return optimizer_opts, None
         elif callable(optimizer_opts):
             optimizer_opts: Dict[str, Any] = {"class": optimizer_opts}
         else:
@@ -461,12 +470,23 @@ class Updater:
         lr = lr * opt_kwargs.pop("learning_rate_multiplier", 1.0)
         opt_kwargs["lr"] = lr
-        params_or_param_groups = self._get_optimizer_param_groups(optim_class, opt_kwargs)
-        optimizer = optim_class(params_or_param_groups, **opt_kwargs)
+        param_groups = self._get_optimizer_param_groups(optim_class, opt_kwargs)
+        param_groups = list(param_groups)
+        assert len(param_groups) > 0, "got an empty parameter list?"
+        if not isinstance(param_groups[0], dict):
+            param_groups = [{"params": param_groups}]
+        optimizer_param_groups_extra_opts: Optional[List[Dict[str, Any]]] = None
+        if any(any(key in group for key in self._OptimizerParamGroupsExtraOpts) for group in param_groups):
+            param_groups = [dict(group) for group in param_groups]  # copy to make sure we can modify it
+            optimizer_param_groups_extra_opts = [
+                {key: group.pop(key) for key in self._OptimizerParamGroupsExtraOpts if key in group}
+                for group in param_groups
+            ]
+        optimizer = optim_class(param_groups, **opt_kwargs)
         print("Optimizer: %s" % optimizer, file=log.v1)
         assert isinstance(optimizer, torch.optim.Optimizer)
-        return optimizer
+        return optimizer, optimizer_param_groups_extra_opts
     def _create_default_optimizer(self):
         """
@@ -514,7 +534,11 @@ class Updater:
             assert callable(custom_param_groups), f"invalid param_groups_custom {custom_param_groups!r}"
             rf_model = wrapped_pt_module_to_rf_module(self.network)
             custom_param_groups_ = custom_param_groups(
-                model=self.network, rf_model=rf_model, optimizer_class=optim_class, optimizer_opts=optimizer_opts
+                model=self.network,
+                rf_model=rf_model,
+                optimizer_class=optim_class,
+                optimizer_opts=optimizer_opts,
+                **get_fwd_compat_kwargs(),
             )
             assert isinstance(custom_param_groups_, Iterable) and all(
                 isinstance(group, dict) for group in custom_param_groups_
@@ -547,11 +571,9 @@ class Updater:
         # Parameters without weight decay: biases + LayerNorm/Embedding layers.
         wd_params = set()
         no_wd_params = set()
-        blacklist_wd_modules: Any = optimizer_opts.pop("weight_decay_modules_blacklist", None)
-        if blacklist_wd_modules is None:
-            blacklist_wd_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
-        else:
-            blacklist_wd_modules = _wrap_user_blacklist_wd_modules(blacklist_wd_modules)
+        blacklist_wd_modules = wrap_user_blacklist_wd_modules(
+            optimizer_opts.pop("weight_decay_modules_blacklist", None)
+        )
         custom_include_check = optimizer_opts.pop("weight_decay_custom_include_check", None)
         if custom_include_check:
             assert callable(custom_include_check), f"invalid weight_decay_custom_include_check {custom_include_check!r}"
@@ -598,9 +620,16 @@ class Updater:
         return optim_groups
-def _wrap_user_blacklist_wd_modules(
-    mods: Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]],
+def wrap_user_blacklist_wd_modules(
+    mods: Optional[Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]]],
 ) -> Tuple[type, ...]:
+    """
+    Wraps the user-provided blacklist_weight_decay_modules into a tuple of types.
+    This supports both pure PyTorch modules (e.g. "torch.nn.LayerNorm")
+    and RF modules (e.g. "rf.LayerNorm"), which can be specified as strings or types.
+    """
+    if mods is None:
+        return torch.nn.LayerNorm, torch.nn.Embedding
     assert isinstance(mods, (list, tuple)), f"invalid blacklist_weight_decay_modules {mods!r}"
     res = []
     for mod in mods:

returnn/util/basic.py CHANGED Viewed

@@ -2459,8 +2459,12 @@ class DictRefKeys(Generic[K, V]):
     Like `dict`, but hash and equality of the keys
     """
-    def __init__(self):
+    def __init__(self, items: Union[None, Iterable[Tuple[K, V]], Dict[K, V]] = None, /, **kwargs):
         self._d = {}  # type: Dict[RefIdEq[K], V]
+        if items is not None:
+            self.update(items)
+        if kwargs:
+            self.update(kwargs)
     def __repr__(self):
         return "DictRefKeys(%s)" % ", ".join(["%r: %r" % (k, v) for (k, v) in self.items()])
@@ -2489,6 +2493,15 @@ class DictRefKeys(Generic[K, V]):
     def __contains__(self, item: K):
         return RefIdEq(item) in self._d
+    def update(self, other: Union[Dict[K, V], Iterable[Tuple[K, V]]], /):
+        """
+        :param other: dict or iterable of (key, value) tuples
+        """
+        if isinstance(other, dict):
+            other = other.items()
+        for k, v in other:
+            self[k] = v
 def make_dll_name(basename):
     """

{returnn-1.20250820.123936.dist-info → returnn-1.20250821.93927.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250820.123936
+Version: 1.20250821.93927
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250820.123936.dist-info → returnn-1.20250821.93927.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=04mKkkm6MNJQzWwBZq5enBZVM1vpvfr-0W705kv7vsk,5215
+returnn/PKG-INFO,sha256=oANuix-AgPHTYt9t0MfUfM0PyjXdMuEBJrN2b99tMUI,5214
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=UEqWo5ULLq_sbbrAc2dmq5fjH29qlfAXGYB_hw8vpZQ,77
+returnn/_setup_info_generated.py,sha256=o3ap30O-BqlF3l-gxibSvi0MgnNA3LJEG6o9FVoM0no,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -208,7 +208,7 @@ returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
 returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
 returnn/torch/distributed.py,sha256=_lyJR71HIoCHpMi5GztGM7YwrX54Am8zSkjnDkE1Lbk,7524
 returnn/torch/engine.py,sha256=JSsQZZiVs9TxRyFEJuR3iH-YZb9sRw7TzoIAIqmplZY,78275
-returnn/torch/updater.py,sha256=9Ju_LuEdXO7LMxt9rs9_6ReePG5y1h36N3coN696rVI,30285
+returnn/torch/updater.py,sha256=7lMoA01Yzp18MY5jjIFncsajTjOD713pK38nU6r-jiE,31999
 returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
 returnn/torch/data/extern_data.py,sha256=5al706ZaYtHWLp5VH2vS-rW69YXP3NHyOFRKY0WY714,7810
 returnn/torch/data/pipeline.py,sha256=HgIL0jQsPcgvh_SPC4wQ6BzclmrnpFja-UiboF_GPN4,29459
@@ -233,7 +233,7 @@ returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko8
 returnn/torch/util/module.py,sha256=MXHIrF9Isu575DDJIa81212ULKwdqu1oOLxDVZecVSk,1693
 returnn/torch/util/scaled_gradient.py,sha256=C5e79mpqtxdtw08OTSy413TSBSlOertRisc-ioiFIaU,3191
 returnn/util/__init__.py,sha256=UIG1qw4idqhW71BV60ha7h9PktxvEVcBIu0lYRossK8,336
-returnn/util/basic.py,sha256=9Ig-7XLtvXk3yfycmBEhdJG-WVNDtoND3DmDyXOl018,142627
+returnn/util/basic.py,sha256=UjHujX9pSu_dOgTxozWD0ujj5eSpyj_zD5vFU6bfyms,143096
 returnn/util/better_exchook.py,sha256=39yvRecluDgYhViwSkaQ8crJ_cBWI63KeEGuK4RKe5w,70843
 returnn/util/bpe.py,sha256=LWFhICZsEOnMwNws0lybPNzKRX6rSr8yKCvP65vjl9Y,19656
 returnn/util/debug.py,sha256=wuRzdg9zB84WWCGyTjmRR_zYypu8gXxlc0nZ6si9OC8,28224
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250820.123936.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250820.123936.dist-info/METADATA,sha256=04mKkkm6MNJQzWwBZq5enBZVM1vpvfr-0W705kv7vsk,5215
-returnn-1.20250820.123936.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-returnn-1.20250820.123936.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250820.123936.dist-info/RECORD,,
+returnn-1.20250821.93927.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250821.93927.dist-info/METADATA,sha256=oANuix-AgPHTYt9t0MfUfM0PyjXdMuEBJrN2b99tMUI,5214
+returnn-1.20250821.93927.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+returnn-1.20250821.93927.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250821.93927.dist-info/RECORD,,

{returnn-1.20250820.123936.dist-info → returnn-1.20250821.93927.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250820.123936.dist-info → returnn-1.20250821.93927.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250820.123936.dist-info → returnn-1.20250821.93927.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250820.123936__py3-none-any.whl → 1.20250821.93927__py3-none-any.whl

Potentially problematic release.

returnn 1.20250820.123936py3-none-any.whl → 1.20250821.93927py3-none-any.whl