PyPI - returnn - Versions diffs - 1.20240723.171654__tar.gz → 1.20240725.5736__tar.gz - Mend

returnn 1.20240723.171654tar.gz → 1.20240725.5736tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (455) hide show

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240723.171654
+Version: 1.20240725.5736
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20240725.5736/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20240725.005736'
2	+ long_version = '1.20240725.005736+git.d5855be'

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/__main__.py RENAMED Viewed

@@ -270,13 +270,13 @@ def print_task_properties():
         print("Train data:", file=log.v2)
         print("  input:", train_data.num_inputs, "x", train_data.window, file=log.v2)
         print("  output:", train_data.num_outputs, file=log.v2)
-        print(" ", train_data.len_info() or "no info", file=log.v2)
+        print(" ", train_data.len_info(fast=True) or "no info", file=log.v2)
     if dev_data:
         print("Dev data:", file=log.v2)
-        print(" ", dev_data.len_info() or "no info", file=log.v2)
+        print(" ", dev_data.len_info(fast=True) or "no info", file=log.v2)
     if eval_data:
         print("Eval data:", file=log.v2)
-        print(" ", eval_data.len_info() or "no info", file=log.v2)
+        print(" ", eval_data.len_info(fast=True) or "no info", file=log.v2)
 def init_engine():

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/audio.py RENAMED Viewed

@@ -393,10 +393,12 @@ class OggZipDataset(CachedDataset2):
         self._lazy_init()
         return [self._get_tag_from_info_dict(seq) for seq in self._data]
-    def get_total_num_seqs(self):
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :rtype: int
         """
+        if fast and self._data is None:
+            raise Exception(f"{self} not initialized")
         self._lazy_init()
         return len(self._data)

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/basic.py RENAMED Viewed

@@ -812,11 +812,12 @@ class Dataset(object):
         """
         raise OptionalNotImplementedError(f"{self} get_all_tags not implemented")
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
+        :param fast: if True, might raise an exception if not possible to get fast.
         :return: total number of seqs, without partition epoch.
-          Should be the same as len(self.get_all_tags()).
-          Note that this is not possible with all datasets.
+            Should be the same as len(self.get_all_tags()).
+            Note that this is not possible with all datasets.
         """
         raise OptionalNotImplementedError(f"{self} get_total_num_seqs not implemented")
@@ -994,16 +995,14 @@ class Dataset(object):
             return self.is_less_than_num_seqs(0)
         raise NotImplementedError(f"{self} have_seqs() is not implemented (and neither get_total_num_seqs())")
-    def len_info(self):
+    def len_info(self, *, fast: bool = False) -> str:
         """
-        :rtype: str
-        :returns a string to present the user as information about our len.
-        Depending on our implementation, we can give some more or some less information.
+        :return: string to present the user as information about our len.
         """
         return ", ".join(
             [
                 self.__class__.__name__,
-                "sequences: %s" % try_run(self.get_total_num_seqs, default="unknown"),
+                "sequences: %s" % try_run(self.get_total_num_seqs, kwargs=dict(fast=fast), default="unknown"),
                 "frames: %s" % try_run(self.get_num_timesteps, default="unknown"),
             ]
         )

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/generating.py RENAMED Viewed

@@ -199,7 +199,7 @@ class GeneratingDataset(Dataset):
         """
         return self._num_seqs
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :return: total num seqs
         """
@@ -1189,7 +1189,7 @@ class StaticDataset(CachedDataset2):
         """
         return self.data[0][key].dtype
-    def get_total_num_seqs(self):
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :rtype: int
         """
@@ -2335,7 +2335,7 @@ class LibriSpeechCorpus(CachedDataset2):
         """
         return [self._get_tag(i) for i in range(len(self._reference_seq_order))]
-    def get_total_num_seqs(self):
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :rtype: int
         """

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/hdf.py RENAMED Viewed

@@ -428,7 +428,7 @@ class HDFDataset(CachedDataset):
             tags += h5_file["seqTags"][...].tolist()
         return list(map(self._decode, tags))
-    def get_total_num_seqs(self):
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :rtype: int
         """
@@ -451,12 +451,6 @@ class HDFDataset(CachedDataset):
         """
         return self.data_dtype[key]
-    def len_info(self):
-        """
-        :rtype: str
-        """
-        return ", ".join(["HDF dataset", "sequences: %i" % self.num_seqs, "frames: %i" % self.get_num_timesteps()])
     def _get_file_index(self, real_seq_idx):
         # bisect() returns the position for which all elements to the left of the returned index are <= real_seq_idx,
         # so it actually returns the next file index in which the sequence can be found.

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/lm.py RENAMED Viewed

@@ -7,6 +7,7 @@ and some related helpers.
 from __future__ import annotations
+from typing import Optional, Union, Callable, List, Tuple
 import typing
 import os
 import sys
@@ -17,7 +18,15 @@ import xml.etree.ElementTree as ElementTree
 import numpy
 from random import Random
-from returnn.util.basic import parse_orthography, parse_orthography_into_symbols, load_json, unicode, cf
+from returnn.util.basic import (
+    parse_orthography,
+    parse_orthography_into_symbols,
+    load_json,
+    unicode,
+    cf,
+    human_bytes_size,
+    hms,
+)
 from returnn.util.literal_py_to_pickle import literal_eval
 from returnn.log import log
@@ -262,7 +271,9 @@ class LmDataset(CachedDataset2):
             self.num_outputs["delayed"] = self.num_outputs["data"]
             self.labels["delayed"] = self.labels["data"]
-        self.orths = None  # will be loaded in _lazy_init
+        self._orth_tmp_file = None
+        self._orth_mmap = None
+        self._orths_offsets_and_lens: Optional[List[Tuple[int, int]]] = None  # will be loaded in _lazy_init
         self.next_orth_idx = 0
         self.next_seq_idx = 0
@@ -270,27 +281,64 @@ class LmDataset(CachedDataset2):
         self.num_unknown = 0
     def _lazy_init(self):
-        if self.orths is not None:
+        if self._orths_offsets_and_lens is not None:
             return
         corpus_file = self._corpus_file
         if callable(corpus_file):
             corpus_file = corpus_file()
         print("LmDataset, loading file", corpus_file, file=log.v4)
+        import tempfile
+        import mmap
+        tmp_file = tempfile.NamedTemporaryFile(prefix="returnn_lm_dataset_", suffix="_tmp.txt")
+        orths = []
+        offset = 0
+        self._orth_tmp_file = tmp_file
+        self._orths_offsets_and_lens = orths
+        start_time = time.time()
+        last_print_time = start_time
+        def _add_line(line: bytes):
+            nonlocal offset, last_print_time
+            orths.append((offset, len(line)))
+            tmp_file.write(line)
+            tmp_file.write(b"\n")
+            offset += len(line) + 1
+            if time.time() - last_print_time > 10:
+                print(
+                    f"  ..., loaded {len(self._orths_offsets_and_lens)} sequences,"
+                    f" {human_bytes_size(offset)},"
+                    f" after {hms(time.time() - start_time)}",
+                    file=log.v4,
+                )
+                last_print_time = time.time()
         if isinstance(corpus_file, list):  # If a list of files is provided, concatenate all.
-            self.orths = []
             for file_name in corpus_file:
                 if self._use_cache_manager:
                     file_name = cf(file_name)
-                self.orths += read_corpus(file_name, skip_empty_lines=self._skip_empty_lines)
+                iter_corpus(file_name, skip_empty_lines=self._skip_empty_lines, decode=False, callback=_add_line)
         else:
             if self._use_cache_manager:
                 corpus_file = cf(corpus_file)
-            self.orths = read_corpus(corpus_file, skip_empty_lines=self._skip_empty_lines)
-        print("  done, loaded %i sequences" % len(self.orths), file=log.v4)
+            iter_corpus(corpus_file, skip_empty_lines=self._skip_empty_lines, decode=False, callback=_add_line)
+        tmp_file.flush()
+        self._orth_mmap = mmap.mmap(tmp_file.fileno(), 0, flags=mmap.MAP_PRIVATE)
+        print(
+            f"  done, loaded {len(self._orths_offsets_and_lens)} sequences,"
+            f" {human_bytes_size(offset)},"
+            f" in {hms(time.time() - start_time)}",
+            file=log.v4,
+        )
         # It's only estimated because we might filter some out or so.
-        self._estimated_num_seqs = len(self.orths) // self.partition_epoch
+        self._estimated_num_seqs = len(self._orths_offsets_and_lens) // self.partition_epoch
     def get_data_keys(self):
         """
@@ -344,7 +392,9 @@ class LmDataset(CachedDataset2):
         else:
             self._lazy_init()
             self.seq_order = self.get_seq_order_for_epoch(
-                epoch=epoch, num_seqs=len(self.orths), get_seq_len=lambda i: len(self.orths[i])
+                epoch=epoch,
+                num_seqs=len(self._orths_offsets_and_lens),
+                get_seq_len=lambda i: self._orths_offsets_and_lens[i][1],
             )
         self.next_orth_idx = 0
         self.next_seq_idx = 0
@@ -358,10 +408,12 @@ class LmDataset(CachedDataset2):
         """supports sorting"""
         return True
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """total num seqs"""
+        if fast and self._orths_offsets_and_lens is None:
+            raise Exception(f"{self} not initialized")
         self._lazy_init()
-        return len(self.orths)
+        return len(self._orths_offsets_and_lens)
     def _reduce_log_skipped_seqs(self):
         if isinstance(self.log_skipped_seqs, bool):
@@ -396,7 +448,9 @@ class LmDataset(CachedDataset2):
             assert self.next_seq_idx == seq_idx, "We expect that we iterate through all seqs."
             true_idx = self.seq_order[self.next_orth_idx]
             self._lazy_init()
-            orth = self.orths[true_idx]  # get sequence for the next index given by seq_order
+            # get sequence for the next index given by seq_order
+            offset, len_ = self._orths_offsets_and_lens[true_idx]
+            orth = self._orth_mmap[offset : offset + len_].decode("utf8")
             seq_tag = self._tag_prefix + str(true_idx)
             self.next_orth_idx += 1
             if orth == "</s>":
@@ -512,10 +566,10 @@ def _is_bliss(filename):
     return False
-def _iter_bliss(filename, callback):
+def _iter_bliss(filename: str, callback: Callable[[Union[str, bytes]], None], *, decode: bool = True):
     """
-    :param str filename:
-    :param (str)->None callback:
+    :param filename:
+    :param callback:
     """
     corpus_file = open(filename, "rb")
     if filename.endswith(".gz"):
@@ -546,51 +600,68 @@ def _iter_bliss(filename, callback):
         orth_split = orth_raw.split()
         orth = " ".join(orth_split)
+        if not decode:
+            orth = orth.encode("utf8")
         callback(orth)
-def _iter_txt(filename, callback, skip_empty_lines=True):
+def _iter_txt(
+    filename: str, callback: Callable[[Union[str, bytes]], None], *, skip_empty_lines: bool = True, decode: bool = True
+) -> None:
     """
-    :param str filename:
-    :param (str)->None callback:
-    :param bool skip_empty_lines:
+    :param filename:
+    :param callback:
+    :param skip_empty_lines:
+    :param decode:
     """
     f = open(filename, "rb")
     if filename.endswith(".gz"):
         f = gzip.GzipFile(fileobj=f)
     for line in f:
-        try:
-            line = line.decode("utf8")
-        except UnicodeDecodeError:
-            line = line.decode("latin_1")  # or iso8859_15?
+        if decode:
+            try:
+                line = line.decode("utf8")
+            except UnicodeDecodeError:
+                line = line.decode("latin_1")  # or iso8859_15?
         line = line.strip()
         if skip_empty_lines and not line:
             continue
         callback(line)
-def iter_corpus(filename, callback, skip_empty_lines=True):
+def iter_corpus(
+    filename: str, callback: Callable[[Union[str, bytes]], None], *, skip_empty_lines: bool = True, decode: bool = True
+) -> None:
     """
-    :param str filename:
-    :param ((str)->None) callback:
-    :param bool skip_empty_lines:
+    :param filename:
+    :param callback:
+    :param skip_empty_lines:
+    :param decode:
     """
     if _is_bliss(filename):
-        _iter_bliss(filename=filename, callback=callback)
+        _iter_bliss(filename=filename, callback=callback, decode=decode)
     else:
-        _iter_txt(filename=filename, callback=callback, skip_empty_lines=skip_empty_lines)
-def read_corpus(filename, skip_empty_lines=True):
-    """
-    :param str filename: either Bliss XML or line-based text
-    :param bool skip_empty_lines: in case of line-based text, skip empty lines
-    :return: list of orthographies
-    :rtype: list[str]
-    """
-    out_list = []
-    iter_corpus(filename=filename, callback=out_list.append, skip_empty_lines=skip_empty_lines)
+        _iter_txt(filename=filename, callback=callback, skip_empty_lines=skip_empty_lines, decode=decode)
+def read_corpus(
+    filename: str,
+    *,
+    skip_empty_lines: bool = True,
+    decode: bool = True,
+    out_list: Optional[Union[List[str], List[bytes]]] = None,
+) -> Union[List[str], List[bytes]]:
+    """
+    :param filename: either Bliss XML or line-based text
+    :param skip_empty_lines: in case of line-based text, skip empty lines
+    :param decode: if True, return str, otherwise bytes
+    :param out_list: if given, append to this list
+    :return: out_list, list of orthographies
+    """
+    if out_list is None:
+        out_list = []
+    iter_corpus(filename=filename, callback=out_list.append, skip_empty_lines=skip_empty_lines, decode=decode)
     return out_list

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/map.py RENAMED Viewed

@@ -109,7 +109,7 @@ class MapDatasetWrapper(CachedDataset2):
             raise NotImplementedError("'num_seqs' is only known after calling init_seq_order().")
         return len(self._seq_order)
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :return: total number of seqs
         """

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/meta.py RENAMED Viewed

@@ -459,7 +459,7 @@ class MetaDataset(CachedDataset2):
         """
         return self.seq_list_original[self.default_dataset_key]
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """
         :return: total number of seqs, without partition epoch
         """
@@ -1639,7 +1639,7 @@ class ConcatSeqsDataset(CachedDataset2):
         """
         return self.dataset.get_data_shape(key)
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """total num seqs"""
         return len(self.full_seq_list)
@@ -1879,9 +1879,9 @@ class VariableDataset(Dataset):
         """all tags"""
         return self._dataset.get_all_tags()
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """total num seqs"""
-        return self._dataset.get_total_num_seqs()
+        return self._dataset.get_total_num_seqs(fast=fast)
     def get_seq_length(self, sorted_seq_idx: int) -> NumbersDict:
         """seq len"""

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/multi_proc.py RENAMED Viewed

@@ -332,7 +332,7 @@ class MultiProcDataset(CachedDataset2):
         """num seqs"""
         return self._num_seqs
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """total num seqs"""
         if self._total_num_seqs is not None:
             return self._total_num_seqs

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/numpy_dump.py RENAMED Viewed

@@ -123,12 +123,6 @@ class NumpyDumpDataset(Dataset):
         """
         return self._num_seqs
-    def len_info(self):
-        """
-        :rtype: str
-        """
-        return "%s, %i seqs" % (self.__class__.__name__, self.num_seqs)
     # ------------ Seq cache management -----------
     def _cleanup_old_seq_cache(self, seq_end):

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/datasets/sprint.py RENAMED Viewed

@@ -1223,7 +1223,7 @@ class SprintCacheDataset(CachedDataset2):
         self._num_seqs = len(self.seq_list_ordered)
         return True
-    def get_total_num_seqs(self) -> int:
+    def get_total_num_seqs(self, *, fast: bool = False) -> int:
         """total num seqs"""
         return len(self.seq_list_original)

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/frontend/decoder/transformer.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-(Label-sync) Transformer decoder, including cross attention to encoder
+(Label-sync) Transformer decoder, optionally including cross attention to encoder
 References:
@@ -30,10 +30,10 @@ class TransformerDecoder(rf.Module):
         self,
         encoder_dim: Optional[Dim],
         vocab_dim: Dim,
-        model_dim: Dim = Dim(512, name="transformer-dec-default-model-dim"),
+        model_dim: Union[Dim, int] = Dim(512, name="transformer-dec-default-model-dim"),
         *,
         num_layers: int,
-        ff_dim: Dim = NotSpecified,
+        ff_dim: Union[Dim, int] = NotSpecified,
         ff_activation: Callable[[Tensor], Tensor] = rf.relu,
         dropout: float = 0.1,
         num_heads: int = 8,
@@ -68,6 +68,13 @@ class TransformerDecoder(rf.Module):
         """
         super().__init__()
+        if not isinstance(vocab_dim, Dim):
+            raise TypeError(f"TransformerDecoder: unexpected vocab_dim {vocab_dim!r} type {type(vocab_dim)}")
+        if isinstance(model_dim, int):
+            model_dim = Dim(model_dim, name="transformer-dec-model-dim")
+        if not isinstance(model_dim, Dim):
+            raise TypeError(f"TransformerDecoder: unexpected model_dim {model_dim!r} type {type(model_dim)}")
         self.encoder_dim = encoder_dim
         self.vocab_dim = vocab_dim
         self.model_dim = model_dim
@@ -81,7 +88,7 @@ class TransformerDecoder(rf.Module):
         if embed_dim:
             self.input_embedding_proj = rf.Linear(embed_dim, model_dim, with_bias=False)
-        # This could also be configurable...
+        # TODO This should be configurable...
         self.pos_enc = functools.partial(
             rf.sinusoidal_positional_encoding, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
         )
@@ -210,7 +217,7 @@ class TransformerDecoderLayer(rf.Module):
         encoder_dim: Optional[Dim],
         out_dim: Dim = Dim(512, name="transformer-dec-default-out-dim"),
         *,
-        ff_dim: Dim = NotSpecified,
+        ff_dim: Union[Dim, int] = NotSpecified,
         ff_activation: Callable[[Tensor], Tensor] = rf.relu,
         dropout: float = 0.1,
         num_heads: int = 8,
@@ -236,8 +243,6 @@ class TransformerDecoderLayer(rf.Module):
         self.dropout_broadcast = rf.dropout_broadcast_default()
         self.out_dim = out_dim
-        if ff_dim is None:
-            ff_dim = 4 * out_dim
         self.ff = FeedForward(out_dim=out_dim, ff_dim=ff_dim, dropout=dropout, activation=ff_activation)
         self.ff_layer_norm = rf.LayerNorm(out_dim)
@@ -320,7 +325,7 @@ class FeedForward(rf.Module):
         self,
         out_dim: Dim,
         *,
-        ff_dim: Optional[Dim] = NotSpecified,
+        ff_dim: Optional[Union[Dim, int]] = NotSpecified,
         dropout: float,
         activation: Callable[[Tensor], Tensor],
     ):
@@ -332,8 +337,12 @@ class FeedForward(rf.Module):
         """
         super().__init__()
-        if ff_dim is NotSpecified:
+        if isinstance(ff_dim, int):
+            ff_dim = Dim(ff_dim, name="transformer-ff-dim")
+        if ff_dim is NotSpecified or ff_dim is None:
             ff_dim = out_dim * 4
+        if not isinstance(ff_dim, Dim):
+            raise TypeError(f"Transformer FeedForward: unexpected ff_dim {ff_dim!r} type {type(ff_dim)}")
         self.out_dim = out_dim
         self.dropout = dropout

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn/torch/engine.py RENAMED Viewed

@@ -18,6 +18,7 @@ from torch.utils.data import DataLoader
 from torch import autocast
 from torch.cuda import amp
 from random import random
+import math
 import returnn
 from returnn.config import Config
@@ -118,6 +119,7 @@ class Engine(EngineBase):
         self._log_memory_usage = config.bool("torch_log_memory_usage", False)
         self._log_batch_size = config.bool("log_batch_size", False) and log.verbose[5]
+        self._calculate_exp_loss = config.bool("calculate_exp_loss", False)
         self._reset_dev_memory_caches = config.bool("reset_dev_memory_caches", False)
         self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
@@ -433,10 +435,11 @@ class Engine(EngineBase):
             accumulated_losses_dict += losses_dict
             accumulated_inv_norm_factors_dict += inv_norm_factors_dict
+            eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
             _print_process(
                 f"ep {self.epoch} train",
                 step=step_idx,
-                eval_info=dict(losses_dict / inv_norm_factors_dict),
+                eval_info=dict(eval_info),
                 step_duration=step_duration,
                 batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
                 log_memory_usage_device=self._device if self._log_memory_usage else None,
@@ -463,6 +466,7 @@ class Engine(EngineBase):
         )
         accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
+        accumulated_losses_dict = self._maybe_extend_losses_info(accumulated_losses_dict)
         self.learning_rate_control.set_epoch_error(
             self.epoch, {f"train_loss_{k}": v for k, v in accumulated_losses_dict.items()}
         )
@@ -530,8 +534,8 @@ class Engine(EngineBase):
                     train_ctx = rf.get_run_ctx()
                     if score_keys is None:
-                        score_keys = [name for name, loss in train_ctx.losses.items() if not loss.as_error]
-                        error_keys = [name for name, loss in train_ctx.losses.items() if loss.as_error]
+                        score_keys = set(name for name, loss in train_ctx.losses.items() if not loss.as_error)
+                        error_keys = set(name for name, loss in train_ctx.losses.items() if loss.as_error)
                     losses_dict = NumbersDict(
                         {
@@ -549,16 +553,18 @@ class Engine(EngineBase):
                     accumulated_losses_dict += losses_dict
                     accumulated_inv_norm_factors_dict += inv_norm_factors_dict
+                    eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
                     _print_process(
                         f"ep {self.epoch} {dataset_name} eval",
                         step=step_idx,
-                        eval_info=dict(losses_dict / inv_norm_factors_dict),
+                        eval_info=dict(eval_info),
                         log_memory_usage_device=self._device if self._log_memory_usage else None,
                     )
                     step_idx += 1
             assert step_idx > 0, f"No data in dataset {dataset_name!r}."
             accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
+            accumulated_losses_dict = self._maybe_extend_losses_info(accumulated_losses_dict)
             self.learning_rate_control.set_epoch_error(
                 self.epoch, {f"{dataset_name}_loss_{k}": v for k, v in accumulated_losses_dict.items()}
@@ -588,6 +594,23 @@ class Engine(EngineBase):
             assert isinstance(ls[0], self.learning_rate_control.EpochData)
             self.learning_rate_control.epoch_data[self.epoch] = ls[0]
+    def _maybe_extend_losses_info(self, losses: NumbersDict) -> NumbersDict:
+        """
+        :param losses:
+        :return: maybe extended losses
+        """
+        if self._calculate_exp_loss and losses.has_values():
+            # Assume the current run ctx still has info about the losses from the last step.
+            assert rf.get_run_ctx().losses
+            score_keys = set(k for k, v in rf.get_run_ctx().losses.items() if not v.as_error)
+            losses_ = {}
+            for key, value in losses.items():
+                losses_[key] = value
+                if key in score_keys:
+                    losses_[f"{key}:exp"] = math.exp(value)
+            losses = NumbersDict(losses_)
+        return losses
     def _create_data_loader(self, dataset: Dataset, *, train: bool = False) -> DataLoader:
         """
         :param dataset: RETURNN dataset

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/returnn.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240723.171654
+Version: 1.20240725.5736
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20240723.171654 → returnn-1.20240725.5736}/tests/test_Dataset.py RENAMED Viewed

@@ -722,15 +722,15 @@ def test_LmDataset_pickle():
             }
         )
         assert isinstance(dataset, LmDataset)
-        assert dataset.orths is None  # not yet loaded, will be lazily loaded
+        assert dataset._orths_offsets_and_lens is None  # not yet loaded, will be lazily loaded
         s = pickle.dumps(dataset)
         dataset = pickle.loads(s)
         assert isinstance(dataset, LmDataset)
-        assert dataset.orths is None  # not yet loaded, will be lazily loaded
+        assert dataset._orths_offsets_and_lens is None  # not yet loaded, will be lazily loaded
         dataset.init_seq_order(epoch=1)
-        assert dataset.orths is not None  # loaded now
+        assert dataset._orths_offsets_and_lens is not None  # loaded now
         dataset.load_seqs(0, 2)
         orth = dataset.get_data(0, "data")
         assert orth.tolist() == [1, 2]

returnn 1.20240723.171654__tar.gz → 1.20240725.5736__tar.gz

Potentially problematic release.

returnn 1.20240723.171654tar.gz → 1.20240725.5736tar.gz