PyPI - returnn - Versions diffs - 1.20240619.184216__tar.gz → 1.20240620.105009__tar.gz - Mend

returnn 1.20240619.184216tar.gz → 1.20240620.105009tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (449) hide show

{returnn-1.20240619.184216/returnn.egg-info → returnn-1.20240620.105009}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240619.184216
+Version: 1.20240620.105009
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20240620.105009/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20240620.105009'
2	+ long_version = '1.20240620.105009+git.47f4450'

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/returnn/datasets/basic.py RENAMED Viewed

@@ -144,9 +144,7 @@ class Dataset(object):
         self.window = window
         self.seq_ordering = seq_ordering  # "default", "sorted" or "random". See self.get_seq_order_for_epoch().
         self.fixed_random_seed = fixed_random_seed
-        if random_seed_offset is None:
-            random_seed_offset = self._get_default_random_seed_offset()
-        self.random_seed_offset = random_seed_offset
+        self._random_seed_offset = random_seed_offset
         self.partition_epoch = partition_epoch or 1
         self.repeat_epoch = repeat_epoch or 1
         self._seq_list_filter_file = seq_list_filter_file
@@ -242,18 +240,32 @@ class Dataset(object):
         state = {attr: getattr(self, attr) for attr in ["epoch", "zpad"]}
         return Dataset._create_from_reduce, (self.__class__, kwargs, state)
-    @staticmethod
-    def _get_default_random_seed_offset():
+    @property
+    def random_seed_offset(self) -> int:
+        """:return: random seed offset for shuffling"""
+        if self._random_seed_offset is None:
+            self._random_seed_offset = self._get_default_random_seed_offset()
+        return self._random_seed_offset
+    def _uses_custom_distributed_sharding(self) -> bool:
+        """
+        :return: if dataset has its own data sharding logic independent of TF/PT.
+            Leads to a fixed random_seed_offset independent of the workers local rank.
+        """
+        return False
+    def _get_default_random_seed_offset(self):
         """
         :return: 0 usually
         :rtype: int
         """
         from returnn.config import get_global_config
+        if self._uses_custom_distributed_sharding():
+            return 0
         config = get_global_config(raise_exception=False)
         if not config:
             return 0
         env_val = os.environ.get(RANDOM_SEED_OFFSET_ENV_VAR)
         if env_val is not None:
             return int(env_val)

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/returnn/datasets/distrib_files.py RENAMED Viewed

@@ -62,6 +62,12 @@ class DistributeFilesDataset(CachedDataset2):
     Specifically, we don't want that some data might be visited more often than others
     (at least its expected value should be the same).
+    In case the dataset grows so large it is unreasonable to expect one worker to
+    ever see all the data, this dataset can also shard the file list on a per-worker
+    basis before distributing across subepochs.
+    This behavior can be configured by setting the property ``"distrib_shard_files": True``.
+    The dataset attempts to split the files as evenly as possible based on the file size.
     Example usage::
         def get_sub_epoch_dataset(files_subepoch: List[str]) -> Dict[str, Any]:
@@ -131,6 +137,7 @@ class DistributeFilesDataset(CachedDataset2):
         preload_next_n_sub_epochs: int = 1,
         buffer_size: int = 1,
         file_cache_opts: Optional[Dict[str, Any]] = None,
+        distrib_shard_files: bool = False,
         _meta_info_cache: Optional[Dict[str, Any]] = None,
         **kwargs,
     ):
@@ -140,6 +147,8 @@ class DistributeFilesDataset(CachedDataset2):
         :param get_sub_epoch_dataset: callable which returns a dataset dict for a given subset of files
         :param preload_next_n_sub_epochs: how many sub epoch datasets to preload
         :param buffer_size: buffer size for each worker, amount of seqs to prefetch
+        :param distrib_shard_files: set to true to shard the data across worker processes in
+            distributed training scenaria
         :param _meta_info_cache: for internal use
         """
         super().__init__(**kwargs)
@@ -152,6 +161,7 @@ class DistributeFilesDataset(CachedDataset2):
         self._file_sizes: Optional[Dict[str, int]] = None  # key -> size. for equal distribution across sub epochs
         self._data_keys: Optional[List[str]] = None
         self._num_seqs: Optional[int] = None
+        self._shard_index, self._num_shards = _get_rank_and_size() if distrib_shard_files else 0, 1
         self._file_cache: Optional[_FileCacheProc] = None
         self._workers: Dict[int, _WorkerProcParent] = {}  # epoch -> worker
@@ -187,6 +197,9 @@ class DistributeFilesDataset(CachedDataset2):
             "file_sizes": self._file_sizes,
         }
+    def _uses_custom_distributed_sharding(self) -> bool:
+        return self._num_shards > 1
     def _lazy_init_num_outputs(self):
         if self.num_outputs:
             return
@@ -245,8 +258,9 @@ class DistributeFilesDataset(CachedDataset2):
         self._lazy_init_file_sizes()
         self._lazy_init_file_cache_proc()
-        # Cleanup and fill _files_order_cache.
         full_epoch_0idx = (epoch - 1) // self.partition_epoch
+        # Cleanup and fill _files_order_cache, also shard files across GPU workers
         for k in list(self._files_order_cache.keys()):
             if k < full_epoch_0idx:
                 del self._files_order_cache[k]
@@ -257,15 +271,22 @@ class DistributeFilesDataset(CachedDataset2):
             if self.seq_ordering == "default":
                 files_order_flat = self.files
             elif self.seq_ordering == "random":
+                # when sharding, _get_random_seed_for_epoch makes sure to use a fixed
+                # random_seed_offset
                 rnd_seed = self._get_random_seed_for_epoch(full_epoch_0idx_ * self.partition_epoch + 1)
                 random_generator = numpy.random.RandomState(rnd_seed)
                 files_order_flat = list(self.files)
                 random_generator.shuffle(files_order_flat)
             else:
                 raise ValueError(f"{self}: seq_ordering {self.seq_ordering!r} not supported")
-            self._files_order_cache[full_epoch_0idx_] = self._get_files_per_sub_epochs(
-                partition_epoch=self.partition_epoch, file_sizes=self._file_sizes, files_order=files_order_flat
+            file_bins = self._distribute_evenly_by_size(
+                num_bins=self._num_shards * self.partition_epoch,
+                file_sizes=self._file_sizes,
+                files_order=files_order_flat,
             )
+            self_index_base = self.partition_epoch * self._shard_index
+            self_index_end = self_index_base + self.partition_epoch
+            self._files_order_cache[full_epoch_0idx_] = file_bins[self_index_base:self_index_end]
         # Cleanup and fill _workers.
         for k, worker in list(self._workers.items()):
@@ -308,62 +329,67 @@ class DistributeFilesDataset(CachedDataset2):
         return dataset_dict, exit_hook
     @staticmethod
-    def _get_files_per_sub_epochs(
-        *, partition_epoch: int, file_sizes: Dict[str, int], files_order: Sequence[FileTree]
+    def _distribute_evenly_by_size(
+        *, num_bins: int, file_sizes: Dict[str, int], files_order: Sequence[FileTree]
     ) -> List[List[FileTree]]:
+        """
+        Distributes the files from files_order into ``num_bins`` while attempting
+        to make every bin as evenly sized (based on ``file_sizes``) as possible.
+        """
         total_size = sum(file_sizes.values())
-        avg_size_per_sub_epoch = total_size / partition_epoch
-        # Now evenly distribute the files over the sub epochs.
+        avg_size_per_sub_epoch = total_size / num_bins
+        # Now evenly distribute the files over the bins.
         # Note that many one-pass variants of algorithms to evenly distribute
-        # can end up with some empty sub epochs,
+        # can end up with some empty bins,
         # so we need to make sure that this is not the case.
-        # E.g. consider the seqs of size [1,1,78,120] and partition_epoch=4.
+        # E.g. consider the seqs of size [1,1,78,120] and num_bins=4.
         # That has avg size per sub epoch 50.
         # Some simple algorithms could end up with the sub epochs
         # [[1,1], [78], [120], []] or [[1,1,78], [120], [], []].
-        # Or consider [5,5]+[10]*7, partition_epoch=5, which has avg size 16.
+        # Or consider [5,5]+[10]*7, num_bins=5, which has avg size 16.
         # A simple algorithm could end up with [[5,5,10], [10,10], [10,10], [10,10], []].
-        # See test_DistributeFilesDataset_get_files_per_sub_epochs for some test cases.
-        assert len(files_order) >= partition_epoch
-        files_per_sub_epochs = [[] for _ in range(partition_epoch)]
-        assert len(files_per_sub_epochs) == partition_epoch
-        sub_epoch_idx = 0
+        # See test_DistributeFilesDataset_distribute_evenly_by_size for some test cases.
+        assert len(files_order) >= num_bins
+        files_per_bin = [[] for _ in range(num_bins)]
+        assert len(files_per_bin) == num_bins
+        bin_idx = 0
         size_taken = 0
         for i, f_tree in enumerate(files_order):
             size = file_sizes[_get_key_for_file_tree(f_tree)]
             num_remaining = len(files_order) - i
-            if num_remaining <= partition_epoch - sub_epoch_idx - 1:
+            if num_remaining <= num_bins - bin_idx - 1:
                 # All remaining sub epochs must be filled.
-                assert files_per_sub_epochs[sub_epoch_idx]
-                sub_epoch_idx += 1
-                files_per_sub_epochs[sub_epoch_idx].append(f_tree)
+                assert files_per_bin[bin_idx]
+                bin_idx += 1
+                files_per_bin[bin_idx].append(f_tree)
                 size_taken = size
                 continue
-            if sub_epoch_idx == partition_epoch - 1:
+            if bin_idx == num_bins - 1:
                 # We are done. Just add the rest to the last sub epoch.
-                files_per_sub_epochs[sub_epoch_idx].append(f_tree)
+                files_per_bin[bin_idx].append(f_tree)
                 size_taken += size
                 continue
             assert size_taken <= avg_size_per_sub_epoch
             if size_taken + size <= avg_size_per_sub_epoch:
-                files_per_sub_epochs[sub_epoch_idx].append(f_tree)
+                files_per_bin[bin_idx].append(f_tree)
                 size_taken += size
                 continue
             # We should increase the sub epoch index.
             # We need to decide where to add this file, to the current or the next sub epoch.
-            if not files_per_sub_epochs[sub_epoch_idx] or (
+            if not files_per_bin[bin_idx] or (
                 # Better to add this file to the current sub epoch?
                 abs((size_taken + size) - avg_size_per_sub_epoch)
                 <= abs(size_taken - avg_size_per_sub_epoch)
             ):
-                files_per_sub_epochs[sub_epoch_idx].append(f_tree)
+                files_per_bin[bin_idx].append(f_tree)
                 size_taken = 0
             else:
-                files_per_sub_epochs[sub_epoch_idx + 1].append(f_tree)
+                files_per_bin[bin_idx + 1].append(f_tree)
                 size_taken = size
-            sub_epoch_idx += 1
-        assert all(files_per_sub_epochs)
-        return files_per_sub_epochs
+            bin_idx += 1
+        assert all(files_per_bin)
+        return files_per_bin
     def _collect_single_seq(self, seq_idx: int) -> Optional[DatasetSeq]:
         if seq_idx >= self._num_seqs:
@@ -413,6 +439,32 @@ def _get_key_for_file_tree(t: FileTree) -> str:
     return ":".join(tree.flatten(t))
+def _get_rank_and_size() -> Tuple[int, int]:
+    """
+    :return: tuple (rank, size): the global rank and size for distributed trainings
+    """
+    from returnn.config import get_global_config
+    config = get_global_config(raise_exception=False)
+    if not config:
+        return 0, 1
+    if config.typed_value("torch_distributed") is not None:
+        import returnn.torch.distributed
+        ctx = returnn.torch.distributed.get_ctx(config=config)
+        return ctx.rank(), ctx.size()
+    elif config.is_true("use_horovod"):
+        assert config.bool("use_tensorflow", False) or config.value("backend", "").startswith("tensorflow")
+        import returnn.tf.horovod
+        ctx = returnn.tf.horovod.get_ctx(config=config)
+        return ctx.rank(), ctx.size()
+    else:
+        return 0, 1
 class _WorkerProcParent:
     def __init__(
         self,

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/returnn/util/bpe.py RENAMED Viewed

@@ -3,8 +3,7 @@ Provide basic Byte-Pair-Encoding (BPE) utilities.
 """
 from __future__ import annotations
-from typing import Dict
-import typing
+from typing import Optional, List, Dict, Callable
 import re
 import numpy
@@ -234,10 +233,10 @@ class PrefixTree:
     This class represents both a single node and the tree.
     """
-    def __init__(self, prefix="", root=None):
+    def __init__(self, prefix: str = "", root: Optional[PrefixTree] = None):
         """
-        :param str prefix:
-        :param PrefixTree|None root:
+        :param prefix:
+        :param root:
         """
         self.prefix = prefix
         self.arcs: Dict[str, PrefixTree] = {}  # single char (or BpePostMergeSymbol) -> sub tree
@@ -246,11 +245,10 @@ class PrefixTree:
         self.is_root = not root
         self.root = root
-    def add(self, postfix, root=None):
+    def add(self, postfix: str, root: Optional[PrefixTree] = None) -> PrefixTree:
         """
-        :param str postfix:
-        :param None|PrefixTree root:
-        :rtype: PrefixTree
+        :param postfix:
+        :param root:
         """
         if not root:
             if self.is_root:
@@ -300,21 +298,21 @@ class CharSyncSearch:
     Covers the search hyps and the search itself.
     """
-    def __init__(self, bpe, word, word_pos=0):
+    def __init__(self, bpe: PrefixTree, word: str, word_pos: int = 0):
         """
-        :param PrefixTree bpe:
-        :param str word:
-        :param int word_pos:
+        :param bpe:
+        :param word:
+        :param word_pos:
         """
         self.bpe = bpe
         self.word = word
         self.word_pos = word_pos
-        self.hyps = [Hyp(bpe_sym_history=[], cur_node=bpe)]  # type: typing.List[Hyp]
-        self.final_bpe_seqs = None  # type: typing.Optional[typing.List[typing.List[str]]]
+        self.hyps: List[Hyp] = [Hyp(bpe_sym_history=[], cur_node=bpe)]
+        self.final_bpe_seqs: Optional[List[List[str]]] = None
     def _get_finished(self):
         assert self.word_pos == len(self.word)
-        finals = []  # type: typing.List[typing.List[str]]
+        finals: List[List[str]] = []
         for hyp in self.hyps:
             if hyp.cur_node.finished:
                 finals.append(hyp.bpe_sym_history + [hyp.cur_node.prefix])
@@ -323,7 +321,7 @@ class CharSyncSearch:
     def _expand(self):
         assert self.word_pos < len(self.word)
         char = self.word[self.word_pos]
-        new_hyps = []  # type: typing.List[Hyp]
+        new_hyps: List[Hyp] = []
         for hyp in self.hyps:
             if hyp.cur_node.bpe_finished:
                 # Start again from root node.
@@ -340,10 +338,9 @@ class CharSyncSearch:
                 new_hyps.append(Hyp(bpe_sym_history=hyp.bpe_sym_history, cur_node=next_node))
         self.hyps = new_hyps
-    def search(self):
+    def search(self) -> List[List[str]]:
         """
         :return: collection of possible BPE symbol seqs
-        :rtype: list[list[str]]
         """
         while self.word_pos < len(self.word):
             self._expand()
@@ -373,23 +370,20 @@ class DepthFirstSearch:
     Depth-first search.
     """
-    def __init__(self, bpe, word, sampler=None):
+    def __init__(self, bpe: PrefixTree, word: str, sampler: Optional[Callable[[], bool]] = None):
         """
-        :param PrefixTree bpe:
-        :param str word:
-        :param (()->bool)|None sampler:
+        :param bpe:
+        :param word:
+        :param sampler:
         """
         self.bpe = bpe
         self.word = word
         self.sampler = sampler
-        self.hyps = []  # type: typing.List[HypInPos]
-        self.final_bpe_seq = None  # type: typing.Optional[typing.List[str]]
+        self.hyps: List[HypInPos] = []
+        self.final_bpe_seq: Optional[List[str]] = None
         self._add_hyp(HypInPos(bpe_sym_history=[], cur_node=bpe, pos=0))
-    def _add_hyp(self, hyp):
-        """
-        :param HypInPos hyp:
-        """
+    def _add_hyp(self, hyp: HypInPos):
         if hyp.pos >= len(self.word):
             if hyp.cur_node.finished:
                 self.final_bpe_seq = hyp.bpe_sym_history + [hyp.cur_node.prefix]
@@ -401,7 +395,7 @@ class DepthFirstSearch:
         # Now check for possible further hyps.
         char = self.word[hyp.pos]
-        new_hyps = []  # type: typing.List[HypInPos]
+        new_hyps: List[HypInPos] = []
         if hyp.cur_node.bpe_finished:
             # Start again from root node.
             next_node = self.bpe.arcs.get(char)
@@ -423,10 +417,9 @@ class DepthFirstSearch:
         for hyp in new_hyps:
             self._add_hyp(hyp)
-    def search(self):
+    def search(self) -> Optional[List[str]]:
         """
         :return: BPE symbol seq if one is found
-        :rtype: list[str]|None
         """
         while self.hyps and self.final_bpe_seq is None:
             self._expand()
@@ -459,23 +452,21 @@ class SamplingBytePairEncoder:
             bpe.add(bpe_sym)
         self._bpe_prefix_tree = bpe
-    def _sampler(self):
+    def _sampler(self) -> bool:
         # When this returns true, it will differ from depth-first search.
         return self.rnd.random_sample() <= self.breadth_prob
-    def get_bpe_split_for_word(self, word):
+    def get_bpe_split_for_word(self, word: str) -> Optional[List[str]]:
         """
-        :param str word:
-        :rtype: list[str]|None
+        :param word:
         """
         return DepthFirstSearch(bpe=self._bpe_prefix_tree, word=word, sampler=self._sampler).search()
-    def segment_sentence(self, sentence):
+    def segment_sentence(self, sentence: str) -> List[str]:
         """
         Segment single sentence (whitespace-tokenized string) with BPE encoding.
-        :param str sentence:
-        :rtype: list[str]
+        :param sentence:
         """
         output = []
         for word in sentence.split():

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/returnn/util/file_cache.py RENAMED Viewed

@@ -10,6 +10,7 @@ Main class is :class:`FileCache`.
 """
 from typing import Any, Collection, List, Tuple
+import errno
 import os
 import time
 import shutil
@@ -56,6 +57,7 @@ class FileCache:
         cleanup_files_always_older_than_days: float = 31.0,
         cleanup_files_wanted_older_than_days: float = 7.0,
         cleanup_disk_usage_wanted_free_ratio: float = 0.2,  # try to free at least 20% disk space
+        num_tries: int = 3,  # retry twice by default
     ):
         """
         :param cache_directory: directory where to cache files.
@@ -64,6 +66,7 @@ class FileCache:
         :param cleanup_files_wanted_older_than_days: if cleanup_disk_usage_wanted_free_ratio not reached,
             cleanup files older than this.
         :param cleanup_disk_usage_wanted_free_ratio: try to free at least this ratio of disk space.
+        :param num_tries: how many times to try caching a file before giving up
         """
         self.cache_directory = expand_env_vars(cache_directory)
         self._cleanup_files_always_older_than_days = cleanup_files_always_older_than_days
@@ -72,6 +75,8 @@ class FileCache:
         self._touch_files_thread = _TouchFilesThread(cache_base_dir=self.cache_directory)
         self._touch_files_thread.start()
         self._recent_full_cleanup_time = float("-inf")
+        assert num_tries > 0
+        self._num_tries = num_tries
     # Note on lock_timeout: It will check whether a potentially existing lock file is older than this timeout,
     # and if so, then it would delete the existing lock file, assuming it is from a crashed previous run.
@@ -99,7 +104,23 @@ class FileCache:
         :return: cached file path (in the cache directory)
         """
         dst_filename = self._get_dst_filename(src_filename)
-        self._copy_file_if_needed(src_filename, dst_filename)
+        last_error = None
+        for try_nr in range(self._num_tries):
+            if try_nr > 0:
+                print(
+                    f"FileCache: Ignoring error while copying {dst_filename}: {type(last_error).__name__}: {last_error}"
+                )
+                time.sleep(1)
+            try:
+                self._copy_file_if_needed(src_filename, dst_filename)
+                break
+            except OSError as e:
+                if e.errno == errno.ENOSPC:
+                    last_error = e
+                else:
+                    raise e
+        if last_error is not None:
+            raise last_error
         self._touch_files_thread.files_extend([dst_filename])
         return dst_filename
@@ -265,23 +286,21 @@ class FileCache:
             os.utime(dst_filename, None)  # touch
             return
-        # Make sure we have enough disk space.
-        self.cleanup(need_at_least_free_space_size=os.stat(src_filename).st_size)
-        print(f"FileCache: Copy file {src_filename} to cache")
         # Create dirs.
         dst_dir = os.path.dirname(dst_filename)
         os.makedirs(dst_dir, exist_ok=True)
         # Copy the file, while holding a lock. See comment on lock_timeout above.
-        with LockFile(
-            directory=dst_dir, name=os.path.basename(dst_filename) + ".lock", lock_timeout=self._lock_timeout
-        ) as lock:
+        with LockFile(directory=self.cache_directory, name="dir.lock", lock_timeout=self._lock_timeout) as lock:
             # Maybe it was copied in the meantime, while waiting for the lock.
-            if os.path.exists(dst_filename):
+            if self._check_existing_copied_file_maybe_cleanup(src_filename, dst_filename):
                 return
+            print(f"FileCache: Copy file {src_filename} to cache")
+            # Make sure we have enough disk space.
+            self.cleanup(need_at_least_free_space_size=os.stat(src_filename).st_size)
             dst_tmp_filename = dst_filename + ".copy"
             if os.path.exists(dst_tmp_filename):
                 # The minimum age should be at least the lock_timeout.
@@ -294,7 +313,7 @@ class FileCache:
                 )
             with self._touch_files_thread.files_added_context([dst_dir, lock.lockfile]):
-                shutil.copyfile(src_filename, dst_tmp_filename)
+                _copy_with_prealloc(src_filename, dst_tmp_filename)
                 os.rename(dst_tmp_filename, dst_filename)
     @staticmethod
@@ -312,6 +331,39 @@ class FileCache:
         return True
+def _copy_with_prealloc(src: str, dst: str):
+    """
+    Copies the file at `src` to `dst` preallocating the space at `dst` before the
+    copy to reduce the chance of race conditions w/ free-disk-space checks occuring.
+    Note the function preallocates `size + 1` to allow detecting incompletely copied
+    files by a mismatch in the file size, should the copy process be interrupted. The
+    additional byte is then truncated away after copying.
+    In practice this function is used to copy to a temporary file first, so the
+    +1-size trick is technically not necessary -- but it also does not hurt leaving
+    it in.
+    """
+    file_size = os.stat(src).st_size
+    with open(dst, "wb") as dst_file:
+        if file_size > 0:
+            # Prealloc size + 1, see docstring for why.
+            #
+            # See also `_check_existing_copied_file_maybe_cleanup`.
+            if os.name == "posix":
+                os.posix_fallocate(dst_file.fileno(), 0, file_size + 1)
+            else:
+                dst_file.seek(file_size)
+                dst_file.write(b"\0")
+                dst_file.seek(0)
+        with open(src, "rb") as src_file:
+            if os.name == "posix":
+                os.posix_fadvise(src_file.fileno(), 0, file_size, os.POSIX_FADV_SEQUENTIAL)
+                os.posix_fadvise(dst_file.fileno(), 0, file_size, os.POSIX_FADV_SEQUENTIAL)
+            shutil.copyfileobj(src_file, dst_file)
+        dst_file.truncate(file_size)
 @dataclass
 class CachedFile:
     """

{returnn-1.20240619.184216 → returnn-1.20240620.105009/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240619.184216
+Version: 1.20240620.105009
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/tests/test_Dataset.py RENAMED Viewed

@@ -623,14 +623,14 @@ def test_MapDatasetWrapper():
     assert res.features["data"].shape == (5, 3)
-def test_DistributeFilesDataset_get_files_per_sub_epochs():
+def test_DistributeFilesDataset_distribute_evenly_by_size():
     from returnn.datasets.distrib_files import DistributeFilesDataset
     def _test(sizes: List[int], partition_epoch: int, expected: List[List[int]]):
         files = [f"file-{i}" for i in range(len(sizes))]
         file_sizes = {f: s for f, s in zip(files, sizes)}
-        res = DistributeFilesDataset._get_files_per_sub_epochs(
-            partition_epoch=partition_epoch, file_sizes=file_sizes, files_order=files
+        res = DistributeFilesDataset._distribute_evenly_by_size(
+            num_bins=partition_epoch, file_sizes=file_sizes, files_order=files
         )
         assert all(res) and len(res) == partition_epoch
         assert set(sum(res, [])) == set(files)

returnn-1.20240619.184216/_setup_info_generated.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- version = '1.20240619.184216'
2	- long_version = '1.20240619.184216+git.bc0e997'

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/.editorconfig RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/.gitignore RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/.gitmodules RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/.kateconfig RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/CHANGELOG.md RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/CODEOWNERS RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/CONTRIBUTING.md RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/MANIFEST.in RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/README.rst RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/__init__.py RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/12AX.cluster_map RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/_setup_returnn_env.py RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-fwd.config RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-horovod-mpi.py RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-horovod-mpi.py.sh RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-horovod-mpi.sh RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-hyper-param-tuning.config RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-iter-dataset.py RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-list-devices.py RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-lua-torch-layer.config RENAMED Viewed

File without changes

{returnn-1.20240619.184216 → returnn-1.20240620.105009}/demos/demo-pretrain.config RENAMED Viewed

File without changes

returnn 1.20240619.184216__tar.gz → 1.20240620.105009__tar.gz

Potentially problematic release.

returnn 1.20240619.184216tar.gz → 1.20240620.105009tar.gz