PyPI - returnn - Versions diffs - 1.20250901.123052__py3-none-any.whl → 1.20250902.10950__py3-none-any.whl - Mend

returnn 1.20250901.123052py3-none-any.whl → 1.20250902.10950py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (8) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250901.123052
+Version: 1.20250902.10950
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250901.123052'
-long_version = '1.20250901.123052+git.b2ef025'
+version = '1.20250902.010950'
+long_version = '1.20250902.010950+git.9d5debf'

returnn/torch/util/diagnose_gpu.py CHANGED Viewed

@@ -8,6 +8,10 @@ import os
 import sys
 import gc
 import subprocess
+import signal
+import time
+import contextlib
+import multiprocessing
 import torch
 from returnn.util.better_exchook import better_exchook
 from returnn.util.basic import human_bytes_size
@@ -26,36 +30,39 @@ def print_available_devices(*, file: Optional[TextIO] = None):
         print("CUDA_VISIBLE_DEVICES is set to %r." % os.environ["CUDA_VISIBLE_DEVICES"], file=file)
         cuda_visible_devs = dict(enumerate([int(d) for d in os.environ["CUDA_VISIBLE_DEVICES"].split(",") if d]))
     else:
-        if torch.cuda.is_available():
-            print("CUDA_VISIBLE_DEVICES is not set.", file=file)
-    if torch.cuda.is_available():
-        print("Available CUDA devices:")
-        count = torch.cuda.device_count()
-        if cuda_visible_devs is not None and len(cuda_visible_devs) != count:
-            print(
-                f"(Mismatch between CUDA device count {count}"
-                f" and CUDA_VISIBLE_DEVICES {cuda_visible_devs} count {len(cuda_visible_devs)}?)",
-                file=file,
-            )
-        for i in range(count):
-            print(f"  {i + 1}/{count}: cuda:{i}", file=file)
-            props = torch.cuda.get_device_properties(i)
-            print(f"       name: {props.name}", file=file)
-            print(f"       total_memory: {human_bytes_size(props.total_memory)}", file=file)
-            print(f"       capability: {props.major}.{props.minor}", file=file)
-            if cuda_visible_devs is not None:
-                if len(cuda_visible_devs) == count:
-                    dev_idx_s = cuda_visible_devs[i]
-                else:
-                    dev_idx_s = "?"
+        with timeout("torch.cuda.is_available()"):
+            if torch.cuda.is_available():
+                print("CUDA_VISIBLE_DEVICES is not set.", file=file)
+    with timeout("torch.cuda.is_available()"):
+        if not torch.cuda.is_available():
+            print("(CUDA not available)", file=file)
+            return
+    print("Available CUDA devices:", file=file)
+    count = torch.cuda.device_count()
+    if cuda_visible_devs is not None and len(cuda_visible_devs) != count:
+        print(
+            f"(Mismatch between CUDA device count {count}"
+            f" and CUDA_VISIBLE_DEVICES {cuda_visible_devs} count {len(cuda_visible_devs)}?)",
+            file=file,
+        )
+    for i in range(count):
+        print(f"  {i + 1}/{count}: cuda:{i}", file=file)
+        props = torch.cuda.get_device_properties(i)
+        print(f"       name: {props.name}", file=file)
+        print(f"       total_memory: {human_bytes_size(props.total_memory)}", file=file)
+        print(f"       capability: {props.major}.{props.minor}", file=file)
+        if cuda_visible_devs is not None:
+            if len(cuda_visible_devs) == count:
+                dev_idx_s = cuda_visible_devs[i]
             else:
-                dev_idx_s = i
-            print(f"       device_index: {dev_idx_s}", file=file)
-        if not count:
-            print("  (None)")
-    else:
-        print("(CUDA not available)")
+                dev_idx_s = "?"
+        else:
+            dev_idx_s = i
+        print(f"       device_index: {dev_idx_s}", file=file)
+    if not count:
+        print("  (None)", file=file)
 def print_using_cuda_device_report(dev: Union[str, torch.device], *, file: Optional[TextIO] = None):
@@ -108,7 +115,7 @@ def diagnose_no_gpu() -> List[str]:
     except Exception as exc:
         print("nvidia-smi failed:", exc)
         better_exchook(*sys.exc_info(), debugshell=False)
-        res.append(f"nvidia-smi failed")
+        res.append("nvidia-smi failed")
     return res
@@ -152,4 +159,31 @@ def garbage_collect():
             f"alloc {human_bytes_size(torch.cuda.memory_allocated())}",
             f"reserved {human_bytes_size(torch.cuda.memory_reserved())}",
         ]
-        print(f"CUDA memory usage after triggered GC:", " ".join(stats))
+        print("CUDA memory usage after triggered GC:", " ".join(stats))
+@contextlib.contextmanager
+def timeout(info: str, *, seconds: int = 30):
+    """
+    Note: don't use signal handlers (e.g. signal.alarm) because unfortunately
+    potential hanging funcs will block the main thread and thus block the signal handler from executing.
+    Thus, we use a subprocess.
+    :param seconds:
+    :param info:
+    """
+    proc = multiprocessing.Process(
+        target=_timeout_handler, kwargs={"seconds": seconds, "proc_id": os.getpid(), "info": info}
+    )
+    proc.start()
+    try:
+        yield
+    finally:
+        proc.terminate()
+        proc.join()
+def _timeout_handler(*, seconds: Union[float, int], proc_id: int, info: str):
+    time.sleep(seconds)
+    print(f"ERROR: {info}: Timeout handler after {seconds} seconds, killing proc {proc_id}.", file=sys.stderr)
+    os.kill(proc_id, signal.SIGABRT)

{returnn-1.20250901.123052.dist-info → returnn-1.20250902.10950.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250901.123052
+Version: 1.20250902.10950
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250901.123052.dist-info → returnn-1.20250902.10950.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=Q09xg2cVnIca0qB_AWTi80jbjkBE5s6htDNdfNeEOYk,5215
+returnn/PKG-INFO,sha256=GVal7eVN_obo9mfdhPK2WvH2MzSm51cFZJChHEsF2XU,5214
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=5CMSFeWeRNgH2Yb1aqfRufv6wh5xGpLR06Ad-TZ4GAA,77
+returnn/_setup_info_generated.py,sha256=jTlsQFAqLqFgm0UJ0uWltcnLf69QwqOK0yV4Slt-2Is,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -227,7 +227,7 @@ returnn/torch/util/README.md,sha256=AW-6ueWhgcwDcm57md6sm227QXNkvLnlRLwaH7NlS-w,
 returnn/torch/util/__init__.py,sha256=AOXYUjzPm0XrzFJCPAXo9Jj_FvqD1XH3FfKtho80Vl8,26
 returnn/torch/util/array_.py,sha256=ell3VZvn01SLtF9Pw2fvPzFNO-XDQ7tSB9VCrVSKmSA,2556
 returnn/torch/util/debug_inf_nan.py,sha256=fmzSSTJJyLf7i5yDWRHLeDI0gxvadeqLE8RxMuSHx_4,6398
-returnn/torch/util/diagnose_gpu.py,sha256=PYMmSk7iQ-jC3RXKKNXlYx1Q744C0LXqz0SB6ympwQg,5844
+returnn/torch/util/diagnose_gpu.py,sha256=_yswLmwR8Q2rCsv2jI5FUQNBT__453jBmiWYwazdu20,6808
 returnn/torch/util/exception_helper.py,sha256=_SqxTD5F-GDY2eR4uRALyUTJwt0ytcbJGB_w38RJMBA,4320
 returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko87ppIvRKAbtpQ,27995
 returnn/torch/util/module.py,sha256=MXHIrF9Isu575DDJIa81212ULKwdqu1oOLxDVZecVSk,1693
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250901.123052.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250901.123052.dist-info/METADATA,sha256=Q09xg2cVnIca0qB_AWTi80jbjkBE5s6htDNdfNeEOYk,5215
-returnn-1.20250901.123052.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-returnn-1.20250901.123052.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250901.123052.dist-info/RECORD,,
+returnn-1.20250902.10950.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250902.10950.dist-info/METADATA,sha256=GVal7eVN_obo9mfdhPK2WvH2MzSm51cFZJChHEsF2XU,5214
+returnn-1.20250902.10950.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+returnn-1.20250902.10950.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250902.10950.dist-info/RECORD,,

{returnn-1.20250901.123052.dist-info → returnn-1.20250902.10950.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250901.123052.dist-info → returnn-1.20250902.10950.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250901.123052.dist-info → returnn-1.20250902.10950.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250901.123052__py3-none-any.whl → 1.20250902.10950__py3-none-any.whl

Potentially problematic release.

returnn 1.20250901.123052py3-none-any.whl → 1.20250902.10950py3-none-any.whl