returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. returnn/PKG-INFO +2 -2
  2. returnn/__old_mod_loader__.py +26 -2
  3. returnn/_setup_info_generated.py +2 -2
  4. returnn/datasets/lm.py +130 -42
  5. returnn/datasets/meta.py +93 -43
  6. returnn/datasets/postprocessing.py +597 -108
  7. returnn/datasets/util/vocabulary.py +90 -0
  8. returnn/frontend/__init__.py +1 -0
  9. returnn/frontend/_backend.py +41 -0
  10. returnn/frontend/_native/__init__.py +22 -0
  11. returnn/frontend/_numpy_backend.py +7 -0
  12. returnn/frontend/_utils.py +1 -1
  13. returnn/frontend/array_.py +48 -2
  14. returnn/frontend/assert_.py +35 -0
  15. returnn/frontend/attention.py +54 -20
  16. returnn/frontend/conv.py +273 -54
  17. returnn/frontend/device.py +14 -1
  18. returnn/frontend/encoder/conformer.py +20 -0
  19. returnn/frontend/encoder/transformer.py +2 -0
  20. returnn/frontend/loss.py +222 -3
  21. returnn/frontend/math_.py +54 -14
  22. returnn/native_op.cpp +182 -172
  23. returnn/native_op.py +36 -31
  24. returnn/sprint/cache.py +12 -13
  25. returnn/tensor/_dim_extra.py +7 -7
  26. returnn/tensor/_tensor_extra.py +10 -10
  27. returnn/tensor/utils.py +8 -5
  28. returnn/tf/frontend_layers/_backend.py +7 -3
  29. returnn/tf/layers/basic.py +27 -40
  30. returnn/tf/native_op.py +27 -63
  31. returnn/tf/network.py +1 -1
  32. returnn/tf/util/basic.py +22 -197
  33. returnn/torch/engine.py +157 -6
  34. returnn/torch/frontend/_backend.py +280 -29
  35. returnn/torch/frontend/bridge.py +61 -0
  36. returnn/torch/frontend/compile_helper.py +106 -0
  37. returnn/torch/util/array_.py +30 -0
  38. returnn/torch/util/assert_.py +122 -0
  39. returnn/torch/util/exception_helper.py +7 -1
  40. returnn/torch/util/native_op.py +885 -0
  41. returnn/torch/util/native_op_code_compiler.py +308 -0
  42. returnn/util/basic.py +6 -7
  43. returnn/util/better_exchook.py +4 -0
  44. returnn/util/cuda_env.py +332 -0
  45. returnn/util/debug.py +12 -2
  46. returnn/util/file_cache.py +15 -1
  47. returnn/util/fsa.py +17 -13
  48. returnn/util/native_code_compiler.py +104 -47
  49. returnn/util/task_system.py +1 -1
  50. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
  51. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
  52. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
  53. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
  54. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0
returnn/util/debug.py CHANGED
@@ -309,6 +309,7 @@ def _get_native_signal_handler_lib_filename() -> str:
309
309
  old_signal_handler[SIGILL] = signal(SIGILL, signal_handler);
310
310
  old_signal_handler[SIGABRT] = signal(SIGABRT, signal_handler);
311
311
  old_signal_handler[SIGFPE] = signal(SIGFPE, signal_handler);
312
+ old_signal_handler[SIGUSR1] = signal(SIGUSR1, signal_handler);
312
313
  }
313
314
  """
314
315
  ),
@@ -704,7 +705,7 @@ def check_py_traces_rf_to_pt_equal(
704
705
  """
705
706
  import random
706
707
  import torch
707
- from returnn.tensor import Tensor, Dim
708
+ from returnn.tensor import Dim
708
709
  import returnn.frontend as rf
709
710
 
710
711
  # noinspection PyProtectedMember
@@ -715,9 +716,18 @@ def check_py_traces_rf_to_pt_equal(
715
716
  def _get_entry(trace, func, i, name, j):
716
717
  return trace[func][i][name][j]
717
718
 
719
+ def _get_entry_attr(trace, func, i, name, j):
720
+ name, attr = name.split(".", 1)
721
+ obj = trace[func][i][name][j]
722
+ return eval(f"{name}.{attr}", {name: obj})
723
+
718
724
  def _resolve_dim(dim: Union[Dim, str]) -> Dim:
719
725
  if isinstance(dim, Dim):
720
726
  return dim
727
+ elif isinstance(dim, str) and "." in dim:
728
+ dim = _get_entry_attr(trace_rf, *check_rf[:2], dim, -1)
729
+ assert isinstance(dim, Dim)
730
+ return dim
721
731
  elif isinstance(dim, str):
722
732
  dim = _get_entry(trace_rf, *check_rf[:2], dim, -1)
723
733
  assert isinstance(dim, Dim)
@@ -763,7 +773,7 @@ def check_py_traces_rf_to_pt_equal(
763
773
  if len(indices) > 5:
764
774
  msgs.append(" non-matching ...")
765
775
  non_matching.append("\n".join(msgs_prefix + msgs))
766
- print(f" mismatch!")
776
+ print(" mismatch!")
767
777
  for msg in msgs:
768
778
  print(msg)
769
779
 
@@ -426,7 +426,21 @@ class FileCache:
426
426
  orig_mtime_ns = os.stat(src_filename).st_mtime_ns
427
427
  FileInfo(mtime_ns=orig_mtime_ns).save(info_file_name)
428
428
 
429
- _copy_with_prealloc(src_filename, dst_tmp_filename)
429
+ try:
430
+ _copy_with_prealloc(src_filename, dst_tmp_filename)
431
+ except Exception:
432
+ # Cleanup if it was created already.
433
+ # That avoids some of the ambiguity of the existence of the .copy file.
434
+ # https://github.com/rwth-i6/returnn/issues/1785
435
+ try:
436
+ os.remove(dst_tmp_filename)
437
+ except FileNotFoundError:
438
+ pass
439
+ try:
440
+ os.remove(info_file_name)
441
+ except FileNotFoundError: # not really expected here, but safe to ignore
442
+ pass
443
+ raise
430
444
  os.rename(dst_tmp_filename, dst_filename)
431
445
 
432
446
  @staticmethod
returnn/util/fsa.py CHANGED
@@ -10,7 +10,7 @@ from __future__ import annotations
10
10
  import numpy
11
11
  import pickle
12
12
  import itertools
13
- import typing
13
+ from typing import Optional, List, Tuple
14
14
  from copy import deepcopy
15
15
  from os.path import isfile
16
16
  from returnn.log import log
@@ -397,7 +397,7 @@ class Ctc:
397
397
  e_end_3 = Edge(self.fsa.num_states, self.fsa.num_states + 1, self.fsa.lem_list[-1][-1], 1.0)
398
398
  self.fsa.edges.append(e_end_3)
399
399
  self.fsa.num_states += 3
400
- # add node nuber of final state
400
+ # add node number of final state
401
401
  self.final_states.append(self.fsa.num_states - 1)
402
402
 
403
403
  elif self.fsa.lem_edges is not None:
@@ -806,7 +806,7 @@ class Ngram:
806
806
  :param int n: size of the gram (1, 2, 3)
807
807
  """
808
808
  self.n = n
809
- self.lexicon = None # type: typing.Optional[Lexicon]
809
+ self.lexicon: Optional[Lexicon] = None
810
810
  # lexicon consists of 3 entries: phoneme_list, phonemes and lemmas
811
811
  # phoneme_list: list of string phonemes in the lexicon
812
812
  # phonemes: dict of dict of str {phone: {index: , symbol: , variation:}}
@@ -1059,7 +1059,7 @@ class FastBwFsaShared:
1059
1059
 
1060
1060
  def __init__(self):
1061
1061
  self.num_states = 1
1062
- self.edges = [] # type: typing.List[Edge]
1062
+ self.edges: List[Edge] = []
1063
1063
 
1064
1064
  def add_edge(self, source_state_idx, target_state_idx, emission_idx, weight=0.0):
1065
1065
  """
@@ -1148,17 +1148,20 @@ class FastBwFsaShared:
1148
1148
  )
1149
1149
 
1150
1150
 
1151
- def get_ctc_fsa_fast_bw(targets, seq_lens, blank_idx):
1151
+ def get_ctc_fsa_fast_bw(
1152
+ *, targets: numpy.ndarray, seq_lens: numpy.ndarray, blank_idx: int, label_loop: bool = True
1153
+ ) -> FastBaumWelchBatchFsa:
1152
1154
  """
1153
- :param numpy.ndarray targets: shape (batch,time)
1154
- :param numpy.ndarray seq_lens: shape (batch)
1155
- :param int blank_idx:
1156
- :rtype: FastBaumWelchBatchFsa
1155
+ :param targets: shape (batch,time)
1156
+ :param seq_lens: shape (batch)
1157
+ :param blank_idx:
1158
+ :param label_loop:
1159
+ :return: FSA
1157
1160
  """
1158
1161
  n_batch, n_time = targets.shape
1159
1162
  assert seq_lens.shape == (n_batch,)
1160
- edges = [] # type: typing.List[typing.Tuple[int,int,int,int]] # list of (from,to,emission_idx,sequence_idx)
1161
- start_end_states = [] # type: typing.List[typing.Tuple[int,int]] # list of (start,end), same len as batch
1163
+ edges: List[Tuple[int, int, int, int]] = [] # list of (from,to,emission_idx,sequence_idx)
1164
+ start_end_states: List[Tuple[int, int]] = [] # list of (start,end), same len as batch
1162
1165
  state_idx = 0
1163
1166
  # Note: We don't use weights on the edges, i.e. they are all set to zero.
1164
1167
  # I.e. we want that all strings for some given length T have the same probability.
@@ -1188,9 +1191,10 @@ def get_ctc_fsa_fast_bw(targets, seq_lens, blank_idx):
1188
1191
  # Skip directly to final state (state_idx + 3).
1189
1192
  edges.append((state_idx, state_idx + 3, label_idx, batch_idx)) # label
1190
1193
  state_idx += 1
1191
- edges.append((state_idx, state_idx, label_idx, batch_idx)) # label loop
1194
+ if label_loop:
1195
+ edges.append((state_idx, state_idx, label_idx, batch_idx)) # label loop
1192
1196
  edges.append((state_idx, state_idx + 1, blank_idx, batch_idx)) # blank
1193
- if not is_final_label and label_idx != next_label_idx:
1197
+ if not is_final_label and (not label_loop or label_idx != next_label_idx):
1194
1198
  # Skip over blank is allowed in this case.
1195
1199
  edges.append((state_idx, state_idx + 2, next_label_idx, batch_idx)) # next label
1196
1200
  if next_is_final_label:
@@ -3,10 +3,11 @@ Native code compiler
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, List
6
+ from typing import Optional, Union, Sequence, List, Tuple, Dict
7
7
  import typing
8
8
  import os
9
9
  import sys
10
+ import shutil
10
11
 
11
12
  from . import basic as util
12
13
 
@@ -17,47 +18,50 @@ class NativeCodeCompiler:
17
18
  """
18
19
 
19
20
  CacheDirName = "returnn_native"
20
- CollectedCompilers = None # type: Optional[List[NativeCodeCompiler]]
21
+ CollectedCompilers: Optional[List[NativeCodeCompiler]] = None
21
22
 
22
23
  def __init__(
23
24
  self,
24
- base_name,
25
- code_version,
26
- code,
27
- is_cpp=True,
28
- c_macro_defines=None,
29
- ld_flags=None,
30
- include_paths=(),
31
- include_deps=None,
32
- static_version_name=None,
33
- should_cleanup_old_all=True,
34
- should_cleanup_old_mydir=False,
35
- use_cxx11_abi=False,
36
- log_stream=None,
37
- verbose=False,
25
+ base_name: str,
26
+ *,
27
+ code_version: Union[int, Tuple[int, ...]] = 1,
28
+ code: str,
29
+ is_cpp: bool = True,
30
+ c_macro_defines: Optional[Dict[str, Union[str, int, None]]] = None,
31
+ ld_flags: Optional[Sequence[str]] = None,
32
+ include_paths: Optional[Sequence[str]] = (),
33
+ include_deps: Optional[Sequence[str]] = None,
34
+ static_version_name: Optional[str] = None,
35
+ should_cleanup_old_all: bool = True,
36
+ should_cleanup_old_mydir: bool = False,
37
+ use_cxx11_abi: bool = False,
38
+ log_stream: Optional[typing.TextIO] = None,
39
+ verbose: Optional[bool] = None,
38
40
  ):
39
41
  """
40
- :param str base_name: base name for the module, e.g. "zero_out"
41
- :param int|tuple[int] code_version: check for the cache whether to reuse
42
- :param str code: the source code itself
43
- :param bool is_cpp: if False, C is assumed
44
- :param dict[str,str|int|None]|None c_macro_defines: e.g. {"TENSORFLOW": 1}
45
- :param list[str]|None ld_flags: e.g. ["-lblas"]
46
- :param list[str]|tuple[str] include_paths:
47
- :param list[str]|None include_deps: if provided and an existing lib file,
42
+ :param base_name: base name for the module, e.g. "zero_out"
43
+ :param code_version: check for the cache whether to reuse
44
+ :param code: the source code itself
45
+ :param is_cpp: if False, C is assumed
46
+ :param c_macro_defines: e.g. {"TENSORFLOW": 1}
47
+ :param ld_flags: e.g. ["-lblas"]
48
+ :param include_paths:
49
+ :param include_deps: if provided and an existing lib file,
48
50
  we will check if any dependency is newer
49
51
  and we need to recompile. we could also do it automatically via -MD but that seems overkill and too slow.
50
- :param str|None static_version_name: normally, we use .../base_name/hash as the dir
52
+ :param static_version_name: normally, we use .../base_name/hash as the dir
51
53
  but this would use .../base_name/static_version_name.
52
- :param bool should_cleanup_old_all: whether we should look in the cache dir
54
+ :param should_cleanup_old_all: whether we should look in the cache dir
53
55
  and check all ops if we can delete some old ones which are older than some limit
54
56
  (self._cleanup_time_limit_days)
55
- :param bool should_cleanup_old_mydir: whether we should delete our op dir before we compile there.
56
- :param typing.TextIO|None log_stream: file stream for print statements
57
- :param bool verbose: be slightly more verbose
57
+ :param should_cleanup_old_mydir: whether we should delete our op dir before we compile there.
58
+ :param log_stream: file stream for print statements
59
+ :param verbose: be slightly more verbose
58
60
  """
59
61
  if self.CollectedCompilers is not None:
60
62
  self.CollectedCompilers.append(self)
63
+ if verbose is None:
64
+ verbose = os.environ.get("RETURNN_NATIVE_CODE_COMPILER_VERBOSE") == "1"
61
65
  self.verbose = verbose
62
66
  self.cache_dir = "%s/%s" % (util.get_cache_dir(), self.CacheDirName)
63
67
  self._include_paths = list(include_paths)
@@ -69,6 +73,7 @@ class NativeCodeCompiler:
69
73
  self.ld_flags = ld_flags or []
70
74
  self.include_deps = include_deps
71
75
  self.static_version_name = static_version_name
76
+ self.use_cxx11_abi = use_cxx11_abi
72
77
  self._code_hash = self._make_code_hash()
73
78
  self._info_dict = self._make_info_dict()
74
79
  self._hash = self._make_hash()
@@ -76,7 +81,6 @@ class NativeCodeCompiler:
76
81
  if should_cleanup_old_all:
77
82
  self._cleanup_old()
78
83
  self._should_cleanup_old_mydir = should_cleanup_old_mydir
79
- self.use_cxx11_abi = use_cxx11_abi
80
84
  self._log_stream = log_stream
81
85
  if self.verbose:
82
86
  print("%s: %r" % (self.__class__.__name__, self), file=log_stream)
@@ -157,7 +161,16 @@ class NativeCodeCompiler:
157
161
  assert isinstance(res, dict)
158
162
  return res
159
163
 
160
- _relevant_info_keys = ("code_version", "code_hash", "c_macro_defines", "ld_flags", "compiler_bin", "platform")
164
+ _relevant_info_keys = (
165
+ "code_version",
166
+ "code_hash",
167
+ "c_macro_defines",
168
+ "ld_flags",
169
+ "compiler_bin",
170
+ "platform",
171
+ "use_cxx11_abi",
172
+ "cpp_version",
173
+ )
161
174
 
162
175
  def _make_info_dict(self):
163
176
  """
@@ -174,6 +187,8 @@ class NativeCodeCompiler:
174
187
  "ld_flags": self.ld_flags,
175
188
  "compiler_bin": self._get_compiler_bin(),
176
189
  "platform": platform.platform(),
190
+ "use_cxx11_abi": self.use_cxx11_abi,
191
+ "cpp_version": self.cpp_version,
177
192
  }
178
193
 
179
194
  def _make_code_hash(self):
@@ -251,8 +266,8 @@ class NativeCodeCompiler:
251
266
  :rtype: str
252
267
  """
253
268
  if self.is_cpp:
254
- return "g++"
255
- return "gcc"
269
+ return get_cpp_bin()
270
+ return get_cc_bin()
256
271
 
257
272
  def _transform_compiler_opts(self, opts):
258
273
  """
@@ -261,27 +276,35 @@ class NativeCodeCompiler:
261
276
  """
262
277
  return opts
263
278
 
279
+ cpp_version = 11
280
+
264
281
  def _extra_common_opts(self):
265
282
  """
266
283
  :rtype: list[str]
267
284
  """
268
285
  if self.is_cpp:
269
- return ["-std=c++11"]
286
+ return [f"-std=c++{self.cpp_version}"]
270
287
  return []
271
288
 
272
- @classmethod
273
- def _transform_ld_flag(cls, opt):
274
- """
275
- :param str opt:
276
- :rtype: str
277
- """
289
+ def _transform_ld_flags(self, opts: Sequence[str]) -> Sequence[str]:
290
+ res = []
291
+ for opt in opts:
292
+ if opt.startswith("-l") or opt.startswith("-L"):
293
+ res.append(opt)
294
+ else:
295
+ res.append("-Wl," + opt)
296
+ opts = res
278
297
  if sys.platform == "darwin":
279
- # It seems some versions of MacOS ld cannot handle the `-l:filename` argument correctly.
280
- # E.g. TensorFlow 1.14 incorrectly uses this.
281
- # https://github.com/tensorflow/tensorflow/issues/30564
282
- if opt.startswith("-l:lib") and opt.endswith(".dylib"):
283
- return "-l%s" % opt[len("-l:lib") : -len(".dylib")]
284
- return opt
298
+ res = []
299
+ for opt in opts:
300
+ # It seems some versions of MacOS ld cannot handle the `-l:filename` argument correctly.
301
+ # E.g. TensorFlow 1.14 incorrectly uses this.
302
+ # https://github.com/tensorflow/tensorflow/issues/30564
303
+ if opt.startswith("-l:lib") and opt.endswith(".dylib"):
304
+ opt = "-l%s" % opt[len("-l:lib") : -len(".dylib")]
305
+ res.append(opt)
306
+ return res
307
+ return opts
285
308
 
286
309
  def _maybe_compile_inner(self):
287
310
  # Directory should be created by the locking mechanism.
@@ -300,7 +323,7 @@ class NativeCodeCompiler:
300
323
  common_opts += ["-D%s=%s" % item for item in sorted(self.c_macro_defines.items())]
301
324
  common_opts += ["-g"]
302
325
  opts = common_opts + [self._c_filename, "-o", self._so_filename]
303
- opts += list(map(self._transform_ld_flag, self.ld_flags))
326
+ opts += self._transform_ld_flags(self.ld_flags)
304
327
  cmd_bin = self._get_compiler_bin()
305
328
  cmd_args = [cmd_bin] + opts
306
329
  from subprocess import Popen, PIPE, STDOUT, CalledProcessError
@@ -348,3 +371,37 @@ class NativeCodeCompiler:
348
371
  """
349
372
  self._maybe_compile()
350
373
  return self._so_filename
374
+
375
+
376
+ def get_cc_bin() -> str:
377
+ """
378
+ :return: path
379
+ """
380
+ cc_bin = os.environ.get("CC", "")
381
+ if cc_bin:
382
+ if cc_bin.startswith("/"):
383
+ return cc_bin
384
+ cc_bin = shutil.which(cc_bin)
385
+ if cc_bin:
386
+ return cc_bin
387
+ cc_bin = shutil.which("cc") or shutil.which("clang") or shutil.which("gcc")
388
+ if not cc_bin:
389
+ raise RuntimeError("Cannot find C compiler (cc, clang, gcc) in PATH")
390
+ return cc_bin
391
+
392
+
393
+ def get_cpp_bin() -> str:
394
+ """
395
+ :return: path
396
+ """
397
+ cpp_bin = os.environ.get("CXX", "")
398
+ if cpp_bin:
399
+ if cpp_bin.startswith("/"):
400
+ return cpp_bin
401
+ cpp_bin = shutil.which(cpp_bin)
402
+ if cpp_bin:
403
+ return cpp_bin
404
+ cpp_bin = shutil.which("c++") or shutil.which("cpp") or shutil.which("clang++") or shutil.which("g++")
405
+ if not cpp_bin:
406
+ raise RuntimeError("Cannot find C++ compiler (c++, cpp, clang++, g++) in PATH")
407
+ return cpp_bin
@@ -671,7 +671,7 @@ class Pickler(_BasePickler):
671
671
  return
672
672
  # For some reason, Numpy fromstring/tostring is faster than Numpy loads/dumps.
673
673
  self.save(make_numpy_ndarray_fromstring)
674
- self.save((obj.tostring(), str(obj.dtype), obj.shape))
674
+ self.save((obj.tobytes(), str(obj.dtype), obj.shape))
675
675
  self.write(pickle.REDUCE)
676
676
 
677
677
  dispatch[numpy.ndarray] = save_ndarray
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20251027.232712
3
+ Version: 1.20260119.15400
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -36,7 +36,7 @@ Welcome to RETURNN
36
36
  `RETURNN paper 2018 <https://arxiv.org/abs/1805.05225>`_.
37
37
 
38
38
  RETURNN - RWTH extensible training framework for universal recurrent neural networks,
39
- is a Theano/TensorFlow-based implementation of modern recurrent neural network architectures.
39
+ is a PyTorch/TensorFlow-based implementation of modern recurrent neural network architectures.
40
40
  It is optimized for fast and reliable training of recurrent neural networks in a multi-GPU environment.
41
41
 
42
42
  The high-level features and goals of RETURNN are: