returnn 1.20251027.232712__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. returnn/PKG-INFO +2 -2
  2. returnn/__old_mod_loader__.py +26 -2
  3. returnn/_setup_info_generated.py +2 -2
  4. returnn/datasets/lm.py +130 -42
  5. returnn/datasets/meta.py +93 -43
  6. returnn/datasets/postprocessing.py +597 -108
  7. returnn/datasets/util/vocabulary.py +90 -0
  8. returnn/frontend/__init__.py +1 -0
  9. returnn/frontend/_backend.py +41 -0
  10. returnn/frontend/_native/__init__.py +22 -0
  11. returnn/frontend/_numpy_backend.py +7 -0
  12. returnn/frontend/_utils.py +1 -1
  13. returnn/frontend/array_.py +48 -2
  14. returnn/frontend/assert_.py +35 -0
  15. returnn/frontend/attention.py +54 -20
  16. returnn/frontend/conv.py +273 -54
  17. returnn/frontend/device.py +14 -1
  18. returnn/frontend/encoder/conformer.py +20 -0
  19. returnn/frontend/encoder/transformer.py +2 -0
  20. returnn/frontend/loss.py +222 -3
  21. returnn/frontend/math_.py +54 -14
  22. returnn/native_op.cpp +182 -172
  23. returnn/native_op.py +36 -31
  24. returnn/sprint/cache.py +12 -13
  25. returnn/tensor/_dim_extra.py +7 -7
  26. returnn/tensor/_tensor_extra.py +10 -10
  27. returnn/tensor/utils.py +8 -5
  28. returnn/tf/frontend_layers/_backend.py +7 -3
  29. returnn/tf/layers/basic.py +27 -40
  30. returnn/tf/native_op.py +27 -63
  31. returnn/tf/network.py +1 -1
  32. returnn/tf/util/basic.py +22 -197
  33. returnn/torch/engine.py +157 -6
  34. returnn/torch/frontend/_backend.py +280 -29
  35. returnn/torch/frontend/bridge.py +61 -0
  36. returnn/torch/frontend/compile_helper.py +106 -0
  37. returnn/torch/util/array_.py +30 -0
  38. returnn/torch/util/assert_.py +122 -0
  39. returnn/torch/util/exception_helper.py +7 -1
  40. returnn/torch/util/native_op.py +885 -0
  41. returnn/torch/util/native_op_code_compiler.py +308 -0
  42. returnn/util/basic.py +6 -7
  43. returnn/util/better_exchook.py +4 -0
  44. returnn/util/cuda_env.py +332 -0
  45. returnn/util/debug.py +12 -2
  46. returnn/util/file_cache.py +15 -1
  47. returnn/util/fsa.py +17 -13
  48. returnn/util/native_code_compiler.py +104 -47
  49. returnn/util/task_system.py +1 -1
  50. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +2 -2
  51. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +54 -48
  52. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
  53. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
  54. {returnn-1.20251027.232712.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0
@@ -858,7 +858,7 @@ class _DimMixin:
858
858
  self._make_extra()
859
859
  dim_order_default = self.dyn_size_ext.dims + (self,)
860
860
  if dim_order is not None:
861
- dim_order = tuple(d for d in dim_order if d in dim_order_default) # filter
861
+ dim_order = tuple([d for d in dim_order if d in dim_order_default]) # filter
862
862
  else:
863
863
  dim_order = dim_order_default
864
864
  cache_key = (device, dim_order)
@@ -2484,16 +2484,16 @@ _BinOpStrs = {
2484
2484
 
2485
2485
  def _math_get_dim_via_bin_op(dims: Sequence[Union[Dim, int]], op_kind: str) -> Dim:
2486
2486
  dims = [d if isinstance(d, _d.Dim) else _make_constant_static_dim(d) for d in dims]
2487
- if all(d.dimension is not None for d in dims):
2487
+ if all([d.dimension is not None for d in dims]):
2488
2488
  op = _BinOps[op_kind]
2489
2489
  dim_value = dims[0].dimension
2490
2490
  for d in dims[1:]:
2491
2491
  dim_value = op(dim_value, d.dimension)
2492
2492
  else:
2493
2493
  dim_value = None
2494
- if all(d.is_constant_static_dim() for d in dims):
2494
+ if all([d.is_constant_static_dim() for d in dims]):
2495
2495
  return _make_constant_static_dim(dim_value, kind=_get_merged_dim_kind(dims))
2496
- desc = _BinOpStrs[op_kind].join(_get_description(d) for d in dims)
2496
+ desc = _BinOpStrs[op_kind].join([_get_description(d) for d in dims])
2497
2497
  if op_kind.startswith("ceildiv"):
2498
2498
  desc = f"⌈{desc}⌉"
2499
2499
  return _d.Dim(
@@ -2676,16 +2676,16 @@ def _get_description(dim, brackets=True):
2676
2676
 
2677
2677
 
2678
2678
  def _get_merged_dim_kind(dim_tags: Sequence[Dim]) -> Entity:
2679
- if any(tag.is_batch_dim() for tag in dim_tags):
2679
+ if any([tag.is_batch_dim() for tag in dim_tags]):
2680
2680
  return DimTypes.Batch
2681
- elif any(tag.is_feature_dim() for tag in dim_tags):
2681
+ elif any([tag.is_feature_dim() for tag in dim_tags]):
2682
2682
  return DimTypes.Feature
2683
2683
  else:
2684
2684
  return DimTypes.Spatial
2685
2685
 
2686
2686
 
2687
2687
  def _representative_tag(terms: Sequence[Dim]) -> Optional[Dim]:
2688
- if any(not term_.auto_generated for term_ in terms):
2688
+ if any([not term_.auto_generated for term_ in terms]):
2689
2689
  # Always prefer non-auto-generated.
2690
2690
  terms = [term_ for term_ in terms if not term_.auto_generated]
2691
2691
  # First find any dynamic.
@@ -32,8 +32,8 @@ class _TensorExtra:
32
32
  tensor: Tensor,
33
33
  time_dim_axis=NotSpecified,
34
34
  available_for_inference=True,
35
- batch=None,
36
- beam=None,
35
+ batch: Optional[BatchInfo] = None,
36
+ beam: Optional[SearchBeam] = None,
37
37
  control_flow_ctx=None,
38
38
  ):
39
39
  """
@@ -41,8 +41,8 @@ class _TensorExtra:
41
41
  :param int|None|NotSpecified time_dim_axis: where we have the time dim axis, after we added the batch-dim.
42
42
  this is often 1. however, can be None if there is no time-dim.
43
43
  :param bool available_for_inference: e.g. the extern data "classes" is usually not available for inference
44
- :param BatchInfo|None batch:
45
- :param SearchBeam|None beam: the batch-dim could be extended by a beam-size,
44
+ :param batch:
45
+ :param beam: the batch-dim could be extended by a beam-size,
46
46
  such that it represents the merged dims [batch, beam_size].
47
47
  :param ControlFlowContext|None control_flow_ctx:
48
48
  """
@@ -668,11 +668,11 @@ class _TensorMixin(_TensorMixinBase):
668
668
  if not perm:
669
669
  return self.copy()
670
670
  if allow_int and isinstance(perm[0], int):
671
- assert all(isinstance(a, int) for a in perm), f"{self}: invalid perm {perm!r} types"
671
+ assert all([isinstance(a, int) for a in perm]), f"{self}: invalid perm {perm!r} types"
672
672
  assert set(perm) == set(range(len(perm))), f"{self}: invalid perm {perm!r}"
673
673
  return self._copy_compatible_to_dims_with_perm([self._dims[i] for i in perm], perm)
674
674
  else:
675
- assert all(isinstance(a, Dim) for a in perm), f"{self}: invalid perm {perm!r} types"
675
+ assert all([isinstance(a, Dim) for a in perm]), f"{self}: invalid perm {perm!r} types"
676
676
  return self.copy_compatible_to_dims(perm)
677
677
 
678
678
  def copy_move_axis(self, old_axis, new_axis) -> _t.Tensor:
@@ -1155,7 +1155,7 @@ class _TensorMixin(_TensorMixinBase):
1155
1155
  )
1156
1156
 
1157
1157
  assert v.batch_ndim == data.batch_ndim
1158
- assert all(mapped_axes[ax] == ax for ax in range(v.batch_ndim))
1158
+ assert all([mapped_axes[ax] == ax for ax in range(v.batch_ndim)])
1159
1159
 
1160
1160
  if self.version == 1:
1161
1161
  # Ensure time_dim_axis and feature_dim_axis is same as in data
@@ -1702,7 +1702,7 @@ class _TensorMixin(_TensorMixinBase):
1702
1702
  """
1703
1703
  :return: shape with added batch-dim. e.g. (batch,time,feat) = (None,None,128)
1704
1704
  """
1705
- return tuple(tag.dimension for tag in self.dim_tags)
1705
+ return tuple([tag.dimension for tag in self.dim_tags])
1706
1706
 
1707
1707
  # noinspection PyShadowingNames
1708
1708
  def get_batch_shape(self, batch_dim):
@@ -3214,7 +3214,7 @@ class _TensorMixin(_TensorMixinBase):
3214
3214
  if len(sources) == 1:
3215
3215
  return sources[0].copy_template()
3216
3216
  max_ndim = max([s.batch_ndim for s in sources])
3217
- if any(src.batch for src in sources):
3217
+ if any([src.batch for src in sources]):
3218
3218
  from returnn.tf.util.data import BatchInfo
3219
3219
 
3220
3220
  common_batch = BatchInfo.get_common_batch_info([src.batch for src in sources if src.batch])
@@ -3254,7 +3254,7 @@ class _TensorMixin(_TensorMixinBase):
3254
3254
  else:
3255
3255
  axis = common.get_default_new_axis_for_dim_tag(dim_tag)
3256
3256
  common = common.copy_add_dim_by_tag(dim_tag, unbroadcast=True, axis=axis)
3257
- if all(s.batch_ndim < common.batch_ndim for s in sources):
3257
+ if all([s.batch_ndim < common.batch_ndim for s in sources]):
3258
3258
  from returnn.util.basic import validate_broadcast_all_sources
3259
3259
 
3260
3260
  validate_broadcast_all_sources(
returnn/tensor/utils.py CHANGED
@@ -36,11 +36,14 @@ def tensor_fill_random_numpy_(
36
36
  *,
37
37
  min_val: int = 0,
38
38
  max_val: Optional[int] = None,
39
- rnd: numpy.random.RandomState,
39
+ rnd: Optional[numpy.random.RandomState] = None,
40
40
  dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
41
41
  dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
42
42
  ) -> bool:
43
43
  """fill. return whether sth was filled"""
44
+ if rnd is None:
45
+ # noinspection PyUnresolvedReferences,PyProtectedMember
46
+ rnd = numpy.random.mtrand._rand
44
47
  if dyn_dim_max_sizes is None:
45
48
  dyn_dim_max_sizes = {}
46
49
  if dyn_dim_min_sizes is None:
@@ -59,7 +62,7 @@ def tensor_fill_random_numpy_(
59
62
  continue
60
63
  if tensor_fill_random_numpy_(
61
64
  dim.dyn_size_ext,
62
- min_val=dyn_dim_min_sizes.get(dim, 2),
65
+ min_val=dyn_dim_min_sizes.get(dim, min(2, dyn_dim_max_sizes.get(dim, 2))),
63
66
  max_val=dyn_dim_max_sizes.get(dim, None),
64
67
  rnd=rnd,
65
68
  dyn_dim_max_sizes=dyn_dim_max_sizes,
@@ -68,7 +71,7 @@ def tensor_fill_random_numpy_(
68
71
  # Make sure at least one of the dyn sizes matches the max size.
69
72
  i = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size)
70
73
  dim.dyn_size_ext.raw_tensor.flat[i] = dyn_dim_max_sizes[dim]
71
- if dim in dyn_dim_min_sizes:
74
+ if dim in dyn_dim_min_sizes and dim.dyn_size_ext.raw_tensor.size > 1:
72
75
  j = rnd.randint(0, dim.dyn_size_ext.raw_tensor.size - 1)
73
76
  if j >= i:
74
77
  j += 1
@@ -98,8 +101,8 @@ def tensor_fill_random_numpy_(
98
101
  if max_val is None:
99
102
  max_val = rnd.randint(5, 20)
100
103
  if x.sparse_dim and x.sparse_dim.dimension is not None:
101
- max_val = x.sparse_dim.dimension
102
- x.raw_tensor = rnd.randint(min_val, max_val, size=shape, dtype=x.dtype)
104
+ max_val = x.sparse_dim.dimension - 1
105
+ x.raw_tensor = rnd.randint(min_val, max_val + 1, size=shape, dtype=x.dtype)
103
106
  elif x.dtype == "bool":
104
107
  x.raw_tensor = rnd.randint(0, 2, size=shape, dtype=x.dtype)
105
108
  elif x.dtype.startswith("float"):
@@ -465,6 +465,8 @@ class ReturnnLayersBackend(Backend[Layer]):
465
465
  targets_spatial_dim: Dim,
466
466
  blank_index: int,
467
467
  max_approx: bool = False,
468
+ use_native_op: Optional[bool] = None,
469
+ label_loop: bool = True,
468
470
  ) -> Tensor:
469
471
  """CTC"""
470
472
  assert targets.sparse_dim and targets.sparse_dim.dimension <= logits.feature_dim.dimension
@@ -482,6 +484,7 @@ class ReturnnLayersBackend(Backend[Layer]):
482
484
  "targets": targets,
483
485
  "blank_index": blank_index,
484
486
  "max_approx": max_approx,
487
+ "label_loop": label_loop,
485
488
  },
486
489
  name="ctc_loss",
487
490
  )
@@ -944,7 +947,6 @@ class ReturnnLayersBackend(Backend[Layer]):
944
947
  """
945
948
  assert mask.dtype == "bool"
946
949
  assert set(mask.dims) == set(dims)
947
- assert set(mask.dims).issubset(set(tensor.dims))
948
950
  if not out_dim:
949
951
  out_dim = Dim(None, name="mask")
950
952
  return (
@@ -1067,14 +1069,16 @@ class ReturnnLayersBackend(Backend[Layer]):
1067
1069
  s = filter_size[i].dimension if not strides else strides[i]
1068
1070
  if filter_size[i].dimension == s == 1 or (s == 1 and padding.lower() == "same"):
1069
1071
  out_spatial_dims[i] = in_spatial_dims[i]
1070
- layer_dict = {
1072
+ assert all(size.is_static() for size in filter_size)
1073
+ layer_dict: Dict[str, Any] = {
1071
1074
  "class": "transposed_conv",
1072
1075
  "from": source,
1073
1076
  "in_dim": in_dim,
1074
1077
  "in_spatial_dims": in_spatial_dims,
1075
1078
  "out_dim": out_dim,
1076
1079
  "out_spatial_dims": out_spatial_dims,
1077
- "filter_size": filter_size,
1080
+ "filter_size": [size.dimension for size in filter_size],
1081
+ "filter_perm": list(filter_size) + [out_dim, in_dim],
1078
1082
  "padding": padding,
1079
1083
  }
1080
1084
  if remove_padding:
@@ -2741,7 +2741,7 @@ class BooleanMaskLayer(LayerBase):
2741
2741
  tensor = self.sources[0].output
2742
2742
  remaining_dims = [d for d in tensor.dims if d not in dims]
2743
2743
  tensor_templ = tensor.copy_template_new_dim_tags(tuple(dims) + tuple(remaining_dims))
2744
- tensor = tensor.copy_compatible_to(tensor_templ, add_dims=False)
2744
+ tensor = tensor.copy_compatible_to(tensor_templ, unbroadcast=True)
2745
2745
  mask_templ = mask.output.copy_template_new_dim_tags(new_dim_tags=tuple(dims))
2746
2746
  mask_ = mask.output.copy_compatible_to(mask_templ, add_dims=False)
2747
2747
  self.output.raw_tensor = tf.boolean_mask(tensor.raw_tensor, mask=mask_.raw_tensor)
@@ -7371,7 +7371,7 @@ class TransposedConvLayer(_ConcatInputLayer):
7371
7371
  """
7372
7372
  from returnn.tf.util.basic import get_initializer, get_activation_function, get_shape
7373
7373
 
7374
- super(TransposedConvLayer, self).__init__(**kwargs)
7374
+ super(TransposedConvLayer, self).__init__(in_dim=in_dim, **kwargs)
7375
7375
  out_dim # noqa # via get_out_data_from_opts
7376
7376
  assert not self.input_data.sparse
7377
7377
  assert self.input_data.have_batch_axis()
@@ -7516,7 +7516,10 @@ class TransposedConvLayer(_ConcatInputLayer):
7516
7516
  ):
7517
7517
  """
7518
7518
  Determines output length of a transposed convolution given input length.
7519
- Copied from conv_utils.deconv_output_length, adapted with simplification.
7519
+
7520
+ Copied from TF/Keras conv_utils.deconv_output_length
7521
+ (https://github.com/tensorflow/tensorflow/blob/5912f51d580551e5cee2cfde4cb882594b4d3e60/tensorflow/python/keras/utils/conv_utils.py#L140),
7522
+ adapted with simplification.
7520
7523
 
7521
7524
  Also see :func:`ConvLayer.calc_out_dim`.
7522
7525
 
@@ -7533,44 +7536,17 @@ class TransposedConvLayer(_ConcatInputLayer):
7533
7536
  """
7534
7537
  if out_dim and out_dim.is_dim_known():
7535
7538
  return out_dim.get_dim_value()
7536
- assert padding in {"same", "valid", "full"}
7537
-
7538
- # Get the dilated kernel size
7539
- filter_size = filter_size + (filter_size - 1) * (dilation - 1)
7540
7539
 
7541
- if stride != 1:
7542
- input_length = input_length * stride
7540
+ import returnn.frontend as rf
7543
7541
 
7544
- # Infer length if output padding is None, else compute the exact length
7545
- if output_padding is None:
7546
- if padding == "valid":
7547
- if isinstance(input_length, Dim):
7548
- length = input_length + max(filter_size - stride, 0)
7549
- else:
7550
- length = tf_util.simplify_add(input_length, max(filter_size - stride, 0))
7551
- elif padding == "full":
7552
- if isinstance(input_length, Dim):
7553
- length = input_length - (stride + filter_size - 2)
7554
- else:
7555
- length = tf_util.simplify_add(input_length, -(stride + filter_size - 2))
7556
- elif padding == "same":
7557
- length = input_length
7558
- else:
7559
- raise Exception("invalid padding %r" % (padding,))
7560
- else: # output_padding
7561
- if padding == "same":
7562
- pad = filter_size // 2
7563
- elif padding == "valid":
7564
- pad = 0
7565
- elif padding == "full":
7566
- pad = filter_size - 1
7567
- else:
7568
- raise Exception("invalid padding %r" % (padding,))
7569
- if isinstance(input_length, Dim):
7570
- length = input_length + (-stride + filter_size - 2 * pad + output_padding)
7571
- else:
7572
- length = tf_util.simplify_add(input_length, -stride + filter_size - 2 * pad + output_padding)
7573
- return length
7542
+ return rf.calc_transposed_conv_out_length(
7543
+ input_length,
7544
+ filter_size=filter_size,
7545
+ padding=padding,
7546
+ output_padding=output_padding,
7547
+ stride=stride,
7548
+ dilation_rate=dilation,
7549
+ )
7574
7550
 
7575
7551
  @classmethod
7576
7552
  def get_out_data_from_opts(
@@ -11562,13 +11538,23 @@ class CtcLossLayer(LayerBase):
11562
11538
  layer_class = "ctc_loss"
11563
11539
  recurrent = True # order matters
11564
11540
 
11565
- def __init__(self, logits, targets, logits_normalized=False, blank_index=-1, max_approx=False, **kwargs):
11541
+ def __init__(
11542
+ self,
11543
+ logits,
11544
+ targets,
11545
+ logits_normalized=False,
11546
+ blank_index=-1,
11547
+ max_approx=False,
11548
+ label_loop: bool = True,
11549
+ **kwargs,
11550
+ ):
11566
11551
  """
11567
11552
  :param LayerBase logits: (before softmax). shape [B,T,D]
11568
11553
  :param LayerBase targets: sparse. shape [B,T]
11569
11554
  :param bool logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
11570
11555
  :param int blank_index: vocab index of the blank symbol
11571
11556
  :param bool max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
11557
+ :param label_loop:
11572
11558
  """
11573
11559
  from returnn.tf.native_op import ctc_loss, ctc_loss_viterbi
11574
11560
 
@@ -11591,6 +11577,7 @@ class CtcLossLayer(LayerBase):
11591
11577
  targets=targets.output.copy_as_batch_major().placeholder,
11592
11578
  targets_seq_lens=targets.output.get_sequence_lengths(),
11593
11579
  blank_index=blank_index,
11580
+ label_loop=label_loop,
11594
11581
  )
11595
11582
 
11596
11583
  def get_dep_layers(self):
returnn/tf/native_op.py CHANGED
@@ -528,77 +528,30 @@ class OpMaker:
528
528
  def _make_mod(self):
529
529
  if self.cache_key in self.mod_cache:
530
530
  return self.mod_cache[self.cache_key]
531
- from returnn.util.basic import find_lib
532
-
533
- # Note about BLAS linkage:
534
- # TensorFlow (or its Eigen lib) likely has linked against some BLAS lib itself.
535
- # For our CPU code, we directly call some BLAS functions such as `sgemm_`.
536
- # On platforms where there is a flat namespace (e.g. Mac),
537
- # it probably is not needed to explicitly link it again for this module.
538
- # In other cases, it's probably needed, but it's not so clear which lib has the
539
- # right symbols (e.g. the `sgemm_` symbol).
531
+
532
+ # Note about BLAS / matmul:
533
+ # Earlier, we assumed that TensorFlow/Eigen used BLAS internally,
534
+ # and our code directly called BLAS sgemm_, so we needed to link directly to BLAS.
535
+ # Now, by default, we use the underlying Eigen library,
536
+ # which is the same code path that TF also uses for CPU matmul.
537
+ # Only if an explicit BLAS library is specified, we use that instead.
540
538
  ld_flags = []
541
- have_blas_lib = False
539
+ c_macro_defines = {}
542
540
 
543
541
  if self.blas_lib is not None and os.path.exists(self.blas_lib):
544
542
  path = os.path.dirname(self.blas_lib)
545
543
  if path == "":
546
544
  path = "."
547
545
  ld_flags += ["-L%s" % path, "-l:%s" % os.path.basename(self.blas_lib)]
548
- have_blas_lib = True
549
- if not have_blas_lib and self.search_for_runtime_blas:
550
- from returnn.util.basic import find_sgemm_libs_from_runtime
551
-
552
- libs = find_sgemm_libs_from_runtime()
553
- if libs:
554
- numpy_libs = [fn for fn in libs if "/numpy/.libs/" in fn]
555
- if numpy_libs:
556
- # Prefer Numpy; move to front.
557
- libs = numpy_libs + [fn for fn in libs if fn not in numpy_libs]
558
- if self.blas_lib is not None:
559
- libs = [lib for lib in libs if self.blas_lib in lib]
560
- for fn in libs:
561
- ld_flags += ["-L%s" % os.path.dirname(fn), "-l:%s" % os.path.basename(fn)]
562
- have_blas_lib = True
563
- if not have_blas_lib and self.search_for_numpy_blas:
564
- # Find related Numpy libs.
565
- # Numpy usually comes with OpenBlas, and Numpy is probably loaded anyway.
566
- # Even do this before the other libs below, as it is likely
567
- # that this OpenBlas lib is correctly initialized already.
568
- import numpy
569
-
570
- numpy_dir = os.path.dirname(numpy.__file__)
571
- if os.path.exists("%s/.libs" % numpy_dir):
572
- ld_flags += ["-L%s/.libs" % numpy_dir]
573
- from glob import glob
574
-
575
- for f in glob("%s/.libs/*.so" % numpy_dir):
576
- f = os.path.basename(f)
577
- if self.blas_lib is not None and self.blas_lib not in f:
578
- continue
579
- if f.startswith("lib"):
580
- f = f[3:]
581
- if f.endswith(".so"):
582
- f = f[:-3]
583
- ld_flags += ["-l%s" % f]
584
- have_blas_lib = True
585
- if not have_blas_lib and self.search_for_system_blas:
586
- # Try to just link against blas/f77blas
587
- # (both can potentially have the symbol) if it finds the lib.
588
- if find_lib("blas"):
589
- ld_flags += ["-lblas"]
590
- have_blas_lib = True
591
- if find_lib("f77blas"):
592
- ld_flags += ["-lf77blas"]
593
- have_blas_lib = True
594
- if not have_blas_lib:
595
- print("WARNING: OpMaker: no BLAS lib found")
546
+ c_macro_defines["HAVE_CUSTOM_BLAS"] = "1"
547
+
596
548
  comp = tf_util.OpCodeCompiler(
597
549
  base_name=self.name,
598
550
  code_version=self.description.code_version,
599
551
  code=self._make_code(),
600
552
  include_deps=[self.support_native_op_cpp_filename],
601
553
  ld_flags=ld_flags,
554
+ c_macro_defines=c_macro_defines,
602
555
  use_cuda_if_available=self.with_cuda,
603
556
  log_stream=self.log_stream,
604
557
  **dict(self.compiler_opts),
@@ -1520,12 +1473,14 @@ def fast_baum_welch_staircase(am_scores, seq_lens, **opts):
1520
1473
 
1521
1474
 
1522
1475
  def ctc_loss(
1476
+ *,
1523
1477
  logits,
1524
1478
  logits_seq_lens,
1525
1479
  logits_time_major,
1526
1480
  targets,
1527
1481
  targets_seq_lens,
1528
- ctc_merge_repeated=True,
1482
+ label_loop: Optional[bool] = None,
1483
+ ctc_merge_repeated: Optional[bool] = None,
1529
1484
  logits_normalize=True,
1530
1485
  grad_wrt_softmax_in=True,
1531
1486
  blank_index=-1,
@@ -1540,7 +1495,8 @@ def ctc_loss(
1540
1495
  :param bool logits_time_major:
1541
1496
  :param tf.Tensor targets: batch-major, [batch,time]
1542
1497
  :param tf.Tensor targets_seq_lens: (batch,)
1543
- :param bool ctc_merge_repeated:
1498
+ :param label_loop:
1499
+ :param ctc_merge_repeated: alias for label_loop
1544
1500
  :param bool logits_normalize: apply log_softmax on logits (default).
1545
1501
  if False, you might also set grad_wrt_softmax_in=False
1546
1502
  :param bool grad_wrt_softmax_in: assume ``p(s|x) = softmax(logits)``, and define the gradient w.r.t. logits.
@@ -1551,6 +1507,11 @@ def ctc_loss(
1551
1507
  :return: loss, shape (batch,)
1552
1508
  :rtype: tf.Tensor
1553
1509
  """
1510
+ if ctc_merge_repeated is not None:
1511
+ assert label_loop is None
1512
+ label_loop = ctc_merge_repeated
1513
+ if label_loop is None:
1514
+ label_loop = True
1554
1515
  assert logits.get_shape().ndims == 3 and logits.get_shape().dims[-1].value
1555
1516
  dim = logits.get_shape().dims[-1].value
1556
1517
  if not logits_time_major:
@@ -1567,7 +1528,7 @@ def ctc_loss(
1567
1528
  blank_index += dim
1568
1529
  assert 0 <= blank_index < dim
1569
1530
  edges, weights, start_end_states = get_ctc_fsa_fast_bw(
1570
- targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=ctc_merge_repeated
1531
+ targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
1571
1532
  )
1572
1533
  fwdbwd, obs_scores = fast_baum_welch(
1573
1534
  am_scores=-log_sm, float_idx=seq_mask, edges=edges, weights=weights, start_end_states=start_end_states
@@ -1607,7 +1568,9 @@ def fast_viterbi(am_scores, am_seq_len, edges, weights, start_end_states):
1607
1568
  return alignment, scores
1608
1569
 
1609
1570
 
1610
- def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, targets_seq_lens, blank_index=-1):
1571
+ def ctc_loss_viterbi(
1572
+ *, logits, logits_seq_lens, logits_time_major, targets, targets_seq_lens, blank_index=-1, label_loop: bool = True
1573
+ ):
1611
1574
  """
1612
1575
  Similar to :func:`ctc_loss`.
1613
1576
  However, instead of using the full sum, we use the best path (i.e. Viterbi instead of Baum-Welch).
@@ -1619,6 +1582,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
1619
1582
  :param tf.Tensor targets: batch-major, [batch,time]
1620
1583
  :param tf.Tensor targets_seq_lens: (batch,)
1621
1584
  :param int blank_index: vocab index of the blank symbol
1585
+ :param label_loop:
1622
1586
  :return: loss, shape (batch,)
1623
1587
  :rtype: tf.Tensor
1624
1588
  """
@@ -1632,7 +1596,7 @@ def ctc_loss_viterbi(logits, logits_seq_lens, logits_time_major, targets, target
1632
1596
  blank_index += dim
1633
1597
  assert 0 <= blank_index < dim
1634
1598
  edges, weights, start_end_states = get_ctc_fsa_fast_bw(
1635
- targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index
1599
+ targets=targets, seq_lens=targets_seq_lens, blank_idx=blank_index, label_loop=label_loop
1636
1600
  )
1637
1601
  alignment, scores = fast_viterbi(
1638
1602
  am_scores=log_sm, am_seq_len=logits_seq_lens, edges=edges, weights=weights, start_end_states=start_end_states
returnn/tf/network.py CHANGED
@@ -4428,7 +4428,7 @@ def help_on_tf_exception(
4428
4428
  data = extern_data.data[data_key]
4429
4429
  info += ", %s" % data
4430
4430
  print(" %r: %s" % (key, info), file=file)
4431
- if data and data.sparse:
4431
+ if data is not None and data.sparse:
4432
4432
  if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
4433
4433
  print(" WARNING, invalid label for data", data, file=file)
4434
4434
  elif feed_dict is None: