returnn 1.20250513.145447__py3-none-any.whl → 1.20250514.101430__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250513.145447
3
+ Version: 1.20250514.101430
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250513.145447'
2
- long_version = '1.20250513.145447+git.9cdc2a4'
1
+ version = '1.20250514.101430'
2
+ long_version = '1.20250514.101430+git.c557cc3'
@@ -15,6 +15,7 @@ __all__ = [
15
15
 
16
16
  from typing import Optional, Union, Type, Callable, List, Dict
17
17
  import sys
18
+ import re
18
19
  import numpy
19
20
 
20
21
  from returnn.util.basic import NotSpecified
@@ -58,6 +59,7 @@ class Vocabulary:
58
59
  num_labels: Optional[int] = None,
59
60
  seq_postfix: Optional[List[int]] = None,
60
61
  labels: Optional[Union[List[str], Callable[[], List[str]]]] = None,
62
+ single_whitespace_split: bool = False,
61
63
  ):
62
64
  """
63
65
  :param vocab_file:
@@ -76,6 +78,11 @@ class Vocabulary:
76
78
  :param num_labels: just for verification
77
79
  :param seq_postfix: labels will be added to the seq in self.get_seq
78
80
  :param labels:
81
+ :param single_whitespace_split:
82
+ Assume that the given text is encoded using ``" ".join(labels[i] for i in seq)``,
83
+ and this will undo that.
84
+ This makes a difference when there is whitespace itself in the vocab (in ``labels``).
85
+ If not enabled (the default), this will simply use ``str.split()``.
79
86
  """
80
87
  if vocab_file and not isinstance(vocab_file, str): # sometimes it is a Path
81
88
  vocab_file = str(vocab_file)
@@ -131,6 +138,12 @@ class Vocabulary:
131
138
  self.control_symbol_ids = {name: self.to_id(label) for name, label in (control_symbols or {}).items()}
132
139
  self.user_defined_symbol_ids = {name: self.to_id(label) for name, label in (user_defined_symbols or {}).items()}
133
140
  self.seq_postfix = seq_postfix or []
141
+ # To be used with findall in get_seq.
142
+ self.decode_seq_token_re = (
143
+ re.compile("(%s|\\S+)(?: |$)" % "|".join(re.escape(v) for v in self.labels))
144
+ if single_whitespace_split
145
+ else None
146
+ )
134
147
 
135
148
  def __repr__(self):
136
149
  parts = [repr(self.vocab_file), "num_labels=%s" % self.num_labels]
@@ -317,7 +330,10 @@ class Vocabulary:
317
330
  :param sentence: assumed to be seq of vocab entries separated by whitespace
318
331
  :return: seq of label indices
319
332
  """
320
- segments = sentence.split()
333
+ if self.decode_seq_token_re is not None:
334
+ segments = self.decode_seq_token_re.findall(sentence)
335
+ else:
336
+ segments = sentence.split()
321
337
  return self.get_seq_indices(segments) + self.seq_postfix
322
338
 
323
339
  def get_seq_indices(self, seq: List[str]) -> List[int]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250513.145447
3
+ Version: 1.20250514.101430
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=EAThvppPVTS1QgrQiT4pYNDRTslbXgFJo7fxZvoDH7M,5215
1
+ returnn/PKG-INFO,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=98tw07uGc71PVFbmlHMzIg0bM0GI8-sDkoG6ohO4Fx4,77
6
+ returnn/_setup_info_generated.py,sha256=CfrKH5EWL08ucEeXafiSxPiV-BUoBw--NGoCK_ERZnw,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -34,7 +34,7 @@ returnn/datasets/text_dict.py,sha256=BPE73nh6-vtSLy3SiDf4dpFl9RJorE7oO6l5y2FU3MI
34
34
  returnn/datasets/util/__init__.py,sha256=rEKhSD6fyhDiQF-x7dUQMwa29JZu72SDm7mYcCcLghY,52
35
35
  returnn/datasets/util/feature_extraction.py,sha256=axtXDb9wcNpOmyhmW3WJUj5xda29TKkKvOcGGvq7ExA,23923
36
36
  returnn/datasets/util/strings.py,sha256=pP8pmXhArkssYqmPOLuxEG9gsko891ZxrWiai86qbLE,412
37
- returnn/datasets/util/vocabulary.py,sha256=HKEbUmYPiW9hsrxf3eE-bNXBOOUcQ7vHkXB4BPtxmEA,27182
37
+ returnn/datasets/util/vocabulary.py,sha256=1W13FgxPVP9XSIyhkt4I7CXK5lj99zT97R-gLa_xnAU,27964
38
38
  returnn/engine/__init__.py,sha256=br7hpn8i_hIBi2uTQfnN3BF9g5DREYa_mQi0_Nvlu6o,228
39
39
  returnn/engine/base.py,sha256=0n4FtB_B2H3W_9KdoLr0P7YPER-hVkbk69pwFqsqmqw,18467
40
40
  returnn/engine/batch.py,sha256=amXW8mGspuSQjo00JdisE2eOLy5Ij1weWWzkE-lXSJM,9912
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250513.145447.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250513.145447.dist-info/METADATA,sha256=EAThvppPVTS1QgrQiT4pYNDRTslbXgFJo7fxZvoDH7M,5215
258
- returnn-1.20250513.145447.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
- returnn-1.20250513.145447.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250513.145447.dist-info/RECORD,,
256
+ returnn-1.20250514.101430.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250514.101430.dist-info/METADATA,sha256=AEKRzwj7-1_1NcUCAPSSEPkMoIrXK-7K5NtSOZBfJvk,5215
258
+ returnn-1.20250514.101430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
259
+ returnn-1.20250514.101430.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250514.101430.dist-info/RECORD,,