batchalign 0.7.6a32__tar.gz → 0.7.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {batchalign-0.7.6a32/batchalign.egg-info → batchalign-0.7.7}/PKG-INFO +5 -1
  2. {batchalign-0.7.6a32 → batchalign-0.7.7}/README.md +4 -0
  3. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/utils.py +1 -1
  4. batchalign-0.7.7/batchalign/models/utils.py +199 -0
  5. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/analysis/eval.py +44 -6
  6. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/ud.py +14 -7
  7. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utterance/ud_utterance.py +2 -1
  8. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/utils/utils.py +3 -0
  9. batchalign-0.7.7/batchalign/version +3 -0
  10. {batchalign-0.7.6a32 → batchalign-0.7.7/batchalign.egg-info}/PKG-INFO +5 -1
  11. batchalign-0.7.6a32/batchalign/models/utils.py +0 -86
  12. batchalign-0.7.6a32/batchalign/version +0 -3
  13. {batchalign-0.7.6a32 → batchalign-0.7.7}/LICENSE +0 -0
  14. {batchalign-0.7.6a32 → batchalign-0.7.7}/MANIFEST.in +0 -0
  15. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/__init__.py +0 -0
  16. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/__main__.py +0 -0
  17. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/cli/__init__.py +0 -0
  18. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/cli/cli.py +0 -0
  19. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/cli/dispatch.py +0 -0
  20. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/constants.py +0 -0
  21. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/document.py +0 -0
  22. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/errors.py +0 -0
  23. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/__init__.py +0 -0
  24. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/base.py +0 -0
  25. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/file.py +0 -0
  27. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/chat/parser.py +0 -0
  30. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/textgrid/__init__.py +0 -0
  31. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/textgrid/file.py +0 -0
  32. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/textgrid/generator.py +0 -0
  33. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/formats/textgrid/parser.py +0 -0
  34. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/__init__.py +0 -0
  35. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/resolve.py +0 -0
  36. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/speaker/__init__.py +0 -0
  37. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/speaker/config.yaml +0 -0
  38. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/speaker/infer.py +0 -0
  39. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/speaker/utils.py +0 -0
  40. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/training/__init__.py +0 -0
  41. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/training/run.py +0 -0
  42. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/training/utils.py +0 -0
  43. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/__init__.py +0 -0
  44. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/dataset.py +0 -0
  45. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/execute.py +0 -0
  46. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/infer.py +0 -0
  47. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/prep.py +0 -0
  48. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/utterance/train.py +0 -0
  49. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/asr/rev.py +0 -0
  56. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/asr/utils.py +0 -0
  57. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/asr/whisper.py +0 -0
  58. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/asr/whisperx.py +0 -0
  59. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/base.py +0 -0
  60. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  61. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  62. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  63. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  64. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  65. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  66. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  67. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  68. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/dispatch.py +0 -0
  69. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/fa/__init__.py +0 -0
  70. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  71. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  72. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  73. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  74. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  75. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  76. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  77. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  78. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/pipeline.py +0 -0
  79. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/speaker/__init__.py +0 -0
  80. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  81. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utr/__init__.py +0 -0
  82. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  83. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utr/utils.py +0 -0
  84. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  85. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/pipelines/utterance/__init__.py +0 -0
  86. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/__init__.py +0 -0
  87. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/conftest.py +0 -0
  88. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  89. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  90. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  91. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  92. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  93. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  94. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  95. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  96. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  97. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  98. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  99. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  100. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/fixures.py +0 -0
  101. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  102. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  103. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/tests/test_document.py +0 -0
  104. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/utils/__init__.py +0 -0
  105. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/utils/config.py +0 -0
  106. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign/utils/dp.py +0 -0
  107. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign.egg-info/SOURCES.txt +0 -0
  108. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign.egg-info/dependency_links.txt +0 -0
  109. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign.egg-info/entry_points.txt +0 -0
  110. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign.egg-info/requires.txt +0 -0
  111. {batchalign-0.7.6a32 → batchalign-0.7.7}/batchalign.egg-info/top_level.txt +0 -0
  112. {batchalign-0.7.6a32 → batchalign-0.7.7}/setup.cfg +0 -0
  113. {batchalign-0.7.6a32 → batchalign-0.7.7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a32
3
+ Version: 0.7.7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -59,6 +59,8 @@ The following instructions provide a quick start to installing Batchalign. For m
59
59
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
60
60
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
61
61
  - your distribution's instructions for Linux
62
+
63
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
62
64
 
63
65
  ### Install and Update the Package
64
66
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -75,6 +77,8 @@ Windows:
75
77
  py -m pip install -U batchalign
76
78
  ```
77
79
 
80
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
81
+
78
82
  ### Rock and Roll
79
83
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
80
84
 
@@ -21,6 +21,8 @@ The following instructions provide a quick start to installing Batchalign. For m
21
21
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
22
22
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
23
23
  - your distribution's instructions for Linux
24
+
25
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
24
26
 
25
27
  ### Install and Update the Package
26
28
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -37,6 +39,8 @@ Windows:
37
39
  py -m pip install -U batchalign
38
40
  ```
39
41
 
42
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
43
+
40
44
  ### Rock and Roll
41
45
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
42
46
 
@@ -108,7 +108,7 @@ def annotation_clean(content, special=False):
108
108
  cleaned_word = re.sub(r"\x15\d+_\d+\x15", '', cleaned_word)
109
109
  if not special:
110
110
  cleaned_word = re.sub(r"&~\w+", '', cleaned_word)
111
- cleaned_word = cleaned_word.replace("(","").replace(")","")
111
+ # cleaned_word = cleaned_word.replace("(","").replace(")","")
112
112
  cleaned_word = cleaned_word.replace("[","").replace("]","")
113
113
  cleaned_word = cleaned_word.replace("<","").replace(">","")
114
114
  cleaned_word = cleaned_word.replace("“","").replace("”","")
@@ -0,0 +1,199 @@
1
+ import torch
2
+ from transformers.models.whisper.generation_whisper import _dynamic_time_warping as _dynamic_time_warping
3
+ from transformers.models.whisper.generation_whisper import _median_filter as _median_filter
4
+
5
+ from dataclasses import dataclass
6
+ import numpy as np
7
+
8
+ def _extract_token_timestamps(
9
+ self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None, num_input_ids=None
10
+ ):
11
+ """
12
+ Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
13
+ map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
14
+ cross-attentions will be cropped before applying DTW.
15
+
16
+ Returns:
17
+ tensor containing the timestamps in seconds for each predicted token
18
+ """
19
+ # Create a list with `decoder_layers` elements, each a tensor of shape
20
+ # (batch size, attention_heads, output length, input length).
21
+ cross_attentions = []
22
+ for i in range(self.config.decoder_layers):
23
+ cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
24
+
25
+ # Select specific cross-attention layers and heads. This is a tensor
26
+ # of shape (batch size, num selected, output length, input length).
27
+ weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
28
+ weights = weights.permute([1, 0, 2, 3])
29
+
30
+ weight_length = None
31
+
32
+ if "beam_indices" in generate_outputs:
33
+ # If beam search has been used, the output sequences may have been generated for more timesteps than their sequence_lengths
34
+ # since the beam search strategy chooses the most probable sequences at the end of the search.
35
+ # In that case, the cross_attentions weights are too long and we have to make sure that they have the right output_length
36
+ weight_length = (generate_outputs.beam_indices != -1).sum(-1).max()
37
+ weight_length = weight_length if num_input_ids is None else weight_length + num_input_ids
38
+
39
+ # beam search takes `decoder_input_ids` into account in the `beam_indices` length
40
+ # but forgot to shift the beam_indices by the number of `decoder_input_ids`
41
+ beam_indices = torch.zeros_like(generate_outputs.beam_indices[:, :weight_length], dtype=torch.float32)
42
+ # we actually shif the beam indices here
43
+ beam_indices[:, num_input_ids:] = generate_outputs.beam_indices[:, : weight_length - num_input_ids]
44
+
45
+ weights = weights[:, :, :weight_length]
46
+
47
+ # If beam index is still -1, it means that the associated token id is EOS
48
+ # We need to replace the index with 0 since index_select gives an error if any of the indexes is -1.
49
+ beam_indices = beam_indices.masked_fill(beam_indices == -1, 0)
50
+
51
+ # Select the cross attention from the right beam for each output sequences
52
+ weights = torch.stack(
53
+ [
54
+ torch.index_select(weights[:, :, i, :], dim=0, index=beam_indices[:, i])
55
+ for i in range(beam_indices.shape[1])
56
+ ],
57
+ dim=2,
58
+ )
59
+
60
+ # make sure timestamps are as long as weights
61
+ input_length = weight_length or cross_attentions[0].shape[2]
62
+ batch_size = generate_outputs.sequences.shape[0]
63
+ timestamps = torch.zeros(
64
+ (batch_size, input_length + 1), dtype=torch.float32, device=generate_outputs.sequences.device
65
+ )
66
+
67
+ if num_frames is not None:
68
+ # two cases:
69
+ # 1. num_frames is the same for each sample -> compute the DTW matrix for each sample in parallel
70
+ # 2. num_frames is different, compute the DTW matrix for each sample sequentially
71
+
72
+ # we're using np.unique because num_frames can be int/list/tuple
73
+ if isinstance(num_frames, int):
74
+ weights = weights[..., : num_frames // 2]
75
+
76
+ elif isinstance(num_frames, (list, tuple, np.ndarray)) and len(np.unique(num_frames)) == 1:
77
+ weights = weights[..., : num_frames[0] // 2]
78
+
79
+ elif isinstance(num_frames, (torch.Tensor)) and len(torch.unique(num_frames)) == 1:
80
+ weights = weights[..., : num_frames[0] // 2]
81
+
82
+ else:
83
+ # num_frames is of shape (batch_size,) whereas batch_size is truely batch_size*num_return_sequences
84
+ repeat_time = batch_size if isinstance(num_frames, int) else batch_size // len(num_frames)
85
+ num_frames = num_frames.cpu() if isinstance(num_frames, (torch.Tensor)) else num_frames
86
+ num_frames = np.repeat(num_frames, repeat_time)
87
+
88
+ if num_frames is None or isinstance(num_frames, int):
89
+ # Normalize and smoothen the weights.
90
+ std = torch.std(weights, dim=-2, keepdim=True, unbiased=False)
91
+ mean = torch.mean(weights, dim=-2, keepdim=True)
92
+ weights = (weights - mean) / std
93
+ weights = _median_filter(weights, self.config.median_filter_width)
94
+
95
+ # Average the different cross-attention heads.
96
+ weights = weights.mean(dim=1)
97
+
98
+ # Perform dynamic time warping on each element of the batch.
99
+ for batch_idx in range(batch_size):
100
+ if num_frames is not None and isinstance(num_frames, (tuple, list, np.ndarray, torch.Tensor)):
101
+ matrix = weights[batch_idx, ..., : num_frames[batch_idx] // 2]
102
+
103
+ # Normalize and smoothen the weights.
104
+ std = torch.std(matrix, dim=-2, keepdim=True, unbiased=False)
105
+ mean = torch.mean(matrix, dim=-2, keepdim=True)
106
+ matrix = (matrix - mean) / std
107
+ matrix = _median_filter(matrix, self.config.median_filter_width)
108
+
109
+ # Average the different cross-attention heads.
110
+ matrix = matrix.mean(dim=0)
111
+ else:
112
+ matrix = weights[batch_idx]
113
+
114
+ text_indices, time_indices = _dynamic_time_warping(-matrix.cpu().double().numpy())
115
+ jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
116
+ jump_times = time_indices[jumps] * time_precision
117
+ timestamps[batch_idx, 1:] = torch.tensor(jump_times)
118
+
119
+ return timestamps
120
+
121
+ # def _extract_token_timestamps(self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None):
122
+ # """
123
+ # Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
124
+ # map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
125
+ # cross-attentions will be cropped before applying DTW.
126
+
127
+ # Returns:
128
+ # tensor containing the timestamps in seconds for each predicted token
129
+ # """
130
+ # # Create a list with `decoder_layers` elements, each a tensor of shape
131
+ # # (batch size, attention_heads, output length, input length).
132
+ # cross_attentions = []
133
+ # for i in range(self.config.decoder_layers):
134
+ # cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
135
+
136
+ # # Select specific cross-attention layers and heads. This is a tensor
137
+ # # of shape (batch size, num selected, output length, input length).
138
+ # weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
139
+ # weights = weights.permute([1, 0, 2, 3])
140
+ # if num_frames is not None:
141
+ # weights = weights[..., : num_frames // 2]
142
+
143
+ # # Normalize and smoothen the weights.
144
+ # std, mean = torch.std_mean(weights, dim=-2, keepdim=True, unbiased=False)
145
+ # weights = (weights - mean) / std
146
+ # weights = _median_filter(weights, self.config.median_filter_width)
147
+
148
+ # # Average the different cross-attention heads.
149
+ # matrix = weights.mean(dim=1)
150
+
151
+ # timestamps = torch.zeros_like(generate_outputs.sequences, dtype=torch.float32)
152
+
153
+ # # Perform dynamic time warping on each element of the batch.
154
+ # for batch_idx in range(timestamps.shape[0]):
155
+ # text_indices, time_indices = _dynamic_time_warping(-matrix[batch_idx].float().cpu().numpy())
156
+ # jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
157
+ # jump_times = time_indices[jumps] * time_precision
158
+ # timestamps[batch_idx, 1:] = torch.tensor(jump_times)
159
+
160
+ # return timestamps
161
+
162
+
163
+ @dataclass
164
+ class ASRAudioFile:
165
+ file : str
166
+ tensor : torch.Tensor
167
+ rate : int
168
+
169
+ def chunk(self,begin_ms, end_ms):
170
+ """Get a chunk of the audio.
171
+
172
+ Parameters
173
+ ----------
174
+ begin_ms : int
175
+ Milliseconds of the start of the slice.
176
+ end_ms : int
177
+ Milliseconds of the end of the slice.
178
+
179
+ Returns
180
+ -------
181
+ torch.Tensor
182
+ The returned chunk to supply to the ASR engine.
183
+ """
184
+
185
+ data = self.tensor[int(round((begin_ms/1000)*self.rate)):
186
+ int(round((end_ms/1000)*self.rate))]
187
+
188
+ return data
189
+
190
+ def all(self):
191
+ """Get the audio in its entirety
192
+
193
+ Notes
194
+ -----
195
+ like `chunk()` but all of the audio
196
+ """
197
+
198
+ return self.tensor
199
+
@@ -3,6 +3,7 @@ eval.py
3
3
  Engines for transcript evaluation
4
4
  """
5
5
 
6
+ import re
6
7
  from batchalign.document import *
7
8
  from batchalign.pipelines.base import *
8
9
  from batchalign.pipelines.asr.utils import *
@@ -22,11 +23,34 @@ class EvaluationEngine(BatchalignEngine):
22
23
  forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
23
24
  gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
24
25
 
25
- forms = [i for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
26
- gold_forms = [i for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
26
+ forms = [i.replace("-", "") for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
27
+ gold_forms = [i.replace("-", "") for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
28
+
29
+ forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
30
+ gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
31
+
32
+ # if there are single letter frames, we combine them tofgether
33
+ # until the utterance is done or there isn't any left
34
+ forms_finished = []
35
+
36
+ single_sticky = ""
37
+ is_single = False
38
+
39
+ for i in forms:
40
+ if len(i) == 1:
41
+ single_sticky += i
42
+ else:
43
+ if single_sticky != "":
44
+ forms_finished.append(single_sticky)
45
+ single_sticky = ""
46
+ forms_finished.append(i)
47
+
48
+ if single_sticky != "":
49
+ forms_finished.append(single_sticky)
50
+ single_sticky = ""
27
51
 
28
52
  # dp!
29
- alignment = align(forms, gold_forms, False)
53
+ alignment = align(forms_finished, gold_forms, False)
30
54
 
31
55
  # calculate each type of error
32
56
  sub = 0
@@ -39,14 +63,28 @@ class EvaluationEngine(BatchalignEngine):
39
63
  # but if we have <extra.reference> <extra.reference> this is 2 insertions
40
64
 
41
65
  cleaned_alignment = []
66
+ # whether we had a "firstname" in reference document and hence are
67
+ # anticipating a payload for it (the actual name) in the next entry in the
68
+ # alignment
69
+ anticipating_payload = False
42
70
 
43
71
  for i in alignment:
44
72
 
45
73
  if isinstance(i, Extra):
46
- if len(cleaned_alignment) > 0 and i.extra_type == ExtraType.REFERENCE and "name" in i.key and i.key[:4] != "name":
47
- cleaned_alignment.pop(-1)
74
+
75
+ if i.extra_type == ExtraType.REFERENCE and "name" in i.key and i.key[:4] != "name":
76
+ if (isinstance(cleaned_alignment[-1], Extra) and
77
+ cleaned_alignment[-1].extra_type == ExtraType.PAYLOAD and
78
+ len(cleaned_alignment) > 0):
79
+ cleaned_alignment.pop(-1)
80
+ else:
81
+ anticipating_payload = True
48
82
  cleaned_alignment.append(Match(i.key, None, None))
49
83
  continue
84
+ elif i.extra_type == ExtraType.PAYLOAD and anticipating_payload:
85
+ anticipating_payload = False
86
+ continue
87
+
50
88
 
51
89
  if prev_error != None and prev_error != i.extra_type:
52
90
  # this is a substitution: we have different "extra"s in
@@ -75,7 +113,7 @@ class EvaluationEngine(BatchalignEngine):
75
113
  cleaned_alignment.append(i)
76
114
 
77
115
  diff = []
78
- for i in alignment:
116
+ for i in cleaned_alignment:
79
117
  if isinstance(i, Extra):
80
118
  diff.append(f"{'+' if i.extra_type == ExtraType.REFERENCE else '-'} {i.key}")
81
119
  else:
@@ -18,6 +18,7 @@ from stanza import DownloadMethod
18
18
  from torch import heaviside
19
19
 
20
20
  from stanza.pipeline.processor import ProcessorVariant, register_processor_variant
21
+ from stanza.resources.common import download_resources_json, load_resources_json, get_language_resources
21
22
 
22
23
  # the loading bar
23
24
  from tqdm import tqdm
@@ -115,6 +116,7 @@ def handler(word, lang=None):
115
116
  target = target.replace('/100', '')
116
117
  target = target.replace('/r', '')
117
118
  target = target.replace('(', '')
119
+ target = target.replace("(","").replace(")","")
118
120
 
119
121
  # remove attachments
120
122
  if "|" in target:
@@ -217,9 +219,9 @@ def handler__NOUN(word, lang=None):
217
219
  type = feats.get("PronType", "")
218
220
 
219
221
  apm = ""
220
- if lang == "fr":
222
+ if lang == "fr" and number_str == "-Plur":
221
223
  from batchalign.pipelines.morphosyntax.fr.apm import is_apm_noun
222
- apm = "apm" if is_apm_noun(word.text) else ""
224
+ apm = "Apm" if is_apm_noun(word.text) else ""
223
225
 
224
226
 
225
227
  if word.deprel == "obj" and case.strip() == "":
@@ -738,13 +740,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
738
740
  else:
739
741
  config["tokenize_postprocessor"] = lambda x:adlist_processor(x)
740
742
 
743
+ download_resources_json()
744
+ resources = load_resources_json()
745
+ mwt_exclusion = ["hr", "zh", "zh-hans", "zh-hant", "ja", "ko",
746
+ "sl", "sr", "bg", "ru", "et", "hu",
747
+ "eu", "el", "he", "af", "ga", "da", "ro"]
748
+
741
749
  if "zh" in lang:
742
750
  lang.pop(lang.index("zh"))
743
751
  lang.append("zh-hans")
744
-
745
- elif not any([i in ["hr", "zh", "zh-hans", "zh-hant", "ja", "ko",
746
- "sl", "sr", "bg", "ru", "et", "hu",
747
- "eu", "el", "he", "af", "ga", "da", "ro"] for i in lang]):
752
+
753
+ elif not any(i in mwt_exclusion or "mwt" not in get_language_resources(resources, i) for i in lang):
748
754
  if "en" in lang:
749
755
  config["processors"]["mwt"] = "gum"
750
756
  else:
@@ -848,7 +854,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
848
854
  inputs.append(line_cut)
849
855
 
850
856
  try:
851
- sents = nlp(line_cut.strip()).sentences
857
+ sents = nlp(line_cut.replace("(","").replace(")","").strip()).sentences
852
858
 
853
859
  if len(sents) == 0:
854
860
  continue
@@ -958,6 +964,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
958
964
  retokenized_ut = re.sub(r"⁎[⁎ ]*(.*?)[⁎ ]*⁎", r"⁎\1⁎ ", retokenized_ut)
959
965
  retokenized_ut = re.sub(r"\[\*(.)\]", r"[* \1]", retokenized_ut)
960
966
  retokenized_ut = re.sub(r" +", r" ", retokenized_ut)
967
+ retokenized_ut = re.sub(r"⁎ @", r"⁎@", retokenized_ut)
961
968
 
962
969
  # pray to everyone that it works---this will simply crash and ignore
963
970
  # the utterance if it didn't work, so we are doing this as a sanity
@@ -84,6 +84,7 @@ def parse_tree(subtree):
84
84
  for i in stack]
85
85
 
86
86
  def process_ut(ut, nlp):
87
+
87
88
  # remove punct
88
89
  if (ut.content[-1].type == TokenType.PUNCT or
89
90
  ut.content[-1].text in ENDING_PUNCT):
@@ -142,7 +143,7 @@ def process_ut(ut, nlp):
142
143
  if isinstance(i, Match):
143
144
  matches.append(i)
144
145
  elif i.extra_type == ExtraType.REFERENCE:
145
- new_refs.append(ReferenceTarget(key=i.key, payload=i.payload))
146
+ new_refs.append(ReferenceTarget(key=i.key, payload=i.payload if i.payload else -1))
146
147
 
147
148
  # we now sort the references based on their orignial utterance order
148
149
  matches = matches + new_refs
@@ -29,6 +29,7 @@ def word_tokenize(str):
29
29
  return tmp.tokenize(str)
30
30
  except LookupError:
31
31
  nltk.download("punkt")
32
+ nltk.download("punkt_tab")
32
33
  return tmp.tokenize(str)
33
34
 
34
35
  def sent_tokenize(str):
@@ -49,6 +50,7 @@ def sent_tokenize(str):
49
50
  return ST(str)
50
51
  except LookupError:
51
52
  nltk.download("punkt")
53
+ nltk.download("punkt_tab")
52
54
  return ST(str)
53
55
 
54
56
  def detokenize(tokens):
@@ -69,6 +71,7 @@ def detokenize(tokens):
69
71
  return TreebankWordDetokenizer().detokenize(tokens)
70
72
  except LookupError:
71
73
  nltk.download("punkt")
74
+ nltk.download("punkt_tab")
72
75
  return TreebankWordDetokenizer().detokenize(tokens)
73
76
 
74
77
  def correct_timing(doc):
@@ -0,0 +1,3 @@
1
+ 0.7.7
2
+ Janurary 3st, 2025
3
+ releasing new full version
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a32
3
+ Version: 0.7.7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -59,6 +59,8 @@ The following instructions provide a quick start to installing Batchalign. For m
59
59
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
60
60
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
61
61
  - your distribution's instructions for Linux
62
+
63
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
62
64
 
63
65
  ### Install and Update the Package
64
66
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -75,6 +77,8 @@ Windows:
75
77
  py -m pip install -U batchalign
76
78
  ```
77
79
 
80
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
81
+
78
82
  ### Rock and Roll
79
83
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
80
84
 
@@ -1,86 +0,0 @@
1
- import torch
2
- from transformers.models.whisper.generation_whisper import _dynamic_time_warping as _dynamic_time_warping
3
- from transformers.models.whisper.generation_whisper import _median_filter as _median_filter
4
-
5
- from dataclasses import dataclass
6
- import numpy as np
7
-
8
- def _extract_token_timestamps(self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None):
9
- """
10
- Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
11
- map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
12
- cross-attentions will be cropped before applying DTW.
13
-
14
- Returns:
15
- tensor containing the timestamps in seconds for each predicted token
16
- """
17
- # Create a list with `decoder_layers` elements, each a tensor of shape
18
- # (batch size, attention_heads, output length, input length).
19
- cross_attentions = []
20
- for i in range(self.config.decoder_layers):
21
- cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
22
-
23
- # Select specific cross-attention layers and heads. This is a tensor
24
- # of shape (batch size, num selected, output length, input length).
25
- weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
26
- weights = weights.permute([1, 0, 2, 3])
27
- if num_frames is not None:
28
- weights = weights[..., : num_frames // 2]
29
-
30
- # Normalize and smoothen the weights.
31
- std, mean = torch.std_mean(weights, dim=-2, keepdim=True, unbiased=False)
32
- weights = (weights - mean) / std
33
- weights = _median_filter(weights, self.config.median_filter_width)
34
-
35
- # Average the different cross-attention heads.
36
- matrix = weights.mean(dim=1)
37
-
38
- timestamps = torch.zeros_like(generate_outputs.sequences, dtype=torch.float32)
39
-
40
- # Perform dynamic time warping on each element of the batch.
41
- for batch_idx in range(timestamps.shape[0]):
42
- text_indices, time_indices = _dynamic_time_warping(-matrix[batch_idx].float().cpu().numpy())
43
- jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
44
- jump_times = time_indices[jumps] * time_precision
45
- timestamps[batch_idx, 1:] = torch.tensor(jump_times)
46
-
47
- return timestamps
48
-
49
-
50
- @dataclass
51
- class ASRAudioFile:
52
- file : str
53
- tensor : torch.Tensor
54
- rate : int
55
-
56
- def chunk(self,begin_ms, end_ms):
57
- """Get a chunk of the audio.
58
-
59
- Parameters
60
- ----------
61
- begin_ms : int
62
- Milliseconds of the start of the slice.
63
- end_ms : int
64
- Milliseconds of the end of the slice.
65
-
66
- Returns
67
- -------
68
- torch.Tensor
69
- The returned chunk to supply to the ASR engine.
70
- """
71
-
72
- data = self.tensor[int(round((begin_ms/1000)*self.rate)):
73
- int(round((end_ms/1000)*self.rate))]
74
-
75
- return data
76
-
77
- def all(self):
78
- """Get the audio in its entirety
79
-
80
- Notes
81
- -----
82
- like `chunk()` but all of the audio
83
- """
84
-
85
- return self.tensor
86
-
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.32
2
- November 26, 2024
3
- French APM
File without changes
File without changes
File without changes
File without changes