batchalign 0.7.7a3__tar.gz → 0.7.7a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {batchalign-0.7.7a3/batchalign.egg-info → batchalign-0.7.7a5}/PKG-INFO +5 -1
  2. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/README.md +4 -0
  3. batchalign-0.7.7a5/batchalign/models/utils.py +199 -0
  4. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/ud.py +10 -5
  5. batchalign-0.7.7a5/batchalign/version +3 -0
  6. {batchalign-0.7.7a3 → batchalign-0.7.7a5/batchalign.egg-info}/PKG-INFO +5 -1
  7. batchalign-0.7.7a3/batchalign/models/utils.py +0 -86
  8. batchalign-0.7.7a3/batchalign/version +0 -3
  9. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/LICENSE +0 -0
  10. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/MANIFEST.in +0 -0
  11. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/document.py +0 -0
  18. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/whisper/__init__.py +0 -0
  47. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/whisper/infer_asr.py +0 -0
  48. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/models/whisper/infer_fa.py +0 -0
  49. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/__init__.py +0 -0
  50. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/analysis/__init__.py +0 -0
  51. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/analysis/eval.py +0 -0
  52. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/asr/__init__.py +0 -0
  53. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/asr/rev.py +0 -0
  54. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/asr/utils.py +0 -0
  55. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/asr/whisper.py +0 -0
  56. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/asr/whisperx.py +0 -0
  57. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/base.py +0 -0
  58. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/__init__.py +0 -0
  59. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  60. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  61. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  62. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/retrace.py +0 -0
  63. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  64. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  65. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/cleanup/support/test.test +0 -0
  66. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/dispatch.py +0 -0
  67. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/fa/__init__.py +0 -0
  68. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  69. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  70. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  71. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  72. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  73. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  74. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  75. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  76. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/pipeline.py +0 -0
  77. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/speaker/__init__.py +0 -0
  78. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  79. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utr/__init__.py +0 -0
  80. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utr/rev_utr.py +0 -0
  81. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utr/utils.py +0 -0
  82. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  83. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utterance/__init__.py +0 -0
  84. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  85. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/__init__.py +0 -0
  86. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/conftest.py +0 -0
  87. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  88. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  89. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  90. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  91. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  92. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  93. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  94. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  95. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  96. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  97. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  98. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  99. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/fixures.py +0 -0
  100. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  101. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  102. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/tests/test_document.py +0 -0
  103. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/utils/__init__.py +0 -0
  104. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/utils/config.py +0 -0
  105. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/utils/dp.py +0 -0
  106. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign/utils/utils.py +0 -0
  107. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign.egg-info/SOURCES.txt +0 -0
  108. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign.egg-info/dependency_links.txt +0 -0
  109. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign.egg-info/entry_points.txt +0 -0
  110. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign.egg-info/requires.txt +0 -0
  111. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/batchalign.egg-info/top_level.txt +0 -0
  112. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/setup.cfg +0 -0
  113. {batchalign-0.7.7a3 → batchalign-0.7.7a5}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.7a3
3
+ Version: 0.7.7a5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -59,6 +59,8 @@ The following instructions provide a quick start to installing Batchalign. For m
59
59
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
60
60
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
61
61
  - your distribution's instructions for Linux
62
+
63
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
62
64
 
63
65
  ### Install and Update the Package
64
66
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -75,6 +77,8 @@ Windows:
75
77
  py -m pip install -U batchalign
76
78
  ```
77
79
 
80
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
81
+
78
82
  ### Rock and Roll
79
83
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
80
84
 
@@ -21,6 +21,8 @@ The following instructions provide a quick start to installing Batchalign. For m
21
21
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
22
22
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
23
23
  - your distribution's instructions for Linux
24
+
25
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
24
26
 
25
27
  ### Install and Update the Package
26
28
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -37,6 +39,8 @@ Windows:
37
39
  py -m pip install -U batchalign
38
40
  ```
39
41
 
42
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
43
+
40
44
  ### Rock and Roll
41
45
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
42
46
 
@@ -0,0 +1,199 @@
1
+ import torch
2
+ from transformers.models.whisper.generation_whisper import _dynamic_time_warping as _dynamic_time_warping
3
+ from transformers.models.whisper.generation_whisper import _median_filter as _median_filter
4
+
5
+ from dataclasses import dataclass
6
+ import numpy as np
7
+
8
+ def _extract_token_timestamps(
9
+ self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None, num_input_ids=None
10
+ ):
11
+ """
12
+ Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
13
+ map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
14
+ cross-attentions will be cropped before applying DTW.
15
+
16
+ Returns:
17
+ tensor containing the timestamps in seconds for each predicted token
18
+ """
19
+ # Create a list with `decoder_layers` elements, each a tensor of shape
20
+ # (batch size, attention_heads, output length, input length).
21
+ cross_attentions = []
22
+ for i in range(self.config.decoder_layers):
23
+ cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
24
+
25
+ # Select specific cross-attention layers and heads. This is a tensor
26
+ # of shape (batch size, num selected, output length, input length).
27
+ weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
28
+ weights = weights.permute([1, 0, 2, 3])
29
+
30
+ weight_length = None
31
+
32
+ if "beam_indices" in generate_outputs:
33
+ # If beam search has been used, the output sequences may have been generated for more timesteps than their sequence_lengths
34
+ # since the beam search strategy chooses the most probable sequences at the end of the search.
35
+ # In that case, the cross_attentions weights are too long and we have to make sure that they have the right output_length
36
+ weight_length = (generate_outputs.beam_indices != -1).sum(-1).max()
37
+ weight_length = weight_length if num_input_ids is None else weight_length + num_input_ids
38
+
39
+ # beam search takes `decoder_input_ids` into account in the `beam_indices` length
40
+ # but forgot to shift the beam_indices by the number of `decoder_input_ids`
41
+ beam_indices = torch.zeros_like(generate_outputs.beam_indices[:, :weight_length], dtype=torch.float32)
42
+ # we actually shif the beam indices here
43
+ beam_indices[:, num_input_ids:] = generate_outputs.beam_indices[:, : weight_length - num_input_ids]
44
+
45
+ weights = weights[:, :, :weight_length]
46
+
47
+ # If beam index is still -1, it means that the associated token id is EOS
48
+ # We need to replace the index with 0 since index_select gives an error if any of the indexes is -1.
49
+ beam_indices = beam_indices.masked_fill(beam_indices == -1, 0)
50
+
51
+ # Select the cross attention from the right beam for each output sequences
52
+ weights = torch.stack(
53
+ [
54
+ torch.index_select(weights[:, :, i, :], dim=0, index=beam_indices[:, i])
55
+ for i in range(beam_indices.shape[1])
56
+ ],
57
+ dim=2,
58
+ )
59
+
60
+ # make sure timestamps are as long as weights
61
+ input_length = weight_length or cross_attentions[0].shape[2]
62
+ batch_size = generate_outputs.sequences.shape[0]
63
+ timestamps = torch.zeros(
64
+ (batch_size, input_length + 1), dtype=torch.float32, device=generate_outputs.sequences.device
65
+ )
66
+
67
+ if num_frames is not None:
68
+ # two cases:
69
+ # 1. num_frames is the same for each sample -> compute the DTW matrix for each sample in parallel
70
+ # 2. num_frames is different, compute the DTW matrix for each sample sequentially
71
+
72
+ # we're using np.unique because num_frames can be int/list/tuple
73
+ if isinstance(num_frames, int):
74
+ weights = weights[..., : num_frames // 2]
75
+
76
+ elif isinstance(num_frames, (list, tuple, np.ndarray)) and len(np.unique(num_frames)) == 1:
77
+ weights = weights[..., : num_frames[0] // 2]
78
+
79
+ elif isinstance(num_frames, (torch.Tensor)) and len(torch.unique(num_frames)) == 1:
80
+ weights = weights[..., : num_frames[0] // 2]
81
+
82
+ else:
83
+ # num_frames is of shape (batch_size,) whereas batch_size is truely batch_size*num_return_sequences
84
+ repeat_time = batch_size if isinstance(num_frames, int) else batch_size // len(num_frames)
85
+ num_frames = num_frames.cpu() if isinstance(num_frames, (torch.Tensor)) else num_frames
86
+ num_frames = np.repeat(num_frames, repeat_time)
87
+
88
+ if num_frames is None or isinstance(num_frames, int):
89
+ # Normalize and smoothen the weights.
90
+ std = torch.std(weights, dim=-2, keepdim=True, unbiased=False)
91
+ mean = torch.mean(weights, dim=-2, keepdim=True)
92
+ weights = (weights - mean) / std
93
+ weights = _median_filter(weights, self.config.median_filter_width)
94
+
95
+ # Average the different cross-attention heads.
96
+ weights = weights.mean(dim=1)
97
+
98
+ # Perform dynamic time warping on each element of the batch.
99
+ for batch_idx in range(batch_size):
100
+ if num_frames is not None and isinstance(num_frames, (tuple, list, np.ndarray, torch.Tensor)):
101
+ matrix = weights[batch_idx, ..., : num_frames[batch_idx] // 2]
102
+
103
+ # Normalize and smoothen the weights.
104
+ std = torch.std(matrix, dim=-2, keepdim=True, unbiased=False)
105
+ mean = torch.mean(matrix, dim=-2, keepdim=True)
106
+ matrix = (matrix - mean) / std
107
+ matrix = _median_filter(matrix, self.config.median_filter_width)
108
+
109
+ # Average the different cross-attention heads.
110
+ matrix = matrix.mean(dim=0)
111
+ else:
112
+ matrix = weights[batch_idx]
113
+
114
+ text_indices, time_indices = _dynamic_time_warping(-matrix.cpu().double().numpy())
115
+ jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
116
+ jump_times = time_indices[jumps] * time_precision
117
+ timestamps[batch_idx, 1:] = torch.tensor(jump_times)
118
+
119
+ return timestamps
120
+
121
+ # def _extract_token_timestamps(self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None):
122
+ # """
123
+ # Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
124
+ # map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
125
+ # cross-attentions will be cropped before applying DTW.
126
+
127
+ # Returns:
128
+ # tensor containing the timestamps in seconds for each predicted token
129
+ # """
130
+ # # Create a list with `decoder_layers` elements, each a tensor of shape
131
+ # # (batch size, attention_heads, output length, input length).
132
+ # cross_attentions = []
133
+ # for i in range(self.config.decoder_layers):
134
+ # cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
135
+
136
+ # # Select specific cross-attention layers and heads. This is a tensor
137
+ # # of shape (batch size, num selected, output length, input length).
138
+ # weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
139
+ # weights = weights.permute([1, 0, 2, 3])
140
+ # if num_frames is not None:
141
+ # weights = weights[..., : num_frames // 2]
142
+
143
+ # # Normalize and smoothen the weights.
144
+ # std, mean = torch.std_mean(weights, dim=-2, keepdim=True, unbiased=False)
145
+ # weights = (weights - mean) / std
146
+ # weights = _median_filter(weights, self.config.median_filter_width)
147
+
148
+ # # Average the different cross-attention heads.
149
+ # matrix = weights.mean(dim=1)
150
+
151
+ # timestamps = torch.zeros_like(generate_outputs.sequences, dtype=torch.float32)
152
+
153
+ # # Perform dynamic time warping on each element of the batch.
154
+ # for batch_idx in range(timestamps.shape[0]):
155
+ # text_indices, time_indices = _dynamic_time_warping(-matrix[batch_idx].float().cpu().numpy())
156
+ # jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
157
+ # jump_times = time_indices[jumps] * time_precision
158
+ # timestamps[batch_idx, 1:] = torch.tensor(jump_times)
159
+
160
+ # return timestamps
161
+
162
+
163
+ @dataclass
164
+ class ASRAudioFile:
165
+ file : str
166
+ tensor : torch.Tensor
167
+ rate : int
168
+
169
+ def chunk(self,begin_ms, end_ms):
170
+ """Get a chunk of the audio.
171
+
172
+ Parameters
173
+ ----------
174
+ begin_ms : int
175
+ Milliseconds of the start of the slice.
176
+ end_ms : int
177
+ Milliseconds of the end of the slice.
178
+
179
+ Returns
180
+ -------
181
+ torch.Tensor
182
+ The returned chunk to supply to the ASR engine.
183
+ """
184
+
185
+ data = self.tensor[int(round((begin_ms/1000)*self.rate)):
186
+ int(round((end_ms/1000)*self.rate))]
187
+
188
+ return data
189
+
190
+ def all(self):
191
+ """Get the audio in its entirety
192
+
193
+ Notes
194
+ -----
195
+ like `chunk()` but all of the audio
196
+ """
197
+
198
+ return self.tensor
199
+
@@ -18,6 +18,7 @@ from stanza import DownloadMethod
18
18
  from torch import heaviside
19
19
 
20
20
  from stanza.pipeline.processor import ProcessorVariant, register_processor_variant
21
+ from stanza.resources.common import download_resources_json, load_resources_json, get_language_resources
21
22
 
22
23
  # the loading bar
23
24
  from tqdm import tqdm
@@ -739,13 +740,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
739
740
  else:
740
741
  config["tokenize_postprocessor"] = lambda x:adlist_processor(x)
741
742
 
743
+ download_resources_json()
744
+ resources = load_resources_json()
745
+ mwt_exclusion = ["hr", "zh", "zh-hans", "zh-hant", "ja", "ko",
746
+ "sl", "sr", "bg", "ru", "et", "hu",
747
+ "eu", "el", "he", "af", "ga", "da", "ro"]
748
+
742
749
  if "zh" in lang:
743
750
  lang.pop(lang.index("zh"))
744
751
  lang.append("zh-hans")
745
-
746
- elif not any([i in ["hr", "zh", "zh-hans", "zh-hant", "ja", "ko",
747
- "sl", "sr", "bg", "ru", "et", "hu",
748
- "eu", "el", "he", "af", "ga", "da", "ro"] for i in lang]):
752
+
753
+ elif not any(i in mwt_exclusion or "mwt" not in get_language_resources(resources, i) for i in lang):
749
754
  if "en" in lang:
750
755
  config["processors"]["mwt"] = "gum"
751
756
  else:
@@ -959,12 +964,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
959
964
  retokenized_ut = re.sub(r"⁎[⁎ ]*(.*?)[⁎ ]*⁎", r"⁎\1⁎ ", retokenized_ut)
960
965
  retokenized_ut = re.sub(r"\[\*(.)\]", r"[* \1]", retokenized_ut)
961
966
  retokenized_ut = re.sub(r" +", r" ", retokenized_ut)
967
+ retokenized_ut = re.sub(r"⁎ @", r"⁎@", retokenized_ut)
962
968
 
963
969
  # pray to everyone that it works---this will simply crash and ignore
964
970
  # the utterance if it didn't work, so we are doing this as a sanity
965
971
  # check rather than needing the parsed result
966
972
  _1, _2 = chat_parse_utterance(retokenized_ut, mor, gra, None, None)
967
- retokenized_ut = re.sub(r"⁎ @", r"⁎@", retokenized_ut)
968
973
  doc.content[indx] = Utterance(content=ut,
969
974
  text=retokenized_ut,
970
975
  tier=doc.content[indx].tier,
@@ -0,0 +1,3 @@
1
+ 0.7.7-alpha.5
2
+ Janurary 1st, 2025
3
+ whisper fix?
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.7a3
3
+ Version: 0.7.7a5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -59,6 +59,8 @@ The following instructions provide a quick start to installing Batchalign. For m
59
59
  1. Install Python 3.11: [via this link](https://www.python.org/ftp/python/3.11.7/python-3.11.7-amd64.exe)
60
60
  2. If later commands report `pip module not found`, [this page may help](https://github.com/TalkBank/batchalign2/wiki/Troubleshooting-Tips#get-pip-on-windows)
61
61
  - your distribution's instructions for Linux
62
+
63
+ For first-time users of Python, note that if you didn't install Python 3.11 (as we recommended above), it may be complex to change Python versions downstream and may cause additional problems. We recommend explicitly installing Python 3.11 by installing it explicitly via specifying a version number as we show above.
62
64
 
63
65
  ### Install and Update the Package
64
66
  You can get Batchalign from PyPi, and you can update the package in the same way:
@@ -75,6 +77,8 @@ Windows:
75
77
  py -m pip install -U batchalign
76
78
  ```
77
79
 
80
+ Note that if your system reports `pip: command not found`, replace every use of `pip` in the instructions with `pip3` and try again.
81
+
78
82
  ### Rock and Roll
79
83
  There are two main ways of interacting with Batchalign. Batchalign can be used as a program to batch-process CHAT (hence the name), or as a Python LSA library.
80
84
 
@@ -1,86 +0,0 @@
1
- import torch
2
- from transformers.models.whisper.generation_whisper import _dynamic_time_warping as _dynamic_time_warping
3
- from transformers.models.whisper.generation_whisper import _median_filter as _median_filter
4
-
5
- from dataclasses import dataclass
6
- import numpy as np
7
-
8
- def _extract_token_timestamps(self, generate_outputs, alignment_heads, time_precision=0.02, num_frames=None):
9
- """
10
- Calculates token-level timestamps using the encoder-decoder cross-attentions and dynamic time-warping (DTW) to
11
- map each output token to a position in the input audio. If `num_frames` is specified, the encoder-decoder
12
- cross-attentions will be cropped before applying DTW.
13
-
14
- Returns:
15
- tensor containing the timestamps in seconds for each predicted token
16
- """
17
- # Create a list with `decoder_layers` elements, each a tensor of shape
18
- # (batch size, attention_heads, output length, input length).
19
- cross_attentions = []
20
- for i in range(self.config.decoder_layers):
21
- cross_attentions.append(torch.cat([x[i] for x in generate_outputs.cross_attentions], dim=2))
22
-
23
- # Select specific cross-attention layers and heads. This is a tensor
24
- # of shape (batch size, num selected, output length, input length).
25
- weights = torch.stack([cross_attentions[l][:, h] for l, h in alignment_heads])
26
- weights = weights.permute([1, 0, 2, 3])
27
- if num_frames is not None:
28
- weights = weights[..., : num_frames // 2]
29
-
30
- # Normalize and smoothen the weights.
31
- std, mean = torch.std_mean(weights, dim=-2, keepdim=True, unbiased=False)
32
- weights = (weights - mean) / std
33
- weights = _median_filter(weights, self.config.median_filter_width)
34
-
35
- # Average the different cross-attention heads.
36
- matrix = weights.mean(dim=1)
37
-
38
- timestamps = torch.zeros_like(generate_outputs.sequences, dtype=torch.float32)
39
-
40
- # Perform dynamic time warping on each element of the batch.
41
- for batch_idx in range(timestamps.shape[0]):
42
- text_indices, time_indices = _dynamic_time_warping(-matrix[batch_idx].float().cpu().numpy())
43
- jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
44
- jump_times = time_indices[jumps] * time_precision
45
- timestamps[batch_idx, 1:] = torch.tensor(jump_times)
46
-
47
- return timestamps
48
-
49
-
50
- @dataclass
51
- class ASRAudioFile:
52
- file : str
53
- tensor : torch.Tensor
54
- rate : int
55
-
56
- def chunk(self,begin_ms, end_ms):
57
- """Get a chunk of the audio.
58
-
59
- Parameters
60
- ----------
61
- begin_ms : int
62
- Milliseconds of the start of the slice.
63
- end_ms : int
64
- Milliseconds of the end of the slice.
65
-
66
- Returns
67
- -------
68
- torch.Tensor
69
- The returned chunk to supply to the ASR engine.
70
- """
71
-
72
- data = self.tensor[int(round((begin_ms/1000)*self.rate)):
73
- int(round((end_ms/1000)*self.rate))]
74
-
75
- return data
76
-
77
- def all(self):
78
- """Get the audio in its entirety
79
-
80
- Notes
81
- -----
82
- like `chunk()` but all of the audio
83
- """
84
-
85
- return self.tensor
86
-
@@ -1,3 +0,0 @@
1
- 0.7.7-alpha.3
2
- December 06, 2024
3
- japanese lower-astrick special case
File without changes
File without changes
File without changes
File without changes