batchalign 0.7.6a15__tar.gz → 0.7.6a16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a15/batchalign.egg-info → batchalign-0.7.6a16}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/dispatch.py +1 -1
  3. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_asr.py +10 -79
  4. batchalign-0.7.6a16/batchalign/version +3 -0
  5. {batchalign-0.7.6a15 → batchalign-0.7.6a16/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.6a15/batchalign/version +0 -3
  7. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/LICENSE +0 -0
  8. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/MANIFEST.in +0 -0
  9. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/README.md +0 -0
  10. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/document.py +0 -0
  16. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_fa.py +0 -0
  47. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/__init__.py +0 -0
  48. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/__init__.py +0 -0
  49. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/eval.py +0 -0
  50. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/__init__.py +0 -0
  51. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/rev.py +0 -0
  52. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/utils.py +0 -0
  53. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisper.py +0 -0
  54. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisperx.py +0 -0
  55. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/base.py +0 -0
  56. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  57. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  58. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  59. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  60. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  61. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  62. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  63. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  64. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/dispatch.py +0 -0
  65. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/fa/__init__.py +0 -0
  66. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  67. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  68. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  69. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  70. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  71. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  72. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/setup.cfg +0 -0
  109. {batchalign-0.7.6a15 → batchalign-0.7.6a16}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a15
3
+ Version: 0.7.6a16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -65,7 +65,7 @@ def _dispatch(command, lang, num_speakers,
65
65
  for basedir, _, fs in os.walk(in_dir):
66
66
  for f in fs:
67
67
  path = Path(os.path.join(basedir, f))
68
- ext = path.suffix.strip(".").strip()
68
+ ext = path.suffix.strip(".").strip().lower()
69
69
 
70
70
  # calculate input path, convert if needed
71
71
  inp_path = str(path)
@@ -67,85 +67,16 @@ class WhisperASRModel(object):
67
67
  self.__config = GenerationConfig.from_pretrained(base)
68
68
  self.__config.no_repeat_ngram_size = 4
69
69
 
70
- if language == "Cantonese":
71
- self.pipe = pipeline(
72
- "automatic-speech-recognition",
73
- model=model,
74
- # tokenizer=WhisperTokenizer.from_pretrained(base),
75
- chunk_length_s=30,
76
- # stride_length_s=3,
77
- device=DEVICE,
78
- # torch_dtype=torch.float32,
79
- return_timestamps="word",
80
- )
81
- self.__config = GenerationConfig.from_model_config(self.pipe.model.config)
82
- self.__config.no_repeat_ngram_size = 4
83
- self.__config.use_cache = False
84
-
85
- forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language="yue", task="transcribe")
86
-
87
- suppress_tokens = []
88
-
89
- # Define other parameters
90
- return_attention_mask = False
91
- pad_token_id = 50257
92
- bos_token_id = 50257
93
- eos_token_id = 50257
94
- decoder_start_token_id = 50258
95
- begin_suppress_tokens = [
96
- 220,
97
- 50257
98
- ],
99
- alignment_heads = [
100
- [5, 3],
101
- [5, 9],
102
- [8, 0],
103
- [8, 4],
104
- [8, 8],
105
- [9, 0],
106
- [9, 7],
107
- [9, 9],
108
- [10, 5]
109
- ]
110
- lang_to_id = {"<|yue|>": 50325}
111
- task_to_id = {"transcribe": 50359}
112
- is_multilingual = True
113
- max_initial_timestamp_index = 50
114
- no_timestamps_token_id = 50363
115
- prev_sot_token_id = 50361
116
- max_length = 448
117
-
118
- # Assign values to generation config
119
- self.__config.forced_decoder_ids = forced_decoder_ids
120
- self.__config.suppress_tokens = suppress_tokens
121
- self.__config.pad_token_id = pad_token_id
122
- self.__config.bos_token_id = bos_token_id
123
- self.__config.eos_token_id = eos_token_id
124
- self.__config.decoder_start_token_id = decoder_start_token_id
125
- self.__config.lang_to_id = lang_to_id
126
- self.__config.task_to_id = task_to_id
127
- self.__config.alignment_heads = alignment_heads
128
- self.__config.alignment_heads = alignment_heads
129
- self.__config.begin_suppress_tokens = begin_suppress_tokens
130
- self.__config.is_multilingual = is_multilingual
131
- self.__config.max_initial_timestamp_index = max_initial_timestamp_index
132
- self.__config.no_timestamps_token_id = no_timestamps_token_id
133
- self.__config.prev_sot_token_id = prev_sot_token_id
134
- self.__config.max_length =max_length
135
-
136
- self.pipe.model.generation_config = self.__config
137
-
138
- else:
139
- self.pipe = pipeline(
140
- "automatic-speech-recognition",
141
- model=model,
142
- tokenizer=WhisperTokenizer.from_pretrained(base),
143
- chunk_length_s=25,
144
- stride_length_s=3,
145
- device=DEVICE,
146
- torch_dtype=torch.float32,
147
- return_timestamps="word",
148
- )
70
+ self.pipe = pipeline(
71
+ "automatic-speech-recognition",
72
+ model=model,
73
+ tokenizer=WhisperTokenizer.from_pretrained(base),
74
+ chunk_length_s=25,
75
+ stride_length_s=3,
76
+ device=DEVICE,
77
+ torch_dtype=torch.float32,
78
+ return_timestamps="word",
79
+ )
149
80
  L.debug("Done, initalizing processor and config...")
150
81
  processor = WhisperProcessor.from_pretrained(base)
151
82
  L.debug("Whisper initialization done.")
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.16
2
+ October 16, 2024
3
+ fixing asr for file names
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a15
3
+ Version: 0.7.6a16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.15
2
- October 16, 2024
3
- gerund support
File without changes
File without changes
File without changes
File without changes
File without changes