batchalign 0.7.6a14__tar.gz → 0.7.6a16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a14/batchalign.egg-info → batchalign-0.7.6a16}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/dispatch.py +1 -1
  3. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_asr.py +10 -79
  4. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ud.py +5 -1
  5. batchalign-0.7.6a16/batchalign/version +3 -0
  6. {batchalign-0.7.6a14 → batchalign-0.7.6a16/batchalign.egg-info}/PKG-INFO +1 -1
  7. batchalign-0.7.6a14/batchalign/version +0 -3
  8. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/LICENSE +0 -0
  9. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/MANIFEST.in +0 -0
  10. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/README.md +0 -0
  11. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/document.py +0 -0
  17. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/__init__.py +0 -0
  47. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  71. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  72. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/setup.cfg +0 -0
  109. {batchalign-0.7.6a14 → batchalign-0.7.6a16}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a14
3
+ Version: 0.7.6a16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -65,7 +65,7 @@ def _dispatch(command, lang, num_speakers,
65
65
  for basedir, _, fs in os.walk(in_dir):
66
66
  for f in fs:
67
67
  path = Path(os.path.join(basedir, f))
68
- ext = path.suffix.strip(".").strip()
68
+ ext = path.suffix.strip(".").strip().lower()
69
69
 
70
70
  # calculate input path, convert if needed
71
71
  inp_path = str(path)
@@ -67,85 +67,16 @@ class WhisperASRModel(object):
67
67
  self.__config = GenerationConfig.from_pretrained(base)
68
68
  self.__config.no_repeat_ngram_size = 4
69
69
 
70
- if language == "Cantonese":
71
- self.pipe = pipeline(
72
- "automatic-speech-recognition",
73
- model=model,
74
- # tokenizer=WhisperTokenizer.from_pretrained(base),
75
- chunk_length_s=30,
76
- # stride_length_s=3,
77
- device=DEVICE,
78
- # torch_dtype=torch.float32,
79
- return_timestamps="word",
80
- )
81
- self.__config = GenerationConfig.from_model_config(self.pipe.model.config)
82
- self.__config.no_repeat_ngram_size = 4
83
- self.__config.use_cache = False
84
-
85
- forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language="yue", task="transcribe")
86
-
87
- suppress_tokens = []
88
-
89
- # Define other parameters
90
- return_attention_mask = False
91
- pad_token_id = 50257
92
- bos_token_id = 50257
93
- eos_token_id = 50257
94
- decoder_start_token_id = 50258
95
- begin_suppress_tokens = [
96
- 220,
97
- 50257
98
- ],
99
- alignment_heads = [
100
- [5, 3],
101
- [5, 9],
102
- [8, 0],
103
- [8, 4],
104
- [8, 8],
105
- [9, 0],
106
- [9, 7],
107
- [9, 9],
108
- [10, 5]
109
- ]
110
- lang_to_id = {"<|yue|>": 50325}
111
- task_to_id = {"transcribe": 50359}
112
- is_multilingual = True
113
- max_initial_timestamp_index = 50
114
- no_timestamps_token_id = 50363
115
- prev_sot_token_id = 50361
116
- max_length = 448
117
-
118
- # Assign values to generation config
119
- self.__config.forced_decoder_ids = forced_decoder_ids
120
- self.__config.suppress_tokens = suppress_tokens
121
- self.__config.pad_token_id = pad_token_id
122
- self.__config.bos_token_id = bos_token_id
123
- self.__config.eos_token_id = eos_token_id
124
- self.__config.decoder_start_token_id = decoder_start_token_id
125
- self.__config.lang_to_id = lang_to_id
126
- self.__config.task_to_id = task_to_id
127
- self.__config.alignment_heads = alignment_heads
128
- self.__config.alignment_heads = alignment_heads
129
- self.__config.begin_suppress_tokens = begin_suppress_tokens
130
- self.__config.is_multilingual = is_multilingual
131
- self.__config.max_initial_timestamp_index = max_initial_timestamp_index
132
- self.__config.no_timestamps_token_id = no_timestamps_token_id
133
- self.__config.prev_sot_token_id = prev_sot_token_id
134
- self.__config.max_length =max_length
135
-
136
- self.pipe.model.generation_config = self.__config
137
-
138
- else:
139
- self.pipe = pipeline(
140
- "automatic-speech-recognition",
141
- model=model,
142
- tokenizer=WhisperTokenizer.from_pretrained(base),
143
- chunk_length_s=25,
144
- stride_length_s=3,
145
- device=DEVICE,
146
- torch_dtype=torch.float32,
147
- return_timestamps="word",
148
- )
70
+ self.pipe = pipeline(
71
+ "automatic-speech-recognition",
72
+ model=model,
73
+ tokenizer=WhisperTokenizer.from_pretrained(base),
74
+ chunk_length_s=25,
75
+ stride_length_s=3,
76
+ device=DEVICE,
77
+ torch_dtype=torch.float32,
78
+ return_timestamps="word",
79
+ )
149
80
  L.debug("Done, initalizing processor and config...")
150
81
  processor = WhisperProcessor.from_pretrained(base)
151
82
  L.debug("Whisper initialization done.")
@@ -213,11 +213,15 @@ def handler__NOUN(word, lang=None):
213
213
  if word.deprel == "obj" and case.strip() == "":
214
214
  case = "Acc"
215
215
 
216
+ ger = ""
217
+ if word.text.endswith("ing") and lang == "en":
218
+ ger += "-Ger"
219
+
216
220
  # clear defaults
217
221
  if gender_str == "-Com,Neut" or gender_str == "-Com" or gender_str == "-ComNeut": gender_str=""
218
222
  if number_str == "-Sing": number_str=""
219
223
 
220
- return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)
224
+ return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)+ger
221
225
 
222
226
  def handler__PROPN(word, lang=None):
223
227
  # code as noun
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.16
2
+ October 16, 2024
3
+ fixing asr for file names
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a14
3
+ Version: 0.7.6a16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.14
2
- October 14, 2024
3
- creaky
File without changes
File without changes
File without changes
File without changes
File without changes