batchalign 0.7.10.post4__tar.gz → 0.7.11a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {batchalign-0.7.10.post4/batchalign.egg-info → batchalign-0.7.11a0}/PKG-INFO +1 -1
  2. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/infer_fa.py +1 -1
  3. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/fa/whisper_fa.py +13 -3
  4. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/ud.py +2 -1
  5. batchalign-0.7.11a0/batchalign/version +3 -0
  6. {batchalign-0.7.10.post4 → batchalign-0.7.11a0/batchalign.egg-info}/PKG-INFO +1 -1
  7. batchalign-0.7.10.post4/batchalign/version +0 -3
  8. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/LICENSE +0 -0
  9. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/MANIFEST.in +0 -0
  10. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/README.md +0 -0
  11. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/document.py +0 -0
  18. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/__init__.py +0 -0
  50. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/analysis/__init__.py +0 -0
  51. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/analysis/eval.py +0 -0
  52. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/__init__.py +0 -0
  53. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/rev.py +0 -0
  54. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/utils.py +0 -0
  55. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/whisper.py +0 -0
  56. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/whisperx.py +0 -0
  57. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/base.py +0 -0
  58. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/__init__.py +0 -0
  59. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  60. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  61. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  62. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/retrace.py +0 -0
  63. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  64. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  65. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/test.test +0 -0
  66. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/dispatch.py +0 -0
  67. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/fa/__init__.py +0 -0
  68. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  71. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  72. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  73. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  74. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  75. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/pipeline.py +0 -0
  76. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/speaker/__init__.py +0 -0
  77. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  78. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/__init__.py +0 -0
  79. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/rev_utr.py +0 -0
  80. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/utils.py +0 -0
  81. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  82. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utterance/__init__.py +0 -0
  83. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  84. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/__init__.py +0 -0
  85. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/conftest.py +0 -0
  86. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  87. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  88. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  89. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  90. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  91. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  92. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  93. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  94. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  95. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  96. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  97. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  98. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/fixures.py +0 -0
  99. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  100. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  101. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/test_document.py +0 -0
  102. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/__init__.py +0 -0
  103. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/config.py +0 -0
  104. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/dp.py +0 -0
  105. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/utils.py +0 -0
  106. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/SOURCES.txt +0 -0
  107. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/dependency_links.txt +0 -0
  108. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/entry_points.txt +0 -0
  109. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/requires.txt +0 -0
  110. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/top_level.txt +0 -0
  111. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/setup.cfg +0 -0
  112. {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.10.post4
3
+ Version: 0.7.11a0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -82,7 +82,7 @@ class WhisperFAModel(object):
82
82
  def __call__(self, audio, text):
83
83
  L.debug("Whisper Preprocessing...")
84
84
  # input features
85
- features = self.__processor(audio=audio, text=text,
85
+ features = self.__processor(audio=audio, text=" ".join(list(text)),
86
86
  sampling_rate=self.sample_rate,
87
87
  return_tensors='pt')
88
88
  tokens = features["labels"][0]
@@ -107,11 +107,20 @@ class WhisperFAEngine(BatchalignEngine):
107
107
  # we do this BACKWARDS because we went to have the first timestamp
108
108
  # we get about a word first
109
109
  alignments.reverse()
110
- for elem in alignments:
110
+ for indx,elem in enumerate(alignments):
111
111
  if isinstance(elem, Match):
112
+ next_elem = indx - 1 # remember this is backwards, see above
113
+ while next_elem >= 0 and alignments[next_elem].payload == elem.payload:
114
+ next_elem -= 1
115
+ if next_elem < 0:
116
+ next_elem = None
117
+ else:
118
+ next_elem = alignments[next_elem]
112
119
  grp[elem.reference_payload][0].time = (int(round((timings[elem.payload]*1000 +
113
120
  grp[0][1][0]))),
114
121
  int(round((timings[elem.payload]*1000 +
122
+ grp[0][1][0])))+500 if next_elem == None else
123
+ int(round((timings[next_elem.payload]*1000 +
115
124
  grp[0][1][0]))))
116
125
 
117
126
  L.debug(f"Correcting text...")
@@ -144,8 +153,9 @@ class WhisperFAEngine(BatchalignEngine):
144
153
  w.time = (w.time[0], doc.content[next_ut].alignment[0])
145
154
  else:
146
155
  w.time = (w.time[0], w.time[0]+500) # give half a second because we don't know
147
- else:
148
- w.time = (w.time[0], ut.content[tmp].time[0])
156
+ # else:
157
+ # w.time = (w.time[0], ut.content[tmp].time[0])
158
+
149
159
  # just in case, bound the time by the utterance derived timings
150
160
  if ut.alignment and ut.alignment[0] != None:
151
161
  w.time = (max(w.time[0], ut.alignment[0]), min(w.time[1], ut.alignment[1]))
@@ -990,7 +990,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
990
990
  content.dependency = form.dependency
991
991
 
992
992
  except Exception as e:
993
- warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
993
+ pass
994
+ # warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
994
995
 
995
996
  L.debug("Stanza done.")
996
997
  return doc
@@ -0,0 +1,3 @@
1
+ 0.7.11-alpha.0
2
+ Janurary 22th, 2025
3
+ Whisper inference with spaces
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.10.post4
3
+ Version: 0.7.11a0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.10-post.4
2
- Janurary 28th, 2025
3
- numbers
File without changes