batchalign 0.7.1b5__tar.gz → 0.7.1b7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {batchalign-0.7.1b5/batchalign.egg-info → batchalign-0.7.1b7}/PKG-INFO +2 -2
  2. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/README.md +1 -1
  3. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/cli.py +9 -6
  4. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/parser.py +1 -1
  5. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/dispatch.py +1 -1
  6. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/ud.py +20 -0
  7. batchalign-0.7.1b7/batchalign/version +3 -0
  8. {batchalign-0.7.1b5 → batchalign-0.7.1b7/batchalign.egg-info}/PKG-INFO +2 -2
  9. batchalign-0.7.1b5/batchalign/version +0 -3
  10. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/LICENSE +0 -0
  11. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/MANIFEST.in +0 -0
  12. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/document.py +0 -0
  18. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/__init__.py +0 -0
  47. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_asr.py +0 -0
  48. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_fa.py +0 -0
  49. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/__init__.py +0 -0
  50. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/__init__.py +0 -0
  51. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/eval.py +0 -0
  52. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/__init__.py +0 -0
  53. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/rev.py +0 -0
  54. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/utils.py +0 -0
  55. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisper.py +0 -0
  56. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisperx.py +0 -0
  57. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/base.py +0 -0
  58. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  59. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  60. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  61. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  62. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  63. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  64. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  65. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  66. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  70. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/pipeline.py +0 -0
  71. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/__init__.py +0 -0
  72. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  73. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/__init__.py +0 -0
  74. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  75. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/utils.py +0 -0
  76. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  77. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/__init__.py +0 -0
  78. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  79. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/__init__.py +0 -0
  80. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/conftest.py +0 -0
  81. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  82. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  83. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  84. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  85. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  86. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  87. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  88. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  89. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  90. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  91. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  92. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  93. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fixures.py +0 -0
  94. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  95. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  96. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/test_document.py +0 -0
  97. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/__init__.py +0 -0
  98. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/config.py +0 -0
  99. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/dp.py +0 -0
  100. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/utils.py +0 -0
  101. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/SOURCES.txt +0 -0
  102. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/dependency_links.txt +0 -0
  103. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/entry_points.txt +0 -0
  104. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/requires.txt +0 -0
  105. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/top_level.txt +0 -0
  106. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/setup.cfg +0 -0
  107. {batchalign-0.7.1b5 → batchalign-0.7.1b7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b5
3
+ Version: 0.7.1b7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
82
82
 
83
83
  ## Quick Start
84
84
 
85
- The following instructions is a quick start to install Batchalign. **For most users, we recommend you [visit this detailed guide](https://talkbank.org/info/batchalign2.pdf) for more detailed instructions.** The remaining instructions on this page provides a very rough overview of the primary functionality of `batchalign`, and assumes familiarity with Python and the terminal.
85
+ The following instructions is a quick start to install Batchalign.
86
86
 
87
87
  ### Get Python
88
88
  - We support Python versions 3.9, 3.10, and 3.11.
@@ -8,7 +8,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
8
8
 
9
9
  ## Quick Start
10
10
 
11
- The following instructions is a quick start to install Batchalign. **For most users, we recommend you [visit this detailed guide](https://talkbank.org/info/batchalign2.pdf) for more detailed instructions.** The remaining instructions on this page provides a very rough overview of the primary functionality of `batchalign`, and assumes familiarity with Python and the terminal.
11
+ The following instructions is a quick start to install Batchalign.
12
12
 
13
13
  ### Get Python
14
14
  - We support Python versions 3.9, 3.10, and 3.11.
@@ -149,19 +149,22 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
149
149
  def loader(file):
150
150
  return file
151
151
 
152
+ asr = "rev"
153
+ if kwargs["whisper"]:
154
+ asr = "whisper"
155
+ if kwargs["whisperx"]:
156
+ asr = "whisperx"
157
+
158
+
152
159
  def writer(doc, output):
160
+ doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
161
+ content=f"Batchalign {VERSION_NUMBER.strip()}, ASR Engine {asr}"))
153
162
  CHATFile(doc=doc, special_mor_=True).write(output
154
163
  .replace(".wav", ".cha")
155
164
  .replace(".mp4", ".cha")
156
165
  .replace(".mp3", ".cha"),
157
166
  write_wor=kwargs.get("wor", False))
158
167
 
159
- asr = "rev"
160
- if kwargs["whisper"]:
161
- asr = "whisper"
162
- if kwargs["whisperx"]:
163
- asr = "whisperx"
164
-
165
168
  if kwargs.get("diarize"):
166
169
  _dispatch("transcribe_s",
167
170
  lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
@@ -222,7 +222,7 @@ def chat_parse_doc(lines, special_mor=False):
222
222
  continue
223
223
  # we split because there are multiple languages possible
224
224
  elif "@Languages" in line.strip():
225
- results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().split(",")]
225
+ results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().replace(" ", ",").strip().split(",") if i.strip() != ""]
226
226
  if len(results["langs"]) > 0 and results["langs"][0] == "eng" and special_mor:
227
227
  use_special_mor = True
228
228
  # parse participants; the number of | delinates the metedata field
@@ -73,7 +73,7 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
73
73
  packages.append("disfluency")
74
74
  if "retracing" not in packages:
75
75
  packages.append("retracing")
76
- if "utterance" not in packages and resolve("utterance", lang) == None:
76
+ if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb"]:
77
77
  packages.append("utterance")
78
78
  if "fa" in packages:
79
79
  if "utr" not in packages:
@@ -808,7 +808,27 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
808
808
  ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
809
809
  mor, gra,
810
810
  None, None)
811
+ # JANK add a space after every form being analyzed
812
+ text_fixed = []
813
+ text_orig = i.text
814
+ # we do this to force one replacement of the token
815
+ # for every input token
816
+ for i in sents[0].tokens:
817
+ try:
818
+ before, after = text_orig.split(i.text, 1)
819
+ text_fixed.append(before.strip())
820
+ text_fixed.append(i.text.strip())
821
+ text_orig = after
822
+ except ValueError:
823
+ # we give up on that token; likely not found
824
+ # because there we tokenization issues (i.e.
825
+ # existing tokenization)
826
+ continue
827
+ text_fixed.append(text_orig.strip())
828
+ text_fixed = " ".join(text_fixed).strip()
829
+ text_fixed = re.sub(r" +", " ", text_fixed)
811
830
  doc.content[indx] = Utterance(content=ut,
831
+ text=text_fixed,
812
832
  tier=doc.content[indx].tier,
813
833
  time=doc.content[indx].time,
814
834
  custom_dependencies=doc.content[indx].custom_dependencies)
@@ -0,0 +1,3 @@
1
+ 0.7.1-beta.7
2
+ May 21st, 2024
3
+ insert debug info to transcribe file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b5
3
+ Version: 0.7.1b7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
82
82
 
83
83
  ## Quick Start
84
84
 
85
- The following instructions is a quick start to install Batchalign. **For most users, we recommend you [visit this detailed guide](https://talkbank.org/info/batchalign2.pdf) for more detailed instructions.** The remaining instructions on this page provides a very rough overview of the primary functionality of `batchalign`, and assumes familiarity with Python and the terminal.
85
+ The following instructions is a quick start to install Batchalign.
86
86
 
87
87
  ### Get Python
88
88
  - We support Python versions 3.9, 3.10, and 3.11.
@@ -1,3 +0,0 @@
1
- 0.7.1-beta.5
2
- Apr 20th, 2024
3
- minor retrace bug fix
File without changes
File without changes
File without changes
File without changes