batchalign 0.7.5a5__tar.gz → 0.7.5a7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {batchalign-0.7.5a5/batchalign.egg-info → batchalign-0.7.5a7}/PKG-INFO +1 -1
  2. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/infer.py +22 -12
  3. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/analysis/eval.py +14 -1
  4. batchalign-0.7.5a7/batchalign/version +3 -0
  5. {batchalign-0.7.5a5 → batchalign-0.7.5a7/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.5a5/batchalign/version +0 -3
  7. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/LICENSE +0 -0
  8. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/MANIFEST.in +0 -0
  9. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/README.md +0 -0
  10. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/document.py +0 -0
  17. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/__init__.py +0 -0
  51. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/rev.py +0 -0
  52. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/utils.py +0 -0
  53. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/whisper.py +0 -0
  54. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/whisperx.py +0 -0
  55. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/base.py +0 -0
  56. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  57. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  58. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  59. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  60. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  61. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  62. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  63. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  64. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/dispatch.py +0 -0
  65. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/fa/__init__.py +0 -0
  66. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  67. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  68. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  69. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  70. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  71. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/pipeline.py +0 -0
  72. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/speaker/__init__.py +0 -0
  73. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  74. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/__init__.py +0 -0
  75. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  76. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/utils.py +0 -0
  77. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  78. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utterance/__init__.py +0 -0
  79. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  80. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/__init__.py +0 -0
  81. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/conftest.py +0 -0
  82. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  83. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  84. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  85. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  86. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  87. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  88. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  89. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  90. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  91. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  92. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  93. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  94. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/fixures.py +0 -0
  95. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  96. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  97. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/test_document.py +0 -0
  98. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/__init__.py +0 -0
  99. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/config.py +0 -0
  100. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/dp.py +0 -0
  101. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/utils.py +0 -0
  102. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/SOURCES.txt +0 -0
  103. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/dependency_links.txt +0 -0
  104. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/entry_points.txt +0 -0
  105. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/requires.txt +0 -0
  106. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/top_level.txt +0 -0
  107. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/setup.cfg +0 -0
  108. {batchalign-0.7.5a5 → batchalign-0.7.5a7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a5
3
+ Version: 0.7.5a7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -35,6 +35,7 @@ class BertUtteranceModel(object):
35
35
  self.model.eval()
36
36
 
37
37
  def __call__(self, passage):
38
+ print(passage)
38
39
  # input passage words removed of all preexisting punctuation
39
40
  passage = passage.lower()
40
41
  passage = passage.replace('.','')
@@ -67,7 +68,8 @@ class BertUtteranceModel(object):
67
68
  prev_word_idx = None
68
69
 
69
70
  # for each word, perform the action
70
- for indx, elem in enumerate(tokd.word_ids(0)):
71
+ wids = tokd.word_ids(0)
72
+ for indx, elem in enumerate(wids):
71
73
  # if its none, append nothing or if we have
72
74
  # seen it before, do nothing
73
75
  if elem is None or elem == prev_word_idx:
@@ -81,23 +83,31 @@ class BertUtteranceModel(object):
81
83
  # set the working variable
82
84
  w = input_tokenized[elem]
83
85
 
84
- # perform the edit actions
85
- if action == 1:
86
- w = w[0].upper() + w[1:]
87
- elif action == 2:
88
- w = w+'.'
89
- elif action == 3:
90
- w = w+'?'
91
- elif action == 4:
92
- w = w+'!'
93
- elif action == 5:
94
- w = w+','
86
+ # fix one word hanging issue
87
+ will_action = False
88
+ if indx < len(wids)-2 and classified_targets[0][indx+1] > 0:
89
+ will_action = True
90
+
91
+ if not will_action:
92
+ # perform the edit actions
93
+ if action == 1:
94
+ w = w[0].upper() + w[1:]
95
+ elif action == 2:
96
+ w = w+'.'
97
+ elif action == 3:
98
+ w = w+'?'
99
+ elif action == 4:
100
+ w = w+'!'
101
+ elif action == 5:
102
+ w = w+','
103
+
95
104
 
96
105
  # append
97
106
  res_toks.append(w)
98
107
 
99
108
  # compose final passage
100
109
  final_passage = self.tokenizer.convert_tokens_to_string(res_toks)
110
+ print(final_passage)
101
111
  try:
102
112
  split_passage = sent_tokenize(final_passage)
103
113
  except LookupError:
@@ -8,7 +8,7 @@ from batchalign.pipelines.base import *
8
8
  from batchalign.pipelines.asr.utils import *
9
9
  from batchalign.utils.config import config_read
10
10
 
11
- from batchalign.utils.dp import align, ExtraType, Extra
11
+ from batchalign.utils.dp import align, ExtraType, Extra, Match
12
12
 
13
13
  import logging
14
14
  L = logging.getLogger("batchalign")
@@ -22,6 +22,9 @@ class EvaluationEngine(BatchalignEngine):
22
22
  forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
23
23
  gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
24
24
 
25
+ forms = [i for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
26
+ gold_forms = [i for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
27
+
25
28
  # dp!
26
29
  alignment = align(forms, gold_forms, False)
27
30
 
@@ -35,8 +38,16 @@ class EvaluationEngine(BatchalignEngine):
35
38
  # ie: if we have <extra.payload> <extra.reference> +> substitution
36
39
  # but if we have <extra.reference> <extra.reference> this is 2 insertions
37
40
 
41
+ cleaned_alignment = []
42
+
38
43
  for i in alignment:
44
+
39
45
  if isinstance(i, Extra):
46
+ if len(cleaned_alignment) > 0 and i.extra_type == ExtraType.REFERENCE and "name" in i.key and i.key[:4] != "name":
47
+ cleaned_alignment.pop(-1)
48
+ cleaned_alignment.append(Match(i.key, None, None))
49
+ continue
50
+
40
51
  if prev_error != None and prev_error != i.extra_type:
41
52
  # this is a substitution: we have different "extra"s in
42
53
  # reference vs. playload
@@ -61,6 +72,8 @@ class EvaluationEngine(BatchalignEngine):
61
72
  else:
62
73
  prev_error = None
63
74
 
75
+ cleaned_alignment.append(i)
76
+
64
77
  diff = []
65
78
  for i in alignment:
66
79
  if isinstance(i, Extra):
@@ -0,0 +1,3 @@
1
+ 0.7.5-alpha.7
2
+ September 7th, 2024
3
+ batch hanging utterance bug
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a5
3
+ Version: 0.7.5a7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.5-alpha.5
2
- September 3nd, 2024
3
- fix benchmark command, part 2
File without changes
File without changes
File without changes
File without changes
File without changes