batchalign 0.7.7.post1__tar.gz → 0.7.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {batchalign-0.7.7.post1/batchalign.egg-info → batchalign-0.7.9}/PKG-INFO +1 -1
  2. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/constants.py +1 -1
  3. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/generator.py +3 -3
  4. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/lexer.py +1 -1
  5. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/parser.py +7 -3
  6. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/ud.py +1 -1
  7. batchalign-0.7.9/batchalign/version +3 -0
  8. {batchalign-0.7.7.post1 → batchalign-0.7.9/batchalign.egg-info}/PKG-INFO +1 -1
  9. batchalign-0.7.7.post1/batchalign/version +0 -3
  10. {batchalign-0.7.7.post1 → batchalign-0.7.9}/LICENSE +0 -0
  11. {batchalign-0.7.7.post1 → batchalign-0.7.9}/MANIFEST.in +0 -0
  12. {batchalign-0.7.7.post1 → batchalign-0.7.9}/README.md +0 -0
  13. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/__init__.py +0 -0
  14. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/__main__.py +0 -0
  15. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/cli/__init__.py +0 -0
  16. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/cli/cli.py +0 -0
  17. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/cli/dispatch.py +0 -0
  18. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/document.py +0 -0
  19. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/errors.py +0 -0
  20. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/__init__.py +0 -0
  21. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/base.py +0 -0
  22. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/__init__.py +0 -0
  23. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/file.py +0 -0
  24. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  71. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  72. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  73. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  74. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  75. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/pipeline.py +0 -0
  76. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/speaker/__init__.py +0 -0
  77. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  78. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utr/__init__.py +0 -0
  79. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utr/rev_utr.py +0 -0
  80. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utr/utils.py +0 -0
  81. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  82. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utterance/__init__.py +0 -0
  83. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  84. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/__init__.py +0 -0
  85. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/conftest.py +0 -0
  86. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  87. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  88. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  89. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  90. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  91. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  92. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  93. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  94. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  95. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  96. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  97. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  98. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/fixures.py +0 -0
  99. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  100. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  101. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/tests/test_document.py +0 -0
  102. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/utils/__init__.py +0 -0
  103. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/utils/config.py +0 -0
  104. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/utils/dp.py +0 -0
  105. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign/utils/utils.py +0 -0
  106. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign.egg-info/SOURCES.txt +0 -0
  107. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign.egg-info/dependency_links.txt +0 -0
  108. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign.egg-info/entry_points.txt +0 -0
  109. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign.egg-info/requires.txt +0 -0
  110. {batchalign-0.7.7.post1 → batchalign-0.7.9}/batchalign.egg-info/top_level.txt +0 -0
  111. {batchalign-0.7.7.post1 → batchalign-0.7.9}/setup.cfg +0 -0
  112. {batchalign-0.7.7.post1 → batchalign-0.7.9}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.7.post1
3
+ Version: 0.7.9
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,5 +1,5 @@
1
1
  # CHAT punctuation specifications
2
- ENDING_PUNCT = [".", "?", "!", "+//.", "+/.", "+...", "+\"/.", "+..?", "+\".", "+//?", "+.", "+!?", "+/?", "..."]
2
+ ENDING_PUNCT = [".", "?", "!", "+//.", "+/.", "+...", "+\"/.", "+..?", "+\".", "+//?", "+.", "+!?", "+/?", "...", "(.)"]
3
3
  MOR_PUNCT = ["‡", "„", ","]
4
4
  CHAT_IGNORE = ["xxx", "yyy", "www"]
5
5
 
@@ -41,9 +41,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
41
41
  gras.append(i.dependency)
42
42
  if i.time:
43
43
  has_wor = True
44
- wor_elems.append(f"{i.text} \x15{str(i.time[0])}_{str(i.time[1])}\x15")
44
+ wor_elems.append(re.sub(r"@\w+", "", f"{i.text} \x15{str(i.time[0])}_{str(i.time[1])}\x15"))
45
45
  else:
46
- wor_elems.append(i.text)
46
+ wor_elems.append(re.sub(r"@\w+", "", i.text))
47
47
 
48
48
  if i.coreference:
49
49
  has_coref = True
@@ -124,7 +124,7 @@ def generate_chat_preamble(doc, birthdays=[]):
124
124
  header = []
125
125
  header.append("@Languages:\t"+", ".join(doc.langs))
126
126
  header.append("@Participants:\t"+", ".join([f"{i.id} {i.name}" for i in doc.tiers]))
127
- header.append("@Options:\tmulti")
127
+ # header.append("@Options:\tmulti")
128
128
  header.append("\n".join([f"@ID:\t{i.lang}|{i.corpus}|{i.id}|{i.birthday}|{i.additional[0]}|{i.additional[1]}|{i.additional[2]}|{i.name}|{i.additional[3]}|{i.additional[4]}|" for i in doc.tiers]))
129
129
  for i in birthdays:
130
130
  header.append(f"@{i.id}:\t{i.content}")
@@ -197,7 +197,7 @@ class UtteranceLexer:
197
197
  while True:
198
198
  res = self.__pull()
199
199
  try:
200
- if res == "" or res == False or res in ENDING_PUNCT or (res[-1] in ENDING_PUNCT and re.findall("\w", res)):
200
+ if res == "" or res == False or res in ENDING_PUNCT or (res[-1] in ENDING_PUNCT and re.findall(r"\w", res)):
201
201
  break
202
202
  except IndexError:
203
203
  raise CHATValidationException(f"Lexer failed! Utterance ended without ending punct. Utterance: {self.raw}")
@@ -39,13 +39,13 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
39
39
 
40
40
  # scan the timing
41
41
  # lex the utterance
42
- to_lex = re.compile("\x15\d+_\d+\x15").sub("", text).strip()
42
+ to_lex = re.compile(r"\x15\d+_\d+\x15").sub("", text).strip()
43
43
 
44
44
  # if the first form has a < in it and has no words,
45
45
  # its probably a beginning delimiter which we do not lex
46
46
  if (len(to_lex) > 0 and
47
47
  ("<" in to_lex.split(" ")[0] or "+" in to_lex.split(" ")[0] )
48
- and not re.findall("\w", to_lex.split(" ")[0])):
48
+ and not re.findall(r"\w", to_lex.split(" ")[0])):
49
49
  beg = to_lex.split(" ")[0]
50
50
  to_lex = to_lex.replace(beg, "", 1)
51
51
 
@@ -64,9 +64,11 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
64
64
  # fix commas for people that don't annotate commas with a space
65
65
  to_lex = to_lex.replace(",", " ,")
66
66
 
67
- to_lex = re.sub(r"\([\d.:]+\)", "", to_lex)
67
+ to_lex = re.sub(r"\([\d.:]+\)(?!$)", "", to_lex)
68
68
  to_lex = re.sub(r"↫.*?↫", "", to_lex)
69
69
 
70
+ to_lex = re.sub(r"\(.\)$", r"$END_SPC$", to_lex)
71
+
70
72
  # if there is a punct, move it
71
73
  for end in sorted(ENDING_PUNCT, key=len, reverse=True):
72
74
  if end in to_lex:
@@ -76,6 +78,8 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
76
78
  to_lex = to_lex.replace(" ", " ")
77
79
 
78
80
  tokens = lex(to_lex)
81
+ if tokens[-1][0] == "END_SPC":
82
+ tokens = tokens[:-1] + [("(.)", TokenType.PUNCT)]
79
83
 
80
84
  # correct 0 forms
81
85
  res = []
@@ -793,7 +793,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
793
793
  except AttributeError:
794
794
  breakpoint()
795
795
 
796
- if re.findall("\w", ending):
796
+ if re.findall(r"\w", ending):
797
797
  ending = "."
798
798
  line_cut = i.strip(join_with_spaces=True)
799
799
  else:
@@ -0,0 +1,3 @@
1
+ 0.7.9
2
+ Janurary 8st, 2025
3
+ align fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.7.post1
3
+ Version: 0.7.9
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.7-post.1
2
- Janurary 3st, 2025
3
- releasing new full version
File without changes
File without changes
File without changes
File without changes
File without changes