batchalign 0.7.17.post12__tar.gz → 0.7.17.post14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {batchalign-0.7.17.post12/batchalign.egg-info → batchalign-0.7.17.post14}/PKG-INFO +1 -1
  2. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/generator.py +2 -2
  3. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/ud.py +4 -4
  4. batchalign-0.7.17.post14/batchalign/version +3 -0
  5. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.17.post12/batchalign/version +0 -3
  7. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/LICENSE +0 -0
  8. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/MANIFEST.in +0 -0
  9. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/README.md +0 -0
  10. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/document.py +0 -0
  17. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/cantonese_infer.py +0 -0
  41. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/wave2vec/__init__.py +0 -0
  47. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/wave2vec/infer_fa.py +0 -0
  48. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/whisper/__init__.py +0 -0
  49. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/whisper/infer_asr.py +0 -0
  50. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/models/whisper/infer_fa.py +0 -0
  51. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/__init__.py +0 -0
  52. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/analysis/__init__.py +0 -0
  53. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/analysis/eval.py +0 -0
  54. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/rev.py +0 -0
  57. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/utils.py +0 -0
  58. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/whisper.py +0 -0
  59. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/asr/whisperx.py +0 -0
  60. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/base.py +0 -0
  61. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/__init__.py +0 -0
  62. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  63. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  64. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  65. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/retrace.py +0 -0
  66. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  67. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  68. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/cleanup/support/test.test +0 -0
  69. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/dispatch.py +0 -0
  70. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/fa/__init__.py +0 -0
  71. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  72. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  73. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  74. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  75. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  76. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  77. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  78. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  79. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  80. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/pipeline.py +0 -0
  81. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/speaker/__init__.py +0 -0
  82. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  83. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/translate/__init__.py +0 -0
  84. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/translate/seamless.py +0 -0
  85. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utr/__init__.py +0 -0
  86. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utr/rev_utr.py +0 -0
  87. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utr/utils.py +0 -0
  88. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  89. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utterance/__init__.py +0 -0
  90. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  91. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/__init__.py +0 -0
  92. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/conftest.py +0 -0
  93. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  94. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  95. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  96. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  97. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  98. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  99. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  100. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  101. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  102. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  103. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  104. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  105. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/fixures.py +0 -0
  106. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  107. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  108. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/tests/test_document.py +0 -0
  109. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/utils/__init__.py +0 -0
  110. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/utils/config.py +0 -0
  111. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/utils/dp.py +0 -0
  112. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign/utils/utils.py +0 -0
  113. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign.egg-info/SOURCES.txt +0 -0
  114. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign.egg-info/dependency_links.txt +0 -0
  115. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign.egg-info/entry_points.txt +0 -0
  116. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign.egg-info/requires.txt +0 -0
  117. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/batchalign.egg-info/top_level.txt +0 -0
  118. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/setup.cfg +0 -0
  119. {batchalign-0.7.17.post12 → batchalign-0.7.17.post14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.17.post12
3
+ Version: 0.7.17.post14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -57,7 +57,7 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
57
57
  if j.end:
58
58
  coref_str += ")"
59
59
  coref_str_form.append(coref_str)
60
- coref_elems.append(",".join(coref_str_form))
60
+ coref_elems.append(" ".join(coref_str_form))
61
61
  else:
62
62
  coref_elems.append("-")
63
63
 
@@ -94,7 +94,7 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
94
94
  if has_wor and write_wor:
95
95
  result.append("%wor:\t"+" ".join(wor_elems))
96
96
  if has_coref:
97
- result.append("%coref:\t"+" ".join(coref_elems))
97
+ result.append("%coref:\t"+(", ".join(coref_elems)))
98
98
  if utterance.translation != None:
99
99
  result.append("%xtra:\t"+utterance.translation)
100
100
 
@@ -462,7 +462,7 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
462
462
  # specivl forms: recall the special form marker is xbxxx
463
463
  if "xbxxx" in word.text.strip():
464
464
  form = special_forms.pop(0)
465
- mor.append(f"x|{form.strip().replace(',', 'cm')}")
465
+ mor.append(f"{form[1].strip()}|{form[0].strip().replace(',', 'cm')}")
466
466
  special_form_ids.append(word.id)
467
467
  else:
468
468
  mor.append(mor_word)
@@ -555,7 +555,6 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
555
555
  # add a deliminator
556
556
  mor_str = mor_str + " " + delimiter
557
557
 
558
-
559
558
  mor_str = mor_str.replace("<UNK>", "")
560
559
  gra_str = gra_str.replace("<UNK>", "")
561
560
 
@@ -843,7 +842,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
843
842
  special_forms_cleaned = []
844
843
  for form in special_forms:
845
844
  line_cut = line_cut.replace(form, "xbxxx")
846
- special_forms_cleaned.append(re.sub(r"@[\w\:]+", "", form).strip())
845
+ special_forms_cleaned.append(form.split("@"))
847
846
 
848
847
  # if line cut is still nothing, we get very angry
849
848
  if line_cut == "":
@@ -942,7 +941,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
942
941
  if ut[i.payload].text != ",":
943
942
  ut[i.payload].morphology = [Morphology(
944
943
  lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
945
- pos = "x",
944
+ pos = ut[i.payload].morphology[0].pos if (ut[i.payload].morphology and len(ut[i.payload].morphology) > 0) else "x",
946
945
  feats = ""
947
946
  )]
948
947
  poses = [i.morphology[0].pos.upper() for i in ut
@@ -999,6 +998,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
999
998
  content.dependency = form.dependency
1000
999
 
1001
1000
  except Exception as e:
1001
+ raise e
1002
1002
  pass
1003
1003
  # warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
1004
1004
 
@@ -0,0 +1,3 @@
1
+ 0.7.17-post.14
2
+ April 06th, 2025
3
+ Japanese form fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.17.post12
3
+ Version: 0.7.17.post14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.17-post.12
2
- March 26th, 2025
3
- better coref model