batchalign 0.7.17.post13__tar.gz → 0.7.17.post15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {batchalign-0.7.17.post13/batchalign.egg-info → batchalign-0.7.17.post15}/PKG-INFO +3 -2
  2. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/generator.py +3 -1
  3. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/ud.py +4 -4
  4. batchalign-0.7.17.post15/batchalign/version +3 -0
  5. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15/batchalign.egg-info}/PKG-INFO +3 -2
  6. batchalign-0.7.17.post13/batchalign/version +0 -3
  7. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/LICENSE +0 -0
  8. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/MANIFEST.in +0 -0
  9. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/README.md +0 -0
  10. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/document.py +0 -0
  17. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/cantonese_infer.py +0 -0
  41. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/wave2vec/__init__.py +0 -0
  47. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/wave2vec/infer_fa.py +0 -0
  48. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/whisper/__init__.py +0 -0
  49. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/whisper/infer_asr.py +0 -0
  50. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/models/whisper/infer_fa.py +0 -0
  51. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/__init__.py +0 -0
  52. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/analysis/__init__.py +0 -0
  53. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/analysis/eval.py +0 -0
  54. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/rev.py +0 -0
  57. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/utils.py +0 -0
  58. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/whisper.py +0 -0
  59. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/asr/whisperx.py +0 -0
  60. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/base.py +0 -0
  61. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/__init__.py +0 -0
  62. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  63. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  64. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  65. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/retrace.py +0 -0
  66. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  67. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  68. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/cleanup/support/test.test +0 -0
  69. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/dispatch.py +0 -0
  70. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/fa/__init__.py +0 -0
  71. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  72. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  73. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  74. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  75. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  76. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  77. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  78. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  79. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  80. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/pipeline.py +0 -0
  81. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/speaker/__init__.py +0 -0
  82. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  83. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/translate/__init__.py +0 -0
  84. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/translate/seamless.py +0 -0
  85. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utr/__init__.py +0 -0
  86. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utr/rev_utr.py +0 -0
  87. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utr/utils.py +0 -0
  88. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  89. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utterance/__init__.py +0 -0
  90. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  91. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/__init__.py +0 -0
  92. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/conftest.py +0 -0
  93. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  94. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  95. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  96. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  97. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  98. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  99. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  100. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  101. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  102. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  103. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  104. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  105. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/fixures.py +0 -0
  106. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  107. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  108. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/tests/test_document.py +0 -0
  109. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/utils/__init__.py +0 -0
  110. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/utils/config.py +0 -0
  111. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/utils/dp.py +0 -0
  112. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign/utils/utils.py +0 -0
  113. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign.egg-info/SOURCES.txt +0 -0
  114. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign.egg-info/dependency_links.txt +0 -0
  115. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign.egg-info/entry_points.txt +0 -0
  116. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign.egg-info/requires.txt +0 -0
  117. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/batchalign.egg-info/top_level.txt +0 -0
  118. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/setup.cfg +0 -0
  119. {batchalign-0.7.17.post13 → batchalign-0.7.17.post15}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.17.post13
3
+ Version: 0.7.17.post15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -45,6 +45,7 @@ Dynamic: author-email
45
45
  Dynamic: classifier
46
46
  Dynamic: description
47
47
  Dynamic: description-content-type
48
+ Dynamic: license-file
48
49
  Dynamic: provides-extra
49
50
  Dynamic: requires-dist
50
51
  Dynamic: summary
@@ -28,6 +28,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
28
28
  """
29
29
 
30
30
  main_line = str(utterance)
31
+ # last minut ecorrections
32
+ main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
31
33
  tier = utterance.tier
32
34
 
33
35
  mors = []
@@ -94,7 +96,7 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
94
96
  if has_wor and write_wor:
95
97
  result.append("%wor:\t"+" ".join(wor_elems))
96
98
  if has_coref:
97
- result.append("%coref:\t"+", ".join(coref_elems))
99
+ result.append("%coref:\t"+(", ".join(coref_elems)))
98
100
  if utterance.translation != None:
99
101
  result.append("%xtra:\t"+utterance.translation)
100
102
 
@@ -462,7 +462,7 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
462
462
  # specivl forms: recall the special form marker is xbxxx
463
463
  if "xbxxx" in word.text.strip():
464
464
  form = special_forms.pop(0)
465
- mor.append(f"x|{form.strip().replace(',', 'cm')}")
465
+ mor.append(f"{form[1].strip()}|{form[0].strip().replace(',', 'cm')}")
466
466
  special_form_ids.append(word.id)
467
467
  else:
468
468
  mor.append(mor_word)
@@ -555,7 +555,6 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
555
555
  # add a deliminator
556
556
  mor_str = mor_str + " " + delimiter
557
557
 
558
-
559
558
  mor_str = mor_str.replace("<UNK>", "")
560
559
  gra_str = gra_str.replace("<UNK>", "")
561
560
 
@@ -843,7 +842,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
843
842
  special_forms_cleaned = []
844
843
  for form in special_forms:
845
844
  line_cut = line_cut.replace(form, "xbxxx")
846
- special_forms_cleaned.append(re.sub(r"@[\w\:]+", "", form).strip())
845
+ special_forms_cleaned.append(form.split("@"))
847
846
 
848
847
  # if line cut is still nothing, we get very angry
849
848
  if line_cut == "":
@@ -942,7 +941,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
942
941
  if ut[i.payload].text != ",":
943
942
  ut[i.payload].morphology = [Morphology(
944
943
  lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
945
- pos = "x",
944
+ pos = ut[i.payload].morphology[0].pos if (ut[i.payload].morphology and len(ut[i.payload].morphology) > 0) else "x",
946
945
  feats = ""
947
946
  )]
948
947
  poses = [i.morphology[0].pos.upper() for i in ut
@@ -999,6 +998,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
999
998
  content.dependency = form.dependency
1000
999
 
1001
1000
  except Exception as e:
1001
+ raise e
1002
1002
  pass
1003
1003
  # warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
1004
1004
 
@@ -0,0 +1,3 @@
1
+ 0.7.17-post.15
2
+ April 09th, 2025
3
+ Fix retraces
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.17.post13
3
+ Version: 0.7.17.post15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -45,6 +45,7 @@ Dynamic: author-email
45
45
  Dynamic: classifier
46
46
  Dynamic: description
47
47
  Dynamic: description-content-type
48
+ Dynamic: license-file
48
49
  Dynamic: provides-extra
49
50
  Dynamic: requires-dist
50
51
  Dynamic: summary
@@ -1,3 +0,0 @@
1
- 0.7.17-post.13
2
- March 26th, 2025
3
- better coref model