batchalign 0.7.19.post15__tar.gz → 0.7.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (125) hide show
  1. {batchalign-0.7.19.post15/batchalign.egg-info → batchalign-0.7.20}/PKG-INFO +2 -3
  2. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/ud.py +3 -2
  3. batchalign-0.7.20/batchalign/version +3 -0
  4. {batchalign-0.7.19.post15 → batchalign-0.7.20/batchalign.egg-info}/PKG-INFO +2 -3
  5. batchalign-0.7.19.post15/batchalign/version +0 -3
  6. {batchalign-0.7.19.post15 → batchalign-0.7.20}/LICENSE +0 -0
  7. {batchalign-0.7.19.post15 → batchalign-0.7.20}/MANIFEST.in +0 -0
  8. {batchalign-0.7.19.post15 → batchalign-0.7.20}/README.md +0 -0
  9. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/__init__.py +0 -0
  10. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/__main__.py +0 -0
  11. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/document.py +0 -0
  16. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/cantonese_infer.py +0 -0
  41. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/dataset.py +0 -0
  42. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/execute.py +0 -0
  43. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/infer.py +0 -0
  44. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/prep.py +0 -0
  45. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/utterance/train.py +0 -0
  46. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/wave2vec/__init__.py +0 -0
  47. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/wave2vec/infer_fa.py +0 -0
  48. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/whisper/__init__.py +0 -0
  49. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/whisper/infer_asr.py +0 -0
  50. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/models/whisper/infer_fa.py +0 -0
  51. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/__init__.py +0 -0
  52. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/analysis/__init__.py +0 -0
  53. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/analysis/eval.py +0 -0
  54. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/base.py +0 -0
  62. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/__init__.py +0 -0
  63. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  64. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  65. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  66. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/retrace.py +0 -0
  67. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  68. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  69. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/cleanup/support/test.test +0 -0
  70. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/dispatch.py +0 -0
  71. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  73. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  74. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  75. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  76. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  77. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  78. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  79. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  80. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  81. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/pipeline.py +0 -0
  82. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/speaker/__init__.py +0 -0
  83. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  84. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/translate/__init__.py +0 -0
  85. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/translate/gtrans.py +0 -0
  86. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/translate/seamless.py +0 -0
  87. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/translate/utils.py +0 -0
  88. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utr/__init__.py +0 -0
  89. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utr/rev_utr.py +0 -0
  90. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utr/utils.py +0 -0
  91. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  92. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utterance/__init__.py +0 -0
  93. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  94. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/__init__.py +0 -0
  95. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/conftest.py +0 -0
  96. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  97. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  98. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  99. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  100. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  101. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  102. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  103. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  104. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  105. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  106. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  107. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  108. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/fixures.py +0 -0
  109. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  110. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  111. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/tests/test_document.py +0 -0
  112. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/__init__.py +0 -0
  113. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/abbrev.py +0 -0
  114. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/compounds.py +0 -0
  115. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/config.py +0 -0
  116. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/dp.py +0 -0
  117. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/names.py +0 -0
  118. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign/utils/utils.py +0 -0
  119. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign.egg-info/SOURCES.txt +0 -0
  120. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign.egg-info/dependency_links.txt +0 -0
  121. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign.egg-info/entry_points.txt +0 -0
  122. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign.egg-info/requires.txt +0 -0
  123. {batchalign-0.7.19.post15 → batchalign-0.7.20}/batchalign.egg-info/top_level.txt +0 -0
  124. {batchalign-0.7.19.post15 → batchalign-0.7.20}/setup.cfg +0 -0
  125. {batchalign-0.7.19.post15 → batchalign-0.7.20}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post15
3
+ Version: 0.7.20
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -47,7 +47,6 @@ Dynamic: author-email
47
47
  Dynamic: classifier
48
48
  Dynamic: description
49
49
  Dynamic: description-content-type
50
- Dynamic: license-file
51
50
  Dynamic: provides-extra
52
51
  Dynamic: requires-dist
53
52
  Dynamic: summary
@@ -72,6 +72,8 @@ def handler(word, lang=None):
72
72
  # if the lemma is ", return the word
73
73
  # not sure what errors are coming along?
74
74
  target = word.lemma
75
+ if target.strip() == "」" or target.strip() == "「":
76
+ target = word.text
75
77
 
76
78
  if target == '"':
77
79
  target = word.text
@@ -885,7 +887,6 @@ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hoo
885
887
  L.debug(f"Encountered an utterance that's likely devoid of morphological information; skipping... utterance='{doc.content[indx]}'")
886
888
  continue
887
889
 
888
-
889
890
  if retokenize:
890
891
  # rewrite the sentence with our desired tokenizations
891
892
  ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
@@ -944,7 +945,7 @@ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hoo
944
945
  # we want to replace the morphology of forms that are not actually
945
946
  # supposed to be analyzed
946
947
  elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
947
- if ut[i.payload].text != ",":
948
+ if ut[i.payload].text not in MOR_PUNCT:
948
949
  ut[i.payload].morphology = [Morphology(
949
950
  lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
950
951
  pos = ut[i.payload].morphology[0].pos if (ut[i.payload].morphology and len(ut[i.payload].morphology) > 0) else "x",
@@ -0,0 +1,3 @@
1
+ 0.7.20
2
+ July 17th, 2025
3
+ UD Japanese Fixes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post15
3
+ Version: 0.7.20
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -47,7 +47,6 @@ Dynamic: author-email
47
47
  Dynamic: classifier
48
48
  Dynamic: description
49
49
  Dynamic: description-content-type
50
- Dynamic: license-file
51
50
  Dynamic: provides-extra
52
51
  Dynamic: requires-dist
53
52
  Dynamic: summary
@@ -1,3 +0,0 @@
1
- 0.7.19-post.15
2
- July 12nd, 2025
3
- compound word list bencmarking
File without changes
File without changes