BatchalignHK 0.7.17.post18__tar.gz → 0.7.17.post19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/PKG-INFO +2 -3
  2. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/PKG-INFO +2 -3
  3. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/ud.py +4 -4
  4. batchalignhk-0.7.17.post19/batchalign/version +3 -0
  5. batchalignhk-0.7.17.post18/batchalign/version +0 -3
  6. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  7. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  8. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/entry_points.txt +0 -0
  9. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/requires.txt +0 -0
  10. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/BatchalignHK.egg-info/top_level.txt +0 -0
  11. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/LICENSE +0 -0
  12. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/MANIFEST.in +0 -0
  13. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/README.md +0 -0
  14. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/__init__.py +0 -0
  15. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/__main__.py +0 -0
  16. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/cli/__init__.py +0 -0
  17. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/cli/cli.py +0 -0
  18. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/cli/dispatch.py +0 -0
  19. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/constants.py +0 -0
  20. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/document.py +0 -0
  21. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/errors.py +0 -0
  22. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/__init__.py +0 -0
  23. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/base.py +0 -0
  24. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/__init__.py +0 -0
  25. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/file.py +0 -0
  26. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/generator.py +0 -0
  27. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/lexer.py +0 -0
  28. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/parser.py +0 -0
  29. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/chat/utils.py +0 -0
  30. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/textgrid/__init__.py +0 -0
  31. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/textgrid/file.py +0 -0
  32. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/textgrid/generator.py +0 -0
  33. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/formats/textgrid/parser.py +0 -0
  34. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/__init__.py +0 -0
  35. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/resolve.py +0 -0
  36. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/speaker/__init__.py +0 -0
  37. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/speaker/config.yaml +0 -0
  38. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/speaker/infer.py +0 -0
  39. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/speaker/utils.py +0 -0
  40. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/training/__init__.py +0 -0
  41. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/training/run.py +0 -0
  42. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/training/utils.py +0 -0
  43. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utils.py +0 -0
  44. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/__init__.py +0 -0
  45. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/cantonese_infer.py +0 -0
  46. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/dataset.py +0 -0
  47. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/execute.py +0 -0
  48. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/infer.py +0 -0
  49. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/prep.py +0 -0
  50. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/utterance/train.py +0 -0
  51. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/wave2vec/__init__.py +0 -0
  52. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/wave2vec/infer_fa.py +0 -0
  53. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/whisper/__init__.py +0 -0
  54. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/whisper/infer_asr.py +0 -0
  55. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/models/whisper/infer_fa.py +0 -0
  56. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/__init__.py +0 -0
  57. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/analysis/__init__.py +0 -0
  58. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/analysis/eval.py +0 -0
  59. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/rev.py +0 -0
  62. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/tencent.py +0 -0
  63. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/utils.py +0 -0
  64. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/base.py +0 -0
  67. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/__init__.py +0 -0
  68. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  69. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  70. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  71. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/retrace.py +0 -0
  72. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  73. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  74. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/cleanup/support/test.test +0 -0
  75. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/dispatch.py +0 -0
  76. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/fa/__init__.py +0 -0
  77. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  78. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  79. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  80. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  81. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  82. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  83. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  84. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  85. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  86. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/pipeline.py +0 -0
  87. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/speaker/__init__.py +0 -0
  88. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  89. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/translate/__init__.py +0 -0
  90. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/translate/seamless.py +0 -0
  91. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utr/__init__.py +0 -0
  92. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utr/rev_utr.py +0 -0
  93. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utr/utils.py +0 -0
  94. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  95. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utterance/__init__.py +0 -0
  96. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  97. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/__init__.py +0 -0
  98. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/conftest.py +0 -0
  99. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  100. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  101. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  102. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  103. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  104. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  105. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  106. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  107. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  108. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  109. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  110. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  111. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/fixures.py +0 -0
  112. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  113. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  114. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/tests/test_document.py +0 -0
  115. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/utils/__init__.py +0 -0
  116. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/utils/config.py +0 -0
  117. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/utils/dp.py +0 -0
  118. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/batchalign/utils/utils.py +0 -0
  119. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/setup.cfg +0 -0
  120. {batchalignhk-0.7.17.post18 → batchalignhk-0.7.17.post19}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post18
3
+ Version: 0.7.17.post19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,7 +48,6 @@ Dynamic: author-email
48
48
  Dynamic: classifier
49
49
  Dynamic: description
50
50
  Dynamic: description-content-type
51
- Dynamic: license-file
52
51
  Dynamic: provides-extra
53
52
  Dynamic: requires-dist
54
53
  Dynamic: summary
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post18
3
+ Version: 0.7.17.post19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,7 +48,6 @@ Dynamic: author-email
48
48
  Dynamic: classifier
49
49
  Dynamic: description
50
50
  Dynamic: description-content-type
51
- Dynamic: license-file
52
51
  Dynamic: provides-extra
53
52
  Dynamic: requires-dist
54
53
  Dynamic: summary
@@ -462,7 +462,7 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
462
462
  # specivl forms: recall the special form marker is xbxxx
463
463
  if "xbxxx" in word.text.strip():
464
464
  form = special_forms.pop(0)
465
- mor.append(f"x|{form.strip().replace(',', 'cm')}")
465
+ mor.append(f"{form[1].strip()}|{form[0].strip().replace(',', 'cm')}")
466
466
  special_form_ids.append(word.id)
467
467
  else:
468
468
  mor.append(mor_word)
@@ -555,7 +555,6 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
555
555
  # add a deliminator
556
556
  mor_str = mor_str + " " + delimiter
557
557
 
558
-
559
558
  mor_str = mor_str.replace("<UNK>", "")
560
559
  gra_str = gra_str.replace("<UNK>", "")
561
560
 
@@ -843,7 +842,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
843
842
  special_forms_cleaned = []
844
843
  for form in special_forms:
845
844
  line_cut = line_cut.replace(form, "xbxxx")
846
- special_forms_cleaned.append(re.sub(r"@[\w\:]+", "", form).strip())
845
+ special_forms_cleaned.append(form.split("@"))
847
846
 
848
847
  # if line cut is still nothing, we get very angry
849
848
  if line_cut == "":
@@ -942,7 +941,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
942
941
  if ut[i.payload].text != ",":
943
942
  ut[i.payload].morphology = [Morphology(
944
943
  lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
945
- pos = "x",
944
+ pos = ut[i.payload].morphology[0].pos if (ut[i.payload].morphology and len(ut[i.payload].morphology) > 0) else "x",
946
945
  feats = ""
947
946
  )]
948
947
  poses = [i.morphology[0].pos.upper() for i in ut
@@ -999,6 +998,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
999
998
  content.dependency = form.dependency
1000
999
 
1001
1000
  except Exception as e:
1001
+ raise e
1002
1002
  pass
1003
1003
  # warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
1004
1004
 
@@ -0,0 +1,3 @@
1
+ 0.7.17-post.19
2
+ April 06th, 2025
3
+ Japanese form fixes
@@ -1,3 +0,0 @@
1
- 0.7.17-post.18
2
- March 26th, 2025
3
- cantonese ASR with general fixes for inputting a list