batchalign 0.7.3b15__tar.gz → 0.7.3b17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.3b15/batchalign.egg-info → batchalign-0.7.3b17}/PKG-INFO +62 -2
  2. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/utils.py +27 -15
  3. batchalign-0.7.3b17/batchalign/pipelines/morphosyntax/ja/verbforms.py +118 -0
  4. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/ud.py +7 -1
  5. batchalign-0.7.3b17/batchalign/version +3 -0
  6. {batchalign-0.7.3b15 → batchalign-0.7.3b17/batchalign.egg-info}/PKG-INFO +62 -2
  7. batchalign-0.7.3b15/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -56
  8. batchalign-0.7.3b15/batchalign/version +0 -3
  9. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/LICENSE +0 -0
  10. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/MANIFEST.in +0 -0
  11. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/README.md +0 -0
  12. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/cli.py +0 -0
  16. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/dispatch.py +0 -0
  17. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/constants.py +0 -0
  18. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/document.py +0 -0
  19. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/errors.py +0 -0
  20. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/__init__.py +0 -0
  21. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/base.py +0 -0
  22. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/__init__.py +0 -0
  23. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/file.py +0 -0
  24. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/generator.py +0 -0
  25. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/lexer.py +0 -0
  26. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/parser.py +0 -0
  27. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/rev.py +0 -0
  55. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/utils.py +0 -0
  56. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  72. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/setup.cfg +0 -0
  109. {batchalign-0.7.3b15 → batchalign-0.7.3b17}/setup.py +0 -0
@@ -1,16 +1,76 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b15
3
+ Version: 0.7.3b17
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: pydantic>=2.4
12
+ Requires-Dist: nltk>=3.8
13
+ Requires-Dist: praatio<6.1.0,>=6.0.0
14
+ Requires-Dist: torch<2.2.0,>=2.0.1
15
+ Requires-Dist: torchaudio<2.2.0,>=2.1.0
16
+ Requires-Dist: pyAudioAnalysis==0.3.14
17
+ Requires-Dist: hmmlearn==0.3.0
18
+ Requires-Dist: eyed3~=0.9.7
19
+ Requires-Dist: pydub<0.26.0,>=0.25.1
20
+ Requires-Dist: imblearn
21
+ Requires-Dist: plotly>=5.18.0
22
+ Requires-Dist: transformers~=4.37
23
+ Requires-Dist: tokenizers>=0.14.1
24
+ Requires-Dist: pycountry>=22.3
25
+ Requires-Dist: stanza>=1.7
26
+ Requires-Dist: scipy~=1.11
27
+ Requires-Dist: rev_ai>=2.18.0
28
+ Requires-Dist: rich~=13.6
29
+ Requires-Dist: click~=8.1
30
+ Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
+ Requires-Dist: pyfiglet==1.0.2
32
+ Requires-Dist: soundfile~=0.12.0
33
+ Requires-Dist: rich-click>=1.7.0
34
+ Requires-Dist: typing-extensions
10
35
  Provides-Extra: dev
36
+ Requires-Dist: pytest; extra == "dev"
11
37
  Provides-Extra: train
38
+ Requires-Dist: accelerate~=0.27; extra == "train"
12
39
  Provides-Extra: speaker
13
- License-File: LICENSE
40
+ Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
+ Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
+ Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
+ Requires-Dist: braceexpand; extra == "speaker"
44
+ Requires-Dist: editdistance; extra == "speaker"
45
+ Requires-Dist: g2p_en; extra == "speaker"
46
+ Requires-Dist: ipywidgets; extra == "speaker"
47
+ Requires-Dist: jiwer; extra == "speaker"
48
+ Requires-Dist: kaldi-python-io; extra == "speaker"
49
+ Requires-Dist: kaldiio; extra == "speaker"
50
+ Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
+ Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
+ Requires-Dist: marshmallow; extra == "speaker"
53
+ Requires-Dist: matplotlib; extra == "speaker"
54
+ Requires-Dist: packaging; extra == "speaker"
55
+ Requires-Dist: pyannote.core; extra == "speaker"
56
+ Requires-Dist: pyannote.metrics; extra == "speaker"
57
+ Requires-Dist: pydub; extra == "speaker"
58
+ Requires-Dist: pyloudnorm; extra == "speaker"
59
+ Requires-Dist: resampy; extra == "speaker"
60
+ Requires-Dist: ruamel.yaml; extra == "speaker"
61
+ Requires-Dist: scipy>=0.14; extra == "speaker"
62
+ Requires-Dist: soundfile; extra == "speaker"
63
+ Requires-Dist: sox; extra == "speaker"
64
+ Requires-Dist: texterrors; extra == "speaker"
65
+ Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
+ Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
+ Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
+ Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
+ Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
+ Requires-Dist: wandb; extra == "speaker"
71
+ Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
+ Requires-Dist: sentencepiece; extra == "speaker"
73
+ Requires-Dist: youtokentome; extra == "speaker"
14
74
 
15
75
  # TalkBank | Batchalign2
16
76
 
@@ -43,17 +43,8 @@ def chat_parse_mor(mor_str):
43
43
  if mor_str in ENDING_PUNCT:
44
44
  return [Morphology(lemma=mor_str, pos="PUNCT", feats="")]
45
45
 
46
- # JANK handle + forms
47
- if "+" in mor_str:
48
- pos, rest = mor_str.split("+", maxsplit=1)
49
- return [Morphology.model_validate({
50
- "lemma": "+"+rest,
51
- "pos": pos.strip("|"),
52
- "feats": "",
53
- })]
54
-
55
46
  try:
56
- mors = [i.split("|") for i in re.split("[~$]", mor_str)]
47
+ mors = [i.split("|", maxsplit=1) for i in re.split("[~$]", mor_str)]
57
48
  # TODO epic jank: backwards compatibility check: if a form
58
49
  # uses a lot of dashes, its probably because its old-style
59
50
  # dash seperated; if it doesn't; it probably is new-style
@@ -67,11 +58,32 @@ def chat_parse_mor(mor_str):
67
58
  except:
68
59
  raise CHATValidationException(f"mor parser recieved invalid mor string: '{mor_str}'")
69
60
 
70
- mors = [Morphology.model_validate({
71
- "lemma": l,
72
- "pos": p,
73
- "feats": f,
74
- }) for p,l,f in zip(pos, lemmas, feats)]
61
+
62
+ mors = []
63
+ for p,l,f in zip(pos, lemmas, feats):
64
+ # if "+" not in mor_str:
65
+ mors.append(Morphology.model_validate({
66
+ "lemma": l,
67
+ "pos": p,
68
+ "feats": f,
69
+ }))
70
+ # else:
71
+ # breakpoint()
72
+ # pos, rest = mor_str.split("+", maxsplit=1)
73
+
74
+
75
+
76
+ # # JANK handle + forms
77
+ # if "+" in mor_str:
78
+ # pos, rest = mor_str.split("+", maxsplit=1)
79
+ # return [Morphology.model_validate({
80
+ # "lemma": "+"+rest,
81
+ # "pos": pos.strip("|"),
82
+ # "feats": "",
83
+ # })]
84
+
85
+
86
+ # mors = [ ]
75
87
 
76
88
  return mors
77
89
 
@@ -0,0 +1,118 @@
1
+ """
2
+ verbforms.py
3
+ Fix Japanese verb forms.
4
+ """
5
+
6
+ def verbform(upos, target, text):
7
+ if "撮る" in text:
8
+ return "verb", "撮る"
9
+ if "貼る" in text:
10
+ return "verb", "貼る"
11
+ if "混ぜ" in text:
12
+ return "verb", "混ぜる"
13
+ if "釣る" in text:
14
+ return "verb", "釣る"
15
+ if "速い" in text and upos == "adj":
16
+ return "adj", "速い"
17
+ if "治ま" in text:
18
+ return "verb", "治まる"
19
+ if "刺す" in text:
20
+ return "verb", "刺す"
21
+ if "降り" in text:
22
+ return "verb", "降りる"
23
+ if "降" in text:
24
+ return "verb", "降る"
25
+ if "載せ" in text:
26
+ return "verb", "載せる"
27
+ if "帰" in text:
28
+ return "verb", "帰る"
29
+ if "はい" in text:
30
+ return "intj", "はい"
31
+ if "うん" in text:
32
+ return "intj", "うん"
33
+ if "おっ" in text:
34
+ return "intj", "おっ"
35
+ if "ほら" in text:
36
+ return "intj", "ほら"
37
+ if "ヤッホー" in text:
38
+ return "intj", "ヤッホー"
39
+ if "ただいま" in text:
40
+ return "intj", "ただいま"
41
+ if "あたし" in text:
42
+ return "pron", "あたし"
43
+ if "舐め" in text:
44
+ return "verb", "舐める"
45
+ if "バツ" in text:
46
+ return "noun", "バツ"
47
+ if "ブラシ" in text:
48
+ return "noun", "ブラシ"
49
+ if "引き出し" in text:
50
+ return "noun", "引き出し"
51
+ if "下さい" in text:
52
+ return "noun", "下さい"
53
+ if target in ["シャャミー", "物コャミ"]:
54
+ return "noun", "クシャミ"
55
+ if "マヨネーズ" in text:
56
+ return "noun", "マヨネーズ"
57
+ if "マヨ" in text:
58
+ return "noun", "マヨ"
59
+ if "チップス" in text:
60
+ return "noun", "チップス"
61
+ if "ゴロンっ" in text:
62
+ return "noun", "ゴロンっ"
63
+ if "モチーンっ" in text:
64
+ return "noun", "モチーンっ"
65
+ if "人っ" == text:
66
+ return "noun", "人"
67
+ if text == "掻く":
68
+ return "part", "かい"
69
+ if "遣" in text and upos == "noun":
70
+ return "verb", "遣る"
71
+ if "死" in text:
72
+ return "verb", "死ぬ"
73
+ if "立" in text:
74
+ return "verb", "立つ"
75
+ if "引" in text:
76
+ return "verb", "引く"
77
+ if "出" in text:
78
+ return "verb", "出す"
79
+ if "引" in text:
80
+ return "verb", "引く"
81
+ if "飲" in text:
82
+ return "verb", "飲む"
83
+ if "呼" in text:
84
+ return "verb", "呼ぶ"
85
+ if "脱" in text:
86
+ return "verb", "脱ぐ"
87
+ if text == "な" and upos == "part":
88
+ return "aux", "な"
89
+ if text == "呼ん":
90
+ return "verb", "呼ぶ"
91
+ if text == "な" and upos == "aux":
92
+ return "aux", "な"
93
+ if text == "だり":
94
+ return "aux", "たり"
95
+ if text == "たり":
96
+ return "aux", "たり"
97
+ if text == "たら":
98
+ return "sconj", "たら"
99
+ if text == "たっ":
100
+ return "sconj", "たって"
101
+ # if text == "て" and upos == "sconj":
102
+ # return "aux", "て"
103
+ if text == "なさい" and target == "為さる":
104
+ return "aux", "為さい"
105
+ if text == "な" and upos == "part":
106
+ return "aux", "な"
107
+ if text == "脱" and upos == "noun":
108
+ return "verb", "脱"
109
+ if text == "よう" and upos == "aux":
110
+ return "aux", "よう"
111
+ if text == "ろ" and upos == "aux" and target == "為る":
112
+ return "aux", "ろ"
113
+ # if upos == "verb" and "る" in target:
114
+ # return "verb", target.replace("る","").strip()
115
+
116
+ return upos,target
117
+
118
+
@@ -237,6 +237,8 @@ def handler__VERB(word, lang=None):
237
237
  res = handler(word, lang)
238
238
  if "sconj" in res:
239
239
  return res
240
+ elif "verb" not in res:
241
+ return res
240
242
  else:
241
243
  return res+flag+stringify_feats(aspect, mood,
242
244
  tense, polarity, polite,
@@ -266,7 +268,10 @@ def handler__PUNCT(word, lang=None):
266
268
  return "noun|da"
267
269
  elif re.match(r"^['\w-]+$", word.text): # we match text here because .text is the ultumate content
268
270
  # instead of the lemma, which maybe entirely weird
269
- return f"x|{word.text}"
271
+ if word.text == "もん":
272
+ return f"part|{word.text}"
273
+ else:
274
+ return f"x|{word.text}"
270
275
 
271
276
  # Register handlers
272
277
  HANDLERS = {
@@ -885,6 +890,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
885
890
  retokenized_ut = retokenized_ut.replace(" :", ":")
886
891
  retokenized_ut = retokenized_ut.replace("+ ,", "+,")
887
892
  retokenized_ut = retokenized_ut.replace(": <", ": <")
893
+ # retokenized_ut = retokenized_ut.replace("[ *", "[*")
888
894
  retokenized_ut = retokenized_ut.replace(" ↑", "↑")
889
895
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
890
896
  retokenized_ut = retokenized_ut.replace(" @", "@")
@@ -0,0 +1,3 @@
1
+ 0.7.3-beta.17
2
+ August 21st, 2024
3
+ manual rules with mor
@@ -1,16 +1,76 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b15
3
+ Version: 0.7.3b17
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: pydantic>=2.4
12
+ Requires-Dist: nltk>=3.8
13
+ Requires-Dist: praatio<6.1.0,>=6.0.0
14
+ Requires-Dist: torch<2.2.0,>=2.0.1
15
+ Requires-Dist: torchaudio<2.2.0,>=2.1.0
16
+ Requires-Dist: pyAudioAnalysis==0.3.14
17
+ Requires-Dist: hmmlearn==0.3.0
18
+ Requires-Dist: eyed3~=0.9.7
19
+ Requires-Dist: pydub<0.26.0,>=0.25.1
20
+ Requires-Dist: imblearn
21
+ Requires-Dist: plotly>=5.18.0
22
+ Requires-Dist: transformers~=4.37
23
+ Requires-Dist: tokenizers>=0.14.1
24
+ Requires-Dist: pycountry>=22.3
25
+ Requires-Dist: stanza>=1.7
26
+ Requires-Dist: scipy~=1.11
27
+ Requires-Dist: rev_ai>=2.18.0
28
+ Requires-Dist: rich~=13.6
29
+ Requires-Dist: click~=8.1
30
+ Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
+ Requires-Dist: pyfiglet==1.0.2
32
+ Requires-Dist: soundfile~=0.12.0
33
+ Requires-Dist: rich-click>=1.7.0
34
+ Requires-Dist: typing-extensions
10
35
  Provides-Extra: dev
36
+ Requires-Dist: pytest; extra == "dev"
11
37
  Provides-Extra: train
38
+ Requires-Dist: accelerate~=0.27; extra == "train"
12
39
  Provides-Extra: speaker
13
- License-File: LICENSE
40
+ Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
+ Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
+ Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
+ Requires-Dist: braceexpand; extra == "speaker"
44
+ Requires-Dist: editdistance; extra == "speaker"
45
+ Requires-Dist: g2p_en; extra == "speaker"
46
+ Requires-Dist: ipywidgets; extra == "speaker"
47
+ Requires-Dist: jiwer; extra == "speaker"
48
+ Requires-Dist: kaldi-python-io; extra == "speaker"
49
+ Requires-Dist: kaldiio; extra == "speaker"
50
+ Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
+ Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
+ Requires-Dist: marshmallow; extra == "speaker"
53
+ Requires-Dist: matplotlib; extra == "speaker"
54
+ Requires-Dist: packaging; extra == "speaker"
55
+ Requires-Dist: pyannote.core; extra == "speaker"
56
+ Requires-Dist: pyannote.metrics; extra == "speaker"
57
+ Requires-Dist: pydub; extra == "speaker"
58
+ Requires-Dist: pyloudnorm; extra == "speaker"
59
+ Requires-Dist: resampy; extra == "speaker"
60
+ Requires-Dist: ruamel.yaml; extra == "speaker"
61
+ Requires-Dist: scipy>=0.14; extra == "speaker"
62
+ Requires-Dist: soundfile; extra == "speaker"
63
+ Requires-Dist: sox; extra == "speaker"
64
+ Requires-Dist: texterrors; extra == "speaker"
65
+ Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
+ Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
+ Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
+ Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
+ Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
+ Requires-Dist: wandb; extra == "speaker"
71
+ Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
+ Requires-Dist: sentencepiece; extra == "speaker"
73
+ Requires-Dist: youtokentome; extra == "speaker"
14
74
 
15
75
  # TalkBank | Batchalign2
16
76
 
@@ -1,56 +0,0 @@
1
- """
2
- verbforms.py
3
- Fix Japanese verb forms.
4
- """
5
-
6
- def verbform(upos, target, text):
7
- if "遣" in text and upos == "noun":
8
- return "verb", "遣る"
9
- if "死" in text:
10
- return "verb", "死ぬ"
11
- if "立" in text:
12
- return "verb", "立つ"
13
- if "引" in text:
14
- return "verb", "引く"
15
- if "出" in text:
16
- return "verb", "出す"
17
- if "引" in text:
18
- return "verb", "引く"
19
- if "飲" in text:
20
- return "verb", "飲む"
21
- if "呼" in text:
22
- return "verb", "呼ぶ"
23
- if "脱" in text:
24
- return "verb", "脱ぐ"
25
- if text == "な" and upos == "part":
26
- return "aux", "な"
27
- if text == "呼ん":
28
- return "verb", "呼ぶ"
29
- if text == "な" and upos == "aux":
30
- return "aux", "な"
31
- if text == "だり":
32
- return "aux", "たり"
33
- if text == "たり":
34
- return "aux", "たり"
35
- if text == "たら":
36
- return "sconj", "たら"
37
- if text == "たっ":
38
- return "sconj", "たって"
39
- # if text == "て" and upos == "sconj":
40
- # return "aux", "て"
41
- if text == "なさい" and target == "為さる":
42
- return "aux", "為さい"
43
- if text == "な" and upos == "part":
44
- return "aux", "な"
45
- if text == "脱" and upos == "noun":
46
- return "verb", "脱"
47
- if text == "よう" and upos == "aux":
48
- return "aux", "よう"
49
- if text == "ろ" and upos == "aux" and target == "為る":
50
- return "aux", "ろ"
51
- # if upos == "verb" and "る" in target:
52
- # return "verb", target.replace("る","").strip()
53
-
54
- return upos,target
55
-
56
-
@@ -1,3 +0,0 @@
1
- 0.7.3-beta.15
2
- July 29th, 2024
3
- Correct Unicode Escapes?
File without changes
File without changes
File without changes
File without changes
File without changes