batchalign 0.7.3b14__tar.gz → 0.7.3b16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.3b14/batchalign.egg-info → batchalign-0.7.3b16}/PKG-INFO +4 -64
  2. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/README.md +2 -2
  3. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/file.py +7 -1
  4. batchalign-0.7.3b16/batchalign/pipelines/morphosyntax/ja/verbforms.py +118 -0
  5. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/ud.py +6 -1
  6. batchalign-0.7.3b16/batchalign/version +3 -0
  7. {batchalign-0.7.3b14 → batchalign-0.7.3b16/batchalign.egg-info}/PKG-INFO +4 -64
  8. batchalign-0.7.3b14/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -56
  9. batchalign-0.7.3b14/batchalign/version +0 -3
  10. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/LICENSE +0 -0
  11. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/MANIFEST.in +0 -0
  12. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/cli.py +0 -0
  16. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/dispatch.py +0 -0
  17. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/constants.py +0 -0
  18. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/document.py +0 -0
  19. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/errors.py +0 -0
  20. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/__init__.py +0 -0
  21. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/base.py +0 -0
  22. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/__init__.py +0 -0
  23. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/rev.py +0 -0
  55. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/utils.py +0 -0
  56. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  72. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/setup.cfg +0 -0
  109. {batchalign-0.7.3b14 → batchalign-0.7.3b16}/setup.py +0 -0
@@ -1,76 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b14
3
+ Version: 0.7.3b16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: pydantic>=2.4
12
- Requires-Dist: nltk>=3.8
13
- Requires-Dist: praatio<6.1.0,>=6.0.0
14
- Requires-Dist: torch<2.2.0,>=2.0.1
15
- Requires-Dist: torchaudio<2.2.0,>=2.1.0
16
- Requires-Dist: pyAudioAnalysis==0.3.14
17
- Requires-Dist: hmmlearn==0.3.0
18
- Requires-Dist: eyed3~=0.9.7
19
- Requires-Dist: pydub<0.26.0,>=0.25.1
20
- Requires-Dist: imblearn
21
- Requires-Dist: plotly>=5.18.0
22
- Requires-Dist: transformers~=4.37
23
- Requires-Dist: tokenizers>=0.14.1
24
- Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza>=1.7
26
- Requires-Dist: scipy~=1.11
27
- Requires-Dist: rev_ai>=2.18.0
28
- Requires-Dist: rich~=13.6
29
- Requires-Dist: click~=8.1
30
- Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
- Requires-Dist: pyfiglet==1.0.2
32
- Requires-Dist: soundfile~=0.12.0
33
- Requires-Dist: rich-click>=1.7.0
34
- Requires-Dist: typing-extensions
35
10
  Provides-Extra: dev
36
- Requires-Dist: pytest; extra == "dev"
37
11
  Provides-Extra: train
38
- Requires-Dist: accelerate~=0.27; extra == "train"
39
12
  Provides-Extra: speaker
40
- Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
- Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
- Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
- Requires-Dist: braceexpand; extra == "speaker"
44
- Requires-Dist: editdistance; extra == "speaker"
45
- Requires-Dist: g2p_en; extra == "speaker"
46
- Requires-Dist: ipywidgets; extra == "speaker"
47
- Requires-Dist: jiwer; extra == "speaker"
48
- Requires-Dist: kaldi-python-io; extra == "speaker"
49
- Requires-Dist: kaldiio; extra == "speaker"
50
- Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
- Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
- Requires-Dist: marshmallow; extra == "speaker"
53
- Requires-Dist: matplotlib; extra == "speaker"
54
- Requires-Dist: packaging; extra == "speaker"
55
- Requires-Dist: pyannote.core; extra == "speaker"
56
- Requires-Dist: pyannote.metrics; extra == "speaker"
57
- Requires-Dist: pydub; extra == "speaker"
58
- Requires-Dist: pyloudnorm; extra == "speaker"
59
- Requires-Dist: resampy; extra == "speaker"
60
- Requires-Dist: ruamel.yaml; extra == "speaker"
61
- Requires-Dist: scipy>=0.14; extra == "speaker"
62
- Requires-Dist: soundfile; extra == "speaker"
63
- Requires-Dist: sox; extra == "speaker"
64
- Requires-Dist: texterrors; extra == "speaker"
65
- Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
- Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
- Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
- Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
- Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
- Requires-Dist: wandb; extra == "speaker"
71
- Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
- Requires-Dist: sentencepiece; extra == "speaker"
73
- Requires-Dist: youtokentome; extra == "speaker"
13
+ License-File: LICENSE
74
14
 
75
15
  # TalkBank | Batchalign2
76
16
 
@@ -102,13 +42,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
102
42
  macOS/Linux:
103
43
 
104
44
  ```
105
- pip3 install -U batchalign
45
+ pip install -U batchalign
106
46
  ```
107
47
 
108
48
  Windows:
109
49
 
110
50
  ```
111
- py -m pip3 install -U batchalign
51
+ py -m pip install -U batchalign
112
52
  ```
113
53
 
114
54
  ### Rock and Roll
@@ -28,13 +28,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
28
28
  macOS/Linux:
29
29
 
30
30
  ```
31
- pip3 install -U batchalign
31
+ pip install -U batchalign
32
32
  ```
33
33
 
34
34
  Windows:
35
35
 
36
36
  ```
37
- py -m pip3 install -U batchalign
37
+ py -m pip install -U batchalign
38
38
  ```
39
39
 
40
40
  ### Rock and Roll
@@ -129,7 +129,13 @@ class CHATFile(BaseFormat):
129
129
  write_wor=write_wor))
130
130
  main.append("@End\n")
131
131
 
132
- return "\n".join(main)
132
+ raw = "\n".join(main)
133
+
134
+ # correct for unicode problems
135
+ corrected = raw
136
+ corrected = corrected.replace(u"\u202b", u"\u200f")
137
+
138
+ return corrected
133
139
 
134
140
  @property
135
141
  def doc(self):
@@ -0,0 +1,118 @@
1
+ """
2
+ verbforms.py
3
+ Fix Japanese verb forms.
4
+ """
5
+
6
+ def verbform(upos, target, text):
7
+ if "撮る" in text:
8
+ return "verb", "撮る"
9
+ if "貼る" in text:
10
+ return "verb", "貼る"
11
+ if "混ぜ" in text:
12
+ return "verb", "混ぜる"
13
+ if "釣る" in text:
14
+ return "verb", "釣る"
15
+ if "速い" in text and upos == "adj":
16
+ return "adj", "速い"
17
+ if "治ま" in text:
18
+ return "verb", "治まる"
19
+ if "刺す" in text:
20
+ return "verb", "刺す"
21
+ if "降り" in text:
22
+ return "verb", "降りる"
23
+ if "降" in text:
24
+ return "verb", "降る"
25
+ if "載せ" in text:
26
+ return "verb", "載せる"
27
+ if "帰" in text:
28
+ return "verb", "帰る"
29
+ if "はい" in text:
30
+ return "intj", "はい"
31
+ if "うん" in text:
32
+ return "intj", "うん"
33
+ if "おっ" in text:
34
+ return "intj", "おっ"
35
+ if "ほら" in text:
36
+ return "intj", "ほら"
37
+ if "ヤッホー" in text:
38
+ return "intj", "ヤッホー"
39
+ if "ただいま" in text:
40
+ return "intj", "ただいま"
41
+ if "あたし" in text:
42
+ return "pron", "あたし"
43
+ if "舐め" in text:
44
+ return "verb", "舐める"
45
+ if "バツ" in text:
46
+ return "noun", "バツ"
47
+ if "ブラシ" in text:
48
+ return "noun", "ブラシ"
49
+ if "引き出し" in text:
50
+ return "noun", "引き出し"
51
+ if "下さい" in text:
52
+ return "noun", "下さい"
53
+ if target in ["シャャミー", "物コャミ"]:
54
+ return "noun", "クシャミ"
55
+ if "マヨネーズ" in text:
56
+ return "noun", "マヨネーズ"
57
+ if "マヨ" in text:
58
+ return "noun", "マヨ"
59
+ if "チップス" in text:
60
+ return "noun", "チップス"
61
+ if "ゴロンっ" in text:
62
+ return "noun", "ゴロンっ"
63
+ if "モチーンっ" in text:
64
+ return "noun", "モチーンっ"
65
+ if "人っ" == text:
66
+ return "noun", "人"
67
+ if text == "掻く":
68
+ return "part", "かい"
69
+ if "遣" in text and upos == "noun":
70
+ return "verb", "遣る"
71
+ if "死" in text:
72
+ return "verb", "死ぬ"
73
+ if "立" in text:
74
+ return "verb", "立つ"
75
+ if "引" in text:
76
+ return "verb", "引く"
77
+ if "出" in text:
78
+ return "verb", "出す"
79
+ if "引" in text:
80
+ return "verb", "引く"
81
+ if "飲" in text:
82
+ return "verb", "飲む"
83
+ if "呼" in text:
84
+ return "verb", "呼ぶ"
85
+ if "脱" in text:
86
+ return "verb", "脱ぐ"
87
+ if text == "な" and upos == "part":
88
+ return "aux", "な"
89
+ if text == "呼ん":
90
+ return "verb", "呼ぶ"
91
+ if text == "な" and upos == "aux":
92
+ return "aux", "な"
93
+ if text == "だり":
94
+ return "aux", "たり"
95
+ if text == "たり":
96
+ return "aux", "たり"
97
+ if text == "たら":
98
+ return "sconj", "たら"
99
+ if text == "たっ":
100
+ return "sconj", "たって"
101
+ # if text == "て" and upos == "sconj":
102
+ # return "aux", "て"
103
+ if text == "なさい" and target == "為さる":
104
+ return "aux", "為さい"
105
+ if text == "な" and upos == "part":
106
+ return "aux", "な"
107
+ if text == "脱" and upos == "noun":
108
+ return "verb", "脱"
109
+ if text == "よう" and upos == "aux":
110
+ return "aux", "よう"
111
+ if text == "ろ" and upos == "aux" and target == "為る":
112
+ return "aux", "ろ"
113
+ # if upos == "verb" and "る" in target:
114
+ # return "verb", target.replace("る","").strip()
115
+
116
+ return upos,target
117
+
118
+
@@ -237,6 +237,8 @@ def handler__VERB(word, lang=None):
237
237
  res = handler(word, lang)
238
238
  if "sconj" in res:
239
239
  return res
240
+ elif "verb" not in res:
241
+ return res
240
242
  else:
241
243
  return res+flag+stringify_feats(aspect, mood,
242
244
  tense, polarity, polite,
@@ -266,7 +268,10 @@ def handler__PUNCT(word, lang=None):
266
268
  return "noun|da"
267
269
  elif re.match(r"^['\w-]+$", word.text): # we match text here because .text is the ultumate content
268
270
  # instead of the lemma, which maybe entirely weird
269
- return f"x|{word.text}"
271
+ if word.text == "もん":
272
+ return f"part|{word.text}"
273
+ else:
274
+ return f"x|{word.text}"
270
275
 
271
276
  # Register handlers
272
277
  HANDLERS = {
@@ -0,0 +1,3 @@
1
+ 0.7.3-beta.16
2
+ August 3rd, 2024
3
+ more Japanese hand-parse rules
@@ -1,76 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b14
3
+ Version: 0.7.3b16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: pydantic>=2.4
12
- Requires-Dist: nltk>=3.8
13
- Requires-Dist: praatio<6.1.0,>=6.0.0
14
- Requires-Dist: torch<2.2.0,>=2.0.1
15
- Requires-Dist: torchaudio<2.2.0,>=2.1.0
16
- Requires-Dist: pyAudioAnalysis==0.3.14
17
- Requires-Dist: hmmlearn==0.3.0
18
- Requires-Dist: eyed3~=0.9.7
19
- Requires-Dist: pydub<0.26.0,>=0.25.1
20
- Requires-Dist: imblearn
21
- Requires-Dist: plotly>=5.18.0
22
- Requires-Dist: transformers~=4.37
23
- Requires-Dist: tokenizers>=0.14.1
24
- Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza>=1.7
26
- Requires-Dist: scipy~=1.11
27
- Requires-Dist: rev_ai>=2.18.0
28
- Requires-Dist: rich~=13.6
29
- Requires-Dist: click~=8.1
30
- Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
- Requires-Dist: pyfiglet==1.0.2
32
- Requires-Dist: soundfile~=0.12.0
33
- Requires-Dist: rich-click>=1.7.0
34
- Requires-Dist: typing-extensions
35
10
  Provides-Extra: dev
36
- Requires-Dist: pytest; extra == "dev"
37
11
  Provides-Extra: train
38
- Requires-Dist: accelerate~=0.27; extra == "train"
39
12
  Provides-Extra: speaker
40
- Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
- Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
- Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
- Requires-Dist: braceexpand; extra == "speaker"
44
- Requires-Dist: editdistance; extra == "speaker"
45
- Requires-Dist: g2p_en; extra == "speaker"
46
- Requires-Dist: ipywidgets; extra == "speaker"
47
- Requires-Dist: jiwer; extra == "speaker"
48
- Requires-Dist: kaldi-python-io; extra == "speaker"
49
- Requires-Dist: kaldiio; extra == "speaker"
50
- Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
- Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
- Requires-Dist: marshmallow; extra == "speaker"
53
- Requires-Dist: matplotlib; extra == "speaker"
54
- Requires-Dist: packaging; extra == "speaker"
55
- Requires-Dist: pyannote.core; extra == "speaker"
56
- Requires-Dist: pyannote.metrics; extra == "speaker"
57
- Requires-Dist: pydub; extra == "speaker"
58
- Requires-Dist: pyloudnorm; extra == "speaker"
59
- Requires-Dist: resampy; extra == "speaker"
60
- Requires-Dist: ruamel.yaml; extra == "speaker"
61
- Requires-Dist: scipy>=0.14; extra == "speaker"
62
- Requires-Dist: soundfile; extra == "speaker"
63
- Requires-Dist: sox; extra == "speaker"
64
- Requires-Dist: texterrors; extra == "speaker"
65
- Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
- Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
- Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
- Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
- Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
- Requires-Dist: wandb; extra == "speaker"
71
- Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
- Requires-Dist: sentencepiece; extra == "speaker"
73
- Requires-Dist: youtokentome; extra == "speaker"
13
+ License-File: LICENSE
74
14
 
75
15
  # TalkBank | Batchalign2
76
16
 
@@ -102,13 +42,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
102
42
  macOS/Linux:
103
43
 
104
44
  ```
105
- pip3 install -U batchalign
45
+ pip install -U batchalign
106
46
  ```
107
47
 
108
48
  Windows:
109
49
 
110
50
  ```
111
- py -m pip3 install -U batchalign
51
+ py -m pip install -U batchalign
112
52
  ```
113
53
 
114
54
  ### Rock and Roll
@@ -1,56 +0,0 @@
1
- """
2
- verbforms.py
3
- Fix Japanese verb forms.
4
- """
5
-
6
- def verbform(upos, target, text):
7
- if "遣" in text and upos == "noun":
8
- return "verb", "遣る"
9
- if "死" in text:
10
- return "verb", "死ぬ"
11
- if "立" in text:
12
- return "verb", "立つ"
13
- if "引" in text:
14
- return "verb", "引く"
15
- if "出" in text:
16
- return "verb", "出す"
17
- if "引" in text:
18
- return "verb", "引く"
19
- if "飲" in text:
20
- return "verb", "飲む"
21
- if "呼" in text:
22
- return "verb", "呼ぶ"
23
- if "脱" in text:
24
- return "verb", "脱ぐ"
25
- if text == "な" and upos == "part":
26
- return "aux", "な"
27
- if text == "呼ん":
28
- return "verb", "呼ぶ"
29
- if text == "な" and upos == "aux":
30
- return "aux", "な"
31
- if text == "だり":
32
- return "aux", "たり"
33
- if text == "たり":
34
- return "aux", "たり"
35
- if text == "たら":
36
- return "sconj", "たら"
37
- if text == "たっ":
38
- return "sconj", "たって"
39
- # if text == "て" and upos == "sconj":
40
- # return "aux", "て"
41
- if text == "なさい" and target == "為さる":
42
- return "aux", "為さい"
43
- if text == "な" and upos == "part":
44
- return "aux", "な"
45
- if text == "脱" and upos == "noun":
46
- return "verb", "脱"
47
- if text == "よう" and upos == "aux":
48
- return "aux", "よう"
49
- if text == "ろ" and upos == "aux" and target == "為る":
50
- return "aux", "ろ"
51
- # if upos == "verb" and "る" in target:
52
- # return "verb", target.replace("る","").strip()
53
-
54
- return upos,target
55
-
56
-
@@ -1,3 +0,0 @@
1
- 0.7.3-beta.14
2
- July 6th, 2024
3
- UD Fixes
File without changes
File without changes
File without changes
File without changes