batchalign 0.7.6a2__tar.gz → 0.7.6a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a2/batchalign.egg-info → batchalign-0.7.6a3}/PKG-INFO +2 -62
  2. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/whisper/infer_asr.py +2 -0
  3. batchalign-0.7.6a3/batchalign/version +3 -0
  4. {batchalign-0.7.6a2 → batchalign-0.7.6a3/batchalign.egg-info}/PKG-INFO +2 -62
  5. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign.egg-info/SOURCES.txt +0 -1
  6. batchalign-0.7.6a2/batchalign/pipelines/morphosyntax/coref.py +0 -45
  7. batchalign-0.7.6a2/batchalign/version +0 -3
  8. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/LICENSE +0 -0
  9. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/MANIFEST.in +0 -0
  10. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/README.md +0 -0
  11. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/document.py +0 -0
  18. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/models/whisper/infer_fa.py +0 -0
  49. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/__init__.py +0 -0
  50. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/analysis/__init__.py +0 -0
  51. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/analysis/eval.py +0 -0
  52. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/asr/__init__.py +0 -0
  53. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/asr/rev.py +0 -0
  54. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/asr/utils.py +0 -0
  55. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/asr/whisper.py +0 -0
  56. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/asr/whisperx.py +0 -0
  57. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/base.py +0 -0
  58. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/__init__.py +0 -0
  59. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  60. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  61. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  62. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/retrace.py +0 -0
  63. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  64. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  65. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/cleanup/support/test.test +0 -0
  66. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/dispatch.py +0 -0
  67. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/fa/__init__.py +0 -0
  68. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  69. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  70. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  71. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  72. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  73. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/pipeline.py +0 -0
  74. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/speaker/__init__.py +0 -0
  75. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  76. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utr/__init__.py +0 -0
  77. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utr/rev_utr.py +0 -0
  78. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utr/utils.py +0 -0
  79. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  80. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utterance/__init__.py +0 -0
  81. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  82. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/__init__.py +0 -0
  83. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/conftest.py +0 -0
  84. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  85. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  86. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  87. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  88. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  89. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  90. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  91. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  92. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  93. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  94. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  95. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  96. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/fixures.py +0 -0
  97. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  98. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  99. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/tests/test_document.py +0 -0
  100. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/utils/__init__.py +0 -0
  101. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/utils/config.py +0 -0
  102. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/utils/dp.py +0 -0
  103. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign/utils/utils.py +0 -0
  104. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/setup.cfg +0 -0
  109. {batchalign-0.7.6a2 → batchalign-0.7.6a3}/setup.py +0 -0
@@ -1,76 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a2
3
+ Version: 0.7.6a3
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: pydantic>=2.4
12
- Requires-Dist: nltk>=3.8
13
- Requires-Dist: praatio<6.1.0,>=6.0.0
14
- Requires-Dist: torch>=2.0.1
15
- Requires-Dist: torchaudio>=2.1.0
16
- Requires-Dist: pyAudioAnalysis==0.3.14
17
- Requires-Dist: hmmlearn==0.3.0
18
- Requires-Dist: eyed3~=0.9.7
19
- Requires-Dist: pydub<0.26.0,>=0.25.1
20
- Requires-Dist: imblearn
21
- Requires-Dist: plotly>=5.18.0
22
- Requires-Dist: transformers>=4.37
23
- Requires-Dist: tokenizers>=0.14.1
24
- Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza[transformers]>=1.9.1
26
- Requires-Dist: scipy~=1.11
27
- Requires-Dist: rev_ai>=2.18.0
28
- Requires-Dist: rich~=13.6
29
- Requires-Dist: click~=8.1
30
- Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
- Requires-Dist: pyfiglet==1.0.2
32
- Requires-Dist: soundfile~=0.12.0
33
- Requires-Dist: rich-click>=1.7.0
34
- Requires-Dist: typing-extensions
35
10
  Provides-Extra: dev
36
- Requires-Dist: pytest; extra == "dev"
37
11
  Provides-Extra: train
38
- Requires-Dist: accelerate~=0.27; extra == "train"
39
12
  Provides-Extra: speaker
40
- Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
- Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
- Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
- Requires-Dist: braceexpand; extra == "speaker"
44
- Requires-Dist: editdistance; extra == "speaker"
45
- Requires-Dist: g2p_en; extra == "speaker"
46
- Requires-Dist: ipywidgets; extra == "speaker"
47
- Requires-Dist: jiwer; extra == "speaker"
48
- Requires-Dist: kaldi-python-io; extra == "speaker"
49
- Requires-Dist: kaldiio; extra == "speaker"
50
- Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
- Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
- Requires-Dist: marshmallow; extra == "speaker"
53
- Requires-Dist: matplotlib; extra == "speaker"
54
- Requires-Dist: packaging; extra == "speaker"
55
- Requires-Dist: pyannote.core; extra == "speaker"
56
- Requires-Dist: pyannote.metrics; extra == "speaker"
57
- Requires-Dist: pydub; extra == "speaker"
58
- Requires-Dist: pyloudnorm; extra == "speaker"
59
- Requires-Dist: resampy; extra == "speaker"
60
- Requires-Dist: ruamel.yaml; extra == "speaker"
61
- Requires-Dist: scipy>=0.14; extra == "speaker"
62
- Requires-Dist: soundfile; extra == "speaker"
63
- Requires-Dist: sox; extra == "speaker"
64
- Requires-Dist: texterrors; extra == "speaker"
65
- Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
- Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
- Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
- Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
- Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
- Requires-Dist: wandb; extra == "speaker"
71
- Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
- Requires-Dist: sentencepiece; extra == "speaker"
73
- Requires-Dist: youtokentome; extra == "speaker"
13
+ License-File: LICENSE
74
14
 
75
15
  # TalkBank | Batchalign2
76
16
 
@@ -191,6 +191,8 @@ class WhisperASRModel(object):
191
191
 
192
192
  if element["type"] == "text":
193
193
  pl = element["payload"].strip()
194
+ pl = pl.replace("「", "")
195
+ pl = pl.replace("」", "")
194
196
  before = re.findall(r"^\W+", pl)
195
197
  after = re.findall(r"\W+$", pl)
196
198
  texts = []
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.3
2
+ September 28th, 2024
3
+ erase quote marks for cantonese
@@ -1,76 +1,16 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a2
3
+ Version: 0.7.6a3
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
7
7
  Classifier: Development Status :: 3 - Alpha
8
8
  Classifier: Topic :: Utilities
9
9
  Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: pydantic>=2.4
12
- Requires-Dist: nltk>=3.8
13
- Requires-Dist: praatio<6.1.0,>=6.0.0
14
- Requires-Dist: torch>=2.0.1
15
- Requires-Dist: torchaudio>=2.1.0
16
- Requires-Dist: pyAudioAnalysis==0.3.14
17
- Requires-Dist: hmmlearn==0.3.0
18
- Requires-Dist: eyed3~=0.9.7
19
- Requires-Dist: pydub<0.26.0,>=0.25.1
20
- Requires-Dist: imblearn
21
- Requires-Dist: plotly>=5.18.0
22
- Requires-Dist: transformers>=4.37
23
- Requires-Dist: tokenizers>=0.14.1
24
- Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza[transformers]>=1.9.1
26
- Requires-Dist: scipy~=1.11
27
- Requires-Dist: rev_ai>=2.18.0
28
- Requires-Dist: rich~=13.6
29
- Requires-Dist: click~=8.1
30
- Requires-Dist: matplotlib<4.0.0,>=3.8.0
31
- Requires-Dist: pyfiglet==1.0.2
32
- Requires-Dist: soundfile~=0.12.0
33
- Requires-Dist: rich-click>=1.7.0
34
- Requires-Dist: typing-extensions
35
10
  Provides-Extra: dev
36
- Requires-Dist: pytest; extra == "dev"
37
11
  Provides-Extra: train
38
- Requires-Dist: accelerate~=0.27; extra == "train"
39
12
  Provides-Extra: speaker
40
- Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
41
- Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
42
- Requires-Dist: pydub~=0.25.0; extra == "speaker"
43
- Requires-Dist: braceexpand; extra == "speaker"
44
- Requires-Dist: editdistance; extra == "speaker"
45
- Requires-Dist: g2p_en; extra == "speaker"
46
- Requires-Dist: ipywidgets; extra == "speaker"
47
- Requires-Dist: jiwer; extra == "speaker"
48
- Requires-Dist: kaldi-python-io; extra == "speaker"
49
- Requires-Dist: kaldiio; extra == "speaker"
50
- Requires-Dist: lhotse>=1.20.0; extra == "speaker"
51
- Requires-Dist: librosa>=0.10.0; extra == "speaker"
52
- Requires-Dist: marshmallow; extra == "speaker"
53
- Requires-Dist: matplotlib; extra == "speaker"
54
- Requires-Dist: packaging; extra == "speaker"
55
- Requires-Dist: pyannote.core; extra == "speaker"
56
- Requires-Dist: pyannote.metrics; extra == "speaker"
57
- Requires-Dist: pydub; extra == "speaker"
58
- Requires-Dist: pyloudnorm; extra == "speaker"
59
- Requires-Dist: resampy; extra == "speaker"
60
- Requires-Dist: ruamel.yaml; extra == "speaker"
61
- Requires-Dist: scipy>=0.14; extra == "speaker"
62
- Requires-Dist: soundfile; extra == "speaker"
63
- Requires-Dist: sox; extra == "speaker"
64
- Requires-Dist: texterrors; extra == "speaker"
65
- Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
66
- Requires-Dist: omegaconf<=2.3; extra == "speaker"
67
- Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
68
- Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
69
- Requires-Dist: transformers>=4.36.0; extra == "speaker"
70
- Requires-Dist: wandb; extra == "speaker"
71
- Requires-Dist: webdataset>=0.2.86; extra == "speaker"
72
- Requires-Dist: sentencepiece; extra == "speaker"
73
- Requires-Dist: youtokentome; extra == "speaker"
13
+ License-File: LICENSE
74
14
 
75
15
  # TalkBank | Batchalign2
76
16
 
@@ -70,7 +70,6 @@ batchalign/pipelines/cleanup/support/test.test
70
70
  batchalign/pipelines/fa/__init__.py
71
71
  batchalign/pipelines/fa/whisper_fa.py
72
72
  batchalign/pipelines/morphosyntax/__init__.py
73
- batchalign/pipelines/morphosyntax/coref.py
74
73
  batchalign/pipelines/morphosyntax/ud.py
75
74
  batchalign/pipelines/morphosyntax/fr/case.py
76
75
  batchalign/pipelines/morphosyntax/ja/verbforms.py
@@ -1,45 +0,0 @@
1
- import stanza
2
- from batchalign.utils.dp import PayloadTarget, ReferenceTarget, Match, align
3
- from warnings import warn
4
- from batchalign.document import *
5
- from batchalign.constants import *
6
- from batchalign.pipelines.base import *
7
- from batchalign.formats.chat.parser import chat_parse_utterance
8
-
9
- from batchalign.utils.dp import *
10
-
11
-
12
-
13
- class CorefEngine(BatchalignEngine):
14
- tasks = [ Task.COREF ]
15
-
16
- def process(self, doc, **kwargs):
17
- if "eng" not in doc.langs:
18
- warn("Coreference resolution is only supported for English documents.")
19
- return
20
-
21
- detokenized = " ".join([i.strip(include_retrace=True, include_fp=True) for i in doc.content if isinstance(i, Utterance)])
22
- pipeline = stanza.Pipeline(lang="en", processors="tokenize, coref")
23
-
24
- coref_chains = pipeline(detokenized).sentences
25
- coref_chains = [(j.text,
26
- [Coref(start=chain.is_start,
27
- end=chain.is_end,
28
- chain=chain.chain.index) for chain in j.coref_chains])
29
- for i in coref_chains
30
- for j in i.words]
31
-
32
- payloads = [PayloadTarget(i[0], i[1]) for i in coref_chains]
33
- references = [ReferenceTarget(j.text, (ut_id, form_id)) for ut_id, i in enumerate(doc.content)
34
- if isinstance(i, Utterance)
35
- for form_id, j in enumerate(i.content)]
36
- alignment = align(payloads, references, tqdm=False)
37
-
38
- for i in alignment:
39
- if isinstance(i, Match):
40
- (ut, form) = i.reference_payload
41
- doc.content[ut].content[form].coreference = i.payload
42
-
43
- return doc
44
-
45
-
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.2
2
- September 27th, 2024
3
- skip extra tokenizer output
File without changes
File without changes
File without changes
File without changes
File without changes