ryokai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ryokai-0.1.0/LICENSE +21 -0
- ryokai-0.1.0/PKG-INFO +117 -0
- ryokai-0.1.0/README.md +77 -0
- ryokai-0.1.0/pyproject.toml +71 -0
- ryokai-0.1.0/ryokai/__init__.py +283 -0
- ryokai-0.1.0/ryokai/_langs.py +16 -0
- ryokai-0.1.0/ryokai/cli.py +87 -0
- ryokai-0.1.0/ryokai/data/labelconfig.yaml +314 -0
- ryokai-0.1.0/ryokai/data/stopwords.yaml +553 -0
- ryokai-0.1.0/ryokai/eval.py +133 -0
- ryokai-0.1.0/ryokai/graph.py +32 -0
- ryokai-0.1.0/ryokai/labelconfig.py +45 -0
- ryokai-0.1.0/ryokai/match.py +24 -0
- ryokai-0.1.0/ryokai/scorer.py +309 -0
- ryokai-0.1.0/ryokai/sim/__init__.py +12 -0
- ryokai-0.1.0/ryokai/sim/contextual.py +250 -0
- ryokai-0.1.0/ryokai/sim/embeddings.py +90 -0
- ryokai-0.1.0/ryokai/sim/static.py +224 -0
- ryokai-0.1.0/ryokai/srl/__init__.py +24 -0
- ryokai-0.1.0/ryokai/srl/hf_backend.py +61 -0
- ryokai-0.1.0/ryokai/srl/hf_pos.py +201 -0
- ryokai-0.1.0/ryokai/stopwords.py +40 -0
- ryokai-0.1.0/ryokai.egg-info/PKG-INFO +117 -0
- ryokai-0.1.0/ryokai.egg-info/SOURCES.txt +39 -0
- ryokai-0.1.0/ryokai.egg-info/dependency_links.txt +1 -0
- ryokai-0.1.0/ryokai.egg-info/entry_points.txt +2 -0
- ryokai-0.1.0/ryokai.egg-info/requires.txt +17 -0
- ryokai-0.1.0/ryokai.egg-info/top_level.txt +1 -0
- ryokai-0.1.0/setup.cfg +4 -0
- ryokai-0.1.0/tests/test_api.py +63 -0
- ryokai-0.1.0/tests/test_contextual_aligner.py +70 -0
- ryokai-0.1.0/tests/test_distortion_layer.py +23 -0
- ryokai-0.1.0/tests/test_eval.py +68 -0
- ryokai-0.1.0/tests/test_labelconfig.py +60 -0
- ryokai-0.1.0/tests/test_languages.py +169 -0
- ryokai-0.1.0/tests/test_match.py +45 -0
- ryokai-0.1.0/tests/test_presets.py +42 -0
- ryokai-0.1.0/tests/test_scorer_nosrl.py +95 -0
- ryokai-0.1.0/tests/test_scorer_unit.py +89 -0
- ryokai-0.1.0/tests/test_static_backend.py +83 -0
- ryokai-0.1.0/tests/test_stopwords.py +39 -0
ryokai-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Liling Tan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
ryokai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ryokai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 了解 — unified semantic MT evaluation: MEANT, XMEANT, YiSi, WOLVESAAR, and SimAlign-style word alignment over modern multilingual embeddings.
|
|
5
|
+
Author: Liling Tan
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/alvations/ryokai
|
|
8
|
+
Project-URL: Repository, https://github.com/alvations/ryokai
|
|
9
|
+
Project-URL: Issues, https://github.com/alvations/ryokai/issues
|
|
10
|
+
Keywords: machine-translation,evaluation,metric,meant,xmeant,yisi,wolvesaar,simalign,word-alignment,semantic-roles,huggingface,multilingual
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: torch>=2.0
|
|
25
|
+
Requires-Dist: transformers>=4.30
|
|
26
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
27
|
+
Requires-Dist: scipy>=1.10
|
|
28
|
+
Requires-Dist: numpy>=1.24
|
|
29
|
+
Requires-Dist: pyyaml>=6.0
|
|
30
|
+
Provides-Extra: test
|
|
31
|
+
Requires-Dist: pytest>=7; extra == "test"
|
|
32
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-xdist; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff; extra == "dev"
|
|
37
|
+
Requires-Dist: build; extra == "dev"
|
|
38
|
+
Requires-Dist: twine; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# ryokai 了解
|
|
42
|
+
|
|
43
|
+
> *Ryokai* (了解, "understood / got it") — a unified Python library for **semantic machine-translation evaluation**, combining the strengths of MEANT 2.0, XMEANT, YiSi-1/2, WOLVESAAR, and SimAlign behind one clean API on top of modern multilingual embeddings.
|
|
44
|
+
|
|
45
|
+
Pure PyTorch + HuggingFace `transformers` — no Stanza, no spaCy, no external parsers. Two HF models cover all 13 supported languages (`en`, `de`, `fr`, `es`, `cs`, `fi`, `hi`, `lv`, `pl`, `ro`, `ru`, `tr`, `zh`) in a single install:
|
|
46
|
+
|
|
47
|
+
- POS / shallow SRL: [`wietsedv/xlm-roberta-base-ft-udpos28`](https://huggingface.co/wietsedv/xlm-roberta-base-ft-udpos28) — ~1.1 GB, downloaded once.
|
|
48
|
+
- Multilingual embeddings: [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) — ~110 MB, used for both same-language and cross-language similarity.
|
|
49
|
+
|
|
50
|
+
Both are one-line swappable for any modern multilingual encoder (Qwen3-Embedding, Jina v3, BGE-M3, Nemotron-8B…) — see [Embedding backbones](DOCUMENTATION.md#embedding-backbones) in `DOCUMENTATION.md`.
|
|
51
|
+
|
|
52
|
+
## Install
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install ryokai
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from ryokai import Ryokai
|
|
62
|
+
|
|
63
|
+
scorer = Ryokai()
|
|
64
|
+
src_lang, tgt_lang = "en", "ja"
|
|
65
|
+
|
|
66
|
+
# Most common: reference-free, word alignment + embedding
|
|
67
|
+
# (XMEANT-lite / YiSi-2 / Doc-embedding adequacy cross-lingual)
|
|
68
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
69
|
+
source_lang=src_lang, target_lang=tgt_lang)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Variants
|
|
73
|
+
|
|
74
|
+
One `.score()` call, four modes, dispatched by which arguments you pass. `srl=False` is the default — `ryokai` is no longer MEANT-first.
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from ryokai import Ryokai
|
|
78
|
+
scorer = Ryokai()
|
|
79
|
+
src_lang, tgt_lang = "en", "ja"
|
|
80
|
+
|
|
81
|
+
# Reference-free, word alignment + embedding (default, most common)
|
|
82
|
+
# E.g. Doc-embedding adequacy / YiSi-2 / XMEANT-lite
|
|
83
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
84
|
+
source_lang=src_lang, target_lang=tgt_lang)
|
|
85
|
+
|
|
86
|
+
# Reference-based, word alignment + embedding
|
|
87
|
+
# E.g. Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign style
|
|
88
|
+
scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang)
|
|
89
|
+
|
|
90
|
+
# Reference-free, frame-based — XMEANT proper
|
|
91
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
92
|
+
source_lang=src_lang, target_lang=tgt_lang, srl=True)
|
|
93
|
+
|
|
94
|
+
# Reference-based, frame-based — MEANT 2.0
|
|
95
|
+
scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang, srl=True)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
See [`DOCUMENTATION.md`](DOCUMENTATION.md) for flags, aligner choices, embedding-backbone swaps, AER evaluation harness, CLI, architecture, and custom role weights.
|
|
99
|
+
|
|
100
|
+
## References
|
|
101
|
+
|
|
102
|
+
Ryokai is glue around several published techniques — credit belongs to their authors.
|
|
103
|
+
|
|
104
|
+
| Technique | Year | Citation | Category |
|
|
105
|
+
| --------- | ---- | -------- | -------- |
|
|
106
|
+
| MEANT | 2011 | Lo & Wu. [*MEANT: An inexpensive, high-accuracy, semi-automatic metric for evaluating translation utility based on semantic roles*](https://aclanthology.org/P11-2042/). ACL 2011. | Semantic-frame MT evaluation |
|
|
107
|
+
| XMEANT | 2014 | Lo, Beloucif, Saers & Wu. [*XMEANT: Better semantic MT evaluation without reference translations*](https://aclanthology.org/P14-2124/). ACL 2014 (Short Papers). | Semantic-frame MT evaluation |
|
|
108
|
+
| MEANT 2.0 | 2017 | Lo. [*MEANT 2.0: Accurate semantic MT evaluation for any output language*](https://aclanthology.org/W17-4767/). WMT 2017. | Semantic-frame MT evaluation |
|
|
109
|
+
| Doc-embedding adequacy | 2015 | Vela & Tan. [*Predicting Machine Translation Adequacy with Document Embeddings*](https://aclanthology.org/W15-3051/). WMT 2015. | Embedding-based MT evaluation |
|
|
110
|
+
| WOLVESAAR | 2016 | Bechara, Gupta, Tan, Orăsan, Mitkov & van Genabith. [*WOLVESAAR at SemEval-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity*](https://aclanthology.org/S16-1096/). SemEval-2016. | Embedding-based MT evaluation |
|
|
111
|
+
| YiSi | 2019 | Lo. [*YiSi — a Unified Semantic MT Quality Evaluation and Estimation Metric for Languages with Different Levels of Available Resources*](https://aclanthology.org/W19-5358/). WMT 2019. | Embedding-based MT evaluation |
|
|
112
|
+
| Monolingual aligner | 2014 | Sultan, Bethard & Sumner. [*Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence*](https://aclanthology.org/Q14-1018/). TACL 2014. | Word alignment |
|
|
113
|
+
| SimAlign | 2020 | Jalili Sabet, Dufter, Yvon & Schütze. [*SimAlign: High Quality Word Alignments without Parallel Training Data using Static and Contextualized Embeddings*](https://aclanthology.org/2020.findings-emnlp.147/). Findings of EMNLP 2020. | Word alignment |
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT — see [`LICENSE`](LICENSE).
|
ryokai-0.1.0/README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# ryokai 了解
|
|
2
|
+
|
|
3
|
+
> *Ryokai* (了解, "understood / got it") — a unified Python library for **semantic machine-translation evaluation**, combining the strengths of MEANT 2.0, XMEANT, YiSi-1/2, WOLVESAAR, and SimAlign behind one clean API on top of modern multilingual embeddings.
|
|
4
|
+
|
|
5
|
+
Pure PyTorch + HuggingFace `transformers` — no Stanza, no spaCy, no external parsers. Two HF models cover all 13 supported languages (`en`, `de`, `fr`, `es`, `cs`, `fi`, `hi`, `lv`, `pl`, `ro`, `ru`, `tr`, `zh`) in a single install:
|
|
6
|
+
|
|
7
|
+
- POS / shallow SRL: [`wietsedv/xlm-roberta-base-ft-udpos28`](https://huggingface.co/wietsedv/xlm-roberta-base-ft-udpos28) — ~1.1 GB, downloaded once.
|
|
8
|
+
- Multilingual embeddings: [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) — ~110 MB, used for both same-language and cross-language similarity.
|
|
9
|
+
|
|
10
|
+
Both are one-line swappable for any modern multilingual encoder (Qwen3-Embedding, Jina v3, BGE-M3, Nemotron-8B…) — see [Embedding backbones](DOCUMENTATION.md#embedding-backbones) in `DOCUMENTATION.md`.
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install ryokai
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quickstart
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from ryokai import Ryokai
|
|
22
|
+
|
|
23
|
+
scorer = Ryokai()
|
|
24
|
+
src_lang, tgt_lang = "en", "ja"
|
|
25
|
+
|
|
26
|
+
# Most common: reference-free, word alignment + embedding
|
|
27
|
+
# (XMEANT-lite / YiSi-2 / Doc-embedding adequacy cross-lingual)
|
|
28
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
29
|
+
source_lang=src_lang, target_lang=tgt_lang)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Variants
|
|
33
|
+
|
|
34
|
+
One `.score()` call, four modes, dispatched by which arguments you pass. `srl=False` is the default — `ryokai` is no longer MEANT-first.
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from ryokai import Ryokai
|
|
38
|
+
scorer = Ryokai()
|
|
39
|
+
src_lang, tgt_lang = "en", "ja"
|
|
40
|
+
|
|
41
|
+
# Reference-free, word alignment + embedding (default, most common)
|
|
42
|
+
# E.g. Doc-embedding adequacy / YiSi-2 / XMEANT-lite
|
|
43
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
44
|
+
source_lang=src_lang, target_lang=tgt_lang)
|
|
45
|
+
|
|
46
|
+
# Reference-based, word alignment + embedding
|
|
47
|
+
# E.g. Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign style
|
|
48
|
+
scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang)
|
|
49
|
+
|
|
50
|
+
# Reference-free, frame-based — XMEANT proper
|
|
51
|
+
scorer.score(source=src, hypothesis=hyp,
|
|
52
|
+
source_lang=src_lang, target_lang=tgt_lang, srl=True)
|
|
53
|
+
|
|
54
|
+
# Reference-based, frame-based — MEANT 2.0
|
|
55
|
+
scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang, srl=True)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
See [`DOCUMENTATION.md`](DOCUMENTATION.md) for flags, aligner choices, embedding-backbone swaps, AER evaluation harness, CLI, architecture, and custom role weights.
|
|
59
|
+
|
|
60
|
+
## References
|
|
61
|
+
|
|
62
|
+
Ryokai is glue around several published techniques — credit belongs to their authors.
|
|
63
|
+
|
|
64
|
+
| Technique | Year | Citation | Category |
|
|
65
|
+
| --------- | ---- | -------- | -------- |
|
|
66
|
+
| MEANT | 2011 | Lo & Wu. [*MEANT: An inexpensive, high-accuracy, semi-automatic metric for evaluating translation utility based on semantic roles*](https://aclanthology.org/P11-2042/). ACL 2011. | Semantic-frame MT evaluation |
|
|
67
|
+
| XMEANT | 2014 | Lo, Beloucif, Saers & Wu. [*XMEANT: Better semantic MT evaluation without reference translations*](https://aclanthology.org/P14-2124/). ACL 2014 (Short Papers). | Semantic-frame MT evaluation |
|
|
68
|
+
| MEANT 2.0 | 2017 | Lo. [*MEANT 2.0: Accurate semantic MT evaluation for any output language*](https://aclanthology.org/W17-4767/). WMT 2017. | Semantic-frame MT evaluation |
|
|
69
|
+
| Doc-embedding adequacy | 2015 | Vela & Tan. [*Predicting Machine Translation Adequacy with Document Embeddings*](https://aclanthology.org/W15-3051/). WMT 2015. | Embedding-based MT evaluation |
|
|
70
|
+
| WOLVESAAR | 2016 | Bechara, Gupta, Tan, Orăsan, Mitkov & van Genabith. [*WOLVESAAR at SemEval-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity*](https://aclanthology.org/S16-1096/). SemEval-2016. | Embedding-based MT evaluation |
|
|
71
|
+
| YiSi | 2019 | Lo. [*YiSi — a Unified Semantic MT Quality Evaluation and Estimation Metric for Languages with Different Levels of Available Resources*](https://aclanthology.org/W19-5358/). WMT 2019. | Embedding-based MT evaluation |
|
|
72
|
+
| Monolingual aligner | 2014 | Sultan, Bethard & Sumner. [*Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence*](https://aclanthology.org/Q14-1018/). TACL 2014. | Word alignment |
|
|
73
|
+
| SimAlign | 2020 | Jalili Sabet, Dufter, Yvon & Schütze. [*SimAlign: High Quality Word Alignments without Parallel Training Data using Static and Contextualized Embeddings*](https://aclanthology.org/2020.findings-emnlp.147/). Findings of EMNLP 2020. | Word alignment |
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
MIT — see [`LICENSE`](LICENSE).
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ryokai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "了解 — unified semantic MT evaluation: MEANT, XMEANT, YiSi, WOLVESAAR, and SimAlign-style word alignment over modern multilingual embeddings."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "Liling Tan" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"machine-translation",
|
|
15
|
+
"evaluation",
|
|
16
|
+
"metric",
|
|
17
|
+
"meant",
|
|
18
|
+
"xmeant",
|
|
19
|
+
"yisi",
|
|
20
|
+
"wolvesaar",
|
|
21
|
+
"simalign",
|
|
22
|
+
"word-alignment",
|
|
23
|
+
"semantic-roles",
|
|
24
|
+
"huggingface",
|
|
25
|
+
"multilingual",
|
|
26
|
+
]
|
|
27
|
+
classifiers = [
|
|
28
|
+
"Development Status :: 3 - Alpha",
|
|
29
|
+
"Intended Audience :: Science/Research",
|
|
30
|
+
"License :: OSI Approved :: MIT License",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.9",
|
|
33
|
+
"Programming Language :: Python :: 3.10",
|
|
34
|
+
"Programming Language :: Python :: 3.11",
|
|
35
|
+
"Programming Language :: Python :: 3.12",
|
|
36
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
37
|
+
"Topic :: Text Processing :: Linguistic",
|
|
38
|
+
]
|
|
39
|
+
dependencies = [
|
|
40
|
+
"torch>=2.0",
|
|
41
|
+
"transformers>=4.30",
|
|
42
|
+
"sentence-transformers>=2.2",
|
|
43
|
+
"scipy>=1.10",
|
|
44
|
+
"numpy>=1.24",
|
|
45
|
+
"pyyaml>=6.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
test = ["pytest>=7", "pytest-xdist"]
|
|
50
|
+
dev = ["pytest>=7", "pytest-xdist", "ruff", "build", "twine"]
|
|
51
|
+
|
|
52
|
+
[project.scripts]
|
|
53
|
+
ryokai = "ryokai.cli:main"
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
Homepage = "https://github.com/alvations/ryokai"
|
|
57
|
+
Repository = "https://github.com/alvations/ryokai"
|
|
58
|
+
Issues = "https://github.com/alvations/ryokai/issues"
|
|
59
|
+
|
|
60
|
+
[tool.setuptools]
|
|
61
|
+
packages = ["ryokai", "ryokai.srl", "ryokai.sim"]
|
|
62
|
+
include-package-data = true
|
|
63
|
+
|
|
64
|
+
[tool.setuptools.package-data]
|
|
65
|
+
ryokai = ["data/*.yaml", "data/**/*.yaml"]
|
|
66
|
+
|
|
67
|
+
[tool.pytest.ini_options]
|
|
68
|
+
testpaths = ["tests"]
|
|
69
|
+
markers = [
|
|
70
|
+
"slow: tests that download HF models (deselect with -m 'not slow')",
|
|
71
|
+
]
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""ryokai — 了解.
|
|
2
|
+
|
|
3
|
+
Unified semantic machine-translation evaluation. Reference-free or
|
|
4
|
+
reference-based, word-alignment + embedding by default, frame-based
|
|
5
|
+
MEANT 2.0 on opt-in — all behind a single `.score()` call.
|
|
6
|
+
|
|
7
|
+
Quickstart
|
|
8
|
+
----------
|
|
9
|
+
>>> from ryokai import Ryokai
|
|
10
|
+
>>> scorer = Ryokai()
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Reference-free (most common; XMEANT / YiSi-2 / Doc-embedding adequacy)
|
|
13
|
+
>>> scorer.score(source=src, hypothesis=hyp,
|
|
14
|
+
... source_lang="en", target_lang="ja")
|
|
15
|
+
>>>
|
|
16
|
+
>>> # Reference-based, word alignment + embeddings
|
|
17
|
+
>>> # (Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign)
|
|
18
|
+
>>> scorer.score(reference=ref, hypothesis=hyp, target_lang="ja")
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Frame-based MEANT 2.0 — opt in with srl=True
|
|
21
|
+
>>> scorer.score(reference=ref, hypothesis=hyp, target_lang="ja", srl=True)
|
|
22
|
+
>>>
|
|
23
|
+
>>> # Frame-based reference-free (XMEANT proper)
|
|
24
|
+
>>> scorer.score(source=src, hypothesis=hyp,
|
|
25
|
+
... source_lang="en", target_lang="ja", srl=True)
|
|
26
|
+
"""
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
from ._langs import SUPPORTED_LANGS
|
|
30
|
+
from .graph import Argument, Frame, SRLGraph
|
|
31
|
+
from .labelconfig import LabelConfig
|
|
32
|
+
from .scorer import DEFAULT_WEIGHTS, MEANTScore, Score, score_nosrl, score_pair
|
|
33
|
+
from .sim import ContextualTokenSimBackend, StaticEmbeddingSimBackend
|
|
34
|
+
from .sim.embeddings import EmbeddingSimBackend
|
|
35
|
+
from .srl import HFPOSHeuristicSRLBackend, HFTokenClassifierSRLBackend, SRLBackend
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Ryokai:
|
|
41
|
+
"""Unified MT-eval scorer.
|
|
42
|
+
|
|
43
|
+
Construct once with the backend / alignment configuration you want;
|
|
44
|
+
call ``.score(...)`` per-pair. The mode is chosen by which arguments
|
|
45
|
+
you pass:
|
|
46
|
+
|
|
47
|
+
+------------------------+--------------+--------------------------------------------+
|
|
48
|
+
| source / reference | srl | Mode |
|
|
49
|
+
+========================+==============+============================================+
|
|
50
|
+
| ``source=`` | False (def.) | Reference-free, word alignment + embedding |
|
|
51
|
+
| | | (XMEANT-lite / YiSi-2 / Doc-embedding adq) |
|
|
52
|
+
+------------------------+--------------+--------------------------------------------+
|
|
53
|
+
| ``reference=`` | False (def.) | Reference-based, word alignment + embedding|
|
|
54
|
+
| | | (WOLVESAAR / YiSi-1 / SimAlign style) |
|
|
55
|
+
+------------------------+--------------+--------------------------------------------+
|
|
56
|
+
| ``source=`` | True | Reference-free, frame-based (XMEANT) |
|
|
57
|
+
+------------------------+--------------+--------------------------------------------+
|
|
58
|
+
| ``reference=`` | True | Reference-based, frame-based (MEANT 2.0) |
|
|
59
|
+
+------------------------+--------------+--------------------------------------------+
|
|
60
|
+
|
|
61
|
+
Constructor parameters configure the *how* (which models, which
|
|
62
|
+
aligner, weights, etc.); per-call parameters configure the *what*
|
|
63
|
+
(which sentences, which languages, ref- vs source-based, with or
|
|
64
|
+
without SRL).
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
srl_backend : SRLBackend | None
|
|
69
|
+
SRL backend used whenever a `.score(..., srl=True)` call is made.
|
|
70
|
+
Defaults to `HFPOSHeuristicSRLBackend` — a multilingual XLM-R
|
|
71
|
+
UDPOS heuristic covering all 13 supported languages with one
|
|
72
|
+
model.
|
|
73
|
+
sim_backend : EmbeddingSimBackend | None
|
|
74
|
+
Sentence / argument similarity backend. Defaults to multilingual
|
|
75
|
+
MiniLM. Swap to any preset (``"qwen3-0.6b"``, ``"jina-v3"``,
|
|
76
|
+
``"nemotron-8b"``…) or any HF model id.
|
|
77
|
+
weights : dict[str, float] | None
|
|
78
|
+
Per-role weights for frame-based F-score. Defaults to
|
|
79
|
+
`DEFAULT_WEIGHTS`.
|
|
80
|
+
label_config : LabelConfig | None
|
|
81
|
+
SRL label aliasing. Defaults to bundled `data/labelconfig.yaml`.
|
|
82
|
+
content_only : bool
|
|
83
|
+
No-SRL only: drop stopwords + punctuation before alignment
|
|
84
|
+
(WOLVESAAR style).
|
|
85
|
+
aggregation : str
|
|
86
|
+
No-SRL only: ``"f1"`` (default) or ``"harmonic"``.
|
|
87
|
+
aligner : str
|
|
88
|
+
No-SRL only: ``"sentence"`` (fast, default), ``"hungarian"``,
|
|
89
|
+
``"argmax"``, ``"itermax"``, or ``"mai"``.
|
|
90
|
+
threshold : float
|
|
91
|
+
No-SRL contextual aligners: cosine floor below which a pair is
|
|
92
|
+
discarded. Default 0.5.
|
|
93
|
+
exact_match_shortcut : bool
|
|
94
|
+
No-SRL contextual aligners: case-insensitive surface equality
|
|
95
|
+
boosts a pair's similarity to 1.0 (Sultan-style prior).
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(
|
|
99
|
+
self,
|
|
100
|
+
srl_backend: SRLBackend | None = None,
|
|
101
|
+
sim_backend: EmbeddingSimBackend | None = None,
|
|
102
|
+
weights: dict[str, float] | None = None,
|
|
103
|
+
label_config: LabelConfig | None = None,
|
|
104
|
+
content_only: bool = False,
|
|
105
|
+
aggregation: str = "f1",
|
|
106
|
+
aligner: str = "sentence",
|
|
107
|
+
threshold: float = 0.5,
|
|
108
|
+
exact_match_shortcut: bool = True,
|
|
109
|
+
) -> None:
|
|
110
|
+
self._srl_backend = srl_backend
|
|
111
|
+
self.sim = sim_backend or EmbeddingSimBackend()
|
|
112
|
+
self.weights = weights or DEFAULT_WEIGHTS
|
|
113
|
+
self.label_config = label_config or LabelConfig()
|
|
114
|
+
self.content_only = content_only
|
|
115
|
+
self.aggregation = aggregation
|
|
116
|
+
self.aligner = aligner
|
|
117
|
+
self.threshold = threshold
|
|
118
|
+
self.exact_match_shortcut = exact_match_shortcut
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def srl(self) -> SRLBackend:
|
|
122
|
+
"""Lazily instantiate the default SRL backend so users who never
|
|
123
|
+
call with ``srl=True`` never pay for the SRL model download."""
|
|
124
|
+
if self._srl_backend is None:
|
|
125
|
+
self._srl_backend = HFPOSHeuristicSRLBackend()
|
|
126
|
+
return self._srl_backend
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _check_lang(name: str, value: str | None) -> None:
|
|
130
|
+
if value is None:
|
|
131
|
+
return
|
|
132
|
+
if value not in SUPPORTED_LANGS:
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f"{name}={value!r} not supported. "
|
|
135
|
+
f"Supported: {sorted(SUPPORTED_LANGS)}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def score(
|
|
139
|
+
self,
|
|
140
|
+
*,
|
|
141
|
+
hypothesis: str,
|
|
142
|
+
target_lang: str,
|
|
143
|
+
source: str | None = None,
|
|
144
|
+
source_lang: str | None = None,
|
|
145
|
+
reference: str | None = None,
|
|
146
|
+
srl: bool = False,
|
|
147
|
+
) -> Score:
|
|
148
|
+
"""Score one MT pair. See class docstring for the dispatch table.
|
|
149
|
+
|
|
150
|
+
Exactly one of ``source=`` or ``reference=`` must be given.
|
|
151
|
+
Higher returned ``f1`` = more adequate.
|
|
152
|
+
"""
|
|
153
|
+
if (source is None) == (reference is None):
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"pass exactly one of source= (reference-free) or "
|
|
156
|
+
"reference= (reference-based)"
|
|
157
|
+
)
|
|
158
|
+
self._check_lang("target_lang", target_lang)
|
|
159
|
+
self._check_lang("source_lang", source_lang)
|
|
160
|
+
|
|
161
|
+
if srl:
|
|
162
|
+
return self._score_with_srl(
|
|
163
|
+
hypothesis=hypothesis,
|
|
164
|
+
target_lang=target_lang,
|
|
165
|
+
source=source,
|
|
166
|
+
source_lang=source_lang,
|
|
167
|
+
reference=reference,
|
|
168
|
+
)
|
|
169
|
+
return self._score_nosrl(
|
|
170
|
+
hypothesis=hypothesis,
|
|
171
|
+
target_lang=target_lang,
|
|
172
|
+
source=source,
|
|
173
|
+
source_lang=source_lang,
|
|
174
|
+
reference=reference,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _score_nosrl(
|
|
178
|
+
self,
|
|
179
|
+
*,
|
|
180
|
+
hypothesis: str,
|
|
181
|
+
target_lang: str,
|
|
182
|
+
source: str | None,
|
|
183
|
+
source_lang: str | None,
|
|
184
|
+
reference: str | None,
|
|
185
|
+
) -> Score:
|
|
186
|
+
# the "other side" of the comparison is either source or reference
|
|
187
|
+
other = source if source is not None else reference
|
|
188
|
+
other_lang = source_lang if source is not None else target_lang
|
|
189
|
+
return score_nosrl(
|
|
190
|
+
other,
|
|
191
|
+
hypothesis,
|
|
192
|
+
self.sim,
|
|
193
|
+
lang=other_lang or target_lang,
|
|
194
|
+
content_only=self.content_only,
|
|
195
|
+
aggregation=self.aggregation,
|
|
196
|
+
aligner=self.aligner,
|
|
197
|
+
threshold=self.threshold,
|
|
198
|
+
exact_match_shortcut=self.exact_match_shortcut,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _score_with_srl(
|
|
202
|
+
self,
|
|
203
|
+
*,
|
|
204
|
+
hypothesis: str,
|
|
205
|
+
target_lang: str,
|
|
206
|
+
source: str | None,
|
|
207
|
+
source_lang: str | None,
|
|
208
|
+
reference: str | None,
|
|
209
|
+
) -> Score:
|
|
210
|
+
hyp_graph = self.srl.parse(hypothesis, target_lang)
|
|
211
|
+
if source is not None:
|
|
212
|
+
ref_graph = self.srl.parse(source, source_lang or target_lang)
|
|
213
|
+
else:
|
|
214
|
+
ref_graph = self.srl.parse(reference, target_lang)
|
|
215
|
+
return score_pair(
|
|
216
|
+
ref_graph, hyp_graph, self.sim, self.weights, self.label_config,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def score_corpus(
|
|
220
|
+
self,
|
|
221
|
+
*,
|
|
222
|
+
hypotheses: list[str],
|
|
223
|
+
target_lang: str,
|
|
224
|
+
sources: list[str] | None = None,
|
|
225
|
+
source_lang: str | None = None,
|
|
226
|
+
references: list[str] | None = None,
|
|
227
|
+
srl: bool = False,
|
|
228
|
+
) -> list[Score]:
|
|
229
|
+
"""Score a parallel corpus — one Score per pair.
|
|
230
|
+
|
|
231
|
+
Same dispatch rules as `.score()`: pass either `sources=` or
|
|
232
|
+
`references=`, not both.
|
|
233
|
+
"""
|
|
234
|
+
if (sources is None) == (references is None):
|
|
235
|
+
raise ValueError(
|
|
236
|
+
"pass exactly one of sources= or references="
|
|
237
|
+
)
|
|
238
|
+
n = len(hypotheses)
|
|
239
|
+
others = sources if sources is not None else references
|
|
240
|
+
if len(others) != n:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"length mismatch: hypotheses={n}, "
|
|
243
|
+
f"{'sources' if sources is not None else 'references'}={len(others)}"
|
|
244
|
+
)
|
|
245
|
+
out: list[Score] = []
|
|
246
|
+
for i in range(n):
|
|
247
|
+
kwargs = dict(
|
|
248
|
+
hypothesis=hypotheses[i],
|
|
249
|
+
target_lang=target_lang,
|
|
250
|
+
source_lang=source_lang,
|
|
251
|
+
srl=srl,
|
|
252
|
+
)
|
|
253
|
+
if sources is not None:
|
|
254
|
+
kwargs["source"] = sources[i]
|
|
255
|
+
else:
|
|
256
|
+
kwargs["reference"] = references[i]
|
|
257
|
+
out.append(self.score(**kwargs))
|
|
258
|
+
return out
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# Backwards-compatible alias — most MT-eval literature calls this MEANT.
|
|
262
|
+
MEANT = Ryokai
|
|
263
|
+
|
|
264
|
+
__all__ = [
|
|
265
|
+
"Ryokai",
|
|
266
|
+
"MEANT",
|
|
267
|
+
"Score",
|
|
268
|
+
"MEANTScore",
|
|
269
|
+
"SRLGraph",
|
|
270
|
+
"Frame",
|
|
271
|
+
"Argument",
|
|
272
|
+
"LabelConfig",
|
|
273
|
+
"SRLBackend",
|
|
274
|
+
"HFPOSHeuristicSRLBackend",
|
|
275
|
+
"HFTokenClassifierSRLBackend",
|
|
276
|
+
"EmbeddingSimBackend",
|
|
277
|
+
"ContextualTokenSimBackend",
|
|
278
|
+
"StaticEmbeddingSimBackend",
|
|
279
|
+
"DEFAULT_WEIGHTS",
|
|
280
|
+
"SUPPORTED_LANGS",
|
|
281
|
+
"score_pair",
|
|
282
|
+
"score_nosrl",
|
|
283
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Registry of the 13 languages MEANT 2.0 originally supported."""
|
|
2
|
+
SUPPORTED_LANGS: dict[str, str] = {
|
|
3
|
+
"en": "English",
|
|
4
|
+
"de": "German",
|
|
5
|
+
"fr": "French",
|
|
6
|
+
"es": "Spanish",
|
|
7
|
+
"cs": "Czech",
|
|
8
|
+
"fi": "Finnish",
|
|
9
|
+
"hi": "Hindi",
|
|
10
|
+
"lv": "Latvian",
|
|
11
|
+
"pl": "Polish",
|
|
12
|
+
"ro": "Romanian",
|
|
13
|
+
"ru": "Russian",
|
|
14
|
+
"tr": "Turkish",
|
|
15
|
+
"zh": "Chinese",
|
|
16
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""ryokai CLI — score a parallel hyp file vs reference or source file."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from . import Ryokai, SUPPORTED_LANGS
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main(argv: list[str] | None = None) -> int:
|
|
11
|
+
p = argparse.ArgumentParser(
|
|
12
|
+
prog="ryokai",
|
|
13
|
+
description=(
|
|
14
|
+
"Score MT output. Pass --ref for reference-based scoring "
|
|
15
|
+
"(MEANT / WOLVESAAR / YiSi-1 / SimAlign style), or --src "
|
|
16
|
+
"for reference-free scoring (XMEANT / YiSi-2 / Doc-embedding "
|
|
17
|
+
"adequacy)."
|
|
18
|
+
),
|
|
19
|
+
)
|
|
20
|
+
group = p.add_mutually_exclusive_group(required=True)
|
|
21
|
+
group.add_argument("--ref", help="Path to reference text (one segment per line).")
|
|
22
|
+
group.add_argument("--src", help="Path to source text (one segment per line).")
|
|
23
|
+
p.add_argument("--hyp", required=True, help="Path to hypothesis / MT output (one segment per line).")
|
|
24
|
+
p.add_argument(
|
|
25
|
+
"--target-lang", required=True, choices=sorted(SUPPORTED_LANGS),
|
|
26
|
+
help="Two-letter language code of the hypothesis.",
|
|
27
|
+
)
|
|
28
|
+
p.add_argument(
|
|
29
|
+
"--source-lang", choices=sorted(SUPPORTED_LANGS),
|
|
30
|
+
help="Language of --src. Defaults to --target-lang. Ignored if --ref is used.",
|
|
31
|
+
)
|
|
32
|
+
p.add_argument(
|
|
33
|
+
"--srl", action="store_true",
|
|
34
|
+
help="Use the frame-based MEANT 2.0 / XMEANT scorer. Default: word alignment + embedding.",
|
|
35
|
+
)
|
|
36
|
+
p.add_argument(
|
|
37
|
+
"--aligner", default="sentence",
|
|
38
|
+
choices=["sentence", "hungarian", "argmax", "itermax", "mai"],
|
|
39
|
+
help="No-SRL aligner. Ignored when --srl is set.",
|
|
40
|
+
)
|
|
41
|
+
p.add_argument(
|
|
42
|
+
"--sim-model", default=None,
|
|
43
|
+
help="Override the multilingual embedding model id or preset.",
|
|
44
|
+
)
|
|
45
|
+
p.add_argument(
|
|
46
|
+
"--no-corpus-avg", action="store_true",
|
|
47
|
+
help="Print one F1 per line instead of corpus mean.",
|
|
48
|
+
)
|
|
49
|
+
args = p.parse_args(argv)
|
|
50
|
+
|
|
51
|
+
hyps = [line.rstrip("\n") for line in open(args.hyp, encoding="utf-8")]
|
|
52
|
+
if args.ref:
|
|
53
|
+
others = [line.rstrip("\n") for line in open(args.ref, encoding="utf-8")]
|
|
54
|
+
kind = "references"
|
|
55
|
+
else:
|
|
56
|
+
others = [line.rstrip("\n") for line in open(args.src, encoding="utf-8")]
|
|
57
|
+
kind = "sources"
|
|
58
|
+
if len(others) != len(hyps):
|
|
59
|
+
print(
|
|
60
|
+
f"{kind} and hypotheses line counts differ: {len(others)} vs {len(hyps)}",
|
|
61
|
+
file=sys.stderr,
|
|
62
|
+
)
|
|
63
|
+
return 2
|
|
64
|
+
|
|
65
|
+
from .sim.embeddings import EmbeddingSimBackend
|
|
66
|
+
sim = EmbeddingSimBackend(args.sim_model) if args.sim_model else None
|
|
67
|
+
scorer = Ryokai(sim_backend=sim, aligner=args.aligner)
|
|
68
|
+
scores = scorer.score_corpus(
|
|
69
|
+
hypotheses=hyps,
|
|
70
|
+
target_lang=args.target_lang,
|
|
71
|
+
sources=others if args.src else None,
|
|
72
|
+
references=others if args.ref else None,
|
|
73
|
+
source_lang=args.source_lang,
|
|
74
|
+
srl=args.srl,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if args.no_corpus_avg:
|
|
78
|
+
for s in scores:
|
|
79
|
+
print(f"{s.f1:.4f}")
|
|
80
|
+
else:
|
|
81
|
+
mean = sum(s.f1 for s in scores) / len(scores) if scores else 0.0
|
|
82
|
+
print(f"ryokai = {mean:.4f} (n={len(scores)})")
|
|
83
|
+
return 0
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
raise SystemExit(main())
|