ryokai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. ryokai-0.1.0/LICENSE +21 -0
  2. ryokai-0.1.0/PKG-INFO +117 -0
  3. ryokai-0.1.0/README.md +77 -0
  4. ryokai-0.1.0/pyproject.toml +71 -0
  5. ryokai-0.1.0/ryokai/__init__.py +283 -0
  6. ryokai-0.1.0/ryokai/_langs.py +16 -0
  7. ryokai-0.1.0/ryokai/cli.py +87 -0
  8. ryokai-0.1.0/ryokai/data/labelconfig.yaml +314 -0
  9. ryokai-0.1.0/ryokai/data/stopwords.yaml +553 -0
  10. ryokai-0.1.0/ryokai/eval.py +133 -0
  11. ryokai-0.1.0/ryokai/graph.py +32 -0
  12. ryokai-0.1.0/ryokai/labelconfig.py +45 -0
  13. ryokai-0.1.0/ryokai/match.py +24 -0
  14. ryokai-0.1.0/ryokai/scorer.py +309 -0
  15. ryokai-0.1.0/ryokai/sim/__init__.py +12 -0
  16. ryokai-0.1.0/ryokai/sim/contextual.py +250 -0
  17. ryokai-0.1.0/ryokai/sim/embeddings.py +90 -0
  18. ryokai-0.1.0/ryokai/sim/static.py +224 -0
  19. ryokai-0.1.0/ryokai/srl/__init__.py +24 -0
  20. ryokai-0.1.0/ryokai/srl/hf_backend.py +61 -0
  21. ryokai-0.1.0/ryokai/srl/hf_pos.py +201 -0
  22. ryokai-0.1.0/ryokai/stopwords.py +40 -0
  23. ryokai-0.1.0/ryokai.egg-info/PKG-INFO +117 -0
  24. ryokai-0.1.0/ryokai.egg-info/SOURCES.txt +39 -0
  25. ryokai-0.1.0/ryokai.egg-info/dependency_links.txt +1 -0
  26. ryokai-0.1.0/ryokai.egg-info/entry_points.txt +2 -0
  27. ryokai-0.1.0/ryokai.egg-info/requires.txt +17 -0
  28. ryokai-0.1.0/ryokai.egg-info/top_level.txt +1 -0
  29. ryokai-0.1.0/setup.cfg +4 -0
  30. ryokai-0.1.0/tests/test_api.py +63 -0
  31. ryokai-0.1.0/tests/test_contextual_aligner.py +70 -0
  32. ryokai-0.1.0/tests/test_distortion_layer.py +23 -0
  33. ryokai-0.1.0/tests/test_eval.py +68 -0
  34. ryokai-0.1.0/tests/test_labelconfig.py +60 -0
  35. ryokai-0.1.0/tests/test_languages.py +169 -0
  36. ryokai-0.1.0/tests/test_match.py +45 -0
  37. ryokai-0.1.0/tests/test_presets.py +42 -0
  38. ryokai-0.1.0/tests/test_scorer_nosrl.py +95 -0
  39. ryokai-0.1.0/tests/test_scorer_unit.py +89 -0
  40. ryokai-0.1.0/tests/test_static_backend.py +83 -0
  41. ryokai-0.1.0/tests/test_stopwords.py +39 -0
ryokai-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Liling Tan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
ryokai-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: ryokai
3
+ Version: 0.1.0
4
+ Summary: 了解 — unified semantic MT evaluation: MEANT, XMEANT, YiSi, WOLVESAAR, and SimAlign-style word alignment over modern multilingual embeddings.
5
+ Author: Liling Tan
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/alvations/ryokai
8
+ Project-URL: Repository, https://github.com/alvations/ryokai
9
+ Project-URL: Issues, https://github.com/alvations/ryokai/issues
10
+ Keywords: machine-translation,evaluation,metric,meant,xmeant,yisi,wolvesaar,simalign,word-alignment,semantic-roles,huggingface,multilingual
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Text Processing :: Linguistic
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: torch>=2.0
25
+ Requires-Dist: transformers>=4.30
26
+ Requires-Dist: sentence-transformers>=2.2
27
+ Requires-Dist: scipy>=1.10
28
+ Requires-Dist: numpy>=1.24
29
+ Requires-Dist: pyyaml>=6.0
30
+ Provides-Extra: test
31
+ Requires-Dist: pytest>=7; extra == "test"
32
+ Requires-Dist: pytest-xdist; extra == "test"
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7; extra == "dev"
35
+ Requires-Dist: pytest-xdist; extra == "dev"
36
+ Requires-Dist: ruff; extra == "dev"
37
+ Requires-Dist: build; extra == "dev"
38
+ Requires-Dist: twine; extra == "dev"
39
+ Dynamic: license-file
40
+
41
+ # ryokai 了解
42
+
43
+ > *Ryokai* (了解, "understood / got it") — a unified Python library for **semantic machine-translation evaluation**, combining the strengths of MEANT 2.0, XMEANT, YiSi-1/2, WOLVESAAR, and SimAlign behind one clean API on top of modern multilingual embeddings.
44
+
45
+ Pure PyTorch + HuggingFace `transformers` — no Stanza, no spaCy, no external parsers. Two HF models cover all 13 supported languages (`en`, `de`, `fr`, `es`, `cs`, `fi`, `hi`, `lv`, `pl`, `ro`, `ru`, `tr`, `zh`) in a single install:
46
+
47
+ - POS / shallow SRL: [`wietsedv/xlm-roberta-base-ft-udpos28`](https://huggingface.co/wietsedv/xlm-roberta-base-ft-udpos28) — ~1.1 GB, downloaded once.
48
+ - Multilingual embeddings: [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) — ~110 MB, used for both same-language and cross-language similarity.
49
+
50
+ Both are one-line swappable for any modern multilingual encoder (Qwen3-Embedding, Jina v3, BGE-M3, Nemotron-8B…) — see [Embedding backbones](DOCUMENTATION.md#embedding-backbones) in `DOCUMENTATION.md`.
51
+
52
+ ## Install
53
+
54
+ ```bash
55
+ pip install ryokai
56
+ ```
57
+
58
+ ## Quickstart
59
+
60
+ ```python
61
+ from ryokai import Ryokai
62
+
63
+ scorer = Ryokai()
64
+ src_lang, tgt_lang = "en", "ja"
65
+
66
+ # Most common: reference-free, word alignment + embedding
67
+ # (XMEANT-lite / YiSi-2 / Doc-embedding adequacy cross-lingual)
68
+ scorer.score(source=src, hypothesis=hyp,
69
+ source_lang=src_lang, target_lang=tgt_lang)
70
+ ```
71
+
72
+ ## Variants
73
+
74
+ One `.score()` call, four modes, dispatched by which arguments you pass. `srl=False` is the default — `ryokai` is no longer MEANT-first.
75
+
76
+ ```python
77
+ from ryokai import Ryokai
78
+ scorer = Ryokai()
79
+ src_lang, tgt_lang = "en", "ja"
80
+
81
+ # Reference-free, word alignment + embedding (default, most common)
82
+ # E.g. Doc-embedding adequacy / YiSi-2 / XMEANT-lite
83
+ scorer.score(source=src, hypothesis=hyp,
84
+ source_lang=src_lang, target_lang=tgt_lang)
85
+
86
+ # Reference-based, word alignment + embedding
87
+ # E.g. Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign style
88
+ scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang)
89
+
90
+ # Reference-free, frame-based — XMEANT proper
91
+ scorer.score(source=src, hypothesis=hyp,
92
+ source_lang=src_lang, target_lang=tgt_lang, srl=True)
93
+
94
+ # Reference-based, frame-based — MEANT 2.0
95
+ scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang, srl=True)
96
+ ```
97
+
98
+ See [`DOCUMENTATION.md`](DOCUMENTATION.md) for flags, aligner choices, embedding-backbone swaps, AER evaluation harness, CLI, architecture, and custom role weights.
99
+
100
+ ## References
101
+
102
+ Ryokai is glue around several published techniques — credit belongs to their authors.
103
+
104
+ | Technique | Year | Citation | Category |
105
+ | --------- | ---- | -------- | -------- |
106
+ | MEANT | 2011 | Lo & Wu. [*MEANT: An inexpensive, high-accuracy, semi-automatic metric for evaluating translation utility based on semantic roles*](https://aclanthology.org/P11-2042/). ACL 2011. | Semantic-frame MT evaluation |
107
+ | XMEANT | 2014 | Lo, Beloucif, Saers & Wu. [*XMEANT: Better semantic MT evaluation without reference translations*](https://aclanthology.org/P14-2124/). ACL 2014 (Short Papers). | Semantic-frame MT evaluation |
108
+ | MEANT 2.0 | 2017 | Lo. [*MEANT 2.0: Accurate semantic MT evaluation for any output language*](https://aclanthology.org/W17-4767/). WMT 2017. | Semantic-frame MT evaluation |
109
+ | Doc-embedding adequacy | 2015 | Vela & Tan. [*Predicting Machine Translation Adequacy with Document Embeddings*](https://aclanthology.org/W15-3051/). WMT 2015. | Embedding-based MT evaluation |
110
+ | WOLVESAAR | 2016 | Bechara, Gupta, Tan, Orăsan, Mitkov & van Genabith. [*WOLVESAAR at SemEval-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity*](https://aclanthology.org/S16-1096/). SemEval-2016. | Embedding-based MT evaluation |
111
+ | YiSi | 2019 | Lo. [*YiSi — a Unified Semantic MT Quality Evaluation and Estimation Metric for Languages with Different Levels of Available Resources*](https://aclanthology.org/W19-5358/). WMT 2019. | Embedding-based MT evaluation |
112
+ | Monolingual aligner | 2014 | Sultan, Bethard & Sumner. [*Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence*](https://aclanthology.org/Q14-1018/). TACL 2014. | Word alignment |
113
+ | SimAlign | 2020 | Jalili Sabet, Dufter, Yvon & Schütze. [*SimAlign: High Quality Word Alignments without Parallel Training Data using Static and Contextualized Embeddings*](https://aclanthology.org/2020.findings-emnlp.147/). Findings of EMNLP 2020. | Word alignment |
114
+
115
+ ## License
116
+
117
+ MIT — see [`LICENSE`](LICENSE).
ryokai-0.1.0/README.md ADDED
@@ -0,0 +1,77 @@
1
+ # ryokai 了解
2
+
3
+ > *Ryokai* (了解, "understood / got it") — a unified Python library for **semantic machine-translation evaluation**, combining the strengths of MEANT 2.0, XMEANT, YiSi-1/2, WOLVESAAR, and SimAlign behind one clean API on top of modern multilingual embeddings.
4
+
5
+ Pure PyTorch + HuggingFace `transformers` — no Stanza, no spaCy, no external parsers. Two HF models cover all 13 supported languages (`en`, `de`, `fr`, `es`, `cs`, `fi`, `hi`, `lv`, `pl`, `ro`, `ru`, `tr`, `zh`) in a single install:
6
+
7
+ - POS / shallow SRL: [`wietsedv/xlm-roberta-base-ft-udpos28`](https://huggingface.co/wietsedv/xlm-roberta-base-ft-udpos28) — ~1.1 GB, downloaded once.
8
+ - Multilingual embeddings: [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) — ~110 MB, used for both same-language and cross-language similarity.
9
+
10
+ Both are one-line swappable for any modern multilingual encoder (Qwen3-Embedding, Jina v3, BGE-M3, Nemotron-8B…) — see [Embedding backbones](DOCUMENTATION.md#embedding-backbones) in `DOCUMENTATION.md`.
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pip install ryokai
16
+ ```
17
+
18
+ ## Quickstart
19
+
20
+ ```python
21
+ from ryokai import Ryokai
22
+
23
+ scorer = Ryokai()
24
+ src_lang, tgt_lang = "en", "ja"
25
+
26
+ # Most common: reference-free, word alignment + embedding
27
+ # (XMEANT-lite / YiSi-2 / Doc-embedding adequacy cross-lingual)
28
+ scorer.score(source=src, hypothesis=hyp,
29
+ source_lang=src_lang, target_lang=tgt_lang)
30
+ ```
31
+
32
+ ## Variants
33
+
34
+ One `.score()` call, four modes, dispatched by which arguments you pass. `srl=False` is the default — `ryokai` is no longer MEANT-first.
35
+
36
+ ```python
37
+ from ryokai import Ryokai
38
+ scorer = Ryokai()
39
+ src_lang, tgt_lang = "en", "ja"
40
+
41
+ # Reference-free, word alignment + embedding (default, most common)
42
+ # E.g. Doc-embedding adequacy / YiSi-2 / XMEANT-lite
43
+ scorer.score(source=src, hypothesis=hyp,
44
+ source_lang=src_lang, target_lang=tgt_lang)
45
+
46
+ # Reference-based, word alignment + embedding
47
+ # E.g. Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign style
48
+ scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang)
49
+
50
+ # Reference-free, frame-based — XMEANT proper
51
+ scorer.score(source=src, hypothesis=hyp,
52
+ source_lang=src_lang, target_lang=tgt_lang, srl=True)
53
+
54
+ # Reference-based, frame-based — MEANT 2.0
55
+ scorer.score(reference=ref, hypothesis=hyp, target_lang=tgt_lang, srl=True)
56
+ ```
57
+
58
+ See [`DOCUMENTATION.md`](DOCUMENTATION.md) for flags, aligner choices, embedding-backbone swaps, AER evaluation harness, CLI, architecture, and custom role weights.
59
+
60
+ ## References
61
+
62
+ Ryokai is glue around several published techniques — credit belongs to their authors.
63
+
64
+ | Technique | Year | Citation | Category |
65
+ | --------- | ---- | -------- | -------- |
66
+ | MEANT | 2011 | Lo & Wu. [*MEANT: An inexpensive, high-accuracy, semi-automatic metric for evaluating translation utility based on semantic roles*](https://aclanthology.org/P11-2042/). ACL 2011. | Semantic-frame MT evaluation |
67
+ | XMEANT | 2014 | Lo, Beloucif, Saers & Wu. [*XMEANT: Better semantic MT evaluation without reference translations*](https://aclanthology.org/P14-2124/). ACL 2014 (Short Papers). | Semantic-frame MT evaluation |
68
+ | MEANT 2.0 | 2017 | Lo. [*MEANT 2.0: Accurate semantic MT evaluation for any output language*](https://aclanthology.org/W17-4767/). WMT 2017. | Semantic-frame MT evaluation |
69
+ | Doc-embedding adequacy | 2015 | Vela & Tan. [*Predicting Machine Translation Adequacy with Document Embeddings*](https://aclanthology.org/W15-3051/). WMT 2015. | Embedding-based MT evaluation |
70
+ | WOLVESAAR | 2016 | Bechara, Gupta, Tan, Orăsan, Mitkov & van Genabith. [*WOLVESAAR at SemEval-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity*](https://aclanthology.org/S16-1096/). SemEval-2016. | Embedding-based MT evaluation |
71
+ | YiSi | 2019 | Lo. [*YiSi — a Unified Semantic MT Quality Evaluation and Estimation Metric for Languages with Different Levels of Available Resources*](https://aclanthology.org/W19-5358/). WMT 2019. | Embedding-based MT evaluation |
72
+ | Monolingual aligner | 2014 | Sultan, Bethard & Sumner. [*Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence*](https://aclanthology.org/Q14-1018/). TACL 2014. | Word alignment |
73
+ | SimAlign | 2020 | Jalili Sabet, Dufter, Yvon & Schütze. [*SimAlign: High Quality Word Alignments without Parallel Training Data using Static and Contextualized Embeddings*](https://aclanthology.org/2020.findings-emnlp.147/). Findings of EMNLP 2020. | Word alignment |
74
+
75
+ ## License
76
+
77
+ MIT — see [`LICENSE`](LICENSE).
@@ -0,0 +1,71 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ryokai"
7
+ version = "0.1.0"
8
+ description = "了解 — unified semantic MT evaluation: MEANT, XMEANT, YiSi, WOLVESAAR, and SimAlign-style word alignment over modern multilingual embeddings."
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "Liling Tan" }]
13
+ keywords = [
14
+ "machine-translation",
15
+ "evaluation",
16
+ "metric",
17
+ "meant",
18
+ "xmeant",
19
+ "yisi",
20
+ "wolvesaar",
21
+ "simalign",
22
+ "word-alignment",
23
+ "semantic-roles",
24
+ "huggingface",
25
+ "multilingual",
26
+ ]
27
+ classifiers = [
28
+ "Development Status :: 3 - Alpha",
29
+ "Intended Audience :: Science/Research",
30
+ "License :: OSI Approved :: MIT License",
31
+ "Programming Language :: Python :: 3",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
36
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
37
+ "Topic :: Text Processing :: Linguistic",
38
+ ]
39
+ dependencies = [
40
+ "torch>=2.0",
41
+ "transformers>=4.30",
42
+ "sentence-transformers>=2.2",
43
+ "scipy>=1.10",
44
+ "numpy>=1.24",
45
+ "pyyaml>=6.0",
46
+ ]
47
+
48
+ [project.optional-dependencies]
49
+ test = ["pytest>=7", "pytest-xdist"]
50
+ dev = ["pytest>=7", "pytest-xdist", "ruff", "build", "twine"]
51
+
52
+ [project.scripts]
53
+ ryokai = "ryokai.cli:main"
54
+
55
+ [project.urls]
56
+ Homepage = "https://github.com/alvations/ryokai"
57
+ Repository = "https://github.com/alvations/ryokai"
58
+ Issues = "https://github.com/alvations/ryokai/issues"
59
+
60
+ [tool.setuptools]
61
+ packages = ["ryokai", "ryokai.srl", "ryokai.sim"]
62
+ include-package-data = true
63
+
64
+ [tool.setuptools.package-data]
65
+ ryokai = ["data/*.yaml", "data/**/*.yaml"]
66
+
67
+ [tool.pytest.ini_options]
68
+ testpaths = ["tests"]
69
+ markers = [
70
+ "slow: tests that download HF models (deselect with -m 'not slow')",
71
+ ]
@@ -0,0 +1,283 @@
1
+ """ryokai — 了解.
2
+
3
+ Unified semantic machine-translation evaluation. Reference-free or
4
+ reference-based, word-alignment + embedding by default, frame-based
5
+ MEANT 2.0 on opt-in — all behind a single `.score()` call.
6
+
7
+ Quickstart
8
+ ----------
9
+ >>> from ryokai import Ryokai
10
+ >>> scorer = Ryokai()
11
+ >>>
12
+ >>> # Reference-free (most common; XMEANT / YiSi-2 / Doc-embedding adequacy)
13
+ >>> scorer.score(source=src, hypothesis=hyp,
14
+ ... source_lang="en", target_lang="ja")
15
+ >>>
16
+ >>> # Reference-based, word alignment + embeddings
17
+ >>> # (Doc-embedding adequacy / WOLVESAAR / YiSi-1 / SimAlign)
18
+ >>> scorer.score(reference=ref, hypothesis=hyp, target_lang="ja")
19
+ >>>
20
+ >>> # Frame-based MEANT 2.0 — opt in with srl=True
21
+ >>> scorer.score(reference=ref, hypothesis=hyp, target_lang="ja", srl=True)
22
+ >>>
23
+ >>> # Frame-based reference-free (XMEANT proper)
24
+ >>> scorer.score(source=src, hypothesis=hyp,
25
+ ... source_lang="en", target_lang="ja", srl=True)
26
+ """
27
+ from __future__ import annotations
28
+
29
+ from ._langs import SUPPORTED_LANGS
30
+ from .graph import Argument, Frame, SRLGraph
31
+ from .labelconfig import LabelConfig
32
+ from .scorer import DEFAULT_WEIGHTS, MEANTScore, Score, score_nosrl, score_pair
33
+ from .sim import ContextualTokenSimBackend, StaticEmbeddingSimBackend
34
+ from .sim.embeddings import EmbeddingSimBackend
35
+ from .srl import HFPOSHeuristicSRLBackend, HFTokenClassifierSRLBackend, SRLBackend
36
+
37
+ __version__ = "0.1.0"
38
+
39
+
40
+ class Ryokai:
41
+ """Unified MT-eval scorer.
42
+
43
+ Construct once with the backend / alignment configuration you want;
44
+ call ``.score(...)`` per-pair. The mode is chosen by which arguments
45
+ you pass:
46
+
47
+ +------------------------+--------------+--------------------------------------------+
48
+ | source / reference | srl | Mode |
49
+ +========================+==============+============================================+
50
+ | ``source=`` | False (def.) | Reference-free, word alignment + embedding |
51
+ | | | (XMEANT-lite / YiSi-2 / Doc-embedding adq) |
52
+ +------------------------+--------------+--------------------------------------------+
53
+ | ``reference=`` | False (def.) | Reference-based, word alignment + embedding|
54
+ | | | (WOLVESAAR / YiSi-1 / SimAlign style) |
55
+ +------------------------+--------------+--------------------------------------------+
56
+ | ``source=`` | True | Reference-free, frame-based (XMEANT) |
57
+ +------------------------+--------------+--------------------------------------------+
58
+ | ``reference=`` | True | Reference-based, frame-based (MEANT 2.0) |
59
+ +------------------------+--------------+--------------------------------------------+
60
+
61
+ Constructor parameters configure the *how* (which models, which
62
+ aligner, weights, etc.); per-call parameters configure the *what*
63
+ (which sentences, which languages, ref- vs source-based, with or
64
+ without SRL).
65
+
66
+ Parameters
67
+ ----------
68
+ srl_backend : SRLBackend | None
69
+ SRL backend used whenever a `.score(..., srl=True)` call is made.
70
+ Defaults to `HFPOSHeuristicSRLBackend` — a multilingual XLM-R
71
+ UDPOS heuristic covering all 13 supported languages with one
72
+ model.
73
+ sim_backend : EmbeddingSimBackend | None
74
+ Sentence / argument similarity backend. Defaults to multilingual
75
+ MiniLM. Swap to any preset (``"qwen3-0.6b"``, ``"jina-v3"``,
76
+ ``"nemotron-8b"``…) or any HF model id.
77
+ weights : dict[str, float] | None
78
+ Per-role weights for frame-based F-score. Defaults to
79
+ `DEFAULT_WEIGHTS`.
80
+ label_config : LabelConfig | None
81
+ SRL label aliasing. Defaults to bundled `data/labelconfig.yaml`.
82
+ content_only : bool
83
+ No-SRL only: drop stopwords + punctuation before alignment
84
+ (WOLVESAAR style).
85
+ aggregation : str
86
+ No-SRL only: ``"f1"`` (default) or ``"harmonic"``.
87
+ aligner : str
88
+ No-SRL only: ``"sentence"`` (fast, default), ``"hungarian"``,
89
+ ``"argmax"``, ``"itermax"``, or ``"mai"``.
90
+ threshold : float
91
+ No-SRL contextual aligners: cosine floor below which a pair is
92
+ discarded. Default 0.5.
93
+ exact_match_shortcut : bool
94
+ No-SRL contextual aligners: case-insensitive surface equality
95
+ boosts a pair's similarity to 1.0 (Sultan-style prior).
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ srl_backend: SRLBackend | None = None,
101
+ sim_backend: EmbeddingSimBackend | None = None,
102
+ weights: dict[str, float] | None = None,
103
+ label_config: LabelConfig | None = None,
104
+ content_only: bool = False,
105
+ aggregation: str = "f1",
106
+ aligner: str = "sentence",
107
+ threshold: float = 0.5,
108
+ exact_match_shortcut: bool = True,
109
+ ) -> None:
110
+ self._srl_backend = srl_backend
111
+ self.sim = sim_backend or EmbeddingSimBackend()
112
+ self.weights = weights or DEFAULT_WEIGHTS
113
+ self.label_config = label_config or LabelConfig()
114
+ self.content_only = content_only
115
+ self.aggregation = aggregation
116
+ self.aligner = aligner
117
+ self.threshold = threshold
118
+ self.exact_match_shortcut = exact_match_shortcut
119
+
120
+ @property
121
+ def srl(self) -> SRLBackend:
122
+ """Lazily instantiate the default SRL backend so users who never
123
+ call with ``srl=True`` never pay for the SRL model download."""
124
+ if self._srl_backend is None:
125
+ self._srl_backend = HFPOSHeuristicSRLBackend()
126
+ return self._srl_backend
127
+
128
+ @staticmethod
129
+ def _check_lang(name: str, value: str | None) -> None:
130
+ if value is None:
131
+ return
132
+ if value not in SUPPORTED_LANGS:
133
+ raise ValueError(
134
+ f"{name}={value!r} not supported. "
135
+ f"Supported: {sorted(SUPPORTED_LANGS)}"
136
+ )
137
+
138
+ def score(
139
+ self,
140
+ *,
141
+ hypothesis: str,
142
+ target_lang: str,
143
+ source: str | None = None,
144
+ source_lang: str | None = None,
145
+ reference: str | None = None,
146
+ srl: bool = False,
147
+ ) -> Score:
148
+ """Score one MT pair. See class docstring for the dispatch table.
149
+
150
+ Exactly one of ``source=`` or ``reference=`` must be given.
151
+ Higher returned ``f1`` = more adequate.
152
+ """
153
+ if (source is None) == (reference is None):
154
+ raise ValueError(
155
+ "pass exactly one of source= (reference-free) or "
156
+ "reference= (reference-based)"
157
+ )
158
+ self._check_lang("target_lang", target_lang)
159
+ self._check_lang("source_lang", source_lang)
160
+
161
+ if srl:
162
+ return self._score_with_srl(
163
+ hypothesis=hypothesis,
164
+ target_lang=target_lang,
165
+ source=source,
166
+ source_lang=source_lang,
167
+ reference=reference,
168
+ )
169
+ return self._score_nosrl(
170
+ hypothesis=hypothesis,
171
+ target_lang=target_lang,
172
+ source=source,
173
+ source_lang=source_lang,
174
+ reference=reference,
175
+ )
176
+
177
+ def _score_nosrl(
178
+ self,
179
+ *,
180
+ hypothesis: str,
181
+ target_lang: str,
182
+ source: str | None,
183
+ source_lang: str | None,
184
+ reference: str | None,
185
+ ) -> Score:
186
+ # the "other side" of the comparison is either source or reference
187
+ other = source if source is not None else reference
188
+ other_lang = source_lang if source is not None else target_lang
189
+ return score_nosrl(
190
+ other,
191
+ hypothesis,
192
+ self.sim,
193
+ lang=other_lang or target_lang,
194
+ content_only=self.content_only,
195
+ aggregation=self.aggregation,
196
+ aligner=self.aligner,
197
+ threshold=self.threshold,
198
+ exact_match_shortcut=self.exact_match_shortcut,
199
+ )
200
+
201
+ def _score_with_srl(
202
+ self,
203
+ *,
204
+ hypothesis: str,
205
+ target_lang: str,
206
+ source: str | None,
207
+ source_lang: str | None,
208
+ reference: str | None,
209
+ ) -> Score:
210
+ hyp_graph = self.srl.parse(hypothesis, target_lang)
211
+ if source is not None:
212
+ ref_graph = self.srl.parse(source, source_lang or target_lang)
213
+ else:
214
+ ref_graph = self.srl.parse(reference, target_lang)
215
+ return score_pair(
216
+ ref_graph, hyp_graph, self.sim, self.weights, self.label_config,
217
+ )
218
+
219
+ def score_corpus(
220
+ self,
221
+ *,
222
+ hypotheses: list[str],
223
+ target_lang: str,
224
+ sources: list[str] | None = None,
225
+ source_lang: str | None = None,
226
+ references: list[str] | None = None,
227
+ srl: bool = False,
228
+ ) -> list[Score]:
229
+ """Score a parallel corpus — one Score per pair.
230
+
231
+ Same dispatch rules as `.score()`: pass either `sources=` or
232
+ `references=`, not both.
233
+ """
234
+ if (sources is None) == (references is None):
235
+ raise ValueError(
236
+ "pass exactly one of sources= or references="
237
+ )
238
+ n = len(hypotheses)
239
+ others = sources if sources is not None else references
240
+ if len(others) != n:
241
+ raise ValueError(
242
+ f"length mismatch: hypotheses={n}, "
243
+ f"{'sources' if sources is not None else 'references'}={len(others)}"
244
+ )
245
+ out: list[Score] = []
246
+ for i in range(n):
247
+ kwargs = dict(
248
+ hypothesis=hypotheses[i],
249
+ target_lang=target_lang,
250
+ source_lang=source_lang,
251
+ srl=srl,
252
+ )
253
+ if sources is not None:
254
+ kwargs["source"] = sources[i]
255
+ else:
256
+ kwargs["reference"] = references[i]
257
+ out.append(self.score(**kwargs))
258
+ return out
259
+
260
+
261
+ # Backwards-compatible alias — most MT-eval literature calls this MEANT.
262
+ MEANT = Ryokai
263
+
264
+ __all__ = [
265
+ "Ryokai",
266
+ "MEANT",
267
+ "Score",
268
+ "MEANTScore",
269
+ "SRLGraph",
270
+ "Frame",
271
+ "Argument",
272
+ "LabelConfig",
273
+ "SRLBackend",
274
+ "HFPOSHeuristicSRLBackend",
275
+ "HFTokenClassifierSRLBackend",
276
+ "EmbeddingSimBackend",
277
+ "ContextualTokenSimBackend",
278
+ "StaticEmbeddingSimBackend",
279
+ "DEFAULT_WEIGHTS",
280
+ "SUPPORTED_LANGS",
281
+ "score_pair",
282
+ "score_nosrl",
283
+ ]
@@ -0,0 +1,16 @@
1
+ """Registry of the 13 languages MEANT 2.0 originally supported."""
2
+ SUPPORTED_LANGS: dict[str, str] = {
3
+ "en": "English",
4
+ "de": "German",
5
+ "fr": "French",
6
+ "es": "Spanish",
7
+ "cs": "Czech",
8
+ "fi": "Finnish",
9
+ "hi": "Hindi",
10
+ "lv": "Latvian",
11
+ "pl": "Polish",
12
+ "ro": "Romanian",
13
+ "ru": "Russian",
14
+ "tr": "Turkish",
15
+ "zh": "Chinese",
16
+ }
@@ -0,0 +1,87 @@
1
+ """ryokai CLI — score a parallel hyp file vs reference or source file."""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import sys
6
+
7
+ from . import Ryokai, SUPPORTED_LANGS
8
+
9
+
10
+ def main(argv: list[str] | None = None) -> int:
11
+ p = argparse.ArgumentParser(
12
+ prog="ryokai",
13
+ description=(
14
+ "Score MT output. Pass --ref for reference-based scoring "
15
+ "(MEANT / WOLVESAAR / YiSi-1 / SimAlign style), or --src "
16
+ "for reference-free scoring (XMEANT / YiSi-2 / Doc-embedding "
17
+ "adequacy)."
18
+ ),
19
+ )
20
+ group = p.add_mutually_exclusive_group(required=True)
21
+ group.add_argument("--ref", help="Path to reference text (one segment per line).")
22
+ group.add_argument("--src", help="Path to source text (one segment per line).")
23
+ p.add_argument("--hyp", required=True, help="Path to hypothesis / MT output (one segment per line).")
24
+ p.add_argument(
25
+ "--target-lang", required=True, choices=sorted(SUPPORTED_LANGS),
26
+ help="Two-letter language code of the hypothesis.",
27
+ )
28
+ p.add_argument(
29
+ "--source-lang", choices=sorted(SUPPORTED_LANGS),
30
+ help="Language of --src. Defaults to --target-lang. Ignored if --ref is used.",
31
+ )
32
+ p.add_argument(
33
+ "--srl", action="store_true",
34
+ help="Use the frame-based MEANT 2.0 / XMEANT scorer. Default: word alignment + embedding.",
35
+ )
36
+ p.add_argument(
37
+ "--aligner", default="sentence",
38
+ choices=["sentence", "hungarian", "argmax", "itermax", "mai"],
39
+ help="No-SRL aligner. Ignored when --srl is set.",
40
+ )
41
+ p.add_argument(
42
+ "--sim-model", default=None,
43
+ help="Override the multilingual embedding model id or preset.",
44
+ )
45
+ p.add_argument(
46
+ "--no-corpus-avg", action="store_true",
47
+ help="Print one F1 per line instead of corpus mean.",
48
+ )
49
+ args = p.parse_args(argv)
50
+
51
+ hyps = [line.rstrip("\n") for line in open(args.hyp, encoding="utf-8")]
52
+ if args.ref:
53
+ others = [line.rstrip("\n") for line in open(args.ref, encoding="utf-8")]
54
+ kind = "references"
55
+ else:
56
+ others = [line.rstrip("\n") for line in open(args.src, encoding="utf-8")]
57
+ kind = "sources"
58
+ if len(others) != len(hyps):
59
+ print(
60
+ f"{kind} and hypotheses line counts differ: {len(others)} vs {len(hyps)}",
61
+ file=sys.stderr,
62
+ )
63
+ return 2
64
+
65
+ from .sim.embeddings import EmbeddingSimBackend
66
+ sim = EmbeddingSimBackend(args.sim_model) if args.sim_model else None
67
+ scorer = Ryokai(sim_backend=sim, aligner=args.aligner)
68
+ scores = scorer.score_corpus(
69
+ hypotheses=hyps,
70
+ target_lang=args.target_lang,
71
+ sources=others if args.src else None,
72
+ references=others if args.ref else None,
73
+ source_lang=args.source_lang,
74
+ srl=args.srl,
75
+ )
76
+
77
+ if args.no_corpus_avg:
78
+ for s in scores:
79
+ print(f"{s.f1:.4f}")
80
+ else:
81
+ mean = sum(s.f1 for s in scores) / len(scores) if scores else 0.0
82
+ print(f"ryokai = {mean:.4f} (n={len(scores)})")
83
+ return 0
84
+
85
+
86
+ if __name__ == "__main__":
87
+ raise SystemExit(main())