simulstream 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. docs/source/conf.py +47 -0
  2. simulstream/__init__.py +15 -0
  3. simulstream/client/__init__.py +0 -0
  4. simulstream/client/wav_reader_client.py +228 -0
  5. simulstream/config.py +31 -0
  6. simulstream/inference.py +170 -0
  7. simulstream/metrics/__init__.py +0 -0
  8. simulstream/metrics/detokenizers.py +71 -0
  9. simulstream/metrics/logger.py +32 -0
  10. simulstream/metrics/readers.py +348 -0
  11. simulstream/metrics/score_latency.py +130 -0
  12. simulstream/metrics/score_quality.py +169 -0
  13. simulstream/metrics/scorers/__init__.py +0 -0
  14. simulstream/metrics/scorers/latency/__init__.py +115 -0
  15. simulstream/metrics/scorers/latency/mwersegmenter.py +136 -0
  16. simulstream/metrics/scorers/latency/stream_laal.py +119 -0
  17. simulstream/metrics/scorers/quality/__init__.py +132 -0
  18. simulstream/metrics/scorers/quality/comet.py +57 -0
  19. simulstream/metrics/scorers/quality/mwersegmenter.py +93 -0
  20. simulstream/metrics/scorers/quality/sacrebleu.py +59 -0
  21. simulstream/metrics/stats.py +184 -0
  22. simulstream/server/__init__.py +0 -0
  23. simulstream/server/http_server.py +95 -0
  24. simulstream/server/message_processor.py +156 -0
  25. simulstream/server/speech_processors/__init__.py +173 -0
  26. simulstream/server/speech_processors/base.py +135 -0
  27. simulstream/server/speech_processors/base_streamatt.py +320 -0
  28. simulstream/server/speech_processors/canary_sliding_window_retranslation.py +73 -0
  29. simulstream/server/speech_processors/hf_sliding_window_retranslation.py +87 -0
  30. simulstream/server/speech_processors/incremental_output.py +85 -0
  31. simulstream/server/speech_processors/seamless_sliding_window_retranslation.py +84 -0
  32. simulstream/server/speech_processors/seamless_streamatt.py +268 -0
  33. simulstream/server/speech_processors/simuleval_wrapper.py +165 -0
  34. simulstream/server/speech_processors/sliding_window_retranslation.py +135 -0
  35. simulstream/server/speech_processors/vad_wrapper.py +180 -0
  36. simulstream/server/websocket_server.py +236 -0
  37. simulstream-0.1.0.dist-info/METADATA +465 -0
  38. simulstream-0.1.0.dist-info/RECORD +48 -0
  39. simulstream-0.1.0.dist-info/WHEEL +5 -0
  40. simulstream-0.1.0.dist-info/entry_points.txt +8 -0
  41. simulstream-0.1.0.dist-info/licenses/LICENSE +201 -0
  42. simulstream-0.1.0.dist-info/top_level.txt +3 -0
  43. uts/__init__.py +0 -0
  44. uts/metrics/__init__.py +0 -0
  45. uts/metrics/log_reader.py +50 -0
  46. uts/speech_processors/__init__.py +0 -0
  47. uts/speech_processors/test_simuleval_wrapper.py +88 -0
  48. uts/utils.py +5 -0
@@ -0,0 +1,115 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import argparse
16
+ import importlib
17
+ import pkgutil
18
+ from abc import abstractmethod
19
+ from dataclasses import dataclass
20
+ from typing import List, Optional
21
+
22
+ from simulstream.metrics.readers import OutputWithDelays, ReferenceSentenceDefinition
23
+
24
+
25
+ LATENCY_SCORER_REGISTRY = {}
26
+
27
+
28
+ def register_latency_scorer(name):
29
+ """
30
+ Decorator for registering a latency scorer class.
31
+
32
+ Args:
33
+ name (str): The unique identifier for the scorer.
34
+
35
+ Raises:
36
+ TypeError: If the decorated class is not a subclass of
37
+ :class:`LatencyScorer`.
38
+
39
+ Example:
40
+ >>> @register_latency_scorer("stream_laal")
41
+ ... class StreamLAALScorer(LatencyScorer):
42
+ ... ...
43
+ """
44
+ def register(cls):
45
+ if not issubclass(cls, LatencyScorer):
46
+ raise TypeError(f"Cannot register {cls.__name__}: must be a subclass of LatencyScorer")
47
+ LATENCY_SCORER_REGISTRY[name] = cls
48
+ return cls
49
+
50
+ return register
51
+
52
+
53
+ @dataclass
54
+ class LatencyScoringSample:
55
+ """
56
+ Data structure representing a single evaluation sample.
57
+
58
+ Attributes:
59
+ audio_name (str): The identifier of the audio file.
60
+ hypothesis (str): The system-generated hypothesis text.
61
+ reference (Optional[List[ReferenceSentenceDefinition]]): One or more reference sentences,
62
+ including the text, start time and duration, or ``None`` if not required.
63
+ """
64
+ audio_name: str
65
+ hypothesis: OutputWithDelays
66
+ reference: Optional[List[ReferenceSentenceDefinition]] = None
67
+
68
+
69
+ @dataclass
70
+ class LatencyScores:
71
+ """
72
+ Data structure representing a latency score.
73
+
74
+ Attributes:
75
+ ideal_latency (float): The latency score in ideal conditions, which do not include
76
+ computational costs.
77
+ computational_aware_latency (Optional[float]): The latency score in computational aware
78
+ conditions, which include computational costs.
79
+ """
80
+ ideal_latency: float
81
+ computational_aware_latency: Optional[float] = None
82
+
83
+
84
+ class LatencyScorer:
85
+ """
86
+ Abstract base class for all latency scorers.
87
+
88
+ A latency scorer evaluates system hypotheses against references and returns a
89
+ :class:`LatencyScores` object that represents the latency scores.
90
+
91
+ Subclasses must implement the abstract methods defined here and should be registered via
92
+ :func:`register_latency_scorer`.
93
+
94
+ Args:
95
+ args (argparse.Namespace): Parsed command-line arguments.
96
+ """
97
+ def __init__(self, args: argparse.Namespace):
98
+ self.args = args
99
+
100
+ @abstractmethod
101
+ def score(self, samples: List[LatencyScoringSample]) -> LatencyScores:
102
+ ...
103
+
104
+ @classmethod
105
+ @abstractmethod
106
+ def add_arguments(cls, parser: argparse.ArgumentParser) -> None:
107
+ ...
108
+
109
+ @abstractmethod
110
+ def requires_reference(self) -> bool:
111
+ ...
112
+
113
+
114
+ for loader, name, is_pkg in pkgutil.walk_packages(__path__, __name__ + "."):
115
+ importlib.import_module(name)
@@ -0,0 +1,136 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ from abc import abstractmethod
16
+ from dataclasses import dataclass
17
+ from typing import List
18
+
19
+ from mweralign import mweralign
20
+
21
+ from simulstream.metrics.readers import ReferenceSentenceDefinition, OutputWithDelays, text_items
22
+ from simulstream.metrics.scorers.latency import LatencyScorer, LatencyScoringSample, LatencyScores
23
+
24
+
25
+ @dataclass
26
+ class ResegmentedLatencyScoringSample:
27
+ """
28
+ A sample containing realigned hypotheses and references.
29
+
30
+ Attributes:
31
+ audio_name (str): The identifier of the audio file.
32
+ hypothesis (List[str]): Hypothesis lines after realignment.
33
+ reference (List[str]): Reference lines aligned to the hypothesis.
34
+ """
35
+ audio_name: str
36
+ hypothesis: List[OutputWithDelays]
37
+ reference: List[ReferenceSentenceDefinition]
38
+
39
+
40
+ class MWERSegmenterBasedLatencyScorer(LatencyScorer):
41
+ """
42
+ Abstract base class for scorers that require aligned system outputs and references through
43
+ MWER Segmenter alignment.
44
+
45
+ This class wraps a latency scorer and applies the MWER Segmenter alignment by `"Effects of
46
+ automatic alignment on speech translation metrics"
47
+ <https://aclanthology.org/2025.iwslt-1.7/>`_ to hypotheses before scoring.
48
+
49
+ Subclasses must implement :meth:`_do_score`, which operates on
50
+ :class:`ResegmentedLatencyScoringSample` instances where hypotheses and references are aligned.
51
+
52
+ Example:
53
+ >>> class CustomLatencyScorer(MWERSegmenterBasedLatencyScorer):
54
+ ... def _do_score(self, samples):
55
+ ... # Compute a custom latency score
56
+ ... return LatencyScores(...)
57
+ """
58
+ def __init__(self, args):
59
+ super().__init__(args)
60
+ self.latency_unit = args.latency_unit
61
+
62
+ def requires_reference(self) -> bool:
63
+ return True
64
+
65
+ @abstractmethod
66
+ def _do_score(self, samples: List[ResegmentedLatencyScoringSample]) -> LatencyScores:
67
+ """
68
+ Compute latency scores on resegmented samples.
69
+
70
+ Subclasses must override this method.
71
+
72
+ Args:
73
+ samples (List[ResegmentedLatencyScoringSample]): Aligned
74
+ hypothesis–reference pairs with delay information.
75
+
76
+ Returns:
77
+ LatencyScores: The computed latency metrics.
78
+ """
79
+ ...
80
+
81
+ def _split_delays_by_segmented_text(
82
+ self, delays: List[float], segmented_text: List[str]) -> List[List[float]]:
83
+ """
84
+ Assign delay values to the corresponding segmented hypotheses.
85
+
86
+ Args:
87
+ delays (List[float]): Delay values (per token or per char).
88
+ segmented_text (List[str]): Segmented hypothesis strings.
89
+
90
+ Returns:
91
+ List[List[float]]: Delays split per segment.
92
+ """
93
+ segmented_delays = []
94
+ index = 0
95
+
96
+ for segment in segmented_text:
97
+ segment_len = len(text_items(segment, self.latency_unit))
98
+ segmented_delays.append(delays[index:index + segment_len])
99
+ index += segment_len
100
+ assert len(delays) == index, \
101
+ f"Index {index} should have reached end of delays ({len(delays)})"
102
+ return segmented_delays
103
+
104
+ def score(self, samples: List[LatencyScoringSample]) -> LatencyScores:
105
+ resegmented_samples = []
106
+ for sample in samples:
107
+ assert sample.reference is not None, "Cannot realign hypothesis to missing reference"
108
+
109
+ resegmented_hypos = mweralign.align_texts(
110
+ "\n".join([sentence_def.content for sentence_def in sample.reference]),
111
+ sample.hypothesis.final_text).split("\n")
112
+
113
+ assert len(resegmented_hypos) == len(sample.reference), \
114
+ f"Reference ({sample.audio_name}) has mismatched number of target " \
115
+ f"({len(sample.reference)}) and resegmented lines ({len(resegmented_hypos)})"
116
+
117
+ ideal_delays_splits = self._split_delays_by_segmented_text(
118
+ sample.hypothesis.ideal_delays,
119
+ resegmented_hypos)
120
+ computational_aware_delays_splits = self._split_delays_by_segmented_text(
121
+ sample.hypothesis.computational_aware_delays,
122
+ resegmented_hypos)
123
+ assert len(ideal_delays_splits) == len(computational_aware_delays_splits)
124
+
125
+ resegmented_hypos_with_delays = []
126
+ for text, ideal_delay, computational_aware_delay in zip(
127
+ resegmented_hypos, ideal_delays_splits, computational_aware_delays_splits):
128
+ resegmented_hypos_with_delays.append(
129
+ OutputWithDelays(text, ideal_delay, computational_aware_delay))
130
+
131
+ resegmented_samples.append(ResegmentedLatencyScoringSample(
132
+ sample.audio_name,
133
+ resegmented_hypos_with_delays,
134
+ sample.reference,
135
+ ))
136
+ return self._do_score(resegmented_samples)
@@ -0,0 +1,119 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import argparse
16
+ import logging
17
+ import statistics
18
+ from typing import List
19
+
20
+ from simulstream.metrics.readers import text_items
21
+ from simulstream.metrics.scorers.latency import register_latency_scorer, LatencyScores
22
+ from simulstream.metrics.scorers.latency.mwersegmenter import MWERSegmenterBasedLatencyScorer, \
23
+ ResegmentedLatencyScoringSample
24
+
25
+
26
+ LOGGER = logging.getLogger('simulstream.metrics.scorers.latency.stream_laal')
27
+
28
+
29
+ @register_latency_scorer("stream_laal")
30
+ class StreamLaal(MWERSegmenterBasedLatencyScorer):
31
+ """
32
+ Computes StreamLAAL version 2.0, as proposed in
33
+ `StreamAtt: Direct Streaming Speech-to-Text Translation with Attention-based
34
+ Audio History Selection <https://aclanthology.org/2024.acl-long.202.pdf>`_.
35
+
36
+
37
+ Then main difference with version 1 is the different segmentation of
38
+ the text (uses mwerSegmenter python package instead of Matusov's executable).
39
+ """
40
+
41
+ @staticmethod
42
+ def _sentence_level_laal(
43
+ delays: List[float], source_length: float, target_length: int) -> float:
44
+ """
45
+ Function to compute Length Adaptive Average Lagging (LAAL) on one sentence as proposed in
46
+ `CUNI-KIT System for Simultaneous Speech Translation Task at IWSLT 2022
47
+ <https://arxiv.org/abs/2204.06028>`_ and
48
+ `Length-Adaptive Average Lagging for Simultaneous Speech Translation
49
+ <https://arxiv.org/abs/2206.05807>`_.
50
+ It is the original Average Lagging as proposed in
51
+ `Controllable Latency using Prefix-to-Prefix Framework
52
+ <https://arxiv.org/abs/1810.08398>`_
53
+ but is robust to the length difference between the hypothesis and reference.
54
+
55
+ The implementation is derived by that available in SimulEval (see `latency_scorer.py` in
56
+ `https://github.com/facebookresearch/SimulEval/).
57
+
58
+ Returns:
59
+ float: the latency score on one sentence.
60
+ """
61
+ if delays[0] > source_length:
62
+ return delays[0]
63
+
64
+ LAAL = 0
65
+ gamma = max(len(delays), target_length) / source_length
66
+ tau = 0
67
+ for t_minus_1, d in enumerate(delays):
68
+ LAAL += d - t_minus_1 / gamma
69
+ tau = t_minus_1 + 1
70
+
71
+ if d >= source_length:
72
+ break
73
+ LAAL /= tau
74
+ return LAAL
75
+
76
+ def _do_score(self, samples: List[ResegmentedLatencyScoringSample]) -> LatencyScores:
77
+ sentence_level_ideal_scores = []
78
+ sentence_level_ca_scores = []
79
+ skipped_sentences = 0
80
+ for sample in samples:
81
+ for sentence_output, sentence_reference in zip(sample.hypothesis, sample.reference):
82
+ # offset delays with respect to reference start of the utterance
83
+ delays_from_sentence_start = [
84
+ delay - sentence_reference.start_time
85
+ for delay in sentence_output.ideal_delays]
86
+ ca_delays_from_sentence_start = [
87
+ delay - sentence_reference.start_time
88
+ for delay in sentence_output.computational_aware_delays]
89
+ assert len(delays_from_sentence_start) == len(ca_delays_from_sentence_start)
90
+
91
+ target_length = len(text_items(sentence_reference.content, self.latency_unit))
92
+
93
+ if len(delays_from_sentence_start) > 0:
94
+ sentence_level_ideal_scores.append(
95
+ self._sentence_level_laal(
96
+ delays_from_sentence_start,
97
+ sentence_reference.duration,
98
+ target_length)
99
+ )
100
+ sentence_level_ca_scores.append(
101
+ self._sentence_level_laal(
102
+ ca_delays_from_sentence_start,
103
+ sentence_reference.duration,
104
+ target_length)
105
+ )
106
+ else:
107
+ skipped_sentences += 1
108
+
109
+ if skipped_sentences > 0:
110
+ LOGGER.warning(
111
+ f"{skipped_sentences} sentences have been skipped in LAAL computation as they "
112
+ "were empty")
113
+ return LatencyScores(
114
+ statistics.mean(sentence_level_ideal_scores),
115
+ statistics.mean(sentence_level_ca_scores))
116
+
117
+ @classmethod
118
+ def add_arguments(cls, parser: argparse.ArgumentParser) -> None:
119
+ pass
@@ -0,0 +1,132 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import argparse
16
+ import importlib
17
+ import pkgutil
18
+ from abc import abstractmethod
19
+ from dataclasses import dataclass
20
+ from typing import List, Optional
21
+
22
+
23
+ QUALITY_SCORER_REGISTRY = {}
24
+
25
+
26
+ def register_quality_scorer(name):
27
+ """
28
+ Decorator for registering a quality scorer class.
29
+
30
+ Args:
31
+ name (str): The unique identifier for the scorer.
32
+
33
+ Raises:
34
+ TypeError: If the decorated class is not a subclass of
35
+ :class:`QualityScorer`.
36
+
37
+ Example:
38
+ >>> @register_quality_scorer("bleu")
39
+ ... class BLEUScorer(QualityScorer):
40
+ ... ...
41
+ """
42
+ def register(cls):
43
+ if not issubclass(cls, QualityScorer):
44
+ raise TypeError(f"Cannot register {cls.__name__}: must be a subclass of QualityScorer")
45
+ QUALITY_SCORER_REGISTRY[name] = cls
46
+ return cls
47
+
48
+ return register
49
+
50
+
51
+ @dataclass
52
+ class QualityScoringSample:
53
+ """
54
+ Data structure representing a single evaluation sample.
55
+
56
+ Attributes:
57
+ audio_name (str): The identifier of the audio file.
58
+ hypothesis (str): The system-generated hypothesis text.
59
+ reference (Optional[List[str]]): One or more reference translations, or ``None`` if not
60
+ required.
61
+ source (Optional[List[str]]): The source transcription or text, or ``None`` if not
62
+ required.
63
+ """
64
+ audio_name: str
65
+ hypothesis: str
66
+ reference: Optional[List[str]] = None
67
+ source: Optional[List[str]] = None
68
+
69
+
70
+ class QualityScorer:
71
+ """
72
+ Abstract base class for all quality scorers.
73
+
74
+ A quality scorer evaluates system hypotheses against references and/or source sentences
75
+ and returns a numerical score.
76
+
77
+ Subclasses must implement the abstract methods defined here and should be registered via
78
+ :func:`register_quality_scorer`.
79
+
80
+ Args:
81
+ args (argparse.Namespace): Parsed command-line arguments.
82
+ """
83
+ def __init__(self, args: argparse.Namespace):
84
+ self.args = args
85
+
86
+ @abstractmethod
87
+ def score(self, samples: List[QualityScoringSample]) -> float:
88
+ """
89
+ Compute a quality score over a list of samples.
90
+
91
+ Args:
92
+ samples (List[QualityScoringSample]): Samples to be evaluated.
93
+
94
+ Returns:
95
+ float: The computed quality score.
96
+ """
97
+ ...
98
+
99
+ @classmethod
100
+ @abstractmethod
101
+ def add_arguments(cls, parser: argparse.ArgumentParser) -> None:
102
+ """
103
+ Add scorer-specific arguments to the CLI parser.
104
+
105
+ Args:
106
+ parser (argparse.ArgumentParser): The parser to extend.
107
+ """
108
+ ...
109
+
110
+ @abstractmethod
111
+ def requires_source(self) -> bool:
112
+ """
113
+ Indicate whether this scorer requires the source text.
114
+
115
+ Returns:
116
+ bool: True if source sentences are required, False otherwise.
117
+ """
118
+ ...
119
+
120
+ @abstractmethod
121
+ def requires_reference(self) -> bool:
122
+ """
123
+ Indicate whether this scorer requires reference translations.
124
+
125
+ Returns:
126
+ bool: True if references are required, False otherwise.
127
+ """
128
+ ...
129
+
130
+
131
+ for loader, name, is_pkg in pkgutil.walk_packages(__path__, __name__ + "."):
132
+ importlib.import_module(name)
@@ -0,0 +1,57 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ import argparse
16
+ import sys
17
+ from typing import List
18
+
19
+ from simulstream.metrics.scorers.quality import register_quality_scorer
20
+ from simulstream.metrics.scorers.quality.mwersegmenter import MWERSegmenterBasedQualityScorer, \
21
+ ResegmentedQualityScoringSample
22
+
23
+ try:
24
+ from comet import download_model, load_from_checkpoint
25
+ except ImportError:
26
+ sys.exit("Please install comet first with `pip install unbabel-comet`.")
27
+
28
+
29
+ @register_quality_scorer("comet")
30
+ class CometScorer(MWERSegmenterBasedQualityScorer):
31
+ def __init__(self, args: argparse.Namespace):
32
+ super().__init__(args)
33
+ self.batch_size = args.batch_size
34
+ model_path = download_model(args.model)
35
+ self.model = load_from_checkpoint(model_path)
36
+ self.model.eval()
37
+
38
+ def _do_score(self, samples: List[ResegmentedQualityScoringSample]) -> float:
39
+ comet_data = []
40
+ for sample in samples:
41
+ for hyp, ref, src in zip(sample.hypothesis, sample.reference, sample.source):
42
+ comet_data.append({
43
+ "src": src.strip(),
44
+ "mt": hyp.strip(),
45
+ "ref": ref.strip()
46
+ })
47
+
48
+ metric = self.model.predict(comet_data, batch_size=self.batch_size)
49
+ return metric.system_score
50
+
51
+ @classmethod
52
+ def add_arguments(cls, parser: argparse.ArgumentParser) -> None:
53
+ parser.add_argument("--model", type=str, default="Unbabel/wmt22-comet-da")
54
+ parser.add_argument("--batch-size", type=int, default=16)
55
+
56
+ def requires_source(self) -> bool:
57
+ return True
@@ -0,0 +1,93 @@
1
+ # Copyright 2025 FBK
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License
14
+
15
+ from abc import abstractmethod
16
+ from dataclasses import dataclass
17
+ from typing import List, Optional
18
+
19
+ from mweralign import mweralign
20
+
21
+ from simulstream.metrics.scorers.quality import QualityScorer, QualityScoringSample
22
+
23
+
24
+ @dataclass
25
+ class ResegmentedQualityScoringSample:
26
+ """
27
+ A sample containing realigned hypotheses and references.
28
+
29
+ Attributes:
30
+ audio_name (str): The identifier of the audio file.
31
+ hypothesis (List[str]): Hypothesis lines after realignment.
32
+ reference (List[str]): Reference lines aligned to the hypothesis.
33
+ source (Optional[List[str]]): Source text (if available).
34
+ """
35
+ audio_name: str
36
+ hypothesis: List[str]
37
+ reference: List[str]
38
+ source: Optional[List[str]] = None
39
+
40
+
41
+ class MWERSegmenterBasedQualityScorer(QualityScorer):
42
+ """
43
+ Abstract base class for scorers that require aligned system outputs and references through
44
+ MWER Segmenter alignment.
45
+
46
+ This class wraps a quality scorer and applies the MWER Segmenter alignment by `"Effects of
47
+ automatic alignment on speech translation metrics"
48
+ <https://aclanthology.org/2025.iwslt-1.7/>`_ to hypotheses before scoring.
49
+
50
+ Subclasses must implement :meth:`_do_score`, which receives
51
+ :class:`ResegmentedQualityScoringSample` instances, where output and references are aligned.
52
+
53
+ Example:
54
+ >>> class CustomQualityScorer(MWERSegmenterBasedQualityScorer):
55
+ ... def _do_score(self, samples):
56
+ ... # Compute a custom quality score
57
+ ... return ...
58
+ """
59
+ def requires_reference(self) -> bool:
60
+ return True
61
+
62
+ @abstractmethod
63
+ def _do_score(self, samples: List[ResegmentedQualityScoringSample]) -> float:
64
+ """
65
+ Compute the final score on resegmented samples.
66
+
67
+ This method must be implemented by subclasses.
68
+
69
+ Args:
70
+ samples (List[ResegmentedQualityScoringSample]): The aligned
71
+ hypothesis–reference pairs, plus optional sources.
72
+
73
+ Returns:
74
+ float: The computed score.
75
+ """
76
+ ...
77
+
78
+ def score(self, samples: List[QualityScoringSample]) -> float:
79
+ resegmented_samples = []
80
+ for sample in samples:
81
+ assert sample.reference is not None, "Cannot realign hypothesis to missing reference"
82
+ resegmented_hypos = mweralign.align_texts(
83
+ "\n".join(sample.reference), sample.hypothesis).split("\n")
84
+ assert len(sample.reference) == len(resegmented_hypos), \
85
+ f"Reference ({sample.audio_name}) has mismatched number of target " \
86
+ f"({len(sample.reference)}) and resegmented lines ({len(resegmented_hypos)})"
87
+ resegmented_samples.append(ResegmentedQualityScoringSample(
88
+ sample.audio_name,
89
+ resegmented_hypos,
90
+ sample.reference,
91
+ sample.source
92
+ ))
93
+ return self._do_score(resegmented_samples)