pearmut 0.3.3__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/constants.py ADDED
@@ -0,0 +1,93 @@
1
+ """Default instructions for different annotation protocols."""
2
+
3
+ # Default instructions for each protocol
4
+ # These are used when no custom instructions are provided
5
+ PROTOCOL_INSTRUCTIONS = {
6
+ "DA": """
7
+ <ul>
8
+ <li>Score each translation using the slider based on meaning preservation and quality.
9
+ <strong>Important:</strong> The relative order of scores matters; ensure better translations have higher
10
+ scores than worse ones.
11
+ <ul>
12
+ <li>0: <strong>Nonsense</strong>: most information is lost.</li>
13
+ <li>33%: <strong>Broken</strong>: major gaps and narrative issues.</li>
14
+ <li>66%: <strong>Middling</strong>: minor issues with grammar or consistency.</li>
15
+ <li>100%: <strong>Perfect</strong>: meaning and grammar align completely with the source.</li>
16
+ </ul>
17
+ </li>
18
+ </ul>
19
+ """,
20
+ "ESA": """
21
+ <ul>
22
+ <li>Error spans:
23
+ <ul>
24
+ <li><strong>Click</strong> on the start of an error, then <strong>click</strong> on the end to mark an
25
+ error span.</li>
26
+ <li><strong>Hover</strong> over an existing highlight to change error severity (minor/major) or remove it.
27
+ </li>
28
+ </ul>
29
+ Error severity:
30
+ <ul>
31
+ <li><span class="instruction_sev" id="instruction_sev_minor">Minor:</span> Style, grammar, or word choice
32
+ could be better.</li>
33
+ <li><span class="instruction_sev" id="instruction_sev_major">Major:</span> Meaning is significantly
34
+ changed or is hard to understand.</li>
35
+ </ul>
36
+ <strong>Tip</strong>: Mark the general area of the error (doesn't need to be exact). Use separate highlights
37
+ for different errors.
38
+ Use <code>[missing]</code> at the end of a sentence for omitted content.<br>
39
+ </li>
40
+ <li>Score each translation using the slider based on meaning preservation and quality.
41
+ <strong>Important:</strong> The relative order of scores matters; ensure better translations have higher
42
+ scores than worse ones.
43
+ <ul>
44
+ <li>0: <strong>Nonsense</strong>: most information is lost.</li>
45
+ <li>33%: <strong>Broken</strong>: major gaps and narrative issues.</li>
46
+ <li>66%: <strong>Middling</strong>: minor issues with grammar or consistency.</li>
47
+ <li>100%: <strong>Perfect</strong>: meaning and grammar align completely with the source.</li>
48
+ </ul>
49
+ </li>
50
+ </ul>
51
+ """,
52
+ "MQM": """
53
+ <ul>
54
+ <li>Error spans:
55
+ <ul>
56
+ <li><strong>Click</strong> on the start of an error, then <strong>click</strong> on the end to mark an
57
+ error span.</li>
58
+ <li><strong>Hover</strong> over an existing highlight to change error severity (minor/major) or remove it.
59
+ </li>
60
+ </ul>
61
+ Error severity:
62
+ <ul>
63
+ <li><span class="instruction_sev" id="instruction_sev_minor">Minor:</span> Style, grammar, or word choice
64
+ could be better.</li>
65
+ <li><span class="instruction_sev" id="instruction_sev_major">Major:</span> Meaning is significantly
66
+ changed or is hard to understand.</li>
67
+ </ul>
68
+ <strong>Tip</strong>: Mark the general area of the error (doesn't need to be exact). Use separate highlights
69
+ for different errors.
70
+ Use <code>[missing]</code> at the end of a sentence for omitted content.<br>
71
+ </li>
72
+ <li>Score each translation using the slider based on meaning preservation and quality.
73
+ <strong>Important:</strong> The relative order of scores matters; ensure better translations have higher
74
+ scores than worse ones.
75
+ <ul>
76
+ <li>0: <strong>Nonsense</strong>: most information is lost.</li>
77
+ <li>33%: <strong>Broken</strong>: major gaps and narrative issues.</li>
78
+ <li>66%: <strong>Middling</strong>: minor issues with grammar or consistency.</li>
79
+ <li>100%: <strong>Perfect</strong>: meaning and grammar align completely with the source.</li>
80
+ </ul>
81
+ </li>
82
+ <li>
83
+ Error types:
84
+ After highlighting an error fragment, you will be asked to select the specific error type (main category and
85
+ subcategory).
86
+ If you are unsure about which errors fall under which categories, please consult the <a
87
+ href="https://themqm.org/the-mqm-typology/"
88
+ style="font-weight: bold; text-decoration: none; color: black;">typology
89
+ definitions</a>.
90
+ </li>
91
+ </ul>
92
+ """,
93
+ }
@@ -0,0 +1,210 @@
1
+ import collections
2
+ import json
3
+ import os
4
+ import statistics
5
+
6
+ from .utils import get_db_log
7
+
8
+
9
+ def comparison_significant(
10
+ scores1: dict[str, float], scores2: dict[str, float]
11
+ ) -> bool:
12
+ """Check if the difference between two sets of scores is statistically significant.
13
+ Assume scores1 > scores2.
14
+ """
15
+
16
+ import scipy.stats
17
+
18
+ # compute intersection
19
+ common_items = set(scores1.keys()).intersection(set(scores2.keys()))
20
+ if len(common_items) < 2:
21
+ return False
22
+
23
+ scores1 = [scores1[k] for k in common_items]
24
+ scores2 = [scores2[k] for k in common_items]
25
+
26
+ return bool(
27
+ scipy.stats.ttest_rel(scores1, scores2, alternative="two-sided").pvalue < 0.05
28
+ )
29
+
30
+
31
+ def compute_model_scores(campaign_id):
32
+ """
33
+ Compute model scores from annotations for a campaign.
34
+
35
+ Returns:
36
+ List of dicts with keys: model, score, count
37
+ Sorted by score in descending order
38
+ """
39
+ # Compute model scores from annotations
40
+ model_scores = collections.defaultdict(dict)
41
+
42
+ # Iterate through all tasks to find items with 'models' field (basic template)
43
+ log = get_db_log(campaign_id)
44
+ for entry in log:
45
+ if "item" not in entry or "annotation" not in entry:
46
+ continue
47
+ for item, annotation in zip(entry["item"], entry["annotation"]):
48
+ for model, annotation in annotation.items():
49
+ if "score" in annotation and annotation["score"] is not None:
50
+ item_id = item.get("item_id") or json.dumps(item | {"tgt": None})
51
+ model_scores[model][item_id] = annotation["score"]
52
+
53
+ model_scores = list(model_scores.items())
54
+ model_scores.sort(key=lambda x: statistics.mean(x[1].values()), reverse=True)
55
+
56
+ results = []
57
+ for i, (model, scores) in enumerate(model_scores):
58
+ avg_score = statistics.mean(scores.values())
59
+ sig_better = False
60
+ if i < len(model_scores) - 1:
61
+ # Compare with next model
62
+ scores_next = model_scores[i + 1][1]
63
+ sig_better = comparison_significant(scores, scores_next)
64
+ else:
65
+ sig_better = False
66
+ results.append(
67
+ {
68
+ "model": model,
69
+ "score": avg_score,
70
+ "count": len(scores),
71
+ "sig_better_than_next": sig_better,
72
+ }
73
+ )
74
+ return results
75
+
76
+
77
+ def escape_typst(s: str):
78
+ return (
79
+ s.replace("\\", "\\\\")
80
+ .replace("#", "\\#")
81
+ .replace("*", "\\*")
82
+ .replace("_", "\\_")
83
+ .replace("`", "\\`")
84
+ .replace("[", "\\[")
85
+ .replace("]", "\\]")
86
+ )
87
+
88
+
89
+ def generate_typst_table(results):
90
+ """
91
+ Generate Typst code for a two-column table with results.
92
+
93
+ Args:
94
+ results: List of dicts with keys: model, score, count
95
+
96
+ Returns:
97
+ String containing Typst table markup
98
+ """
99
+ if not results:
100
+ return "// No results available"
101
+
102
+ typst_code = """#table(
103
+ columns: (auto, auto),
104
+ align: (left, right),
105
+ stroke: none,
106
+ table.hline(),
107
+ [*Model*], [*Score*],
108
+ table.hline(),
109
+ """
110
+
111
+ for result in results:
112
+ # Escape Typst special characters
113
+ model = escape_typst(result["model"])
114
+ score = f"{result['score']:.1f}"
115
+ typst_code += f" [{model}], [{score}],\n"
116
+ if result["sig_better_than_next"]:
117
+ typst_code += " table.hline(end: 1),\n"
118
+
119
+ typst_code += " table.hline(),\n"
120
+ typst_code += ")\n"
121
+ return typst_code
122
+
123
+
124
+ def generate_latex_table(results):
125
+ """
126
+ Generate LaTeX code for a booktabs two-column table with results.
127
+
128
+ Args:
129
+ results: List of dicts with keys: model, score, count
130
+
131
+ Returns:
132
+ String containing LaTeX table markup
133
+ """
134
+ if not results:
135
+ return "% No results available"
136
+
137
+ latex_code = """\\begin{table}[h]
138
+ \\centering
139
+ \\begin{tabular}{lr}
140
+ \\toprule
141
+ \\textbf{Model} & \\textbf{Score} \\\\
142
+ \\midrule
143
+ """
144
+
145
+ for result in results:
146
+ # Escape LaTeX special characters
147
+ model = result["model"]
148
+ model = model.replace("\\", "\\textbackslash ")
149
+ model = model.replace("_", "\\_")
150
+ model = model.replace("&", "\\&")
151
+ model = model.replace("%", "\\%")
152
+ model = model.replace("$", "\\$")
153
+ model = model.replace("#", "\\#")
154
+ model = model.replace("{", "\\{")
155
+ model = model.replace("}", "\\}")
156
+ model = model.replace("~", "\\textasciitilde ")
157
+ model = model.replace("^", "\\textasciicircum ")
158
+
159
+ score = f"{result['score']:.1f}"
160
+ latex_code += f"{model} & {score} \\\\\n"
161
+ if result["sig_better_than_next"]:
162
+ latex_code += "\\cmidrule{1-1}\n"
163
+
164
+ latex_code += """\\bottomrule
165
+ \\end{tabular}
166
+ \\caption{Model ranking results}
167
+ \\label{tab:results}
168
+ \\end{table}
169
+ """
170
+ return latex_code
171
+
172
+
173
+ def generate_pdf(results, campaign_id):
174
+ """
175
+ Generate PDF from Typst code using typst-py.
176
+
177
+ Args:
178
+ results: List of dicts with keys: model, score, count
179
+
180
+ Returns:
181
+ bytes containing the PDF
182
+ """
183
+
184
+ import tempfile
185
+
186
+ import typst
187
+
188
+ if not results:
189
+ # Return empty PDF with message
190
+ typst_code = "[No results available]"
191
+ else:
192
+ typst_code = f"""
193
+ #set page(width: auto, height: auto, margin: 1.5pt)
194
+ == {escape_typst(campaign_id)}
195
+ """ + generate_typst_table(
196
+ results
197
+ )
198
+
199
+ # Create a temporary file for the typst source
200
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".typ", delete=False) as f:
201
+ f.write(typst_code)
202
+ typst_file = f.name
203
+
204
+ try:
205
+ # Compile to PDF
206
+ pdf_bytes = typst.compile(typst_file)
207
+ return pdf_bytes
208
+ finally:
209
+ # Clean up
210
+ os.unlink(typst_file)