pearmut 0.3.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ import collections
2
+ import json
3
+ import os
4
+ import statistics
5
+
6
+ from .utils import get_db_log
7
+
8
+
9
+ def comparison_significant(
10
+ scores1: dict[str, float], scores2: dict[str, float]
11
+ ) -> bool:
12
+ """Check if the difference between two sets of scores is statistically significant.
13
+ Assume scores1 > scores2.
14
+ """
15
+
16
+ import scipy.stats
17
+
18
+ # compute intersection
19
+ common_items = set(scores1.keys()).intersection(set(scores2.keys()))
20
+ if len(common_items) < 2:
21
+ return False
22
+
23
+ scores1 = [scores1[k] for k in common_items]
24
+ scores2 = [scores2[k] for k in common_items]
25
+
26
+ return bool(
27
+ scipy.stats.ttest_rel(scores1, scores2, alternative="two-sided").pvalue < 0.05
28
+ )
29
+
30
+
31
+ def compute_model_scores(campaign_id):
32
+ """
33
+ Compute model scores from annotations for a campaign.
34
+
35
+ Returns:
36
+ List of dicts with keys: model, score, count
37
+ Sorted by score in descending order
38
+ """
39
+ # Compute model scores from annotations
40
+ model_scores = collections.defaultdict(dict)
41
+
42
+ # Iterate through all tasks to find items with 'models' field (basic template)
43
+ log = get_db_log(campaign_id)
44
+ for entry in log:
45
+ if "item" not in entry or "annotation" not in entry:
46
+ continue
47
+ for item, annotation in zip(entry["item"], entry["annotation"]):
48
+ for model, annotation in annotation.items():
49
+ if "score" in annotation and annotation["score"] is not None:
50
+ item_id = item.get("item_id") or json.dumps(item | {"tgt": None})
51
+ model_scores[model][item_id] = annotation["score"]
52
+
53
+ model_scores = list(model_scores.items())
54
+ model_scores.sort(key=lambda x: statistics.mean(x[1].values()), reverse=True)
55
+
56
+ results = []
57
+ for i, (model, scores) in enumerate(model_scores):
58
+ avg_score = statistics.mean(scores.values())
59
+ sig_better = False
60
+ if i < len(model_scores) - 1:
61
+ # Compare with next model
62
+ scores_next = model_scores[i + 1][1]
63
+ sig_better = comparison_significant(scores, scores_next)
64
+ else:
65
+ sig_better = False
66
+ results.append(
67
+ {
68
+ "model": model,
69
+ "score": avg_score,
70
+ "count": len(scores),
71
+ "sig_better_than_next": sig_better,
72
+ }
73
+ )
74
+ return results
75
+
76
+
77
+ def escape_typst(s: str):
78
+ return (
79
+ s.replace("\\", "\\\\")
80
+ .replace("#", "\\#")
81
+ .replace("*", "\\*")
82
+ .replace("_", "\\_")
83
+ .replace("`", "\\`")
84
+ .replace("[", "\\[")
85
+ .replace("]", "\\]")
86
+ )
87
+
88
+
89
+ def generate_typst_table(results):
90
+ """
91
+ Generate Typst code for a two-column table with results.
92
+
93
+ Args:
94
+ results: List of dicts with keys: model, score, count
95
+
96
+ Returns:
97
+ String containing Typst table markup
98
+ """
99
+ if not results:
100
+ return "// No results available"
101
+
102
+ typst_code = """#table(
103
+ columns: (auto, auto),
104
+ align: (left, right),
105
+ stroke: none,
106
+ table.hline(),
107
+ [*Model*], [*Score*],
108
+ table.hline(),
109
+ """
110
+
111
+ for result in results:
112
+ # Escape Typst special characters
113
+ model = escape_typst(result["model"])
114
+ score = f"{result['score']:.1f}"
115
+ typst_code += f" [{model}], [{score}],\n"
116
+ if result["sig_better_than_next"]:
117
+ typst_code += " table.hline(end: 1),\n"
118
+
119
+ typst_code += " table.hline(),\n"
120
+ typst_code += ")\n"
121
+ return typst_code
122
+
123
+
124
+ def generate_latex_table(results):
125
+ """
126
+ Generate LaTeX code for a booktabs two-column table with results.
127
+
128
+ Args:
129
+ results: List of dicts with keys: model, score, count
130
+
131
+ Returns:
132
+ String containing LaTeX table markup
133
+ """
134
+ if not results:
135
+ return "% No results available"
136
+
137
+ latex_code = """\\begin{table}[h]
138
+ \\centering
139
+ \\begin{tabular}{lr}
140
+ \\toprule
141
+ \\textbf{Model} & \\textbf{Score} \\\\
142
+ \\midrule
143
+ """
144
+
145
+ for result in results:
146
+ # Escape LaTeX special characters
147
+ model = result["model"]
148
+ model = model.replace("\\", "\\textbackslash ")
149
+ model = model.replace("_", "\\_")
150
+ model = model.replace("&", "\\&")
151
+ model = model.replace("%", "\\%")
152
+ model = model.replace("$", "\\$")
153
+ model = model.replace("#", "\\#")
154
+ model = model.replace("{", "\\{")
155
+ model = model.replace("}", "\\}")
156
+ model = model.replace("~", "\\textasciitilde ")
157
+ model = model.replace("^", "\\textasciicircum ")
158
+
159
+ score = f"{result['score']:.1f}"
160
+ latex_code += f"{model} & {score} \\\\\n"
161
+ if result["sig_better_than_next"]:
162
+ latex_code += "\\cmidrule{1-1}\n"
163
+
164
+ latex_code += """\\bottomrule
165
+ \\end{tabular}
166
+ \\caption{Model ranking results}
167
+ \\label{tab:results}
168
+ \\end{table}
169
+ """
170
+ return latex_code
171
+
172
+
173
+ def generate_pdf(results, campaign_id):
174
+ """
175
+ Generate PDF from Typst code using typst-py.
176
+
177
+ Args:
178
+ results: List of dicts with keys: model, score, count
179
+
180
+ Returns:
181
+ bytes containing the PDF
182
+ """
183
+
184
+ import tempfile
185
+
186
+ import typst
187
+
188
+ if not results:
189
+ # Return empty PDF with message
190
+ typst_code = "[No results available]"
191
+ else:
192
+ typst_code = f"""
193
+ #set page(width: auto, height: auto, margin: 1.5pt)
194
+ == {escape_typst(campaign_id)}
195
+ """ + generate_typst_table(
196
+ results
197
+ )
198
+
199
+ # Create a temporary file for the typst source
200
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".typ", delete=False) as f:
201
+ f.write(typst_code)
202
+ typst_file = f.name
203
+
204
+ try:
205
+ # Compile to PDF
206
+ pdf_bytes = typst.compile(typst_file)
207
+ return pdf_bytes
208
+ finally:
209
+ # Clean up
210
+ os.unlink(typst_file)