pearmut 0.1.3__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pearmut-0.1.3 → pearmut-0.2.0}/PKG-INFO +30 -6
- {pearmut-0.1.3 → pearmut-0.2.0}/README.md +29 -5
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/PKG-INFO +30 -6
- {pearmut-0.1.3 → pearmut-0.2.0}/pyproject.toml +2 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/app.py +20 -4
- {pearmut-0.1.3 → pearmut-0.2.0}/server/assignment.py +1 -5
- {pearmut-0.1.3 → pearmut-0.2.0}/server/cli.py +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/assets/style.css +7 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/dashboard.bundle.js +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/dashboard.html +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/listwise.bundle.js +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/listwise.html +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/pointwise.bundle.js +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/pointwise.html +1 -1
- {pearmut-0.1.3 → pearmut-0.2.0}/LICENSE +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/SOURCES.txt +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/dependency_links.txt +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/entry_points.txt +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/requires.txt +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/pearmut.egg-info/top_level.txt +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/assets/favicon.svg +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/server/static/index.html +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/server/utils.py +0 -0
- {pearmut-0.1.3 → pearmut-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
|
|
|
42
42
|
# install the package
|
|
43
43
|
pip install pearmut
|
|
44
44
|
# download two campaign definitions
|
|
45
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
46
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
45
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
|
|
46
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
|
|
47
47
|
# load them into pearmut
|
|
48
|
-
pearmut add
|
|
49
|
-
pearmut add
|
|
48
|
+
pearmut add esa_encs.json
|
|
49
|
+
pearmut add da_enuk.json
|
|
50
50
|
# start pearmut (will show management links)
|
|
51
51
|
pearmut run
|
|
52
52
|
```
|
|
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
|
|
|
145
145
|
|
|
146
146
|
See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
|
|
147
147
|
|
|
148
|
+
## Tutorial and Attention Checks
|
|
149
|
+
|
|
150
|
+
You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
{
|
|
154
|
+
"src": "The quick brown fox jumps.",
|
|
155
|
+
"tgt": "Rychlá hnědá liška skáče.",
|
|
156
|
+
"validation": {
|
|
157
|
+
"warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
|
|
158
|
+
"score": [70, 80], # required score range [min, max]
|
|
159
|
+
"error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
|
|
160
|
+
"allow_skip": true # show "skip tutorial" button
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
166
|
+
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
167
|
+
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
168
|
+
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
169
|
+
The dashboard shows failed/total validation checks per user.
|
|
170
|
+
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
171
|
+
|
|
148
172
|
## Single-stream Assignment
|
|
149
173
|
|
|
150
174
|
We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
|
|
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
|
|
|
260
284
|
```bibtex
|
|
261
285
|
@misc{zouhar2025pearmut,
|
|
262
286
|
author={Vilém Zouhar},
|
|
263
|
-
title={Pearmut
|
|
287
|
+
title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
|
|
264
288
|
url={https://github.com/zouharvi/pearmut/},
|
|
265
289
|
year={2025},
|
|
266
290
|
}
|
|
@@ -21,11 +21,11 @@ You do not need to clone this repository. Simply install with pip and run locall
|
|
|
21
21
|
# install the package
|
|
22
22
|
pip install pearmut
|
|
23
23
|
# download two campaign definitions
|
|
24
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
25
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
24
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
|
|
25
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
|
|
26
26
|
# load them into pearmut
|
|
27
|
-
pearmut add
|
|
28
|
-
pearmut add
|
|
27
|
+
pearmut add esa_encs.json
|
|
28
|
+
pearmut add da_enuk.json
|
|
29
29
|
# start pearmut (will show management links)
|
|
30
30
|
pearmut run
|
|
31
31
|
```
|
|
@@ -124,6 +124,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
|
|
|
124
124
|
|
|
125
125
|
See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
|
|
126
126
|
|
|
127
|
+
## Tutorial and Attention Checks
|
|
128
|
+
|
|
129
|
+
You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
{
|
|
133
|
+
"src": "The quick brown fox jumps.",
|
|
134
|
+
"tgt": "Rychlá hnědá liška skáče.",
|
|
135
|
+
"validation": {
|
|
136
|
+
"warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
|
|
137
|
+
"score": [70, 80], # required score range [min, max]
|
|
138
|
+
"error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
|
|
139
|
+
"allow_skip": true # show "skip tutorial" button
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
145
|
+
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
146
|
+
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
147
|
+
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
148
|
+
The dashboard shows failed/total validation checks per user.
|
|
149
|
+
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
150
|
+
|
|
127
151
|
## Single-stream Assignment
|
|
128
152
|
|
|
129
153
|
We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
|
|
@@ -239,7 +263,7 @@ If you use this work in your paper, please cite as:
|
|
|
239
263
|
```bibtex
|
|
240
264
|
@misc{zouhar2025pearmut,
|
|
241
265
|
author={Vilém Zouhar},
|
|
242
|
-
title={Pearmut
|
|
266
|
+
title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
|
|
243
267
|
url={https://github.com/zouharvi/pearmut/},
|
|
244
268
|
year={2025},
|
|
245
269
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
|
|
|
42
42
|
# install the package
|
|
43
43
|
pip install pearmut
|
|
44
44
|
# download two campaign definitions
|
|
45
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
46
|
-
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/
|
|
45
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
|
|
46
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
|
|
47
47
|
# load them into pearmut
|
|
48
|
-
pearmut add
|
|
49
|
-
pearmut add
|
|
48
|
+
pearmut add esa_encs.json
|
|
49
|
+
pearmut add da_enuk.json
|
|
50
50
|
# start pearmut (will show management links)
|
|
51
51
|
pearmut run
|
|
52
52
|
```
|
|
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
|
|
|
145
145
|
|
|
146
146
|
See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
|
|
147
147
|
|
|
148
|
+
## Tutorial and Attention Checks
|
|
149
|
+
|
|
150
|
+
You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
{
|
|
154
|
+
"src": "The quick brown fox jumps.",
|
|
155
|
+
"tgt": "Rychlá hnědá liška skáče.",
|
|
156
|
+
"validation": {
|
|
157
|
+
"warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
|
|
158
|
+
"score": [70, 80], # required score range [min, max]
|
|
159
|
+
"error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
|
|
160
|
+
"allow_skip": true # show "skip tutorial" button
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
|
|
166
|
+
- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
|
|
167
|
+
- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
|
|
168
|
+
For listwise template, `validation` is an array where each element corresponds to a candidate.
|
|
169
|
+
The dashboard shows failed/total validation checks per user.
|
|
170
|
+
See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
|
|
171
|
+
|
|
148
172
|
## Single-stream Assignment
|
|
149
173
|
|
|
150
174
|
We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
|
|
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
|
|
|
260
284
|
```bibtex
|
|
261
285
|
@misc{zouhar2025pearmut,
|
|
262
286
|
author={Vilém Zouhar},
|
|
263
|
-
title={Pearmut
|
|
287
|
+
title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
|
|
264
288
|
url={https://github.com/zouharvi/pearmut/},
|
|
265
289
|
year={2025},
|
|
266
290
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pearmut"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "A tool for evaluation of model outputs, primarily MT."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "apache-2.0" }
|
|
@@ -40,6 +40,7 @@ build-backend = "setuptools.build_meta"
|
|
|
40
40
|
[tool.setuptools.package-data]
|
|
41
41
|
"pearmut" = ["static/**"]
|
|
42
42
|
|
|
43
|
+
# managed by .github/workflows/publish.yml now but still can be built and pushed lcoally
|
|
43
44
|
# rm -rf server/static/; npm install web/ --prefix web/; npm run build --prefix web/
|
|
44
45
|
# rm -rf {build,dist,pearmut.egg-info}/*; python3 -m build
|
|
45
46
|
# python3 -m twine upload dist/* -u __token__
|
|
@@ -53,7 +53,8 @@ async def _log_response(request: LogResponseRequest):
|
|
|
53
53
|
return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
|
|
54
54
|
|
|
55
55
|
# append response to the output log
|
|
56
|
-
save_db_payload(campaign_id, request.payload | {
|
|
56
|
+
save_db_payload(campaign_id, request.payload | {
|
|
57
|
+
"user_id": user_id, "item_i": item_i})
|
|
57
58
|
|
|
58
59
|
# if actions were submitted, we can log time data
|
|
59
60
|
if "actions" in request.payload:
|
|
@@ -68,7 +69,16 @@ async def _log_response(request: LogResponseRequest):
|
|
|
68
69
|
for a, b in zip(times, times[1:])
|
|
69
70
|
])
|
|
70
71
|
|
|
71
|
-
|
|
72
|
+
# Initialize validation_checks if it doesn't exist
|
|
73
|
+
print(request.payload.keys())
|
|
74
|
+
if "validations" in request.payload:
|
|
75
|
+
if "validations" not in progress_data[campaign_id][user_id]:
|
|
76
|
+
progress_data[campaign_id][user_id]["validations"] = {}
|
|
77
|
+
|
|
78
|
+
progress_data[campaign_id][user_id]["validations"][request.item_i] = request.payload["validations"]
|
|
79
|
+
|
|
80
|
+
update_progress(campaign_id, user_id, tasks_data,
|
|
81
|
+
progress_data, request.item_i, request.payload)
|
|
72
82
|
save_progress_data(progress_data)
|
|
73
83
|
|
|
74
84
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
@@ -145,6 +155,11 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
145
155
|
for user_id, user_val in progress_data[campaign_id].items():
|
|
146
156
|
# shallow copy
|
|
147
157
|
entry = dict(user_val)
|
|
158
|
+
entry["validations"] = [
|
|
159
|
+
all(v)
|
|
160
|
+
for v in list(entry.get("validations", {}).values())
|
|
161
|
+
]
|
|
162
|
+
|
|
148
163
|
|
|
149
164
|
if not is_privileged:
|
|
150
165
|
entry["token_correct"] = None
|
|
@@ -229,10 +244,11 @@ async def _download_progress(
|
|
|
229
244
|
|
|
230
245
|
static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
|
|
231
246
|
if not os.path.exists(static_dir + "index.html"):
|
|
232
|
-
raise FileNotFoundError(
|
|
247
|
+
raise FileNotFoundError(
|
|
248
|
+
"Static directory not found. Please build the frontend first.")
|
|
233
249
|
|
|
234
250
|
app.mount(
|
|
235
251
|
"/",
|
|
236
252
|
StaticFiles(directory=static_dir, html=True, follow_symlink=True),
|
|
237
253
|
name="static",
|
|
238
|
-
)
|
|
254
|
+
)
|
|
@@ -76,8 +76,6 @@ def get_i_item_taskbased(
|
|
|
76
76
|
Get specific item for task-based protocol.
|
|
77
77
|
"""
|
|
78
78
|
user_progress = progress_data[campaign_id][user_id]
|
|
79
|
-
if all(user_progress["progress"]):
|
|
80
|
-
return _completed_response(progress_data, campaign_id, user_id)
|
|
81
79
|
|
|
82
80
|
# try to get existing annotations if any
|
|
83
81
|
items_existing = get_db_log_item(campaign_id, user_id, item_i)
|
|
@@ -120,8 +118,6 @@ def get_i_item_singlestream(
|
|
|
120
118
|
Get specific item for single-stream assignment.
|
|
121
119
|
"""
|
|
122
120
|
user_progress = progress_data[campaign_id][user_id]
|
|
123
|
-
if all(user_progress["progress"]):
|
|
124
|
-
return _completed_response(progress_data, campaign_id, user_id)
|
|
125
121
|
|
|
126
122
|
# try to get existing annotations if any
|
|
127
123
|
# note the None user_id since it is shared
|
|
@@ -254,6 +250,7 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
|
|
|
254
250
|
progress_data[campaign_id][user_id]["time"] = 0.0
|
|
255
251
|
progress_data[campaign_id][user_id]["time_start"] = None
|
|
256
252
|
progress_data[campaign_id][user_id]["time_end"] = None
|
|
253
|
+
progress_data[campaign_id][user_id]["validations"] = {}
|
|
257
254
|
|
|
258
255
|
|
|
259
256
|
def reset_task(
|
|
@@ -299,7 +296,6 @@ def update_progress(
|
|
|
299
296
|
if assignment == "task-based":
|
|
300
297
|
# even if it's already set it should be fine
|
|
301
298
|
progress_data[campaign_id][user_id]["progress"][item_i] = True
|
|
302
|
-
# TODO: log attention checks/quality?
|
|
303
299
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
304
300
|
elif assignment == "single-stream":
|
|
305
301
|
# progress all users
|
|
@@ -214,7 +214,7 @@ def main():
|
|
|
214
214
|
import shutil
|
|
215
215
|
|
|
216
216
|
confirm = input(
|
|
217
|
-
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n]"
|
|
217
|
+
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
|
|
218
218
|
)
|
|
219
219
|
if confirm.lower() == 'y':
|
|
220
220
|
shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
|
|
@@ -225,4 +225,11 @@ input[type="button"].error_delete:hover {
|
|
|
225
225
|
|
|
226
226
|
#progress span.progress_incomplete:hover {
|
|
227
227
|
background: #aaa;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/* Validation warning indicator */
|
|
231
|
+
.validation_warning {
|
|
232
|
+
margin-right: 5px;
|
|
233
|
+
position: relative;
|
|
234
|
+
top: -5px;
|
|
228
235
|
}
|