pearmut 0.1.3__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {pearmut-0.1.3 → pearmut-0.2.1}/PKG-INFO +30 -6
  2. {pearmut-0.1.3 → pearmut-0.2.1}/README.md +29 -5
  3. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/PKG-INFO +30 -6
  4. {pearmut-0.1.3 → pearmut-0.2.1}/pyproject.toml +2 -1
  5. {pearmut-0.1.3 → pearmut-0.2.1}/server/app.py +19 -4
  6. {pearmut-0.1.3 → pearmut-0.2.1}/server/assignment.py +1 -5
  7. {pearmut-0.1.3 → pearmut-0.2.1}/server/cli.py +1 -1
  8. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/assets/style.css +7 -0
  9. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/dashboard.bundle.js +1 -1
  10. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/dashboard.html +1 -1
  11. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/listwise.bundle.js +1 -1
  12. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/listwise.html +1 -1
  13. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/pointwise.bundle.js +1 -1
  14. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/pointwise.html +1 -1
  15. {pearmut-0.1.3 → pearmut-0.2.1}/LICENSE +0 -0
  16. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/SOURCES.txt +0 -0
  17. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/dependency_links.txt +0 -0
  18. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/entry_points.txt +0 -0
  19. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/requires.txt +0 -0
  20. {pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/top_level.txt +0 -0
  21. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/assets/favicon.svg +0 -0
  22. {pearmut-0.1.3 → pearmut-0.2.1}/server/static/index.html +0 -0
  23. {pearmut-0.1.3 → pearmut-0.2.1}/server/utils.py +0 -0
  24. {pearmut-0.1.3 → pearmut-0.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.1.3
3
+ Version: 0.2.1
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
42
42
  # install the package
43
43
  pip install pearmut
44
44
  # download two campaign definitions
45
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
46
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
45
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
46
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
47
47
  # load them into pearmut
48
- pearmut add wmt25_#_en-cs_CZ.json
49
- pearmut add wmt25_#_cs-de_DE.json
48
+ pearmut add esa_encs.json
49
+ pearmut add da_enuk.json
50
50
  # start pearmut (will show management links)
51
51
  pearmut run
52
52
  ```
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
145
145
 
146
146
  See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
147
147
 
148
+ ## Tutorial and Attention Checks
149
+
150
+ You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
151
+
152
+ ```python
153
+ {
154
+ "src": "The quick brown fox jumps.",
155
+ "tgt": "Rychlá hnědá liška skáče.",
156
+ "validation": {
157
+ "warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
158
+ "score": [70, 80], # required score range [min, max]
159
+ "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
160
+ "allow_skip": true # show "skip tutorial" button
161
+ }
162
+ }
163
+ ```
164
+
165
+ - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
166
+ - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
167
+ - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
168
+ For listwise template, `validation` is an array where each element corresponds to a candidate.
169
+ The dashboard shows failed/total validation checks per user.
170
+ See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
171
+
148
172
  ## Single-stream Assignment
149
173
 
150
174
  We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
260
284
  ```bibtex
261
285
  @misc{zouhar2025pearmut,
262
286
  author={Vilém Zouhar},
263
- title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
287
+ title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
264
288
  url={https://github.com/zouharvi/pearmut/},
265
289
  year={2025},
266
290
  }
@@ -21,11 +21,11 @@ You do not need to clone this repository. Simply install with pip and run locall
21
21
  # install the package
22
22
  pip install pearmut
23
23
  # download two campaign definitions
24
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
25
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
24
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
25
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
26
26
  # load them into pearmut
27
- pearmut add wmt25_#_en-cs_CZ.json
28
- pearmut add wmt25_#_cs-de_DE.json
27
+ pearmut add esa_encs.json
28
+ pearmut add da_enuk.json
29
29
  # start pearmut (will show management links)
30
30
  pearmut run
31
31
  ```
@@ -124,6 +124,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
124
124
 
125
125
  See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
126
126
 
127
+ ## Tutorial and Attention Checks
128
+
129
+ You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
130
+
131
+ ```python
132
+ {
133
+ "src": "The quick brown fox jumps.",
134
+ "tgt": "Rychlá hnědá liška skáče.",
135
+ "validation": {
136
+ "warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
137
+ "score": [70, 80], # required score range [min, max]
138
+ "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
139
+ "allow_skip": true # show "skip tutorial" button
140
+ }
141
+ }
142
+ ```
143
+
144
+ - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
145
+ - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
146
+ - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
147
+ For listwise template, `validation` is an array where each element corresponds to a candidate.
148
+ The dashboard shows failed/total validation checks per user.
149
+ See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
150
+
127
151
  ## Single-stream Assignment
128
152
 
129
153
  We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -239,7 +263,7 @@ If you use this work in your paper, please cite as:
239
263
  ```bibtex
240
264
  @misc{zouhar2025pearmut,
241
265
  author={Vilém Zouhar},
242
- title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
266
+ title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
243
267
  url={https://github.com/zouharvi/pearmut/},
244
268
  year={2025},
245
269
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.1.3
3
+ Version: 0.2.1
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
42
42
  # install the package
43
43
  pip install pearmut
44
44
  # download two campaign definitions
45
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
46
- wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
45
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
46
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
47
47
  # load them into pearmut
48
- pearmut add wmt25_#_en-cs_CZ.json
49
- pearmut add wmt25_#_cs-de_DE.json
48
+ pearmut add esa_encs.json
49
+ pearmut add da_enuk.json
50
50
  # start pearmut (will show management links)
51
51
  pearmut run
52
52
  ```
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
145
145
 
146
146
  See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
147
147
 
148
+ ## Tutorial and Attention Checks
149
+
150
+ You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
151
+
152
+ ```python
153
+ {
154
+ "src": "The quick brown fox jumps.",
155
+ "tgt": "Rychlá hnědá liška skáče.",
156
+ "validation": {
157
+ "warning": "Please set score between 70-80.", # shown on failure (omit for silent logging)
158
+ "score": [70, 80], # required score range [min, max]
159
+ "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}], # expected spans
160
+ "allow_skip": true # show "skip tutorial" button
161
+ }
162
+ }
163
+ ```
164
+
165
+ - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
166
+ - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
167
+ - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
168
+ For listwise template, `validation` is an array where each element corresponds to a candidate.
169
+ The dashboard shows failed/total validation checks per user.
170
+ See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
171
+
148
172
  ## Single-stream Assignment
149
173
 
150
174
  We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
260
284
  ```bibtex
261
285
  @misc{zouhar2025pearmut,
262
286
  author={Vilém Zouhar},
263
- title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
287
+ title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
264
288
  url={https://github.com/zouharvi/pearmut/},
265
289
  year={2025},
266
290
  }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "0.1.3"
3
+ version = "0.2.1"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "apache-2.0" }
@@ -40,6 +40,7 @@ build-backend = "setuptools.build_meta"
40
40
  [tool.setuptools.package-data]
41
41
  "pearmut" = ["static/**"]
42
42
 
43
+ # managed by .github/workflows/publish.yml now but still can be built and pushed lcoally
43
44
  # rm -rf server/static/; npm install web/ --prefix web/; npm run build --prefix web/
44
45
  # rm -rf {build,dist,pearmut.egg-info}/*; python3 -m build
45
46
  # python3 -m twine upload dist/* -u __token__
@@ -53,7 +53,8 @@ async def _log_response(request: LogResponseRequest):
53
53
  return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
54
54
 
55
55
  # append response to the output log
56
- save_db_payload(campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
56
+ save_db_payload(
57
+ campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
57
58
 
58
59
  # if actions were submitted, we can log time data
59
60
  if "actions" in request.payload:
@@ -68,7 +69,15 @@ async def _log_response(request: LogResponseRequest):
68
69
  for a, b in zip(times, times[1:])
69
70
  ])
70
71
 
71
- update_progress(campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload)
72
+ # Initialize validation_checks if it doesn't exist
73
+ if "validations" in request.payload:
74
+ if "validations" not in progress_data[campaign_id][user_id]:
75
+ progress_data[campaign_id][user_id]["validations"] = {}
76
+
77
+ progress_data[campaign_id][user_id]["validations"][request.item_i] = request.payload["validations"]
78
+
79
+ update_progress(campaign_id, user_id, tasks_data,
80
+ progress_data, request.item_i, request.payload)
72
81
  save_progress_data(progress_data)
73
82
 
74
83
  return JSONResponse(content={"status": "ok"}, status_code=200)
@@ -145,6 +154,11 @@ async def _dashboard_data(request: DashboardDataRequest):
145
154
  for user_id, user_val in progress_data[campaign_id].items():
146
155
  # shallow copy
147
156
  entry = dict(user_val)
157
+ entry["validations"] = [
158
+ all(v)
159
+ for v in list(entry.get("validations", {}).values())
160
+ ]
161
+
148
162
 
149
163
  if not is_privileged:
150
164
  entry["token_correct"] = None
@@ -229,10 +243,11 @@ async def _download_progress(
229
243
 
230
244
  static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
231
245
  if not os.path.exists(static_dir + "index.html"):
232
- raise FileNotFoundError("Static directory not found. Please build the frontend first.")
246
+ raise FileNotFoundError(
247
+ "Static directory not found. Please build the frontend first.")
233
248
 
234
249
  app.mount(
235
250
  "/",
236
251
  StaticFiles(directory=static_dir, html=True, follow_symlink=True),
237
252
  name="static",
238
- )
253
+ )
@@ -76,8 +76,6 @@ def get_i_item_taskbased(
76
76
  Get specific item for task-based protocol.
77
77
  """
78
78
  user_progress = progress_data[campaign_id][user_id]
79
- if all(user_progress["progress"]):
80
- return _completed_response(progress_data, campaign_id, user_id)
81
79
 
82
80
  # try to get existing annotations if any
83
81
  items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -120,8 +118,6 @@ def get_i_item_singlestream(
120
118
  Get specific item for single-stream assignment.
121
119
  """
122
120
  user_progress = progress_data[campaign_id][user_id]
123
- if all(user_progress["progress"]):
124
- return _completed_response(progress_data, campaign_id, user_id)
125
121
 
126
122
  # try to get existing annotations if any
127
123
  # note the None user_id since it is shared
@@ -254,6 +250,7 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
254
250
  progress_data[campaign_id][user_id]["time"] = 0.0
255
251
  progress_data[campaign_id][user_id]["time_start"] = None
256
252
  progress_data[campaign_id][user_id]["time_end"] = None
253
+ progress_data[campaign_id][user_id]["validations"] = {}
257
254
 
258
255
 
259
256
  def reset_task(
@@ -299,7 +296,6 @@ def update_progress(
299
296
  if assignment == "task-based":
300
297
  # even if it's already set it should be fine
301
298
  progress_data[campaign_id][user_id]["progress"][item_i] = True
302
- # TODO: log attention checks/quality?
303
299
  return JSONResponse(content={"status": "ok"}, status_code=200)
304
300
  elif assignment == "single-stream":
305
301
  # progress all users
@@ -214,7 +214,7 @@ def main():
214
214
  import shutil
215
215
 
216
216
  confirm = input(
217
- "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n]"
217
+ "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
218
218
  )
219
219
  if confirm.lower() == 'y':
220
220
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
@@ -225,4 +225,11 @@ input[type="button"].error_delete:hover {
225
225
 
226
226
  #progress span.progress_incomplete:hover {
227
227
  background: #aaa;
228
+ }
229
+
230
+ /* Validation warning indicator */
231
+ .validation_warning {
232
+ margin-right: 5px;
233
+ position: relative;
234
+ top: -5px;
228
235
  }