PyPI - pearmut - Versions diffs - 0.1.3__tar.gz → 0.2.1__tar.gz - Mend

pearmut 0.1.3tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{pearmut-0.1.3 → pearmut-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pearmut
-Version: 0.1.3
+Version: 0.2.1
 Summary: A tool for evaluation of model outputs, primarily MT.
 Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
 License: apache-2.0
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
 # install the package
 pip install pearmut
 # download two campaign definitions
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
 # load them into pearmut
-pearmut add wmt25_#_en-cs_CZ.json
-pearmut add wmt25_#_cs-de_DE.json
+pearmut add esa_encs.json
+pearmut add da_enuk.json
 # start pearmut (will show management links)
 pearmut run
 ```
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
 See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
+## Tutorial and Attention Checks
+You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
+```python
+{
+  "src": "The quick brown fox jumps.",
+  "tgt": "Rychlá hnědá liška skáče.",
+  "validation": {
+    "warning": "Please set score between 70-80.",  # shown on failure (omit for silent logging)
+    "score": [70, 80],                             # required score range [min, max]
+    "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}],  # expected spans
+    "allow_skip": true                             # show "skip tutorial" button
+  }
+}
+```
+- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
+- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
+- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
+For listwise template, `validation` is an array where each element corresponds to a candidate.
+The dashboard shows failed/total validation checks per user.
+See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
 ## Single-stream Assignment
 We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
 ```bibtex
 @misc{zouhar2025pearmut,
     author={Vilém Zouhar},
-    title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
+    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
     url={https://github.com/zouharvi/pearmut/},
     year={2025},
 }

{pearmut-0.1.3 → pearmut-0.2.1}/README.md RENAMED Viewed

@@ -21,11 +21,11 @@ You do not need to clone this repository. Simply install with pip and run locall
 # install the package
 pip install pearmut
 # download two campaign definitions
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
 # load them into pearmut
-pearmut add wmt25_#_en-cs_CZ.json
-pearmut add wmt25_#_cs-de_DE.json
+pearmut add esa_encs.json
+pearmut add da_enuk.json
 # start pearmut (will show management links)
 pearmut run
 ```
@@ -124,6 +124,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
 See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
+## Tutorial and Attention Checks
+You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
+```python
+{
+  "src": "The quick brown fox jumps.",
+  "tgt": "Rychlá hnědá liška skáče.",
+  "validation": {
+    "warning": "Please set score between 70-80.",  # shown on failure (omit for silent logging)
+    "score": [70, 80],                             # required score range [min, max]
+    "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}],  # expected spans
+    "allow_skip": true                             # show "skip tutorial" button
+  }
+}
+```
+- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
+- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
+- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
+For listwise template, `validation` is an array where each element corresponds to a candidate.
+The dashboard shows failed/total validation checks per user.
+See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
 ## Single-stream Assignment
 We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -239,7 +263,7 @@ If you use this work in your paper, please cite as:
 ```bibtex
 @misc{zouhar2025pearmut,
     author={Vilém Zouhar},
-    title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
+    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
     url={https://github.com/zouharvi/pearmut/},
     year={2025},
 }

{pearmut-0.1.3 → pearmut-0.2.1}/pearmut.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pearmut
-Version: 0.1.3
+Version: 0.2.1
 Summary: A tool for evaluation of model outputs, primarily MT.
 Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
 License: apache-2.0
@@ -42,11 +42,11 @@ You do not need to clone this repository. Simply install with pip and run locall
 # install the package
 pip install pearmut
 # download two campaign definitions
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
-wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/esa_encs.json
+wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/da_enuk.json
 # load them into pearmut
-pearmut add wmt25_#_en-cs_CZ.json
-pearmut add wmt25_#_cs-de_DE.json
+pearmut add esa_encs.json
+pearmut add da_enuk.json
 # start pearmut (will show management links)
 pearmut run
 ```
@@ -145,6 +145,30 @@ For **listwise** template, `error_spans` is a 2D array where each inner array co
 See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
+## Tutorial and Attention Checks
+You can add validation rules to items for tutorials or attention checks. Items with `validation` field will be checked before submission:
+```python
+{
+  "src": "The quick brown fox jumps.",
+  "tgt": "Rychlá hnědá liška skáče.",
+  "validation": {
+    "warning": "Please set score between 70-80.",  # shown on failure (omit for silent logging)
+    "score": [70, 80],                             # required score range [min, max]
+    "error_spans": [{"start_i": [0, 2], "end_i": [4, 8], "severity": "minor"}],  # expected spans
+    "allow_skip": true                             # show "skip tutorial" button
+  }
+}
+```
+- Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
+- Loud attention checks: Include `warning` without `allow_skip` to force users to retry
+- Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
+For listwise template, `validation` is an array where each element corresponds to a candidate.
+The dashboard shows failed/total validation checks per user.
+See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
 ## Single-stream Assignment
 We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
@@ -260,7 +284,7 @@ If you use this work in your paper, please cite as:
 ```bibtex
 @misc{zouhar2025pearmut,
     author={Vilém Zouhar},
-    title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
+    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
     url={https://github.com/zouharvi/pearmut/},
     year={2025},
 }

{pearmut-0.1.3 → pearmut-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pearmut"
-version = "0.1.3"
+version = "0.2.1"
 description = "A tool for evaluation of model outputs, primarily MT."
 readme = "README.md"
 license = { text = "apache-2.0" }
@@ -40,6 +40,7 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools.package-data]
 "pearmut" = ["static/**"]
+# managed by .github/workflows/publish.yml now but still can be built and pushed lcoally
 # rm -rf server/static/; npm install web/ --prefix web/; npm run build --prefix web/
 # rm -rf {build,dist,pearmut.egg-info}/*; python3 -m build
 # python3 -m twine upload dist/* -u __token__

{pearmut-0.1.3 → pearmut-0.2.1}/server/app.py RENAMED Viewed

@@ -53,7 +53,8 @@ async def _log_response(request: LogResponseRequest):
         return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
     # append response to the output log
-    save_db_payload(campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
+    save_db_payload(
+        campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
     # if actions were submitted, we can log time data
     if "actions" in request.payload:
@@ -68,7 +69,15 @@ async def _log_response(request: LogResponseRequest):
             for a, b in zip(times, times[1:])
         ])
-    update_progress(campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload)
+    # Initialize validation_checks if it doesn't exist
+    if "validations" in request.payload:
+        if "validations" not in progress_data[campaign_id][user_id]:
+            progress_data[campaign_id][user_id]["validations"] = {}
+        progress_data[campaign_id][user_id]["validations"][request.item_i] = request.payload["validations"]
+    update_progress(campaign_id, user_id, tasks_data,
+                    progress_data, request.item_i, request.payload)
     save_progress_data(progress_data)
     return JSONResponse(content={"status": "ok"}, status_code=200)
@@ -145,6 +154,11 @@ async def _dashboard_data(request: DashboardDataRequest):
     for user_id, user_val in progress_data[campaign_id].items():
         # shallow copy
         entry = dict(user_val)
+        entry["validations"] = [
+            all(v)
+            for v in list(entry.get("validations", {}).values())
+        ]
         if not is_privileged:
             entry["token_correct"] = None
@@ -229,10 +243,11 @@ async def _download_progress(
 static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
 if not os.path.exists(static_dir + "index.html"):
-    raise FileNotFoundError("Static directory not found. Please build the frontend first.")
+    raise FileNotFoundError(
+        "Static directory not found. Please build the frontend first.")
 app.mount(
     "/",
     StaticFiles(directory=static_dir, html=True, follow_symlink=True),
     name="static",
-)
+)

{pearmut-0.1.3 → pearmut-0.2.1}/server/assignment.py RENAMED Viewed

@@ -76,8 +76,6 @@ def get_i_item_taskbased(
     Get specific item for task-based protocol.
     """
     user_progress = progress_data[campaign_id][user_id]
-    if all(user_progress["progress"]):
-        return _completed_response(progress_data, campaign_id, user_id)
     # try to get existing annotations if any
     items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -120,8 +118,6 @@ def get_i_item_singlestream(
     Get specific item for single-stream assignment.
     """
     user_progress = progress_data[campaign_id][user_id]
-    if all(user_progress["progress"]):
-        return _completed_response(progress_data, campaign_id, user_id)
     # try to get existing annotations if any
     # note the None user_id since it is shared
@@ -254,6 +250,7 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
     progress_data[campaign_id][user_id]["time"] = 0.0
     progress_data[campaign_id][user_id]["time_start"] = None
     progress_data[campaign_id][user_id]["time_end"] = None
+    progress_data[campaign_id][user_id]["validations"] = {}
 def reset_task(
@@ -299,7 +296,6 @@ def update_progress(
     if assignment == "task-based":
         # even if it's already set it should be fine
         progress_data[campaign_id][user_id]["progress"][item_i] = True
-        # TODO: log attention checks/quality?
         return JSONResponse(content={"status": "ok"}, status_code=200)
     elif assignment == "single-stream":
         # progress all users

{pearmut-0.1.3 → pearmut-0.2.1}/server/cli.py RENAMED Viewed

@@ -214,7 +214,7 @@ def main():
         import shutil
         confirm = input(
-            "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n]"
+            "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
         )
         if confirm.lower() == 'y':
             shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)

{pearmut-0.1.3 → pearmut-0.2.1}/server/static/assets/style.css RENAMED Viewed

@@ -225,4 +225,11 @@ input[type="button"].error_delete:hover {
 #progress span.progress_incomplete:hover {
     background: #aaa;
+}
+/* Validation warning indicator */
+.validation_warning {
+    margin-right: 5px;
+    position: relative;
+    top: -5px;
 }

pearmut 0.1.3__tar.gz → 0.2.1__tar.gz

pearmut 0.1.3tar.gz → 0.2.1tar.gz