PyPI - pearmut - Versions diffs - 0.3.2__tar.gz → 1.0.0__tar.gz - Mend

pearmut 0.3.2tar.gz → 1.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{pearmut-0.3.2 → pearmut-1.0.0}/PKG-INFO +56 -27
{pearmut-0.3.2 → pearmut-1.0.0}/README.md +54 -26
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/PKG-INFO +56 -27
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/SOURCES.txt +2 -0
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/requires.txt +1 -0
{pearmut-0.3.2 → pearmut-1.0.0}/pyproject.toml +2 -1
{pearmut-0.3.2 → pearmut-1.0.0}/server/app.py +52 -29
pearmut-1.0.0/server/assignment.py +552 -0
{pearmut-0.3.2 → pearmut-1.0.0}/server/cli.py +104 -25
pearmut-1.0.0/server/results_export.py +210 -0
pearmut-1.0.0/server/static/basic.bundle.js +1 -0
pearmut-1.0.0/server/static/basic.html +97 -0
pearmut-1.0.0/server/static/dashboard.bundle.js +1 -0
pearmut-1.0.0/server/static/dashboard.html +96 -0
pearmut-1.0.0/server/static/index.bundle.js +1 -0
pearmut-1.0.0/server/static/index.html +1 -0
{pearmut-0.3.2 → pearmut-1.0.0}/server/static/style.css +1 -1
{pearmut-0.3.2 → pearmut-1.0.0}/server/utils.py +16 -2
pearmut-0.3.2/server/assignment.py +0 -342
pearmut-0.3.2/server/static/basic.bundle.js +0 -1
pearmut-0.3.2/server/static/basic.html +0 -74
pearmut-0.3.2/server/static/dashboard.bundle.js +0 -1
pearmut-0.3.2/server/static/dashboard.html +0 -81
pearmut-0.3.2/server/static/index.html +0 -1
{pearmut-0.3.2 → pearmut-1.0.0}/LICENSE +0 -0
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/dependency_links.txt +0 -0
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/entry_points.txt +0 -0
{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/top_level.txt +0 -0
{pearmut-0.3.2 → pearmut-1.0.0}/server/static/favicon.svg +0 -0
{pearmut-0.3.2 → pearmut-1.0.0}/setup.cfg +0 -0

{pearmut-0.3.2 → pearmut-1.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pearmut
-Version: 0.3.2
+Version: 1.0.0
 Summary: A tool for evaluation of model outputs, primarily MT.
 Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
 License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: fastapi>=0.110.0
 Requires-Dist: uvicorn>=0.29.0
 Requires-Dist: wonderwords>=3.0.0
 Requires-Dist: psutil>=7.1.0
+Requires-Dist: typst>=0.14.4
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"
 Dynamic: license-file
@@ -30,7 +31,8 @@ Dynamic: license-file
 &nbsp;
 [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
-<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/4fb9a1cb-78ac-47e0-99cd-0870a368a0ad" />
+<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/71334238-300b-4ffc-b777-7f3c242b1630" />
 ## Table of Contents
@@ -45,6 +47,7 @@ Dynamic: license-file
   - [Multimodal Annotations](#multimodal-annotations)
   - [Hosting Assets](#hosting-assets)
 - [Campaign Management](#campaign-management)
+  - [Custom Completion Messages](#custom-completion-messages)
 - [CLI Commands](#cli-commands)
 - [Terminology](#terminology)
 - [Development](#development)
@@ -86,11 +89,13 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
         {
           "instructions": "Evaluate translation from en to cs_CZ",  # message to show to users above the first item
           "src": "This will be the year that Guinness loses its cool. Cheers to that!",
-          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."}
+          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."},
+          "item_id": "first item in first document"
         },
         {
           "src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
-          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"}
+          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"},
+          "item_id": "second item in first document"
         }
         ...
       ],
@@ -105,20 +110,10 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
   ]
 }
 ```
-Task items are protocol-specific. For ESA/DA/MQM protocols, each item is a dictionary representing a document unit:
-```python
-[
-  {
-    "src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.",  # required
-    "tgt": {"modelA": "And suddenly all the water became full of other people and other people."}  # required (dict)
-  },
-  {
-    "src": "toto je pokračování stejného dokumentu",
-    "tgt": {"modelA": "this is a continuation of the same document"}
-    # Additional keys stored for analysis
-  }
-]
-```
+Each item has to have `src` (string) and `tgt` (dictionary from model names to strings, even for a single model evaluation).
+For full Pearmut functionality (e.g. automatic statistical analysis), add `item_id` as well.
+Any other keys that you add will simply be stored in the logs.
 Load campaigns and start the server:
 ```bash
@@ -130,7 +125,7 @@ pearmut run
 - **`task-based`**: Each user has predefined items
 - **`single-stream`**: All users draw from a shared pool (random assignment)
-- **`dynamic`**: work in progress ⚠️
+- **`dynamic`**: Items are dynamically assigned based on current model performance (see [Dynamic Assignment](#dynamic-assignment))
 ## Advanced Features
@@ -223,7 +218,8 @@ The `validation` field is an array (one per candidate). Dashboard shows ✅/❌
 }
 ```
 The `score_greaterthan` field specifies the index of the candidate that must have a lower score than the current candidate.
-See [examples/tutorial_kway.json](examples/tutorial_kway.json).
+See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
+To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
 ### Single-stream Assignment
@@ -243,6 +239,36 @@ All annotators draw from a shared pool with random assignment:
 }
 ```
+### Dynamic Assignment
+The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
+All items must contain outputs from all models for this assignment type to work properly.
+```python
+{
+    "campaign_id": "my dynamic campaign",
+    "info": {
+        "assignment": "dynamic",
+        "protocol": "ESA",
+        "users": 10,                           # number of annotators
+        "dynamic_top": 3,                      # how many top models to consider (required)
+        "dynamic_contrastive_models": 2,       # how many models to compare per item (optional, default: 1)
+        "dynamic_first": 5,                    # annotations per model before dynamic kicks in (optional, default: 5)
+        "dynamic_backoff": 0.1,                # probability of uniform sampling (optional, default: 0)
+    },
+    "data": [...], # list of all items (shared among all annotators)
+}
+```
+**How it works:**
+1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
+2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
+3. Contrastive evaluatoin: From the top N models, `dynamic_contrastive_models` models are randomly selected for each item
+4. Item prioritization: Items with the least annotations for the selected models are prioritized
+5. Backoff: With probability `dynamic_backoff`, uniform random selection is used instead to maintain exploration
+This approach efficiently focuses annotation resources on distinguishing between the best-performing models while ensuring all models get adequate baseline coverage. The contrastive evaluation allows for direct comparison of multiple models simultaneously.
+For an example, see [examples/dynamic.json](examples/dynamic.json).
 ### Pre-defined User IDs and Tokens
@@ -316,6 +342,10 @@ Completion tokens are shown at annotation end for verification (download correct
 When tokens are supplied, the dashboard will try to show model rankings based on the names in the dictionaries.
+### Custom Completion Messages
+Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
 ## Terminology
 - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -343,7 +373,7 @@ When tokens are supplied, the dashboard will try to show model rankings based on
 - **Assignment**: The method for distributing items to users:
   - **Task-based**: Each user has predefined items
   - **Single-stream**: Users draw from a shared pool with random assignment
-  - **Dynamic**: Work in progress
+  - **Dynamic**: Items are intelligently assigned based on model performance to focus on top models
 ## Development
@@ -376,15 +406,14 @@ See [web/src/basic.ts](web/src/basic.ts) for example.
 Run on public server or tunnel local port to public IP/domain and run locally.
-## Misc.
+## Citation
 If you use this work in your paper, please cite as following.
 ```bibtex
-@misc{zouhar2025pearmut,
-    author={Vilém Zouhar},
-    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
-    url={https://github.com/zouharvi/pearmut/},
-    year={2026},
+@misc{zouhar2026pearmut,
+  author = {Zouhar, Vilém},
+  title = {Pearmut: Human Evaluation of Translation Made Trivial},
+  year = {2026}
 }
 ```

{pearmut-0.3.2 → pearmut-1.0.0}/README.md RENAMED Viewed

@@ -10,7 +10,8 @@
 &nbsp;
 [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
-<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/4fb9a1cb-78ac-47e0-99cd-0870a368a0ad" />
+<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/71334238-300b-4ffc-b777-7f3c242b1630" />
 ## Table of Contents
@@ -25,6 +26,7 @@
   - [Multimodal Annotations](#multimodal-annotations)
   - [Hosting Assets](#hosting-assets)
 - [Campaign Management](#campaign-management)
+  - [Custom Completion Messages](#custom-completion-messages)
 - [CLI Commands](#cli-commands)
 - [Terminology](#terminology)
 - [Development](#development)
@@ -66,11 +68,13 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
         {
           "instructions": "Evaluate translation from en to cs_CZ",  # message to show to users above the first item
           "src": "This will be the year that Guinness loses its cool. Cheers to that!",
-          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."}
+          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."},
+          "item_id": "first item in first document"
         },
         {
           "src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
-          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"}
+          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"},
+          "item_id": "second item in first document"
         }
         ...
       ],
@@ -85,20 +89,10 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
   ]
 }
 ```
-Task items are protocol-specific. For ESA/DA/MQM protocols, each item is a dictionary representing a document unit:
-```python
-[
-  {
-    "src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.",  # required
-    "tgt": {"modelA": "And suddenly all the water became full of other people and other people."}  # required (dict)
-  },
-  {
-    "src": "toto je pokračování stejného dokumentu",
-    "tgt": {"modelA": "this is a continuation of the same document"}
-    # Additional keys stored for analysis
-  }
-]
-```
+Each item has to have `src` (string) and `tgt` (dictionary from model names to strings, even for a single model evaluation).
+For full Pearmut functionality (e.g. automatic statistical analysis), add `item_id` as well.
+Any other keys that you add will simply be stored in the logs.
 Load campaigns and start the server:
 ```bash
@@ -110,7 +104,7 @@ pearmut run
 - **`task-based`**: Each user has predefined items
 - **`single-stream`**: All users draw from a shared pool (random assignment)
-- **`dynamic`**: work in progress ⚠️
+- **`dynamic`**: Items are dynamically assigned based on current model performance (see [Dynamic Assignment](#dynamic-assignment))
 ## Advanced Features
@@ -203,7 +197,8 @@ The `validation` field is an array (one per candidate). Dashboard shows ✅/❌
 }
 ```
 The `score_greaterthan` field specifies the index of the candidate that must have a lower score than the current candidate.
-See [examples/tutorial_kway.json](examples/tutorial_kway.json).
+See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
+To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
 ### Single-stream Assignment
@@ -223,6 +218,36 @@ All annotators draw from a shared pool with random assignment:
 }
 ```
+### Dynamic Assignment
+The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
+All items must contain outputs from all models for this assignment type to work properly.
+```python
+{
+    "campaign_id": "my dynamic campaign",
+    "info": {
+        "assignment": "dynamic",
+        "protocol": "ESA",
+        "users": 10,                           # number of annotators
+        "dynamic_top": 3,                      # how many top models to consider (required)
+        "dynamic_contrastive_models": 2,       # how many models to compare per item (optional, default: 1)
+        "dynamic_first": 5,                    # annotations per model before dynamic kicks in (optional, default: 5)
+        "dynamic_backoff": 0.1,                # probability of uniform sampling (optional, default: 0)
+    },
+    "data": [...], # list of all items (shared among all annotators)
+}
+```
+**How it works:**
+1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
+2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
+3. Contrastive evaluatoin: From the top N models, `dynamic_contrastive_models` models are randomly selected for each item
+4. Item prioritization: Items with the least annotations for the selected models are prioritized
+5. Backoff: With probability `dynamic_backoff`, uniform random selection is used instead to maintain exploration
+This approach efficiently focuses annotation resources on distinguishing between the best-performing models while ensuring all models get adequate baseline coverage. The contrastive evaluation allows for direct comparison of multiple models simultaneously.
+For an example, see [examples/dynamic.json](examples/dynamic.json).
 ### Pre-defined User IDs and Tokens
@@ -296,6 +321,10 @@ Completion tokens are shown at annotation end for verification (download correct
 When tokens are supplied, the dashboard will try to show model rankings based on the names in the dictionaries.
+### Custom Completion Messages
+Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
 ## Terminology
 - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -323,7 +352,7 @@ When tokens are supplied, the dashboard will try to show model rankings based on
 - **Assignment**: The method for distributing items to users:
   - **Task-based**: Each user has predefined items
   - **Single-stream**: Users draw from a shared pool with random assignment
-  - **Dynamic**: Work in progress
+  - **Dynamic**: Items are intelligently assigned based on model performance to focus on top models
 ## Development
@@ -356,15 +385,14 @@ See [web/src/basic.ts](web/src/basic.ts) for example.
 Run on public server or tunnel local port to public IP/domain and run locally.
-## Misc.
+## Citation
 If you use this work in your paper, please cite as following.
 ```bibtex
-@misc{zouhar2025pearmut,
-    author={Vilém Zouhar},
-    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
-    url={https://github.com/zouharvi/pearmut/},
-    year={2026},
+@misc{zouhar2026pearmut,
+  author = {Zouhar, Vilém},
+  title = {Pearmut: Human Evaluation of Translation Made Trivial},
+  year = {2026}
 }
 ```

{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pearmut
-Version: 0.3.2
+Version: 1.0.0
 Summary: A tool for evaluation of model outputs, primarily MT.
 Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
 License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: fastapi>=0.110.0
 Requires-Dist: uvicorn>=0.29.0
 Requires-Dist: wonderwords>=3.0.0
 Requires-Dist: psutil>=7.1.0
+Requires-Dist: typst>=0.14.4
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"
 Dynamic: license-file
@@ -30,7 +31,8 @@ Dynamic: license-file
 &nbsp;
 [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
-<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/4fb9a1cb-78ac-47e0-99cd-0870a368a0ad" />
+<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/71334238-300b-4ffc-b777-7f3c242b1630" />
 ## Table of Contents
@@ -45,6 +47,7 @@ Dynamic: license-file
   - [Multimodal Annotations](#multimodal-annotations)
   - [Hosting Assets](#hosting-assets)
 - [Campaign Management](#campaign-management)
+  - [Custom Completion Messages](#custom-completion-messages)
 - [CLI Commands](#cli-commands)
 - [Terminology](#terminology)
 - [Development](#development)
@@ -86,11 +89,13 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
         {
           "instructions": "Evaluate translation from en to cs_CZ",  # message to show to users above the first item
           "src": "This will be the year that Guinness loses its cool. Cheers to that!",
-          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."}
+          "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."},
+          "item_id": "first item in first document"
         },
         {
           "src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
-          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"}
+          "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"},
+          "item_id": "second item in first document"
         }
         ...
       ],
@@ -105,20 +110,10 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
   ]
 }
 ```
-Task items are protocol-specific. For ESA/DA/MQM protocols, each item is a dictionary representing a document unit:
-```python
-[
-  {
-    "src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.",  # required
-    "tgt": {"modelA": "And suddenly all the water became full of other people and other people."}  # required (dict)
-  },
-  {
-    "src": "toto je pokračování stejného dokumentu",
-    "tgt": {"modelA": "this is a continuation of the same document"}
-    # Additional keys stored for analysis
-  }
-]
-```
+Each item has to have `src` (string) and `tgt` (dictionary from model names to strings, even for a single model evaluation).
+For full Pearmut functionality (e.g. automatic statistical analysis), add `item_id` as well.
+Any other keys that you add will simply be stored in the logs.
 Load campaigns and start the server:
 ```bash
@@ -130,7 +125,7 @@ pearmut run
 - **`task-based`**: Each user has predefined items
 - **`single-stream`**: All users draw from a shared pool (random assignment)
-- **`dynamic`**: work in progress ⚠️
+- **`dynamic`**: Items are dynamically assigned based on current model performance (see [Dynamic Assignment](#dynamic-assignment))
 ## Advanced Features
@@ -223,7 +218,8 @@ The `validation` field is an array (one per candidate). Dashboard shows ✅/❌
 }
 ```
 The `score_greaterthan` field specifies the index of the candidate that must have a lower score than the current candidate.
-See [examples/tutorial_kway.json](examples/tutorial_kway.json).
+See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
+To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
 ### Single-stream Assignment
@@ -243,6 +239,36 @@ All annotators draw from a shared pool with random assignment:
 }
 ```
+### Dynamic Assignment
+The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
+All items must contain outputs from all models for this assignment type to work properly.
+```python
+{
+    "campaign_id": "my dynamic campaign",
+    "info": {
+        "assignment": "dynamic",
+        "protocol": "ESA",
+        "users": 10,                           # number of annotators
+        "dynamic_top": 3,                      # how many top models to consider (required)
+        "dynamic_contrastive_models": 2,       # how many models to compare per item (optional, default: 1)
+        "dynamic_first": 5,                    # annotations per model before dynamic kicks in (optional, default: 5)
+        "dynamic_backoff": 0.1,                # probability of uniform sampling (optional, default: 0)
+    },
+    "data": [...], # list of all items (shared among all annotators)
+}
+```
+**How it works:**
+1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
+2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
+3. Contrastive evaluatoin: From the top N models, `dynamic_contrastive_models` models are randomly selected for each item
+4. Item prioritization: Items with the least annotations for the selected models are prioritized
+5. Backoff: With probability `dynamic_backoff`, uniform random selection is used instead to maintain exploration
+This approach efficiently focuses annotation resources on distinguishing between the best-performing models while ensuring all models get adequate baseline coverage. The contrastive evaluation allows for direct comparison of multiple models simultaneously.
+For an example, see [examples/dynamic.json](examples/dynamic.json).
 ### Pre-defined User IDs and Tokens
@@ -316,6 +342,10 @@ Completion tokens are shown at annotation end for verification (download correct
 When tokens are supplied, the dashboard will try to show model rankings based on the names in the dictionaries.
+### Custom Completion Messages
+Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
 ## Terminology
 - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -343,7 +373,7 @@ When tokens are supplied, the dashboard will try to show model rankings based on
 - **Assignment**: The method for distributing items to users:
   - **Task-based**: Each user has predefined items
   - **Single-stream**: Users draw from a shared pool with random assignment
-  - **Dynamic**: Work in progress
+  - **Dynamic**: Items are intelligently assigned based on model performance to focus on top models
 ## Development
@@ -376,15 +406,14 @@ See [web/src/basic.ts](web/src/basic.ts) for example.
 Run on public server or tunnel local port to public IP/domain and run locally.
-## Misc.
+## Citation
 If you use this work in your paper, please cite as following.
 ```bibtex
-@misc{zouhar2025pearmut,
-    author={Vilém Zouhar},
-    title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
-    url={https://github.com/zouharvi/pearmut/},
-    year={2026},
+@misc{zouhar2026pearmut,
+  author = {Zouhar, Vilém},
+  title = {Pearmut: Human Evaluation of Translation Made Trivial},
+  year = {2026}
 }
 ```

{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,11 +10,13 @@ pearmut.egg-info/top_level.txt
 server/app.py
 server/assignment.py
 server/cli.py
+server/results_export.py
 server/utils.py
 server/static/basic.bundle.js
 server/static/basic.html
 server/static/dashboard.bundle.js
 server/static/dashboard.html
 server/static/favicon.svg
+server/static/index.bundle.js
 server/static/index.html
 server/static/style.css

{pearmut-0.3.2 → pearmut-1.0.0}/pearmut.egg-info/requires.txt RENAMED Viewed

@@ -2,6 +2,7 @@ fastapi>=0.110.0
 uvicorn>=0.29.0
 wonderwords>=3.0.0
 psutil>=7.1.0
+typst>=0.14.4
 [dev]
 pytest

{pearmut-0.3.2 → pearmut-1.0.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pearmut"
-version = "0.3.2"
+version = "1.0.0"
 description = "A tool for evaluation of model outputs, primarily MT."
 readme = "README.md"
 license = { text = "MIT" }
@@ -17,6 +17,7 @@ dependencies = [
     "uvicorn >= 0.29.0",
     "wonderwords >= 3.0.0",
     "psutil >= 7.1.0",
+    "typst >= 0.14.4",
 ]
 [project.optional-dependencies]

{pearmut-0.3.2 → pearmut-1.0.0}/server/app.py RENAMED Viewed

@@ -1,20 +1,23 @@
-import collections
 import json
 import os
-import statistics
 from typing import Any
 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, Response
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 from .assignment import get_i_item, get_next_item, reset_task, update_progress
+from .results_export import (
+    compute_model_scores,
+    generate_latex_table,
+    generate_pdf,
+    generate_typst_table,
+)
 from .utils import (
     ROOT,
     check_validation_threshold,
-    get_db_log,
     load_progress_data,
     save_db_payload,
     save_progress_data,
@@ -159,7 +162,7 @@ async def _dashboard_data(request: DashboardDataRequest):
     progress_new = {}
     assignment = tasks_data[campaign_id]["info"]["assignment"]
-    if assignment not in ["task-based", "single-stream"]:
+    if assignment not in ["task-based", "single-stream", "dynamic"]:
         return JSONResponse(
             content="Unsupported campaign assignment type", status_code=400
         )
@@ -211,31 +214,47 @@ async def _dashboard_results(request: DashboardResultsRequest):
     if token != tasks_data[campaign_id]["token"]:
         return JSONResponse(content="Invalid token", status_code=400)
-    # Compute model scores from annotations
-    model_scores = collections.defaultdict(dict)
-    # Iterate through all tasks to find items with 'models' field (basic template)
-    log = get_db_log(campaign_id)
-    for entry in log:
-        if "item" not in entry or "annotation" not in entry:
-            continue
-        for item, annotation in zip(entry["item"], entry["annotation"]):
-            for model, annotation in annotation.items():
-                if "score" in annotation:
-                    model_scores[model][json.dumps(item)] = annotation["score"]
-    results = [
-        {
-            "model": model,
-            "score": statistics.mean(scores.values()),
-            "count": len(scores),
-        }
-        for model, scores in model_scores.items()
-    ]
-    results.sort(key=lambda x: x["score"], reverse=True)
+    results = compute_model_scores(campaign_id)
     return JSONResponse(content=results, status_code=200)
+@app.get("/export-results")
+async def _export_results(
+    campaign_id: str = Query(),
+    token: str = Query(),
+    format: str = Query(),
+):
+    if campaign_id not in progress_data:
+        return JSONResponse(content="Unknown campaign ID", status_code=400)
+    # Check if token is valid
+    if token != tasks_data[campaign_id]["token"]:
+        return JSONResponse(content="Invalid token", status_code=400)
+    results = compute_model_scores(campaign_id)
+    if format == "typst":
+        content = generate_typst_table(results)
+        return Response(
+            content=content,
+            media_type="text/plain",
+        )
+    elif format == "latex":
+        content = generate_latex_table(results)
+        return Response(
+            content=content,
+            media_type="text/plain",
+        )
+    elif format == "pdf":
+        pdf_bytes = generate_pdf(results, campaign_id)
+        return Response(
+            content=pdf_bytes,
+            media_type="application/pdf",
+        )
+    else:
+        return JSONResponse(content="Invalid export format", status_code=400)
 class ResetTaskRequest(BaseModel):
     campaign_id: str
     user_id: str
@@ -284,7 +303,9 @@ async def _download_annotations(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="annotations.json"'},
+        headers={
+            "Content-Disposition": 'attachment; filename="annotations.json"',
+        },
     )
@@ -312,7 +333,9 @@ async def _download_progress(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="progress.json"'},
+        headers={
+            "Content-Disposition": 'attachment; filename="progress.json"',
+        },
     )

pearmut 0.3.2__tar.gz → 1.0.0__tar.gz

pearmut 0.3.2tar.gz → 1.0.0tar.gz