pearmut 0.3.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,41 @@
1
- <html><head><meta charset="UTF-8"><meta name="viewport" content="width=900px"><title>Pearmut Evaluation</title><link rel="icon" type="image/svg+xml" href="favicon.svg"><link rel="stylesheet" href="style.css"><style>.white-box {
2
- margin-bottom: 20px;
1
+ <html><head><meta charset="UTF-8"><meta name="viewport" content="width=900px"><title>Pearmut Evaluation</title><link rel="icon" type="image/svg+xml" href="favicon.svg"><style>.white-box {
2
+ margin-bottom: 10px;
3
3
  width: max-content;
4
+ display: inline-block;
4
5
  }
5
6
 
6
7
  .abutton {
7
- background-color: white;
8
+ background-color: #eee !important;
8
9
  text-decoration: none;
9
10
  color: black;
10
11
  padding: 5px 10px;
11
12
  border-radius: 8px;
12
13
  box-shadow: 0 4px 6px #0003;
14
+ border: none;
15
+ font-size: smaller !important;
13
16
  }
14
17
 
15
18
  .abutton:hover {
16
- background-color: #ffd;
19
+ background-color: #ffd !important;
20
+ cursor: pointer;
21
+ }
22
+
23
+ .ranking-content input[type="button"]:not(:last-child) {
24
+ margin-right: 5px;
25
+ }
26
+
27
+ .ranking-content input[type="button"] {
28
+ margin-bottom: 8px;
29
+ padding: 3px 5px;
30
+ box-shadow: 0 2px 2px #0003 !important;
31
+ }
32
+
33
+ .ranking-content {
34
+ display: inline-block;
35
+ vertical-align: top;
36
+ }
37
+
38
+ .show-ranking-btn:hover {
17
39
  cursor: pointer;
18
40
  }
19
41
 
@@ -54,17 +76,10 @@
54
76
  }
55
77
 
56
78
  .dashboard-content {
57
- display: flex;
58
- gap: 30px;
59
- align-items: flex-start;
60
- }
61
-
62
- .dashboard-table {
63
- flex-shrink: 0;
79
+ display: inline-block;
64
80
  }
65
81
 
66
82
  .results-section {
67
- flex-shrink: 0;
68
83
  min-width: 300px;
69
84
  }
70
85
 
@@ -78,4 +93,4 @@
78
93
  .white-box {
79
94
  overflow-x: auto;
80
95
  }
81
- }</style><script defer="defer" src="dashboard.bundle.js"></script></head><body style="padding-top: 50px; padding-bottom: 50px;"><div id="main_div" style="width: calc(100% - 200px); min-width: 1300px; margin-left: auto; margin-right: auto;"><div style="margin-bottom: 15px; font-size: 0.9em; color: #555;">💤not started &nbsp;&nbsp; ✍️in progress &nbsp;&nbsp; ✅completed & passed &nbsp;&nbsp;❌completed & failed &nbsp;&nbsp;&nbsp;&nbsp; 🔗annotator link &nbsp;&nbsp; 👁️anotator link (view-only) &nbsp;&nbsp; 🗑️reset annotator progress</div><div id="dashboard_div"></div><br><a class="abutton" id="download_progress" style="width: 300px; margin-right: 20px;">Download progress metadata 💾</a> <a class="abutton" id="download_annotations" style="width: 300px; margin-right: 20px;">Download all annotations 💾</a></div></body></html>
96
+ }</style><script defer="defer" src="dashboard.bundle.js?148e44d47bac0dd405e1"></script><link href="style.css?148e44d47bac0dd405e1" rel="stylesheet"></head><body style="padding-top: 50px; padding-bottom: 50px;"><div id="main_div" style="width: calc(100% - 200px); min-width: 1300px; margin-left: auto; margin-right: auto;"><div style="margin-bottom: 15px; font-size: 0.9em; color: #555;">💤not started &nbsp;&nbsp; ✍️in progress &nbsp;&nbsp; ✅completed & passed &nbsp;&nbsp;❌completed & failed &nbsp;&nbsp; 🔗annotator link &nbsp;&nbsp; 👁️anotator link (view-only) &nbsp;&nbsp; 🗑️reset annotator progress &nbsp;&nbsp; ⚖️show model ranking</div><div id="dashboard_div"></div><br><a class="abutton" id="download_progress" style="width: 300px; margin-right: 20px;">Download progress metadata 💾</a> <a class="abutton" id="download_annotations" style="width: 300px; margin-right: 20px;">Download all annotations 💾</a></div></body></html>
@@ -0,0 +1 @@
1
+ (()=>{"use strict";var e={284(e,r,t){t.r(r)}},r={};function t(o){var n=r[o];if(void 0!==n)return n.exports;var i=r[o]={exports:{}};return e[o](i,i.exports,t),i.exports}t.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};t(284)})();
pearmut/static/index.html CHANGED
@@ -1 +1 @@
1
- <!doctype html><html lang="en" style="height: 100%;"><head><meta charset="UTF-8"><meta name="viewport" content="width=900px"><title>Pearmut Evaluation</title><link rel="icon" type="image/svg+xml" href="favicon.svg"><link rel="stylesheet" href="style.css"></head><body><div class="white-box" style="width: max-content; font-size: large; position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%);">You have reached the Pearmut🍐 evaluation interface.<ul><li>If you are an annotator, you should have received a specialized link that takes you to the annotations.</li><li>If you are annotation manager, then you should distribute these links.</li></ul><br><br>See the <a href="https://github.com/zouharvi/pearmut">Pearmut project on GitHub</a>. Made with 💚 by Vilém Zouhar and others in 2025-2026.</div></body></html>
1
+ <!doctype html><html lang="en" style="height: 100%;"><head><meta charset="UTF-8"><meta name="viewport" content="width=900px"><title>Pearmut Evaluation</title><link rel="icon" type="image/svg+xml" href="favicon.svg"><script defer="defer" src="index.bundle.js?148e44d47bac0dd405e1"></script><link href="style.css?148e44d47bac0dd405e1" rel="stylesheet"></head><body><div class="white-box" style="width: max-content; font-size: large; position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%);">You have reached the Pearmut🍐 evaluation interface.<ul><li>If you are an annotator, you should have received a specialized link that takes you to the annotations.</li><li>If you are annotation manager, then you should distribute these links.</li></ul><br><br>See the <a href="https://github.com/zouharvi/pearmut">Pearmut project on GitHub</a>. Made with 💚 by Vilém Zouhar and others in 2025-2026.</div></body></html>
pearmut/static/style.css CHANGED
@@ -235,4 +235,4 @@ input[type="button"].error_delete:hover {
235
235
 
236
236
  .char_missing {
237
237
  font-family: monospace;
238
- }
238
+ }
pearmut/utils.py CHANGED
@@ -14,12 +14,26 @@ def load_progress_data(warn: str | None = None):
14
14
  with open(f"{ROOT}/data/progress.json", "w") as f:
15
15
  f.write(json.dumps({}))
16
16
  with open(f"{ROOT}/data/progress.json", "r") as f:
17
- return json.load(f)
17
+ data = json.load(f)
18
+
19
+ return data
18
20
 
19
21
 
20
22
  def save_progress_data(data):
23
+ # Convert sets to lists for JSON serialization
24
+ def convert_sets(obj):
25
+ if isinstance(obj, dict):
26
+ return {k: convert_sets(v) for k, v in obj.items()}
27
+ elif isinstance(obj, list):
28
+ return [convert_sets(item) for item in obj]
29
+ elif isinstance(obj, set):
30
+ return list(obj)
31
+ else:
32
+ return obj
33
+
34
+ serializable_data = convert_sets(data)
21
35
  with open(f"{ROOT}/data/progress.json", "w") as f:
22
- json.dump(data, f, indent=2)
36
+ json.dump(serializable_data, f, indent=2)
23
37
 
24
38
 
25
39
  _logs = {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.3.2
3
+ Version: 1.0.0
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: fastapi>=0.110.0
14
14
  Requires-Dist: uvicorn>=0.29.0
15
15
  Requires-Dist: wonderwords>=3.0.0
16
16
  Requires-Dist: psutil>=7.1.0
17
+ Requires-Dist: typst>=0.14.4
17
18
  Provides-Extra: dev
18
19
  Requires-Dist: pytest; extra == "dev"
19
20
  Dynamic: license-file
@@ -30,7 +31,8 @@ Dynamic: license-file
30
31
  &nbsp;
31
32
  [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
32
33
 
33
- <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/4fb9a1cb-78ac-47e0-99cd-0870a368a0ad" />
34
+ <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/71334238-300b-4ffc-b777-7f3c242b1630" />
35
+
34
36
 
35
37
  ## Table of Contents
36
38
 
@@ -45,6 +47,7 @@ Dynamic: license-file
45
47
  - [Multimodal Annotations](#multimodal-annotations)
46
48
  - [Hosting Assets](#hosting-assets)
47
49
  - [Campaign Management](#campaign-management)
50
+ - [Custom Completion Messages](#custom-completion-messages)
48
51
  - [CLI Commands](#cli-commands)
49
52
  - [Terminology](#terminology)
50
53
  - [Development](#development)
@@ -86,11 +89,13 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
86
89
  {
87
90
  "instructions": "Evaluate translation from en to cs_CZ", # message to show to users above the first item
88
91
  "src": "This will be the year that Guinness loses its cool. Cheers to that!",
89
- "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."}
92
+ "tgt": {"modelA": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."},
93
+ "item_id": "first item in first document"
90
94
  },
91
95
  {
92
96
  "src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
93
- "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"}
97
+ "tgt": {"modelA": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"},
98
+ "item_id": "second item in first document"
94
99
  }
95
100
  ...
96
101
  ],
@@ -105,20 +110,10 @@ Campaigns are defined in JSON files (see [examples/](examples/)). The simplest c
105
110
  ]
106
111
  }
107
112
  ```
108
- Task items are protocol-specific. For ESA/DA/MQM protocols, each item is a dictionary representing a document unit:
109
- ```python
110
- [
111
- {
112
- "src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # required
113
- "tgt": {"modelA": "And suddenly all the water became full of other people and other people."} # required (dict)
114
- },
115
- {
116
- "src": "toto je pokračování stejného dokumentu",
117
- "tgt": {"modelA": "this is a continuation of the same document"}
118
- # Additional keys stored for analysis
119
- }
120
- ]
121
- ```
113
+
114
+ Each item has to have `src` (string) and `tgt` (dictionary from model names to strings, even for a single model evaluation).
115
+ For full Pearmut functionality (e.g. automatic statistical analysis), add `item_id` as well.
116
+ Any other keys that you add will simply be stored in the logs.
122
117
 
123
118
  Load campaigns and start the server:
124
119
  ```bash
@@ -130,7 +125,7 @@ pearmut run
130
125
 
131
126
  - **`task-based`**: Each user has predefined items
132
127
  - **`single-stream`**: All users draw from a shared pool (random assignment)
133
- - **`dynamic`**: work in progress ⚠️
128
+ - **`dynamic`**: Items are dynamically assigned based on current model performance (see [Dynamic Assignment](#dynamic-assignment))
134
129
 
135
130
  ## Advanced Features
136
131
 
@@ -223,7 +218,8 @@ The `validation` field is an array (one per candidate). Dashboard shows ✅/❌
223
218
  }
224
219
  ```
225
220
  The `score_greaterthan` field specifies the index of the candidate that must have a lower score than the current candidate.
226
- See [examples/tutorial_kway.json](examples/tutorial_kway.json).
221
+ See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
222
+ To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
227
223
 
228
224
  ### Single-stream Assignment
229
225
 
@@ -243,6 +239,36 @@ All annotators draw from a shared pool with random assignment:
243
239
  }
244
240
  ```
245
241
 
242
+ ### Dynamic Assignment
243
+
244
+ The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
245
+ All items must contain outputs from all models for this assignment type to work properly.
246
+
247
+ ```python
248
+ {
249
+ "campaign_id": "my dynamic campaign",
250
+ "info": {
251
+ "assignment": "dynamic",
252
+ "protocol": "ESA",
253
+ "users": 10, # number of annotators
254
+ "dynamic_top": 3, # how many top models to consider (required)
255
+ "dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
256
+ "dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
257
+ "dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
258
+ },
259
+ "data": [...], # list of all items (shared among all annotators)
260
+ }
261
+ ```
262
+
263
+ **How it works:**
264
+ 1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
265
+ 2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
266
+ 3. Contrastive evaluatoin: From the top N models, `dynamic_contrastive_models` models are randomly selected for each item
267
+ 4. Item prioritization: Items with the least annotations for the selected models are prioritized
268
+ 5. Backoff: With probability `dynamic_backoff`, uniform random selection is used instead to maintain exploration
269
+
270
+ This approach efficiently focuses annotation resources on distinguishing between the best-performing models while ensuring all models get adequate baseline coverage. The contrastive evaluation allows for direct comparison of multiple models simultaneously.
271
+ For an example, see [examples/dynamic.json](examples/dynamic.json).
246
272
 
247
273
  ### Pre-defined User IDs and Tokens
248
274
 
@@ -316,6 +342,10 @@ Completion tokens are shown at annotation end for verification (download correct
316
342
 
317
343
  When tokens are supplied, the dashboard will try to show model rankings based on the names in the dictionaries.
318
344
 
345
+ ### Custom Completion Messages
346
+
347
+ Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
348
+
319
349
  ## Terminology
320
350
 
321
351
  - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -343,7 +373,7 @@ When tokens are supplied, the dashboard will try to show model rankings based on
343
373
  - **Assignment**: The method for distributing items to users:
344
374
  - **Task-based**: Each user has predefined items
345
375
  - **Single-stream**: Users draw from a shared pool with random assignment
346
- - **Dynamic**: Work in progress
376
+ - **Dynamic**: Items are intelligently assigned based on model performance to focus on top models
347
377
 
348
378
  ## Development
349
379
 
@@ -376,15 +406,14 @@ See [web/src/basic.ts](web/src/basic.ts) for example.
376
406
 
377
407
  Run on public server or tunnel local port to public IP/domain and run locally.
378
408
 
379
- ## Misc.
409
+ ## Citation
380
410
 
381
411
  If you use this work in your paper, please cite as following.
382
412
  ```bibtex
383
- @misc{zouhar2025pearmut,
384
- author={Vilém Zouhar},
385
- title={Pearmut: Platform for Evaluating and Reviewing of Multilingual Tasks},
386
- url={https://github.com/zouharvi/pearmut/},
387
- year={2026},
413
+ @misc{zouhar2026pearmut,
414
+ author = {Zouhar, Vilém},
415
+ title = {Pearmut: Human Evaluation of Translation Made Trivial},
416
+ year = {2026}
388
417
  }
389
418
  ```
390
419
 
@@ -0,0 +1,19 @@
1
+ pearmut/app.py,sha256=eZgJjQfBi5WuNYyc91JB_wo_8dEklhNbzmbDh228f_0,10635
2
+ pearmut/assignment.py,sha256=wtOyiEycm-yiYPt9NfSnOLa52bz6vDj7M4_6jaHoMi4,20011
3
+ pearmut/cli.py,sha256=-79930TRcNqBDOhWvxGJnhEX8mYH0OA1EgpfYz3H_uI,24711
4
+ pearmut/results_export.py,sha256=UxtbbqbrqVJDBSYQf-aT3M7lMgQgQiepHkHy4TSZrGg,5743
5
+ pearmut/utils.py,sha256=7CUemQHlQnOhc_a07QXVivdd8DodVykI6dek151ftTs,4798
6
+ pearmut/static/basic.bundle.js,sha256=LnPSRoU-05MGLzS6tYT50eAxWpXIYj8rtjdfT4biyGc,110682
7
+ pearmut/static/basic.html,sha256=s1Y9Qsn-VuQNoW4Pi9BNQ03z7W5wOTPRi7dY88qT59U,5998
8
+ pearmut/static/dashboard.bundle.js,sha256=jtSI2-UehI_tMJMWJLRAopr1nlXUMQD7ohCGU6JGQEo,102109
9
+ pearmut/static/dashboard.html,sha256=T-4J82egtpuEhjF3LVET0JENJ2vPMiFA351W9bnCgsg,3187
10
+ pearmut/static/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
11
+ pearmut/static/index.bundle.js,sha256=-koQkaoRCei-H40wozYnvf0PnrAoZbtOXHotJcTn5OM,346
12
+ pearmut/static/index.html,sha256=r4PHyLh0JZ99nAZVlfcq70XIBRzoI4_C5MMXiL9kktw,930
13
+ pearmut/static/style.css,sha256=NKdwsugsS946w3pREfLab2Rf3Av9hNk0fvkTxmhyGrQ,4102
14
+ pearmut-1.0.0.dist-info/licenses/LICENSE,sha256=GtR6RcTdRn-P23h5pKFuWSLZrLPD0ytHAwSOBt7aLpI,1071
15
+ pearmut-1.0.0.dist-info/METADATA,sha256=MaOqnfxUJgsyFXiFIFOutjNsy3TuDm1auuVSI74YiSw,18060
16
+ pearmut-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ pearmut-1.0.0.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
18
+ pearmut-1.0.0.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
19
+ pearmut-1.0.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- pearmut/app.py,sha256=IZNmeKTAuLcf9FggvlHktWDbIGxfykjSRM-sI8Byfik,10179
2
- pearmut/assignment.py,sha256=_0hNXtA-Mgn6bRyRVjgeGxERKRvBezR3NmEwx2uME38,11685
3
- pearmut/cli.py,sha256=eMiuNcEztWDuEsfCA9n_Jj0wDDetzXFwCN6rOOZHySw,20805
4
- pearmut/utils.py,sha256=Rl_i-WCaJN3p_VG5iVL0fSeI481jcJUUEZO6HKx62PE,4347
5
- pearmut/static/basic.bundle.js,sha256=9cz_5Jq0KgnWTwkuGqRT2eAY3FHQJM2f2OP1RnNi0s4,110582
6
- pearmut/static/basic.html,sha256=Nm0t3uGsbUUso_lFpIpMMEe9iBEDS_Og4tz5vdWhJGo,5473
7
- pearmut/static/dashboard.bundle.js,sha256=djacPNoKpxtSP0CzAdEmgPocDyBO0ihFUriCw_RJOhQ,100630
8
- pearmut/static/dashboard.html,sha256=HXZzoz44f7LYtAfuP7uQioxTkNmo2_fAN0v2C2s1lAs,2680
9
- pearmut/static/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
10
- pearmut/static/index.html,sha256=yMttallApd0T7sxngUrdwCDrtTQpRIFF0-4W0jfXejU,835
11
- pearmut/static/style.css,sha256=hI_Mbvq6BbXfsp-WMpx73tsOL_6QflgrSV1um-3c-hU,4101
12
- pearmut-0.3.2.dist-info/licenses/LICENSE,sha256=GtR6RcTdRn-P23h5pKFuWSLZrLPD0ytHAwSOBt7aLpI,1071
13
- pearmut-0.3.2.dist-info/METADATA,sha256=hZqSfG_P5fZxAt1-WyIvkuj8o9DJT0tGqFyVRWmAW3A,15606
14
- pearmut-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- pearmut-0.3.2.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
16
- pearmut-0.3.2.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
17
- pearmut-0.3.2.dist-info/RECORD,,