pearmut 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. pearmut-0.0.5/PKG-INFO +185 -0
  2. pearmut-0.0.5/README.md +165 -0
  3. pearmut-0.0.5/pearmut.egg-info/PKG-INFO +185 -0
  4. {pearmut-0.0.3 → pearmut-0.0.5}/pyproject.toml +1 -1
  5. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/pointwise.bundle.js +1 -1
  6. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/pointwise.html +1 -1
  7. pearmut-0.0.3/PKG-INFO +0 -149
  8. pearmut-0.0.3/README.md +0 -129
  9. pearmut-0.0.3/pearmut.egg-info/PKG-INFO +0 -149
  10. {pearmut-0.0.3 → pearmut-0.0.5}/LICENSE +0 -0
  11. {pearmut-0.0.3 → pearmut-0.0.5}/pearmut.egg-info/SOURCES.txt +0 -0
  12. {pearmut-0.0.3 → pearmut-0.0.5}/pearmut.egg-info/dependency_links.txt +0 -0
  13. {pearmut-0.0.3 → pearmut-0.0.5}/pearmut.egg-info/entry_points.txt +0 -0
  14. {pearmut-0.0.3 → pearmut-0.0.5}/pearmut.egg-info/requires.txt +0 -0
  15. {pearmut-0.0.3 → pearmut-0.0.5}/pearmut.egg-info/top_level.txt +0 -0
  16. {pearmut-0.0.3 → pearmut-0.0.5}/server/cli.py +0 -0
  17. {pearmut-0.0.3 → pearmut-0.0.5}/server/model.py +0 -0
  18. {pearmut-0.0.3 → pearmut-0.0.5}/server/protocols.py +0 -0
  19. {pearmut-0.0.3 → pearmut-0.0.5}/server/run.py +0 -0
  20. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/assets/favicon.svg +0 -0
  21. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/assets/style.css +0 -0
  22. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/dashboard.bundle.js +0 -0
  23. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/dashboard.html +0 -0
  24. {pearmut-0.0.3 → pearmut-0.0.5}/server/static/index.html +0 -0
  25. {pearmut-0.0.3 → pearmut-0.0.5}/server/utils.py +0 -0
  26. {pearmut-0.0.3 → pearmut-0.0.5}/setup.cfg +0 -0
pearmut-0.0.5/PKG-INFO ADDED
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: pearmut
3
+ Version: 0.0.5
4
+ Summary: A tool for evaluation of model outputs, primarily MT.
5
+ Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/zouharvi/pearmut
8
+ Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
9
+ Keywords: evaluation,machine translation,human evaluation,annotation
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: fastapi>=0.110.0
14
+ Requires-Dist: uvicorn>=0.29.0
15
+ Requires-Dist: wonderwords>=3.0.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest; extra == "dev"
18
+ Requires-Dist: pynpm>=0.3.0; extra == "dev"
19
+ Dynamic: license-file
20
+
21
+ # Pearmut 🍐
22
+
23
+ Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
24
+ It evaluates model outputs, primarily translation but also various other NLP tasks.
25
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
26
+
27
+ [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
28
+ &nbsp;
29
+ [![PyPI download/month](https://img.shields.io/pypi/dm/pearmut.svg)](https://pypi.python.org/pypi/pearmut/)
30
+ &nbsp;
31
+ [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
32
+ &nbsp;
33
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
34
+
35
+ <img width="1334" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/dde04b98-c724-4226-b926-011a89e9ce31" />
36
+
37
+ ## Getting started fast
38
+ ```bash
39
+ # install the package
40
+ pip install pearmut
41
+ # download two campaign definitions
42
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
43
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
44
+ # load them into pearmut
45
+ pearmut add wmt25_#_en-cs_CZ.json
46
+ pearmut add wmt25_#_cs-de_DE.json
47
+ # start pearmut (will show management links)
48
+ pearmut run
49
+ ```
50
+
51
+ ## Starting a campaign
52
+
53
+ First, install the package
54
+ ```bash
55
+ pip install pearmut
56
+ ```
57
+
58
+ A campaign is described in a single JSON file (see [examples/](examples/)!).
59
+ One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
60
+ ```python
61
+ {
62
+ "info": {
63
+ "type": "task-based",
64
+ "template": "pointwise",
65
+ "protocol_score": true, # we want scores [0...100] for each segment
66
+ "protocol_error_spans": true, # we want error spans
67
+ "protocol_error_categories": false, # we do not want error span categories
68
+ "status_message": "Evaluate translation from en to cs_CZ", # message to show to users
69
+ "url": "http://localhost:8001" # where the server will be accessible
70
+ },
71
+ "campaign_id": "wmt25_#_en-cs_CZ",
72
+ "data": [
73
+ # data for first task/user
74
+ [
75
+ {
76
+ # each evaluation item is a document
77
+ "src": [
78
+ "This will be the year that Guinness loses its cool. Cheers to that!",
79
+ "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
80
+ ],
81
+ "tgt": [
82
+ "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!",
83
+ "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ...",
84
+ ]
85
+ },
86
+ ...
87
+ ],
88
+ # data for second task/user
89
+ [
90
+ ...
91
+ ],
92
+ # arbitrary number of users (each corresponds to a single URL to be shared)
93
+ ]
94
+ }
95
+ ```
96
+ In general, the task item can be anything and is handled by the specific protocol template.
97
+ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
98
+ ```python
99
+ { # single document definition
100
+ "src": ["A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", "toto je pokračování stejného dokumentu"], # mandatory for ESA/MQM/DA
101
+ "tgt": ["And suddenly all the water became full of other people and other people.", "this is a continuation of the same document"], # mandatory for ESA/MQM/DA
102
+ ... # all other keys that will be stored, useful for your analysis
103
+ },
104
+ ... # definition of another item
105
+ ```
106
+
107
+ We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
108
+ ```python
109
+ {
110
+ "campaign_id": "my campaign 6",
111
+ "info": {
112
+ "type": "task-single",
113
+ "template": "pointwise",
114
+ "protocol_score": True, # collect scores
115
+ "protocol_error_spans": True, # collect error spans
116
+ "protocol_error_categories": False, # do not collect MQM categories, so ESA
117
+ "users": 50,
118
+ },
119
+ "data": [...], # list of all items
120
+ }
121
+ ```
122
+
123
+
124
+ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), which is more complex and can be ignored for now:
125
+ ```python
126
+ {
127
+ "campaign_id": "my campaign 6",
128
+ "info": {
129
+ "type": "dynamic",
130
+ "template": "kway",
131
+ "protocol_k": 5,
132
+ "users": 50,
133
+ },
134
+ "data": [...], # list of all items
135
+ }
136
+ ```
137
+
138
+ To load a campaign into the server, run the following.
139
+ It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
140
+ It will also output a secret management link. Then, launch the server:
141
+ ```bash
142
+ pearmut add my_campaign_4.json
143
+ pearmut run
144
+ ```
145
+
146
+ ## Campaign management
147
+
148
+ When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
149
+
150
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
151
+
152
+ Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
153
+ An intentionally incorrect token can be shown if the annotations don't pass quality control.
154
+
155
+ <img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
156
+
157
+ ## Development
158
+
159
+ For the server and frontend locally run:
160
+
161
+ ```bash
162
+ # watch the frontend for changes (in a separate terminal)
163
+ npm install web/ --prefix web/
164
+ npm run watch --prefix web/
165
+
166
+ # install local package as editable
167
+ pip3 install -e .
168
+ # add existing data from WMT25, this generates annotation links
169
+ # sets up progress/log files in current working folder
170
+ pearmut add examples/wmt25_#_en-cs_CZ.json
171
+ pearmut add examples/wmt25_#_cs-de_DE.json
172
+ # shows a management link for all loaded campaigns
173
+ pearmut run
174
+ ```
175
+
176
+ ## Citation
177
+
178
+ If you use this work in your paper, please cite as:
179
+ ```bibtex
180
+ @misc{zouhar2025pearmut,
181
+ author={Vilém Zouhar and others},
182
+ title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
183
+ url={https://github.com/zouharvi/pearmut/},
184
+ year={2025},
185
+ }
@@ -0,0 +1,165 @@
1
+ # Pearmut 🍐
2
+
3
+ Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
4
+ It evaluates model outputs, primarily translation but also various other NLP tasks.
5
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
6
+
7
+ [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
8
+ &nbsp;
9
+ [![PyPI download/month](https://img.shields.io/pypi/dm/pearmut.svg)](https://pypi.python.org/pypi/pearmut/)
10
+ &nbsp;
11
+ [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
12
+ &nbsp;
13
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
14
+
15
+ <img width="1334" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/dde04b98-c724-4226-b926-011a89e9ce31" />
16
+
17
+ ## Getting started fast
18
+ ```bash
19
+ # install the package
20
+ pip install pearmut
21
+ # download two campaign definitions
22
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
23
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
24
+ # load them into pearmut
25
+ pearmut add wmt25_#_en-cs_CZ.json
26
+ pearmut add wmt25_#_cs-de_DE.json
27
+ # start pearmut (will show management links)
28
+ pearmut run
29
+ ```
30
+
31
+ ## Starting a campaign
32
+
33
+ First, install the package
34
+ ```bash
35
+ pip install pearmut
36
+ ```
37
+
38
+ A campaign is described in a single JSON file (see [examples/](examples/)!).
39
+ One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
40
+ ```python
41
+ {
42
+ "info": {
43
+ "type": "task-based",
44
+ "template": "pointwise",
45
+ "protocol_score": true, # we want scores [0...100] for each segment
46
+ "protocol_error_spans": true, # we want error spans
47
+ "protocol_error_categories": false, # we do not want error span categories
48
+ "status_message": "Evaluate translation from en to cs_CZ", # message to show to users
49
+ "url": "http://localhost:8001" # where the server will be accessible
50
+ },
51
+ "campaign_id": "wmt25_#_en-cs_CZ",
52
+ "data": [
53
+ # data for first task/user
54
+ [
55
+ {
56
+ # each evaluation item is a document
57
+ "src": [
58
+ "This will be the year that Guinness loses its cool. Cheers to that!",
59
+ "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
60
+ ],
61
+ "tgt": [
62
+ "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!",
63
+ "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ...",
64
+ ]
65
+ },
66
+ ...
67
+ ],
68
+ # data for second task/user
69
+ [
70
+ ...
71
+ ],
72
+ # arbitrary number of users (each corresponds to a single URL to be shared)
73
+ ]
74
+ }
75
+ ```
76
+ In general, the task item can be anything and is handled by the specific protocol template.
77
+ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
78
+ ```python
79
+ { # single document definition
80
+ "src": ["A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", "toto je pokračování stejného dokumentu"], # mandatory for ESA/MQM/DA
81
+ "tgt": ["And suddenly all the water became full of other people and other people.", "this is a continuation of the same document"], # mandatory for ESA/MQM/DA
82
+ ... # all other keys that will be stored, useful for your analysis
83
+ },
84
+ ... # definition of another item
85
+ ```
86
+
87
+ We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
88
+ ```python
89
+ {
90
+ "campaign_id": "my campaign 6",
91
+ "info": {
92
+ "type": "task-single",
93
+ "template": "pointwise",
94
+ "protocol_score": True, # collect scores
95
+ "protocol_error_spans": True, # collect error spans
96
+ "protocol_error_categories": False, # do not collect MQM categories, so ESA
97
+ "users": 50,
98
+ },
99
+ "data": [...], # list of all items
100
+ }
101
+ ```
102
+
103
+
104
+ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), which is more complex and can be ignored for now:
105
+ ```python
106
+ {
107
+ "campaign_id": "my campaign 6",
108
+ "info": {
109
+ "type": "dynamic",
110
+ "template": "kway",
111
+ "protocol_k": 5,
112
+ "users": 50,
113
+ },
114
+ "data": [...], # list of all items
115
+ }
116
+ ```
117
+
118
+ To load a campaign into the server, run the following.
119
+ It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
120
+ It will also output a secret management link. Then, launch the server:
121
+ ```bash
122
+ pearmut add my_campaign_4.json
123
+ pearmut run
124
+ ```
125
+
126
+ ## Campaign management
127
+
128
+ When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
129
+
130
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
131
+
132
+ Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
133
+ An intentionally incorrect token can be shown if the annotations don't pass quality control.
134
+
135
+ <img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
136
+
137
+ ## Development
138
+
139
+ For the server and frontend locally run:
140
+
141
+ ```bash
142
+ # watch the frontend for changes (in a separate terminal)
143
+ npm install web/ --prefix web/
144
+ npm run watch --prefix web/
145
+
146
+ # install local package as editable
147
+ pip3 install -e .
148
+ # add existing data from WMT25, this generates annotation links
149
+ # sets up progress/log files in current working folder
150
+ pearmut add examples/wmt25_#_en-cs_CZ.json
151
+ pearmut add examples/wmt25_#_cs-de_DE.json
152
+ # shows a management link for all loaded campaigns
153
+ pearmut run
154
+ ```
155
+
156
+ ## Citation
157
+
158
+ If you use this work in your paper, please cite as:
159
+ ```bibtex
160
+ @misc{zouhar2025pearmut,
161
+ author={Vilém Zouhar and others},
162
+ title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
163
+ url={https://github.com/zouharvi/pearmut/},
164
+ year={2025},
165
+ }
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: pearmut
3
+ Version: 0.0.5
4
+ Summary: A tool for evaluation of model outputs, primarily MT.
5
+ Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/zouharvi/pearmut
8
+ Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
9
+ Keywords: evaluation,machine translation,human evaluation,annotation
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: fastapi>=0.110.0
14
+ Requires-Dist: uvicorn>=0.29.0
15
+ Requires-Dist: wonderwords>=3.0.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest; extra == "dev"
18
+ Requires-Dist: pynpm>=0.3.0; extra == "dev"
19
+ Dynamic: license-file
20
+
21
+ # Pearmut 🍐
22
+
23
+ Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
24
+ It evaluates model outputs, primarily translation but also various other NLP tasks.
25
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
26
+
27
+ [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
28
+ &nbsp;
29
+ [![PyPI download/month](https://img.shields.io/pypi/dm/pearmut.svg)](https://pypi.python.org/pypi/pearmut/)
30
+ &nbsp;
31
+ [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
32
+ &nbsp;
33
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
34
+
35
+ <img width="1334" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/dde04b98-c724-4226-b926-011a89e9ce31" />
36
+
37
+ ## Getting started fast
38
+ ```bash
39
+ # install the package
40
+ pip install pearmut
41
+ # download two campaign definitions
42
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
43
+ wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
44
+ # load them into pearmut
45
+ pearmut add wmt25_#_en-cs_CZ.json
46
+ pearmut add wmt25_#_cs-de_DE.json
47
+ # start pearmut (will show management links)
48
+ pearmut run
49
+ ```
50
+
51
+ ## Starting a campaign
52
+
53
+ First, install the package
54
+ ```bash
55
+ pip install pearmut
56
+ ```
57
+
58
+ A campaign is described in a single JSON file (see [examples/](examples/)!).
59
+ One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
60
+ ```python
61
+ {
62
+ "info": {
63
+ "type": "task-based",
64
+ "template": "pointwise",
65
+ "protocol_score": true, # we want scores [0...100] for each segment
66
+ "protocol_error_spans": true, # we want error spans
67
+ "protocol_error_categories": false, # we do not want error span categories
68
+ "status_message": "Evaluate translation from en to cs_CZ", # message to show to users
69
+ "url": "http://localhost:8001" # where the server will be accessible
70
+ },
71
+ "campaign_id": "wmt25_#_en-cs_CZ",
72
+ "data": [
73
+ # data for first task/user
74
+ [
75
+ {
76
+ # each evaluation item is a document
77
+ "src": [
78
+ "This will be the year that Guinness loses its cool. Cheers to that!",
79
+ "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
80
+ ],
81
+ "tgt": [
82
+ "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!",
83
+ "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ...",
84
+ ]
85
+ },
86
+ ...
87
+ ],
88
+ # data for second task/user
89
+ [
90
+ ...
91
+ ],
92
+ # arbitrary number of users (each corresponds to a single URL to be shared)
93
+ ]
94
+ }
95
+ ```
96
+ In general, the task item can be anything and is handled by the specific protocol template.
97
+ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
98
+ ```python
99
+ { # single document definition
100
+ "src": ["A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", "toto je pokračování stejného dokumentu"], # mandatory for ESA/MQM/DA
101
+ "tgt": ["And suddenly all the water became full of other people and other people.", "this is a continuation of the same document"], # mandatory for ESA/MQM/DA
102
+ ... # all other keys that will be stored, useful for your analysis
103
+ },
104
+ ... # definition of another item
105
+ ```
106
+
107
+ We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
108
+ ```python
109
+ {
110
+ "campaign_id": "my campaign 6",
111
+ "info": {
112
+ "type": "task-single",
113
+ "template": "pointwise",
114
+ "protocol_score": True, # collect scores
115
+ "protocol_error_spans": True, # collect error spans
116
+ "protocol_error_categories": False, # do not collect MQM categories, so ESA
117
+ "users": 50,
118
+ },
119
+ "data": [...], # list of all items
120
+ }
121
+ ```
122
+
123
+
124
+ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), which is more complex and can be ignored for now:
125
+ ```python
126
+ {
127
+ "campaign_id": "my campaign 6",
128
+ "info": {
129
+ "type": "dynamic",
130
+ "template": "kway",
131
+ "protocol_k": 5,
132
+ "users": 50,
133
+ },
134
+ "data": [...], # list of all items
135
+ }
136
+ ```
137
+
138
+ To load a campaign into the server, run the following.
139
+ It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
140
+ It will also output a secret management link. Then, launch the server:
141
+ ```bash
142
+ pearmut add my_campaign_4.json
143
+ pearmut run
144
+ ```
145
+
146
+ ## Campaign management
147
+
148
+ When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
149
+
150
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
151
+
152
+ Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
153
+ An intentionally incorrect token can be shown if the annotations don't pass quality control.
154
+
155
+ <img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
156
+
157
+ ## Development
158
+
159
+ For the server and frontend locally run:
160
+
161
+ ```bash
162
+ # watch the frontend for changes (in a separate terminal)
163
+ npm install web/ --prefix web/
164
+ npm run watch --prefix web/
165
+
166
+ # install local package as editable
167
+ pip3 install -e .
168
+ # add existing data from WMT25, this generates annotation links
169
+ # sets up progress/log files in current working folder
170
+ pearmut add examples/wmt25_#_en-cs_CZ.json
171
+ pearmut add examples/wmt25_#_cs-de_DE.json
172
+ # shows a management link for all loaded campaigns
173
+ pearmut run
174
+ ```
175
+
176
+ ## Citation
177
+
178
+ If you use this work in your paper, please cite as:
179
+ ```bibtex
180
+ @misc{zouhar2025pearmut,
181
+ author={Vilém Zouhar and others},
182
+ title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
183
+ url={https://github.com/zouharvi/pearmut/},
184
+ year={2025},
185
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "0.0.3"
3
+ version = "0.0.5"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }