pearmut 0.0.5__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pearmut-0.0.5 → pearmut-0.1.0}/PKG-INFO +56 -26
- {pearmut-0.0.5 → pearmut-0.1.0}/README.md +53 -24
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/PKG-INFO +56 -26
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/SOURCES.txt +1 -2
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/requires.txt +1 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/pyproject.toml +3 -2
- pearmut-0.0.5/server/run.py → pearmut-0.1.0/server/app.py +4 -13
- {pearmut-0.0.5 → pearmut-0.1.0}/server/cli.py +71 -23
- {pearmut-0.0.5 → pearmut-0.1.0}/server/protocols.py +15 -2
- {pearmut-0.0.5 → pearmut-0.1.0}/server/static/assets/style.css +12 -8
- {pearmut-0.0.5 → pearmut-0.1.0}/server/static/dashboard.bundle.js +1 -1
- {pearmut-0.0.5 → pearmut-0.1.0}/server/static/dashboard.html +4 -3
- pearmut-0.1.0/server/static/pointwise.bundle.js +1 -0
- pearmut-0.1.0/server/static/pointwise.html +235 -0
- pearmut-0.0.5/server/model.py +0 -61
- pearmut-0.0.5/server/static/pointwise.bundle.js +0 -1
- pearmut-0.0.5/server/static/pointwise.html +0 -171
- {pearmut-0.0.5 → pearmut-0.1.0}/LICENSE +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/dependency_links.txt +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/entry_points.txt +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/pearmut.egg-info/top_level.txt +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/server/static/assets/favicon.svg +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/server/static/index.html +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/server/utils.py +0 -0
- {pearmut-0.0.5 → pearmut-0.1.0}/setup.cfg +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
|
-
License:
|
|
6
|
+
License: apache-2.0
|
|
7
7
|
Project-URL: Repository, https://github.com/zouharvi/pearmut
|
|
8
8
|
Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
|
|
9
9
|
Keywords: evaluation,machine translation,human evaluation,annotation
|
|
@@ -13,6 +13,7 @@ License-File: LICENSE
|
|
|
13
13
|
Requires-Dist: fastapi>=0.110.0
|
|
14
14
|
Requires-Dist: uvicorn>=0.29.0
|
|
15
15
|
Requires-Dist: wonderwords>=3.0.0
|
|
16
|
+
Requires-Dist: psutil>=7.1.0
|
|
16
17
|
Provides-Extra: dev
|
|
17
18
|
Requires-Dist: pytest; extra == "dev"
|
|
18
19
|
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
@@ -32,9 +33,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
32
33
|
|
|
33
34
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
34
35
|
|
|
35
|
-
<img width="
|
|
36
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
36
37
|
|
|
37
|
-
##
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
38
41
|
```bash
|
|
39
42
|
# install the package
|
|
40
43
|
pip install pearmut
|
|
@@ -55,7 +58,7 @@ First, install the package
|
|
|
55
58
|
pip install pearmut
|
|
56
59
|
```
|
|
57
60
|
|
|
58
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
61
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
59
62
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
60
63
|
```python
|
|
61
64
|
{
|
|
@@ -65,24 +68,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
65
68
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
66
69
|
"protocol_error_spans": true, # we want error spans
|
|
67
70
|
"protocol_error_categories": false, # we do not want error span categories
|
|
68
|
-
"
|
|
69
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
71
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
70
72
|
},
|
|
71
73
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
72
74
|
"data": [
|
|
73
75
|
# data for first task/user
|
|
74
76
|
[
|
|
75
|
-
|
|
77
|
+
[
|
|
76
78
|
# each evaluation item is a document
|
|
77
|
-
|
|
78
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
79
|
-
"
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
{
|
|
80
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
81
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
85
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
86
|
+
}
|
|
87
|
+
...
|
|
88
|
+
],
|
|
89
|
+
# more document
|
|
86
90
|
...
|
|
87
91
|
],
|
|
88
92
|
# data for second task/user
|
|
@@ -96,12 +100,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
96
100
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
97
101
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
98
102
|
```python
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
# single document definition
|
|
104
|
+
[
|
|
105
|
+
{
|
|
106
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
107
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
111
|
+
"tgt": "this is a continuation of the same document",
|
|
102
112
|
... # all other keys that will be stored, useful for your analysis
|
|
103
|
-
}
|
|
104
|
-
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
... # definition of another item (document)
|
|
105
116
|
```
|
|
106
117
|
|
|
107
118
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -146,6 +157,7 @@ pearmut run
|
|
|
146
157
|
## Campaign management
|
|
147
158
|
|
|
148
159
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
160
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
149
161
|
|
|
150
162
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
151
163
|
|
|
@@ -154,11 +166,23 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
|
|
|
154
166
|
|
|
155
167
|
<img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
|
|
156
168
|
|
|
157
|
-
##
|
|
169
|
+
## Multimodal Annotations
|
|
170
|
+
|
|
171
|
+
We also support anything HTML-compatible both on the input and on the output.
|
|
172
|
+
This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
|
|
173
|
+
For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
|
|
174
|
+
Tip: make sure the elements are already appropriately styled.
|
|
158
175
|
|
|
159
|
-
|
|
176
|
+
<img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
|
|
160
177
|
|
|
178
|
+
## Development
|
|
179
|
+
|
|
180
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
181
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
182
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
183
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
161
184
|
```bash
|
|
185
|
+
cd pearmut
|
|
162
186
|
# watch the frontend for changes (in a separate terminal)
|
|
163
187
|
npm install web/ --prefix web/
|
|
164
188
|
npm run watch --prefix web/
|
|
@@ -169,16 +193,22 @@ pip3 install -e .
|
|
|
169
193
|
# sets up progress/log files in current working folder
|
|
170
194
|
pearmut add examples/wmt25_#_en-cs_CZ.json
|
|
171
195
|
pearmut add examples/wmt25_#_cs-de_DE.json
|
|
172
|
-
# shows a management link for all loaded campaigns
|
|
196
|
+
# shows a management link for all loaded campaigns and reload on change
|
|
173
197
|
pearmut run
|
|
174
198
|
```
|
|
175
199
|
|
|
200
|
+
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
201
|
+
The `pearmut run` also accepts `--port` (default 8001).
|
|
202
|
+
|
|
203
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
204
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
205
|
+
|
|
176
206
|
## Citation
|
|
177
207
|
|
|
178
208
|
If you use this work in your paper, please cite as:
|
|
179
209
|
```bibtex
|
|
180
210
|
@misc{zouhar2025pearmut,
|
|
181
|
-
author={Vilém Zouhar
|
|
211
|
+
author={Vilém Zouhar},
|
|
182
212
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
183
213
|
url={https://github.com/zouharvi/pearmut/},
|
|
184
214
|
year={2025},
|
|
@@ -12,9 +12,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
12
12
|
|
|
13
13
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
14
14
|
|
|
15
|
-
<img width="
|
|
15
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
16
16
|
|
|
17
|
-
##
|
|
17
|
+
## Quick start
|
|
18
|
+
|
|
19
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
18
20
|
```bash
|
|
19
21
|
# install the package
|
|
20
22
|
pip install pearmut
|
|
@@ -35,7 +37,7 @@ First, install the package
|
|
|
35
37
|
pip install pearmut
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
40
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
39
41
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
40
42
|
```python
|
|
41
43
|
{
|
|
@@ -45,24 +47,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
45
47
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
46
48
|
"protocol_error_spans": true, # we want error spans
|
|
47
49
|
"protocol_error_categories": false, # we do not want error span categories
|
|
48
|
-
"
|
|
49
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
50
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
50
51
|
},
|
|
51
52
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
52
53
|
"data": [
|
|
53
54
|
# data for first task/user
|
|
54
55
|
[
|
|
55
|
-
|
|
56
|
+
[
|
|
56
57
|
# each evaluation item is a document
|
|
57
|
-
|
|
58
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
59
|
-
"
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
|
|
65
|
-
|
|
58
|
+
{
|
|
59
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
60
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
64
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
65
|
+
}
|
|
66
|
+
...
|
|
67
|
+
],
|
|
68
|
+
# more document
|
|
66
69
|
...
|
|
67
70
|
],
|
|
68
71
|
# data for second task/user
|
|
@@ -76,12 +79,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
76
79
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
77
80
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
78
81
|
```python
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
+
# single document definition
|
|
83
|
+
[
|
|
84
|
+
{
|
|
85
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
86
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
90
|
+
"tgt": "this is a continuation of the same document",
|
|
82
91
|
... # all other keys that will be stored, useful for your analysis
|
|
83
|
-
}
|
|
84
|
-
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
... # definition of another item (document)
|
|
85
95
|
```
|
|
86
96
|
|
|
87
97
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -126,6 +136,7 @@ pearmut run
|
|
|
126
136
|
## Campaign management
|
|
127
137
|
|
|
128
138
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
139
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
129
140
|
|
|
130
141
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
131
142
|
|
|
@@ -134,11 +145,23 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
|
|
|
134
145
|
|
|
135
146
|
<img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
|
|
136
147
|
|
|
137
|
-
##
|
|
148
|
+
## Multimodal Annotations
|
|
149
|
+
|
|
150
|
+
We also support anything HTML-compatible both on the input and on the output.
|
|
151
|
+
This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
|
|
152
|
+
For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
|
|
153
|
+
Tip: make sure the elements are already appropriately styled.
|
|
138
154
|
|
|
139
|
-
|
|
155
|
+
<img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
|
|
140
156
|
|
|
157
|
+
## Development
|
|
158
|
+
|
|
159
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
160
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
161
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
162
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
141
163
|
```bash
|
|
164
|
+
cd pearmut
|
|
142
165
|
# watch the frontend for changes (in a separate terminal)
|
|
143
166
|
npm install web/ --prefix web/
|
|
144
167
|
npm run watch --prefix web/
|
|
@@ -149,16 +172,22 @@ pip3 install -e .
|
|
|
149
172
|
# sets up progress/log files in current working folder
|
|
150
173
|
pearmut add examples/wmt25_#_en-cs_CZ.json
|
|
151
174
|
pearmut add examples/wmt25_#_cs-de_DE.json
|
|
152
|
-
# shows a management link for all loaded campaigns
|
|
175
|
+
# shows a management link for all loaded campaigns and reload on change
|
|
153
176
|
pearmut run
|
|
154
177
|
```
|
|
155
178
|
|
|
179
|
+
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
180
|
+
The `pearmut run` also accepts `--port` (default 8001).
|
|
181
|
+
|
|
182
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
183
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
184
|
+
|
|
156
185
|
## Citation
|
|
157
186
|
|
|
158
187
|
If you use this work in your paper, please cite as:
|
|
159
188
|
```bibtex
|
|
160
189
|
@misc{zouhar2025pearmut,
|
|
161
|
-
author={Vilém Zouhar
|
|
190
|
+
author={Vilém Zouhar},
|
|
162
191
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
163
192
|
url={https://github.com/zouharvi/pearmut/},
|
|
164
193
|
year={2025},
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
|
-
License:
|
|
6
|
+
License: apache-2.0
|
|
7
7
|
Project-URL: Repository, https://github.com/zouharvi/pearmut
|
|
8
8
|
Project-URL: Issues, https://github.com/zouharvi/pearmut/issues
|
|
9
9
|
Keywords: evaluation,machine translation,human evaluation,annotation
|
|
@@ -13,6 +13,7 @@ License-File: LICENSE
|
|
|
13
13
|
Requires-Dist: fastapi>=0.110.0
|
|
14
14
|
Requires-Dist: uvicorn>=0.29.0
|
|
15
15
|
Requires-Dist: wonderwords>=3.0.0
|
|
16
|
+
Requires-Dist: psutil>=7.1.0
|
|
16
17
|
Provides-Extra: dev
|
|
17
18
|
Requires-Dist: pytest; extra == "dev"
|
|
18
19
|
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
@@ -32,9 +33,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
32
33
|
|
|
33
34
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
34
35
|
|
|
35
|
-
<img width="
|
|
36
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
36
37
|
|
|
37
|
-
##
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
38
41
|
```bash
|
|
39
42
|
# install the package
|
|
40
43
|
pip install pearmut
|
|
@@ -55,7 +58,7 @@ First, install the package
|
|
|
55
58
|
pip install pearmut
|
|
56
59
|
```
|
|
57
60
|
|
|
58
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
61
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
59
62
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
60
63
|
```python
|
|
61
64
|
{
|
|
@@ -65,24 +68,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
65
68
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
66
69
|
"protocol_error_spans": true, # we want error spans
|
|
67
70
|
"protocol_error_categories": false, # we do not want error span categories
|
|
68
|
-
"
|
|
69
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
71
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
70
72
|
},
|
|
71
73
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
72
74
|
"data": [
|
|
73
75
|
# data for first task/user
|
|
74
76
|
[
|
|
75
|
-
|
|
77
|
+
[
|
|
76
78
|
# each evaluation item is a document
|
|
77
|
-
|
|
78
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
79
|
-
"
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
{
|
|
80
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
81
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
85
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
86
|
+
}
|
|
87
|
+
...
|
|
88
|
+
],
|
|
89
|
+
# more document
|
|
86
90
|
...
|
|
87
91
|
],
|
|
88
92
|
# data for second task/user
|
|
@@ -96,12 +100,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
96
100
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
97
101
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
98
102
|
```python
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
# single document definition
|
|
104
|
+
[
|
|
105
|
+
{
|
|
106
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
107
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
111
|
+
"tgt": "this is a continuation of the same document",
|
|
102
112
|
... # all other keys that will be stored, useful for your analysis
|
|
103
|
-
}
|
|
104
|
-
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
... # definition of another item (document)
|
|
105
116
|
```
|
|
106
117
|
|
|
107
118
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -146,6 +157,7 @@ pearmut run
|
|
|
146
157
|
## Campaign management
|
|
147
158
|
|
|
148
159
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
160
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
149
161
|
|
|
150
162
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
151
163
|
|
|
@@ -154,11 +166,23 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
|
|
|
154
166
|
|
|
155
167
|
<img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
|
|
156
168
|
|
|
157
|
-
##
|
|
169
|
+
## Multimodal Annotations
|
|
170
|
+
|
|
171
|
+
We also support anything HTML-compatible both on the input and on the output.
|
|
172
|
+
This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
|
|
173
|
+
For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
|
|
174
|
+
Tip: make sure the elements are already appropriately styled.
|
|
158
175
|
|
|
159
|
-
|
|
176
|
+
<img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
|
|
160
177
|
|
|
178
|
+
## Development
|
|
179
|
+
|
|
180
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
181
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
182
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
183
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
161
184
|
```bash
|
|
185
|
+
cd pearmut
|
|
162
186
|
# watch the frontend for changes (in a separate terminal)
|
|
163
187
|
npm install web/ --prefix web/
|
|
164
188
|
npm run watch --prefix web/
|
|
@@ -169,16 +193,22 @@ pip3 install -e .
|
|
|
169
193
|
# sets up progress/log files in current working folder
|
|
170
194
|
pearmut add examples/wmt25_#_en-cs_CZ.json
|
|
171
195
|
pearmut add examples/wmt25_#_cs-de_DE.json
|
|
172
|
-
# shows a management link for all loaded campaigns
|
|
196
|
+
# shows a management link for all loaded campaigns and reload on change
|
|
173
197
|
pearmut run
|
|
174
198
|
```
|
|
175
199
|
|
|
200
|
+
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
201
|
+
The `pearmut run` also accepts `--port` (default 8001).
|
|
202
|
+
|
|
203
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
204
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
205
|
+
|
|
176
206
|
## Citation
|
|
177
207
|
|
|
178
208
|
If you use this work in your paper, please cite as:
|
|
179
209
|
```bibtex
|
|
180
210
|
@misc{zouhar2025pearmut,
|
|
181
|
-
author={Vilém Zouhar
|
|
211
|
+
author={Vilém Zouhar},
|
|
182
212
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
183
213
|
url={https://github.com/zouharvi/pearmut/},
|
|
184
214
|
year={2025},
|
|
@@ -7,10 +7,9 @@ pearmut.egg-info/dependency_links.txt
|
|
|
7
7
|
pearmut.egg-info/entry_points.txt
|
|
8
8
|
pearmut.egg-info/requires.txt
|
|
9
9
|
pearmut.egg-info/top_level.txt
|
|
10
|
+
server/app.py
|
|
10
11
|
server/cli.py
|
|
11
|
-
server/model.py
|
|
12
12
|
server/protocols.py
|
|
13
|
-
server/run.py
|
|
14
13
|
server/utils.py
|
|
15
14
|
server/static/dashboard.bundle.js
|
|
16
15
|
server/static/dashboard.html
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pearmut"
|
|
3
|
-
version = "0.0
|
|
3
|
+
version = "0.1.0"
|
|
4
4
|
description = "A tool for evaluation of model outputs, primarily MT."
|
|
5
5
|
readme = "README.md"
|
|
6
|
-
license = { text = "
|
|
6
|
+
license = { text = "apache-2.0" }
|
|
7
7
|
requires-python = ">=3.12"
|
|
8
8
|
authors = [{ name = "Vilém Zouhar", email = "vilem.zouhar@gmail.com" }]
|
|
9
9
|
keywords = [
|
|
@@ -16,6 +16,7 @@ dependencies = [
|
|
|
16
16
|
"fastapi >= 0.110.0",
|
|
17
17
|
"uvicorn >= 0.29.0",
|
|
18
18
|
"wonderwords >= 3.0.0",
|
|
19
|
+
"psutil >= 7.1.0",
|
|
19
20
|
]
|
|
20
21
|
|
|
21
22
|
[project.optional-dependencies]
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import urllib
|
|
4
3
|
from typing import Any
|
|
5
4
|
|
|
6
5
|
from fastapi import FastAPI, Query
|
|
@@ -9,7 +8,7 @@ from fastapi.responses import JSONResponse
|
|
|
9
8
|
from fastapi.staticfiles import StaticFiles
|
|
10
9
|
from pydantic import BaseModel
|
|
11
10
|
|
|
12
|
-
from .protocols import get_next_item,
|
|
11
|
+
from .protocols import get_next_item, reset_task, update_progress
|
|
13
12
|
from .utils import ROOT, load_progress_data, save_progress_data
|
|
14
13
|
|
|
15
14
|
os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
|
|
@@ -32,15 +31,6 @@ for campaign_id in progress_data.keys():
|
|
|
32
31
|
with open(f"{ROOT}/data/tasks/{campaign_id}.json", "r") as f:
|
|
33
32
|
tasks_data[campaign_id] = json.load(f)
|
|
34
33
|
|
|
35
|
-
if tasks_data:
|
|
36
|
-
# print access dashboard URL for all campaigns
|
|
37
|
-
print(
|
|
38
|
-
list(tasks_data.values())[0]["info"]["url"] + "/dashboard.html?" + "&".join([
|
|
39
|
-
f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
|
|
40
|
-
for campaign_id, campaign_data in tasks_data.items()
|
|
41
|
-
])
|
|
42
|
-
)
|
|
43
|
-
|
|
44
34
|
|
|
45
35
|
class LogResponseRequest(BaseModel):
|
|
46
36
|
campaign_id: str
|
|
@@ -61,6 +51,7 @@ async def _log_response(request: LogResponseRequest):
|
|
|
61
51
|
if user_id not in progress_data[campaign_id]:
|
|
62
52
|
return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
|
|
63
53
|
|
|
54
|
+
# append response to the output log
|
|
64
55
|
with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
|
|
65
56
|
log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
|
|
66
57
|
|
|
@@ -77,7 +68,7 @@ async def _log_response(request: LogResponseRequest):
|
|
|
77
68
|
for a, b in zip(times, times[1:])
|
|
78
69
|
])
|
|
79
70
|
|
|
80
|
-
|
|
71
|
+
update_progress(campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload)
|
|
81
72
|
save_progress_data(progress_data)
|
|
82
73
|
|
|
83
74
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
@@ -214,4 +205,4 @@ app.mount(
|
|
|
214
205
|
"/",
|
|
215
206
|
StaticFiles(directory=f"{os.path.dirname(os.path.abspath(__file__))}/static/" , html=True, follow_symlink=True),
|
|
216
207
|
name="static",
|
|
217
|
-
)
|
|
208
|
+
)
|