pearmut 0.0.6__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pearmut-0.0.6 → pearmut-0.1.1}/PKG-INFO +41 -23
- {pearmut-0.0.6 → pearmut-0.1.1}/README.md +39 -22
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/PKG-INFO +41 -23
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/SOURCES.txt +0 -1
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/requires.txt +1 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/pyproject.toml +2 -1
- {pearmut-0.0.6 → pearmut-0.1.1}/server/app.py +3 -2
- {pearmut-0.0.6 → pearmut-0.1.1}/server/cli.py +28 -1
- {pearmut-0.0.6 → pearmut-0.1.1}/server/protocols.py +15 -2
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/assets/style.css +12 -8
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/dashboard.bundle.js +1 -1
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/dashboard.html +4 -3
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/pointwise.bundle.js +1 -1
- pearmut-0.1.1/server/static/pointwise.html +235 -0
- pearmut-0.0.6/server/model.py +0 -61
- pearmut-0.0.6/server/static/pointwise.html +0 -173
- {pearmut-0.0.6 → pearmut-0.1.1}/LICENSE +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/dependency_links.txt +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/entry_points.txt +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/pearmut.egg-info/top_level.txt +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/assets/favicon.svg +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/server/static/index.html +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/server/utils.py +0 -0
- {pearmut-0.0.6 → pearmut-0.1.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -13,6 +13,7 @@ License-File: LICENSE
|
|
|
13
13
|
Requires-Dist: fastapi>=0.110.0
|
|
14
14
|
Requires-Dist: uvicorn>=0.29.0
|
|
15
15
|
Requires-Dist: wonderwords>=3.0.0
|
|
16
|
+
Requires-Dist: psutil>=7.1.0
|
|
16
17
|
Provides-Extra: dev
|
|
17
18
|
Requires-Dist: pytest; extra == "dev"
|
|
18
19
|
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
@@ -32,9 +33,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
32
33
|
|
|
33
34
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
34
35
|
|
|
35
|
-
<img width="
|
|
36
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
36
37
|
|
|
37
38
|
## Quick start
|
|
39
|
+
|
|
40
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
38
41
|
```bash
|
|
39
42
|
# install the package
|
|
40
43
|
pip install pearmut
|
|
@@ -55,7 +58,7 @@ First, install the package
|
|
|
55
58
|
pip install pearmut
|
|
56
59
|
```
|
|
57
60
|
|
|
58
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
61
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
59
62
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
60
63
|
```python
|
|
61
64
|
{
|
|
@@ -65,24 +68,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
65
68
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
66
69
|
"protocol_error_spans": true, # we want error spans
|
|
67
70
|
"protocol_error_categories": false, # we do not want error span categories
|
|
68
|
-
"
|
|
69
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
71
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
70
72
|
},
|
|
71
73
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
72
74
|
"data": [
|
|
73
75
|
# data for first task/user
|
|
74
76
|
[
|
|
75
|
-
|
|
77
|
+
[
|
|
76
78
|
# each evaluation item is a document
|
|
77
|
-
|
|
78
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
79
|
-
"
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
{
|
|
80
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
81
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
85
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
86
|
+
}
|
|
87
|
+
...
|
|
88
|
+
],
|
|
89
|
+
# more document
|
|
86
90
|
...
|
|
87
91
|
],
|
|
88
92
|
# data for second task/user
|
|
@@ -96,12 +100,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
96
100
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
97
101
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
98
102
|
```python
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
# single document definition
|
|
104
|
+
[
|
|
105
|
+
{
|
|
106
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
107
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
111
|
+
"tgt": "this is a continuation of the same document",
|
|
102
112
|
... # all other keys that will be stored, useful for your analysis
|
|
103
|
-
}
|
|
104
|
-
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
... # definition of another item (document)
|
|
105
116
|
```
|
|
106
117
|
|
|
107
118
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -146,6 +157,7 @@ pearmut run
|
|
|
146
157
|
## Campaign management
|
|
147
158
|
|
|
148
159
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
160
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
149
161
|
|
|
150
162
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
151
163
|
|
|
@@ -165,9 +177,12 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
165
177
|
|
|
166
178
|
## Development
|
|
167
179
|
|
|
168
|
-
|
|
169
|
-
|
|
180
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
181
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
182
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
183
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
170
184
|
```bash
|
|
185
|
+
cd pearmut
|
|
171
186
|
# watch the frontend for changes (in a separate terminal)
|
|
172
187
|
npm install web/ --prefix web/
|
|
173
188
|
npm run watch --prefix web/
|
|
@@ -185,12 +200,15 @@ pearmut run
|
|
|
185
200
|
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
186
201
|
The `pearmut run` also accepts `--port` (default 8001).
|
|
187
202
|
|
|
203
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
204
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
205
|
+
|
|
188
206
|
## Citation
|
|
189
207
|
|
|
190
208
|
If you use this work in your paper, please cite as:
|
|
191
209
|
```bibtex
|
|
192
210
|
@misc{zouhar2025pearmut,
|
|
193
|
-
author={Vilém Zouhar
|
|
211
|
+
author={Vilém Zouhar},
|
|
194
212
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
195
213
|
url={https://github.com/zouharvi/pearmut/},
|
|
196
214
|
year={2025},
|
|
@@ -12,9 +12,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
12
12
|
|
|
13
13
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
14
14
|
|
|
15
|
-
<img width="
|
|
15
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
16
16
|
|
|
17
17
|
## Quick start
|
|
18
|
+
|
|
19
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
18
20
|
```bash
|
|
19
21
|
# install the package
|
|
20
22
|
pip install pearmut
|
|
@@ -35,7 +37,7 @@ First, install the package
|
|
|
35
37
|
pip install pearmut
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
40
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
39
41
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
40
42
|
```python
|
|
41
43
|
{
|
|
@@ -45,24 +47,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
45
47
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
46
48
|
"protocol_error_spans": true, # we want error spans
|
|
47
49
|
"protocol_error_categories": false, # we do not want error span categories
|
|
48
|
-
"
|
|
49
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
50
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
50
51
|
},
|
|
51
52
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
52
53
|
"data": [
|
|
53
54
|
# data for first task/user
|
|
54
55
|
[
|
|
55
|
-
|
|
56
|
+
[
|
|
56
57
|
# each evaluation item is a document
|
|
57
|
-
|
|
58
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
59
|
-
"
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
|
|
65
|
-
|
|
58
|
+
{
|
|
59
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
60
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
64
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
65
|
+
}
|
|
66
|
+
...
|
|
67
|
+
],
|
|
68
|
+
# more document
|
|
66
69
|
...
|
|
67
70
|
],
|
|
68
71
|
# data for second task/user
|
|
@@ -76,12 +79,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
76
79
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
77
80
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
78
81
|
```python
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
+
# single document definition
|
|
83
|
+
[
|
|
84
|
+
{
|
|
85
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
86
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
90
|
+
"tgt": "this is a continuation of the same document",
|
|
82
91
|
... # all other keys that will be stored, useful for your analysis
|
|
83
|
-
}
|
|
84
|
-
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
... # definition of another item (document)
|
|
85
95
|
```
|
|
86
96
|
|
|
87
97
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -126,6 +136,7 @@ pearmut run
|
|
|
126
136
|
## Campaign management
|
|
127
137
|
|
|
128
138
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
139
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
129
140
|
|
|
130
141
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
131
142
|
|
|
@@ -145,9 +156,12 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
145
156
|
|
|
146
157
|
## Development
|
|
147
158
|
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
160
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
161
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
162
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
150
163
|
```bash
|
|
164
|
+
cd pearmut
|
|
151
165
|
# watch the frontend for changes (in a separate terminal)
|
|
152
166
|
npm install web/ --prefix web/
|
|
153
167
|
npm run watch --prefix web/
|
|
@@ -165,12 +179,15 @@ pearmut run
|
|
|
165
179
|
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
166
180
|
The `pearmut run` also accepts `--port` (default 8001).
|
|
167
181
|
|
|
182
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
183
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
184
|
+
|
|
168
185
|
## Citation
|
|
169
186
|
|
|
170
187
|
If you use this work in your paper, please cite as:
|
|
171
188
|
```bibtex
|
|
172
189
|
@misc{zouhar2025pearmut,
|
|
173
|
-
author={Vilém Zouhar
|
|
190
|
+
author={Vilém Zouhar},
|
|
174
191
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
175
192
|
url={https://github.com/zouharvi/pearmut/},
|
|
176
193
|
year={2025},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -13,6 +13,7 @@ License-File: LICENSE
|
|
|
13
13
|
Requires-Dist: fastapi>=0.110.0
|
|
14
14
|
Requires-Dist: uvicorn>=0.29.0
|
|
15
15
|
Requires-Dist: wonderwords>=3.0.0
|
|
16
|
+
Requires-Dist: psutil>=7.1.0
|
|
16
17
|
Provides-Extra: dev
|
|
17
18
|
Requires-Dist: pytest; extra == "dev"
|
|
18
19
|
Requires-Dist: pynpm>=0.3.0; extra == "dev"
|
|
@@ -32,9 +33,11 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
32
33
|
|
|
33
34
|
[](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
|
|
34
35
|
|
|
35
|
-
<img width="
|
|
36
|
+
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
36
37
|
|
|
37
38
|
## Quick start
|
|
39
|
+
|
|
40
|
+
You do not need to clone this repository. Simply install with pip and run locally:
|
|
38
41
|
```bash
|
|
39
42
|
# install the package
|
|
40
43
|
pip install pearmut
|
|
@@ -55,7 +58,7 @@ First, install the package
|
|
|
55
58
|
pip install pearmut
|
|
56
59
|
```
|
|
57
60
|
|
|
58
|
-
A campaign is described in a single JSON file (see [examples/](examples/)
|
|
61
|
+
A campaign is described in a single JSON file (see [examples/](examples/)).
|
|
59
62
|
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
60
63
|
```python
|
|
61
64
|
{
|
|
@@ -65,24 +68,25 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
65
68
|
"protocol_score": true, # we want scores [0...100] for each segment
|
|
66
69
|
"protocol_error_spans": true, # we want error spans
|
|
67
70
|
"protocol_error_categories": false, # we do not want error span categories
|
|
68
|
-
"
|
|
69
|
-
"url": "http://localhost:8001" # where the server will be accessible
|
|
71
|
+
"instructions": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
70
72
|
},
|
|
71
73
|
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
72
74
|
"data": [
|
|
73
75
|
# data for first task/user
|
|
74
76
|
[
|
|
75
|
-
|
|
77
|
+
[
|
|
76
78
|
# each evaluation item is a document
|
|
77
|
-
|
|
78
|
-
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
79
|
-
"
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
{
|
|
80
|
+
"src": "This will be the year that Guinness loses its cool. Cheers to that!",
|
|
81
|
+
"tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"src": "I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
85
|
+
"tgt": "Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!"
|
|
86
|
+
}
|
|
87
|
+
...
|
|
88
|
+
],
|
|
89
|
+
# more document
|
|
86
90
|
...
|
|
87
91
|
],
|
|
88
92
|
# data for second task/user
|
|
@@ -96,12 +100,19 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
|
|
|
96
100
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
97
101
|
For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (corresponding to a single document unit) that looks as follows:
|
|
98
102
|
```python
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
# single document definition
|
|
104
|
+
[
|
|
105
|
+
{
|
|
106
|
+
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
107
|
+
"tgt": "And suddenly all the water became full of other people and other people." # mandatory for ESA/MQM/DA
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"src": "toto je pokračování stejného dokumentu",
|
|
111
|
+
"tgt": "this is a continuation of the same document",
|
|
102
112
|
... # all other keys that will be stored, useful for your analysis
|
|
103
|
-
}
|
|
104
|
-
|
|
113
|
+
}
|
|
114
|
+
],
|
|
115
|
+
... # definition of another item (document)
|
|
105
116
|
```
|
|
106
117
|
|
|
107
118
|
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
@@ -146,6 +157,7 @@ pearmut run
|
|
|
146
157
|
## Campaign management
|
|
147
158
|
|
|
148
159
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
160
|
+
This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
|
|
149
161
|
|
|
150
162
|
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
151
163
|
|
|
@@ -165,9 +177,12 @@ Tip: make sure the elements are already appropriately styled.
|
|
|
165
177
|
|
|
166
178
|
## Development
|
|
167
179
|
|
|
168
|
-
|
|
169
|
-
|
|
180
|
+
Pearmut works by running a server that responds to requests from the frontend.
|
|
181
|
+
These requests are not template-based but rather carry only data (which gives flexibility in designing new protocols and interfaces).
|
|
182
|
+
By default, the frontend is served from `static/` which is pre-built when you `pip install pearmut`.
|
|
183
|
+
To make changes locally, clone the repository and run the following, which will recompile the frontend on changes (server changes need server restart):
|
|
170
184
|
```bash
|
|
185
|
+
cd pearmut
|
|
171
186
|
# watch the frontend for changes (in a separate terminal)
|
|
172
187
|
npm install web/ --prefix web/
|
|
173
188
|
npm run watch --prefix web/
|
|
@@ -185,12 +200,15 @@ pearmut run
|
|
|
185
200
|
Optionally, you can specify `--server` in `pearmut add` and `pearmut run` to show correct URL prefixes.
|
|
186
201
|
The `pearmut run` also accepts `--port` (default 8001).
|
|
187
202
|
|
|
203
|
+
If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
|
|
204
|
+
A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
|
|
205
|
+
|
|
188
206
|
## Citation
|
|
189
207
|
|
|
190
208
|
If you use this work in your paper, please cite as:
|
|
191
209
|
```bibtex
|
|
192
210
|
@misc{zouhar2025pearmut,
|
|
193
|
-
author={Vilém Zouhar
|
|
211
|
+
author={Vilém Zouhar},
|
|
194
212
|
title={Pearmut🍐 Platform for Evaluation and Reviewing of Multilingual Tasks},
|
|
195
213
|
url={https://github.com/zouharvi/pearmut/},
|
|
196
214
|
year={2025},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pearmut"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.1.1"
|
|
4
4
|
description = "A tool for evaluation of model outputs, primarily MT."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "apache-2.0" }
|
|
@@ -16,6 +16,7 @@ dependencies = [
|
|
|
16
16
|
"fastapi >= 0.110.0",
|
|
17
17
|
"uvicorn >= 0.29.0",
|
|
18
18
|
"wonderwords >= 3.0.0",
|
|
19
|
+
"psutil >= 7.1.0",
|
|
19
20
|
]
|
|
20
21
|
|
|
21
22
|
[project.optional-dependencies]
|
|
@@ -8,7 +8,7 @@ from fastapi.responses import JSONResponse
|
|
|
8
8
|
from fastapi.staticfiles import StaticFiles
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
|
-
from .protocols import get_next_item,
|
|
11
|
+
from .protocols import get_next_item, reset_task, update_progress
|
|
12
12
|
from .utils import ROOT, load_progress_data, save_progress_data
|
|
13
13
|
|
|
14
14
|
os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
|
|
@@ -51,6 +51,7 @@ async def _log_response(request: LogResponseRequest):
|
|
|
51
51
|
if user_id not in progress_data[campaign_id]:
|
|
52
52
|
return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
|
|
53
53
|
|
|
54
|
+
# append response to the output log
|
|
54
55
|
with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
|
|
55
56
|
log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
|
|
56
57
|
|
|
@@ -67,7 +68,7 @@ async def _log_response(request: LogResponseRequest):
|
|
|
67
68
|
for a, b in zip(times, times[1:])
|
|
68
69
|
])
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
update_progress(campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload)
|
|
71
72
|
save_progress_data(progress_data)
|
|
72
73
|
|
|
73
74
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for managing and running the Pearmut server.
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import argparse
|
|
2
6
|
import hashlib
|
|
3
7
|
import json
|
|
@@ -46,8 +50,10 @@ def _run(args_unknown):
|
|
|
46
50
|
)
|
|
47
51
|
|
|
48
52
|
|
|
49
|
-
|
|
50
53
|
def _add_campaign(args_unknown):
|
|
54
|
+
"""
|
|
55
|
+
Add a new campaign from a JSON data file.
|
|
56
|
+
"""
|
|
51
57
|
import random
|
|
52
58
|
|
|
53
59
|
import wonderwords
|
|
@@ -80,13 +86,30 @@ def _add_campaign(args_unknown):
|
|
|
80
86
|
)
|
|
81
87
|
exit(1)
|
|
82
88
|
|
|
89
|
+
if "info" not in campaign_data:
|
|
90
|
+
raise ValueError("Campaign data must contain 'info' field.")
|
|
91
|
+
if "data" not in campaign_data:
|
|
92
|
+
raise ValueError("Campaign data must contain 'data' field.")
|
|
93
|
+
if "type" not in campaign_data["info"]:
|
|
94
|
+
raise ValueError("Campaign 'info' must contain 'type' field.")
|
|
95
|
+
if "template" not in campaign_data["info"]:
|
|
96
|
+
raise ValueError("Campaign 'info' must contain 'template' field.")
|
|
97
|
+
|
|
83
98
|
# use random words for identifying users
|
|
84
99
|
rng = random.Random(campaign_data["campaign_id"])
|
|
85
100
|
rword = wonderwords.RandomWord(rng=rng)
|
|
86
101
|
if campaign_data["info"]["type"] == "task-based":
|
|
87
102
|
tasks = campaign_data["data"]
|
|
103
|
+
if not isinstance(tasks, list):
|
|
104
|
+
raise ValueError("Task-based campaign 'data' must be a list of tasks.")
|
|
105
|
+
if not all(isinstance(task, list) for task in tasks):
|
|
106
|
+
raise ValueError("Each task in task-based campaign 'data' must be a list of items.")
|
|
88
107
|
amount = len(tasks)
|
|
89
108
|
elif campaign_data["info"]["type"] == "dynamic":
|
|
109
|
+
if "num_users" not in campaign_data:
|
|
110
|
+
raise ValueError("Dynamic campaigns must specify 'num_users'.")
|
|
111
|
+
if not isinstance(campaign_data["data"], list):
|
|
112
|
+
raise ValueError("Dynamic campaign 'data' must be a list of items.")
|
|
90
113
|
amount = campaign_data["num_users"]
|
|
91
114
|
else:
|
|
92
115
|
raise ValueError(
|
|
@@ -94,6 +117,7 @@ def _add_campaign(args_unknown):
|
|
|
94
117
|
|
|
95
118
|
user_ids = []
|
|
96
119
|
while len(user_ids) < amount:
|
|
120
|
+
# generate random user IDs
|
|
97
121
|
new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
|
|
98
122
|
if new_id not in user_ids:
|
|
99
123
|
user_ids.append(new_id)
|
|
@@ -150,6 +174,9 @@ def _add_campaign(args_unknown):
|
|
|
150
174
|
|
|
151
175
|
|
|
152
176
|
def main():
|
|
177
|
+
"""
|
|
178
|
+
Main entry point for the CLI.
|
|
179
|
+
"""
|
|
153
180
|
args = argparse.ArgumentParser()
|
|
154
181
|
args.add_argument('command', type=str, choices=['run', 'add', 'purge'])
|
|
155
182
|
args, args_unknown = args.parse_known_args()
|
|
@@ -9,6 +9,9 @@ def get_next_item(
|
|
|
9
9
|
tasks_data: dict,
|
|
10
10
|
progress_data: dict,
|
|
11
11
|
) -> JSONResponse:
|
|
12
|
+
"""
|
|
13
|
+
Get the next item for the user in the specified campaign.
|
|
14
|
+
"""
|
|
12
15
|
if tasks_data[campaign_id]["info"]["type"] == "task-based":
|
|
13
16
|
return get_next_item_taskbased(campaign_id, user_id, tasks_data, progress_data)
|
|
14
17
|
elif tasks_data[campaign_id]["info"]["type"] == "dynamic":
|
|
@@ -23,6 +26,9 @@ def get_next_item_taskbased(
|
|
|
23
26
|
data_all: dict,
|
|
24
27
|
progress_data: dict,
|
|
25
28
|
) -> JSONResponse:
|
|
29
|
+
"""
|
|
30
|
+
Get the next item for task-based protocol.
|
|
31
|
+
"""
|
|
26
32
|
if all(progress_data[campaign_id][user_id]["progress"]):
|
|
27
33
|
# all items completed
|
|
28
34
|
# TODO: add check for data quality
|
|
@@ -51,7 +57,7 @@ def get_next_item_taskbased(
|
|
|
51
57
|
"total": len(data_all[campaign_id]["data"][user_id]),
|
|
52
58
|
},
|
|
53
59
|
"info": {
|
|
54
|
-
"
|
|
60
|
+
"instructions": data_all[campaign_id]["info"].get("instructions", ""),
|
|
55
61
|
"item_i": item_i,
|
|
56
62
|
} | {
|
|
57
63
|
k: v
|
|
@@ -74,6 +80,9 @@ def reset_task(
|
|
|
74
80
|
tasks_data: dict,
|
|
75
81
|
progress_data: dict,
|
|
76
82
|
) -> JSONResponse:
|
|
83
|
+
"""
|
|
84
|
+
Reset the task progress for the user in the specified campaign.
|
|
85
|
+
"""
|
|
77
86
|
if tasks_data[campaign_id]["info"]["type"] == "task-based":
|
|
78
87
|
progress_data[campaign_id][user_id]["progress"] = [False]*len(tasks_data[campaign_id]["data"][user_id])
|
|
79
88
|
progress_data[campaign_id][user_id]["time"] = 0.0
|
|
@@ -89,7 +98,7 @@ def reset_task(
|
|
|
89
98
|
|
|
90
99
|
|
|
91
100
|
|
|
92
|
-
def
|
|
101
|
+
def update_progress(
|
|
93
102
|
campaign_id: str,
|
|
94
103
|
user_id: str,
|
|
95
104
|
tasks_data: dict,
|
|
@@ -97,9 +106,13 @@ def log_response(
|
|
|
97
106
|
item_i: int,
|
|
98
107
|
payload: Any,
|
|
99
108
|
) -> JSONResponse:
|
|
109
|
+
"""
|
|
110
|
+
Log the user's response for the specified item in the campaign.
|
|
111
|
+
"""
|
|
100
112
|
if tasks_data[campaign_id]["info"]["type"] == "task-based":
|
|
101
113
|
# even if it's already set it should be fine
|
|
102
114
|
progress_data[campaign_id][user_id]["progress"][item_i] = True
|
|
115
|
+
# TODO: log attention checks/quality?
|
|
103
116
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
104
117
|
elif tasks_data[campaign_id]["info"]["type"] == "dynamic":
|
|
105
118
|
return JSONResponse(content={"status": "error", "message": "Dynamic protocol logging not implemented yet."}, status_code=400)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
body {
|
|
2
2
|
margin: 0;
|
|
3
3
|
padding: 0;
|
|
4
|
-
background: linear-gradient(135deg, #b9e2a1 0%, #
|
|
4
|
+
background: linear-gradient(135deg, #b9e2a1 0%, #e7e2cf 100%);
|
|
5
5
|
background-attachment: fixed;
|
|
6
6
|
}
|
|
7
7
|
|
|
@@ -14,7 +14,7 @@ body {
|
|
|
14
14
|
width: 30%;
|
|
15
15
|
background-color: #fffc;
|
|
16
16
|
padding: 10px;
|
|
17
|
-
border-radius:
|
|
17
|
+
border-radius: 8px;
|
|
18
18
|
vertical-align: top;
|
|
19
19
|
margin-left: 5px;
|
|
20
20
|
}
|
|
@@ -32,25 +32,29 @@ body {
|
|
|
32
32
|
input[type="button"] {
|
|
33
33
|
background: #fff;
|
|
34
34
|
border: none;
|
|
35
|
-
border-radius:
|
|
35
|
+
border-radius: 8px;
|
|
36
36
|
font-size: large;
|
|
37
|
+
box-shadow: 0 2px 4px #0001;
|
|
37
38
|
}
|
|
38
39
|
|
|
39
|
-
.button_navigation.button_selected {
|
|
40
|
-
background: #8db3ec !important;
|
|
41
|
-
}
|
|
42
40
|
|
|
43
41
|
input[type="button"]:hover:not(:disabled) {
|
|
44
|
-
background: #
|
|
42
|
+
background: #ffd;
|
|
45
43
|
cursor: pointer;
|
|
46
44
|
}
|
|
47
45
|
|
|
46
|
+
input[type="button"]:disabled {
|
|
47
|
+
background: #bbb;
|
|
48
|
+
cursor: not-allowed;
|
|
49
|
+
}
|
|
50
|
+
|
|
48
51
|
label {
|
|
49
52
|
user-select: none;
|
|
50
53
|
}
|
|
51
54
|
|
|
52
55
|
.white-box {
|
|
53
|
-
border-radius:
|
|
56
|
+
border-radius: 8px;
|
|
54
57
|
background: #fff;
|
|
55
58
|
padding: 15pt;
|
|
59
|
+
box-shadow: 0 4px 6px #0000001a
|
|
56
60
|
}
|