pearmut 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/static/pointwise.html +1 -1
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/METADATA +75 -43
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/RECORD +7 -7
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/WHEEL +0 -0
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/entry_points.txt +0 -0
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/licenses/LICENSE +0 -0
- {pearmut-0.0.3.dist-info → pearmut-0.0.4.dist-info}/top_level.txt +0 -0
pearmut/static/pointwise.html
CHANGED
|
@@ -168,4 +168,4 @@
|
|
|
168
168
|
direction: rtl;
|
|
169
169
|
width: 16px;
|
|
170
170
|
height: 200px;
|
|
171
|
-
}</style><script defer="defer" src="pointwise.bundle.js"></script></head><body><div style="max-width: 1600px; min-width: 900px; margin-left: auto; margin-right: auto; margin-top: 20px; padding-left: 10px;"><div style="display: flex;"><span id="progress" style="flex: 0 0
|
|
171
|
+
}</style><script defer="defer" src="pointwise.bundle.js"></script></head><body><div style="max-width: 1600px; min-width: 900px; margin-left: auto; margin-right: auto; margin-top: 20px; padding-left: 10px;"><div style="display: flex;"><span id="progress" style="flex: 0 0 140px;">Annotated: 0/0</span> <span id="time" style="flex: 0 0 190px;">Annotation time: 0m</span> <span id="status_message" style="margin-left: 20px; flex-grow: 1; vertical-align: top;"></span> <input type="button" value="Next 🛠️" id="button_next" disabled="disabled" style="flex: 0 0 150px; margin-right: 20px; margin-left: 20px; height: 2.5em;" title="Finish annotating all examples first."></div><div id="output_div" style="margin-top: 100px;"></div><br><br><br></div></body></html>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -28,6 +28,19 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
28
28
|
|
|
29
29
|
<img width="1334" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/dde04b98-c724-4226-b926-011a89e9ce31" />
|
|
30
30
|
|
|
31
|
+
## Getting started fast
|
|
32
|
+
```bash
|
|
33
|
+
# install the package
|
|
34
|
+
pip install pearmut
|
|
35
|
+
# download two campaign definitions
|
|
36
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_en-cs_CZ.json
|
|
37
|
+
wget https://raw.githubusercontent.com/zouharvi/pearmut/refs/heads/main/examples/wmt25_%23_cs-de_DE.json
|
|
38
|
+
# load them into pearmut
|
|
39
|
+
pearmut add wmt25_#_en-cs_CZ.json
|
|
40
|
+
pearmut add wmt25_#_cs-de_DE.json
|
|
41
|
+
# start pearmut (will show management links)
|
|
42
|
+
pearmut run
|
|
43
|
+
```
|
|
31
44
|
|
|
32
45
|
## Starting a campaign
|
|
33
46
|
|
|
@@ -36,68 +49,82 @@ First, install the package
|
|
|
36
49
|
pip install pearmut
|
|
37
50
|
```
|
|
38
51
|
|
|
39
|
-
A campaign is described in a single JSON file.
|
|
40
|
-
|
|
52
|
+
A campaign is described in a single JSON file (see [examples/](examples/)!).
|
|
53
|
+
One of the simplest ones, where each user has a pre-defined list of tasks (`task-based`), is:
|
|
41
54
|
```python
|
|
42
55
|
{
|
|
43
|
-
|
|
44
|
-
"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
"info": {
|
|
57
|
+
"type": "task-based",
|
|
58
|
+
"template": "pointwise",
|
|
59
|
+
"protocol_score": true, # we want scores [0...100] for each segment
|
|
60
|
+
"protocol_error_spans": true, # we want error spans
|
|
61
|
+
"protocol_error_categories": false, # we do not want error span categories
|
|
62
|
+
"status_message": "Evaluate translation from en to cs_CZ", # message to show to users
|
|
63
|
+
"url": "http://localhost:8001" # where the server will be accessible
|
|
64
|
+
},
|
|
65
|
+
"campaign_id": "wmt25_#_en-cs_CZ",
|
|
66
|
+
"data": [
|
|
67
|
+
# data for first task/user
|
|
68
|
+
[
|
|
69
|
+
{
|
|
70
|
+
# each evaluation item is a document
|
|
71
|
+
"src": [
|
|
72
|
+
"This will be the year that Guinness loses its cool. Cheers to that!",
|
|
73
|
+
"I'm not sure I can remember exactly when I sensed it. Maybe it was when some...",
|
|
74
|
+
],
|
|
75
|
+
"tgt": [
|
|
76
|
+
"Tohle bude rok, kdy Guinness přijde o svůj „cool“ faktor. Na zdraví!",
|
|
77
|
+
"Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ...",
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
...
|
|
81
|
+
],
|
|
82
|
+
# data for second task/user
|
|
83
|
+
[
|
|
55
84
|
...
|
|
56
85
|
],
|
|
86
|
+
# arbitrary number of users (each corresponds to a single URL to be shared)
|
|
87
|
+
]
|
|
57
88
|
}
|
|
58
89
|
```
|
|
59
90
|
In general, the task item can be anything and is handled by the specific protocol template.
|
|
60
91
|
For the standard ones (ESA, DA, MQM), we expect each item to be a list (i.e. document unit) that looks as follows:
|
|
61
92
|
```python
|
|
62
93
|
[
|
|
63
|
-
{
|
|
64
|
-
"src": "A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", # mandatory for ESA/MQM/DA
|
|
65
|
-
"tgt": "And suddenly all the water became full of other people and other people.", # mandatory for ESA/MQM/DA
|
|
94
|
+
{ # single document definition
|
|
95
|
+
"src": ["A najednou se všechna tato voda naplnila dalšími lidmi a dalšími věcmi.", "toto je pokračování stejného dokumentu"], # mandatory for ESA/MQM/DA
|
|
96
|
+
"tgt": ["And suddenly all the water became full of other people and other people.", "this is a continuation of the same document"], # mandatory for ESA/MQM/DA
|
|
66
97
|
... # all other keys that will be stored, useful for your analysis
|
|
67
98
|
},
|
|
68
|
-
|
|
69
|
-
"src": "toto je pokračování stejného dokumentu",
|
|
70
|
-
"tgt": "this is a continuation of the same document",
|
|
71
|
-
...
|
|
72
|
-
},
|
|
73
|
-
...
|
|
99
|
+
... # definition of another item
|
|
74
100
|
]
|
|
75
101
|
```
|
|
76
102
|
|
|
77
|
-
We also support
|
|
103
|
+
We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
|
|
78
104
|
```python
|
|
79
105
|
{
|
|
80
106
|
"campaign_id": "my campaign 6",
|
|
81
107
|
"info": {
|
|
82
|
-
"type": "
|
|
83
|
-
"template": "
|
|
84
|
-
"
|
|
108
|
+
"type": "task-single",
|
|
109
|
+
"template": "pointwise",
|
|
110
|
+
"protocol_score": True, # collect scores
|
|
111
|
+
"protocol_error_spans": True, # collect error spans
|
|
112
|
+
"protocol_error_categories": False, # do not collect MQM categories, so ESA
|
|
85
113
|
"users": 50,
|
|
86
114
|
},
|
|
87
115
|
"data": [...], # list of all items
|
|
88
116
|
}
|
|
89
117
|
```
|
|
90
118
|
|
|
91
|
-
|
|
119
|
+
|
|
120
|
+
We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), which is more complex and can be ignored for now:
|
|
92
121
|
```python
|
|
93
122
|
{
|
|
94
123
|
"campaign_id": "my campaign 6",
|
|
95
124
|
"info": {
|
|
96
|
-
"type": "
|
|
97
|
-
"template": "
|
|
98
|
-
"
|
|
99
|
-
"protocol_error_spans": True, # collect error spans
|
|
100
|
-
"protocol_error_categories": False, # do not collect MQM categories, so ESA
|
|
125
|
+
"type": "dynamic",
|
|
126
|
+
"template": "kway",
|
|
127
|
+
"protocol_k": 5,
|
|
101
128
|
"users": 50,
|
|
102
129
|
},
|
|
103
130
|
"data": [...], # list of all items
|
|
@@ -106,17 +133,22 @@ We also support a super simple allocation of annotations (`task-single`, not yet
|
|
|
106
133
|
|
|
107
134
|
To load a campaign into the server, run the following.
|
|
108
135
|
It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
|
|
109
|
-
It will also output a secret management link.
|
|
136
|
+
It will also output a secret management link. Then, launch the server:
|
|
110
137
|
```bash
|
|
111
138
|
pearmut add my_campaign_4.json
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Finally, you can launch the server with:
|
|
115
|
-
```bash
|
|
116
139
|
pearmut run
|
|
117
140
|
```
|
|
118
141
|
|
|
119
|
-
|
|
142
|
+
## Annotator management
|
|
143
|
+
|
|
144
|
+
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
145
|
+
|
|
146
|
+
<img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/057899d7-2291-46c7-876f-407c4050a9cb" />
|
|
147
|
+
|
|
148
|
+
Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
|
|
149
|
+
An intentionally incorrect token can be shown if the annotations don't pass quality control.
|
|
150
|
+
|
|
151
|
+
<img width="500" alt="Token on completion" src="https://github.com/user-attachments/assets/4b4d2aa9-7bab-44d6-894b-6c789cd3bc6e" />
|
|
120
152
|
|
|
121
153
|
## Development
|
|
122
154
|
|
|
@@ -131,13 +163,13 @@ npm run watch --prefix web/
|
|
|
131
163
|
pip3 install -e .
|
|
132
164
|
# add existing data from WMT25, this generates annotation links
|
|
133
165
|
# sets up progress/log files in current working folder
|
|
134
|
-
pearmut add
|
|
135
|
-
pearmut add
|
|
166
|
+
pearmut add examples/wmt25_#_en-cs_CZ.json
|
|
167
|
+
pearmut add examples/wmt25_#_cs-de_DE.json
|
|
136
168
|
# shows a management link for all loaded campaigns
|
|
137
169
|
pearmut run
|
|
138
170
|
```
|
|
139
171
|
|
|
140
|
-
##
|
|
172
|
+
## Citation
|
|
141
173
|
|
|
142
174
|
If you use this work in your paper, please cite as:
|
|
143
175
|
```bibtex
|
|
@@ -7,12 +7,12 @@ pearmut/static/dashboard.bundle.js,sha256=bd7L6wiFIHTdCk1bgiDkWNhJ-T9OwI3pq8Tsis
|
|
|
7
7
|
pearmut/static/dashboard.html,sha256=yXwKubqBYdWZ260xRSgNcfebtDVWPl6J5UAa6sj2NOk,1742
|
|
8
8
|
pearmut/static/index.html,sha256=ieCRLK83MVe-f-gtjYiOlvE-kKd8VnFF2xgyi6FoZpU,872
|
|
9
9
|
pearmut/static/pointwise.bundle.js,sha256=2aGddZQPxdVM73Ln9-ZJen42VeTY5fhMiAYgO1I63Rw,98820
|
|
10
|
-
pearmut/static/pointwise.html,sha256=
|
|
10
|
+
pearmut/static/pointwise.html,sha256=RJxuRj8xbEdxfWM0K_phltK7pMjRuk48mYhEo1X8PgY,4436
|
|
11
11
|
pearmut/static/assets/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
|
|
12
12
|
pearmut/static/assets/style.css,sha256=jfETRgVCohe680_30GXxbV4Zq4-B6UlXd5pZXlVLIRs,888
|
|
13
|
-
pearmut-0.0.
|
|
14
|
-
pearmut-0.0.
|
|
15
|
-
pearmut-0.0.
|
|
16
|
-
pearmut-0.0.
|
|
17
|
-
pearmut-0.0.
|
|
18
|
-
pearmut-0.0.
|
|
13
|
+
pearmut-0.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
14
|
+
pearmut-0.0.4.dist-info/METADATA,sha256=9L-x0xFezPPy8FCKQBVk7criwX-qzGkvEEJw-tkpu3c,6814
|
|
15
|
+
pearmut-0.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
16
|
+
pearmut-0.0.4.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
|
|
17
|
+
pearmut-0.0.4.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
|
|
18
|
+
pearmut-0.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|