pearmut 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +31 -5
- pearmut/assignment.py +138 -10
- pearmut/static/listwise.bundle.js +1 -1
- pearmut/static/listwise.html +1 -1
- pearmut/static/pointwise.bundle.js +1 -1
- pearmut/utils.py +55 -2
- {pearmut-0.1.2.dist-info → pearmut-0.1.3.dist-info}/METADATA +56 -5
- pearmut-0.1.3.dist-info/RECORD +19 -0
- pearmut-0.1.2.dist-info/RECORD +0 -19
- {pearmut-0.1.2.dist-info → pearmut-0.1.3.dist-info}/WHEEL +0 -0
- {pearmut-0.1.2.dist-info → pearmut-0.1.3.dist-info}/entry_points.txt +0 -0
- {pearmut-0.1.2.dist-info → pearmut-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {pearmut-0.1.2.dist-info → pearmut-0.1.3.dist-info}/top_level.txt +0 -0
pearmut/utils.py
CHANGED
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
|
|
4
4
|
ROOT = "."
|
|
5
5
|
|
|
6
|
+
|
|
6
7
|
def highlight_differences(a, b):
|
|
7
8
|
"""
|
|
8
9
|
Compares two strings and wraps their differences in HTML span tags.
|
|
@@ -30,7 +31,7 @@ def highlight_differences(a, b):
|
|
|
30
31
|
res_a.append(f"{span_open}{a[i1:i2]}{span_close}")
|
|
31
32
|
if tag in ('replace', 'insert'):
|
|
32
33
|
res_b.append(f"{span_open}{b[j1:j2]}{span_close}")
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
return "".join(res_a), "".join(res_b)
|
|
35
36
|
|
|
36
37
|
|
|
@@ -43,6 +44,58 @@ def load_progress_data(warn: str | None = None):
|
|
|
43
44
|
with open(f"{ROOT}/data/progress.json", "r") as f:
|
|
44
45
|
return json.load(f)
|
|
45
46
|
|
|
47
|
+
|
|
46
48
|
def save_progress_data(data):
|
|
47
49
|
with open(f"{ROOT}/data/progress.json", "w") as f:
|
|
48
|
-
json.dump(data, f, indent=2)
|
|
50
|
+
json.dump(data, f, indent=2)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_logs = {}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_db_log(campaign_id: str) -> list[dict]:
|
|
57
|
+
"""
|
|
58
|
+
Returns up to date log for the given campaign_id.
|
|
59
|
+
"""
|
|
60
|
+
if campaign_id not in _logs:
|
|
61
|
+
# create a new one if it doesn't exist
|
|
62
|
+
log_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
|
|
63
|
+
if os.path.exists(log_path):
|
|
64
|
+
with open(log_path, "r") as f:
|
|
65
|
+
_logs[campaign_id] = [
|
|
66
|
+
json.loads(line) for line in f.readlines()
|
|
67
|
+
]
|
|
68
|
+
else:
|
|
69
|
+
_logs[campaign_id] = []
|
|
70
|
+
|
|
71
|
+
return _logs[campaign_id]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_db_log_item(campaign_id: str, user_id: str | None, item_i: int | None) -> list[dict]:
|
|
75
|
+
"""
|
|
76
|
+
Returns the log item for the given campaign_id, user_id and item_i.
|
|
77
|
+
Can be empty.
|
|
78
|
+
"""
|
|
79
|
+
log = get_db_log(campaign_id)
|
|
80
|
+
return [
|
|
81
|
+
entry for entry in log
|
|
82
|
+
if (
|
|
83
|
+
(user_id is None or entry.get("user_id") == user_id) and
|
|
84
|
+
(item_i is None or entry.get("item_i") == item_i)
|
|
85
|
+
)
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def save_db_payload(campaign_id: str, payload: dict):
|
|
90
|
+
"""
|
|
91
|
+
Saves the given payload to the log for the given campaign_id, user_id and item_i.
|
|
92
|
+
Saves both on disk and in-memory.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
log_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
|
|
96
|
+
with open(log_path, "a") as log_file:
|
|
97
|
+
log_file.write(json.dumps(payload, ensure_ascii=False,) + "\n")
|
|
98
|
+
|
|
99
|
+
log = get_db_log(campaign_id)
|
|
100
|
+
# copy to avoid mutation issues
|
|
101
|
+
log.append(payload)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: apache-2.0
|
|
@@ -23,7 +23,7 @@ Dynamic: license-file
|
|
|
23
23
|
|
|
24
24
|
Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
|
|
25
25
|
It evaluates model outputs, primarily translation but also various other NLP tasks.
|
|
26
|
-
Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
|
|
26
|
+
Supports multimodality (text, video, audio, images) and a variety of annotation protocols ([DA](https://aclanthology.org/N15-1124/), [ESA](https://aclanthology.org/2024.wmt-1.131/), [ESA<sup>AI</sup>](https://aclanthology.org/2025.naacl-long.255/), [MQM](https://doi.org/10.1162/tacl_a_00437), paired ESA, etc).
|
|
27
27
|
|
|
28
28
|
[](https://pypi.org/project/pearmut)
|
|
29
29
|
|
|
@@ -31,7 +31,7 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
|
|
|
31
31
|
|
|
32
32
|
[](https://pypi.org/project/pearmut/)
|
|
33
33
|
|
|
34
|
-
[](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
|
|
35
35
|
|
|
36
36
|
<img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
|
|
37
37
|
|
|
@@ -115,6 +115,38 @@ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (co
|
|
|
115
115
|
... # definition of another item (document)
|
|
116
116
|
```
|
|
117
117
|
|
|
118
|
+
## Pre-filled Error Spans (ESA<sup>AI</sup> Support)
|
|
119
|
+
|
|
120
|
+
For workflows where you want to provide pre-filled error annotations (e.g., ESA<sup>AI</sup>), you can include an `error_spans` key in each item.
|
|
121
|
+
These spans will be loaded into the interface as existing annotations that users can review, modify, or delete.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
{
|
|
125
|
+
"src": "The quick brown fox jumps over the lazy dog.",
|
|
126
|
+
"tgt": "Rychlá hnědá liška skáče přes líného psa.",
|
|
127
|
+
"error_spans": [
|
|
128
|
+
{
|
|
129
|
+
"start_i": 0, # character index start (inclusive)
|
|
130
|
+
"end_i": 5, # character index end (inclusive)
|
|
131
|
+
"severity": "minor", # "minor", "major", "neutral", or null
|
|
132
|
+
"category": null # MQM category string or null
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"start_i": 27,
|
|
136
|
+
"end_i": 32,
|
|
137
|
+
"severity": "major",
|
|
138
|
+
"category": null
|
|
139
|
+
}
|
|
140
|
+
]
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
For **listwise** template, `error_spans` is a 2D array where each inner array corresponds to error spans for that candidate.
|
|
145
|
+
|
|
146
|
+
See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
|
|
147
|
+
|
|
148
|
+
## Single-stream Assignment
|
|
149
|
+
|
|
118
150
|
We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
|
|
119
151
|
```python
|
|
120
152
|
{
|
|
@@ -138,7 +170,7 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
|
|
|
138
170
|
"campaign_id": "my campaign 6",
|
|
139
171
|
"info": {
|
|
140
172
|
"assignment": "dynamic",
|
|
141
|
-
"template": "
|
|
173
|
+
"template": "listwise",
|
|
142
174
|
"protocol_k": 5,
|
|
143
175
|
"num_users": 50,
|
|
144
176
|
},
|
|
@@ -154,6 +186,25 @@ pearmut add my_campaign_4.json
|
|
|
154
186
|
pearmut run
|
|
155
187
|
```
|
|
156
188
|
|
|
189
|
+
## Campaign options
|
|
190
|
+
|
|
191
|
+
In summary, you can select from the assignment types
|
|
192
|
+
|
|
193
|
+
- `task-based`: each user has a predefined set of items
|
|
194
|
+
- `single-stream`: all users are annotating together the same set of items
|
|
195
|
+
- `dynamic`: WIP ⚠️
|
|
196
|
+
|
|
197
|
+
and independently of that select your protocol template:
|
|
198
|
+
|
|
199
|
+
- `pointwise`: evaluate a single output given a single output
|
|
200
|
+
- `protocol_score`: ask for score 0 to 100
|
|
201
|
+
- `protocol_error_spans`: ask for highlighting error spans
|
|
202
|
+
- `protocol_error_categories`: ask for highlighting error categories
|
|
203
|
+
- `listwise`: evaluate multiple outputs at the same time given a single output ⚠️
|
|
204
|
+
- `protocol_score`: ask for score 0 to 100
|
|
205
|
+
- `protocol_error_spans`: ask for highlighting error spans
|
|
206
|
+
- `protocol_error_categories`: ask for highlighting error categories
|
|
207
|
+
|
|
157
208
|
## Campaign management
|
|
158
209
|
|
|
159
210
|
When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
|
|
@@ -170,7 +221,7 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
|
|
|
170
221
|
|
|
171
222
|
We also support anything HTML-compatible both on the input and on the output.
|
|
172
223
|
This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
|
|
173
|
-
For an example, try [examples/
|
|
224
|
+
For an example, try [examples/multimodal.json](examples/multimodal.json).
|
|
174
225
|
Tip: make sure the elements are already appropriately styled.
|
|
175
226
|
|
|
176
227
|
<img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
pearmut/app.py,sha256=ymRlnpKrWSiwdc51Tw4PBDDFFOY1bmdeU-xJ2VlOl-Q,7393
|
|
2
|
+
pearmut/assignment.py,sha256=aOQNlGYzzPNgunAmIIwlcF4qY-l-w6Wmy7hGquArAsc,10623
|
|
3
|
+
pearmut/cli.py,sha256=mV76uw6BywckbU7QEKIKTboukcALEdZp7l-kskJnBVA,7683
|
|
4
|
+
pearmut/utils.py,sha256=gk8b4biPc9TTvZiQMQ_8xh1_FsWuwrhtPzeK3NpzhZc,2902
|
|
5
|
+
pearmut/static/dashboard.bundle.js,sha256=6389gsHLCFh6JqiKdU3ng-Lm6VICRvfJgCSYM61H75U,91257
|
|
6
|
+
pearmut/static/dashboard.html,sha256=tUP1yYvbKySRz0mxFtGq2Si4hTMhJkUCWeTpnq91Nf4,1789
|
|
7
|
+
pearmut/static/index.html,sha256=ieCRLK83MVe-f-gtjYiOlvE-kKd8VnFF2xgyi6FoZpU,872
|
|
8
|
+
pearmut/static/listwise.bundle.js,sha256=Qcz3TSA8C5QRFI-ui47y99WF87wf_4tMKHZ3TyfiYa8,103790
|
|
9
|
+
pearmut/static/listwise.html,sha256=MNS4gV1Fqx7JXZikLhrWgL0z1OPdqgumlOfTcmGnXEI,5212
|
|
10
|
+
pearmut/static/pointwise.bundle.js,sha256=doa3DC8n9L7IIV2ttWxV-TBKVMQHgjTQgSR3Pjozy3k,106133
|
|
11
|
+
pearmut/static/pointwise.html,sha256=dhmfgpWvCFB833Y4kj08_aBZyCN33SayYcS1ckL2-FU,5009
|
|
12
|
+
pearmut/static/assets/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
|
|
13
|
+
pearmut/static/assets/style.css,sha256=-B-RySjt8qccqkwvLT0PDy6IRoE1xytLLKAFtR_S-Tg,3967
|
|
14
|
+
pearmut-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
15
|
+
pearmut-0.1.3.dist-info/METADATA,sha256=XhlUE5eAzWzZ1MQX4RmPQuM5Kijk_LwYahgQvTbmmp4,10990
|
|
16
|
+
pearmut-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
pearmut-0.1.3.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
|
|
18
|
+
pearmut-0.1.3.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
|
|
19
|
+
pearmut-0.1.3.dist-info/RECORD,,
|
pearmut-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
pearmut/app.py,sha256=s_xv7Nq9dm3ObApH_Iz9myS-H_q4oXsFKqwiwVbQYuY,6740
|
|
2
|
-
pearmut/assignment.py,sha256=IgGXmZKFASoGW8jVeXXUN3meY8Two-Txwg4nMwZEOnA,6422
|
|
3
|
-
pearmut/cli.py,sha256=mV76uw6BywckbU7QEKIKTboukcALEdZp7l-kskJnBVA,7683
|
|
4
|
-
pearmut/utils.py,sha256=6hfVenrVdGm1r-7uJIkWHhX9o0ztWjqPse_j_MqkgBw,1443
|
|
5
|
-
pearmut/static/dashboard.bundle.js,sha256=6389gsHLCFh6JqiKdU3ng-Lm6VICRvfJgCSYM61H75U,91257
|
|
6
|
-
pearmut/static/dashboard.html,sha256=tUP1yYvbKySRz0mxFtGq2Si4hTMhJkUCWeTpnq91Nf4,1789
|
|
7
|
-
pearmut/static/index.html,sha256=ieCRLK83MVe-f-gtjYiOlvE-kKd8VnFF2xgyi6FoZpU,872
|
|
8
|
-
pearmut/static/listwise.bundle.js,sha256=_KWKocPZjkDHHoiixKFOZzmD0qlw-nqFheBPcbED0HM,100788
|
|
9
|
-
pearmut/static/listwise.html,sha256=zipFfGus26qWEdFbuNQmaG-NR5S1yaczv2XpD8j843U,5203
|
|
10
|
-
pearmut/static/pointwise.bundle.js,sha256=1mks6kD4P2w7uQqeze4GttKVc-JZvsLYKRktV6Em6R0,100431
|
|
11
|
-
pearmut/static/pointwise.html,sha256=dhmfgpWvCFB833Y4kj08_aBZyCN33SayYcS1ckL2-FU,5009
|
|
12
|
-
pearmut/static/assets/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
|
|
13
|
-
pearmut/static/assets/style.css,sha256=-B-RySjt8qccqkwvLT0PDy6IRoE1xytLLKAFtR_S-Tg,3967
|
|
14
|
-
pearmut-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
15
|
-
pearmut-0.1.2.dist-info/METADATA,sha256=cuHpmxeRqYF9H6s5ukP6RZBEx4tzy7bzipdhmbtIBVc,8923
|
|
16
|
-
pearmut-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
-
pearmut-0.1.2.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
|
|
18
|
-
pearmut-0.1.2.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
|
|
19
|
-
pearmut-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|