pearmut 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/utils.py CHANGED
@@ -3,6 +3,7 @@ import os
3
3
 
4
4
  ROOT = "."
5
5
 
6
+
6
7
  def highlight_differences(a, b):
7
8
  """
8
9
  Compares two strings and wraps their differences in HTML span tags.
@@ -30,7 +31,7 @@ def highlight_differences(a, b):
30
31
  res_a.append(f"{span_open}{a[i1:i2]}{span_close}")
31
32
  if tag in ('replace', 'insert'):
32
33
  res_b.append(f"{span_open}{b[j1:j2]}{span_close}")
33
-
34
+
34
35
  return "".join(res_a), "".join(res_b)
35
36
 
36
37
 
@@ -43,6 +44,58 @@ def load_progress_data(warn: str | None = None):
43
44
  with open(f"{ROOT}/data/progress.json", "r") as f:
44
45
  return json.load(f)
45
46
 
47
+
46
48
  def save_progress_data(data):
47
49
  with open(f"{ROOT}/data/progress.json", "w") as f:
48
- json.dump(data, f, indent=2)
50
+ json.dump(data, f, indent=2)
51
+
52
+
53
+ _logs = {}
54
+
55
+
56
+ def get_db_log(campaign_id: str) -> list[dict]:
57
+ """
58
+ Returns up to date log for the given campaign_id.
59
+ """
60
+ if campaign_id not in _logs:
61
+ # create a new one if it doesn't exist
62
+ log_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
63
+ if os.path.exists(log_path):
64
+ with open(log_path, "r") as f:
65
+ _logs[campaign_id] = [
66
+ json.loads(line) for line in f.readlines()
67
+ ]
68
+ else:
69
+ _logs[campaign_id] = []
70
+
71
+ return _logs[campaign_id]
72
+
73
+
74
+ def get_db_log_item(campaign_id: str, user_id: str | None, item_i: int | None) -> list[dict]:
75
+ """
76
+ Returns the log item for the given campaign_id, user_id and item_i.
77
+ Can be empty.
78
+ """
79
+ log = get_db_log(campaign_id)
80
+ return [
81
+ entry for entry in log
82
+ if (
83
+ (user_id is None or entry.get("user_id") == user_id) and
84
+ (item_i is None or entry.get("item_i") == item_i)
85
+ )
86
+ ]
87
+
88
+
89
+ def save_db_payload(campaign_id: str, payload: dict):
90
+ """
91
+ Saves the given payload to the log for the given campaign_id, user_id and item_i.
92
+ Saves both on disk and in-memory.
93
+ """
94
+
95
+ log_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
96
+ with open(log_path, "a") as log_file:
97
+ log_file.write(json.dumps(payload, ensure_ascii=False,) + "\n")
98
+
99
+ log = get_db_log(campaign_id)
100
+ # copy to avoid mutation issues
101
+ log.append(payload)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -23,7 +23,7 @@ Dynamic: license-file
23
23
 
24
24
  Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
25
25
  It evaluates model outputs, primarily translation but also various other NLP tasks.
26
- Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
26
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols ([DA](https://aclanthology.org/N15-1124/), [ESA](https://aclanthology.org/2024.wmt-1.131/), [ESA<sup>AI</sup>](https://aclanthology.org/2025.naacl-long.255/), [MQM](https://doi.org/10.1162/tacl_a_00437), paired ESA, etc).
27
27
 
28
28
  [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
29
29
  &nbsp;
@@ -31,7 +31,7 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
31
31
  &nbsp;
32
32
  [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
33
33
  &nbsp;
34
- [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
34
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
35
35
 
36
36
  <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
37
37
 
@@ -115,6 +115,38 @@ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (co
115
115
  ... # definition of another item (document)
116
116
  ```
117
117
 
118
+ ## Pre-filled Error Spans (ESA<sup>AI</sup> Support)
119
+
120
+ For workflows where you want to provide pre-filled error annotations (e.g., ESA<sup>AI</sup>), you can include an `error_spans` key in each item.
121
+ These spans will be loaded into the interface as existing annotations that users can review, modify, or delete.
122
+
123
+ ```python
124
+ {
125
+ "src": "The quick brown fox jumps over the lazy dog.",
126
+ "tgt": "Rychlá hnědá liška skáče přes líného psa.",
127
+ "error_spans": [
128
+ {
129
+ "start_i": 0, # character index start (inclusive)
130
+ "end_i": 5, # character index end (inclusive)
131
+ "severity": "minor", # "minor", "major", "neutral", or null
132
+ "category": null # MQM category string or null
133
+ },
134
+ {
135
+ "start_i": 27,
136
+ "end_i": 32,
137
+ "severity": "major",
138
+ "category": null
139
+ }
140
+ ]
141
+ }
142
+ ```
143
+
144
+ For **listwise** template, `error_spans` is a 2D array where each inner array corresponds to error spans for that candidate.
145
+
146
+ See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
147
+
148
+ ## Single-stream Assignment
149
+
118
150
  We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
119
151
  ```python
120
152
  {
@@ -138,7 +170,7 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
138
170
  "campaign_id": "my campaign 6",
139
171
  "info": {
140
172
  "assignment": "dynamic",
141
- "template": "kway",
173
+ "template": "listwise",
142
174
  "protocol_k": 5,
143
175
  "num_users": 50,
144
176
  },
@@ -154,6 +186,25 @@ pearmut add my_campaign_4.json
154
186
  pearmut run
155
187
  ```
156
188
 
189
+ ## Campaign options
190
+
191
+ In summary, you can select from the assignment types
192
+
193
+ - `task-based`: each user has a predefined set of items
194
+ - `single-stream`: all users are annotating together the same set of items
195
+ - `dynamic`: WIP ⚠️
196
+
197
+ and independently of that select your protocol template:
198
+
199
+ - `pointwise`: evaluate a single output given a single output
200
+ - `protocol_score`: ask for score 0 to 100
201
+ - `protocol_error_spans`: ask for highlighting error spans
202
+ - `protocol_error_categories`: ask for highlighting error categories
203
+ - `listwise`: evaluate multiple outputs at the same time given a single output ⚠️
204
+ - `protocol_score`: ask for score 0 to 100
205
+ - `protocol_error_spans`: ask for highlighting error spans
206
+ - `protocol_error_categories`: ask for highlighting error categories
207
+
157
208
  ## Campaign management
158
209
 
159
210
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
@@ -170,7 +221,7 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
170
221
 
171
222
  We also support anything HTML-compatible both on the input and on the output.
172
223
  This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
173
- For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
224
+ For an example, try [examples/multimodal.json](examples/multimodal.json).
174
225
  Tip: make sure the elements are already appropriately styled.
175
226
 
176
227
  <img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
@@ -0,0 +1,19 @@
1
+ pearmut/app.py,sha256=ymRlnpKrWSiwdc51Tw4PBDDFFOY1bmdeU-xJ2VlOl-Q,7393
2
+ pearmut/assignment.py,sha256=aOQNlGYzzPNgunAmIIwlcF4qY-l-w6Wmy7hGquArAsc,10623
3
+ pearmut/cli.py,sha256=mV76uw6BywckbU7QEKIKTboukcALEdZp7l-kskJnBVA,7683
4
+ pearmut/utils.py,sha256=gk8b4biPc9TTvZiQMQ_8xh1_FsWuwrhtPzeK3NpzhZc,2902
5
+ pearmut/static/dashboard.bundle.js,sha256=6389gsHLCFh6JqiKdU3ng-Lm6VICRvfJgCSYM61H75U,91257
6
+ pearmut/static/dashboard.html,sha256=tUP1yYvbKySRz0mxFtGq2Si4hTMhJkUCWeTpnq91Nf4,1789
7
+ pearmut/static/index.html,sha256=ieCRLK83MVe-f-gtjYiOlvE-kKd8VnFF2xgyi6FoZpU,872
8
+ pearmut/static/listwise.bundle.js,sha256=Qcz3TSA8C5QRFI-ui47y99WF87wf_4tMKHZ3TyfiYa8,103790
9
+ pearmut/static/listwise.html,sha256=MNS4gV1Fqx7JXZikLhrWgL0z1OPdqgumlOfTcmGnXEI,5212
10
+ pearmut/static/pointwise.bundle.js,sha256=doa3DC8n9L7IIV2ttWxV-TBKVMQHgjTQgSR3Pjozy3k,106133
11
+ pearmut/static/pointwise.html,sha256=dhmfgpWvCFB833Y4kj08_aBZyCN33SayYcS1ckL2-FU,5009
12
+ pearmut/static/assets/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
13
+ pearmut/static/assets/style.css,sha256=-B-RySjt8qccqkwvLT0PDy6IRoE1xytLLKAFtR_S-Tg,3967
14
+ pearmut-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
15
+ pearmut-0.1.3.dist-info/METADATA,sha256=XhlUE5eAzWzZ1MQX4RmPQuM5Kijk_LwYahgQvTbmmp4,10990
16
+ pearmut-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ pearmut-0.1.3.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
18
+ pearmut-0.1.3.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
19
+ pearmut-0.1.3.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- pearmut/app.py,sha256=s_xv7Nq9dm3ObApH_Iz9myS-H_q4oXsFKqwiwVbQYuY,6740
2
- pearmut/assignment.py,sha256=IgGXmZKFASoGW8jVeXXUN3meY8Two-Txwg4nMwZEOnA,6422
3
- pearmut/cli.py,sha256=mV76uw6BywckbU7QEKIKTboukcALEdZp7l-kskJnBVA,7683
4
- pearmut/utils.py,sha256=6hfVenrVdGm1r-7uJIkWHhX9o0ztWjqPse_j_MqkgBw,1443
5
- pearmut/static/dashboard.bundle.js,sha256=6389gsHLCFh6JqiKdU3ng-Lm6VICRvfJgCSYM61H75U,91257
6
- pearmut/static/dashboard.html,sha256=tUP1yYvbKySRz0mxFtGq2Si4hTMhJkUCWeTpnq91Nf4,1789
7
- pearmut/static/index.html,sha256=ieCRLK83MVe-f-gtjYiOlvE-kKd8VnFF2xgyi6FoZpU,872
8
- pearmut/static/listwise.bundle.js,sha256=_KWKocPZjkDHHoiixKFOZzmD0qlw-nqFheBPcbED0HM,100788
9
- pearmut/static/listwise.html,sha256=zipFfGus26qWEdFbuNQmaG-NR5S1yaczv2XpD8j843U,5203
10
- pearmut/static/pointwise.bundle.js,sha256=1mks6kD4P2w7uQqeze4GttKVc-JZvsLYKRktV6Em6R0,100431
11
- pearmut/static/pointwise.html,sha256=dhmfgpWvCFB833Y4kj08_aBZyCN33SayYcS1ckL2-FU,5009
12
- pearmut/static/assets/favicon.svg,sha256=gVPxdBlyfyJVkiMfh8WLaiSyH4lpwmKZs8UiOeX8YW4,7347
13
- pearmut/static/assets/style.css,sha256=-B-RySjt8qccqkwvLT0PDy6IRoE1xytLLKAFtR_S-Tg,3967
14
- pearmut-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
15
- pearmut-0.1.2.dist-info/METADATA,sha256=cuHpmxeRqYF9H6s5ukP6RZBEx4tzy7bzipdhmbtIBVc,8923
16
- pearmut-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
- pearmut-0.1.2.dist-info/entry_points.txt,sha256=eEA9LVWsS3neQbMvL_nMvEw8I0oFudw8nQa1iqxOiWM,45
18
- pearmut-0.1.2.dist-info/top_level.txt,sha256=CdgtUM-SKQDt6o5g0QreO-_7XTBP9_wnHMS1P-Rl5Go,8
19
- pearmut-0.1.2.dist-info/RECORD,,