pearmut 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {pearmut-0.1.1 → pearmut-0.1.3}/PKG-INFO +64 -13
  2. {pearmut-0.1.1 → pearmut-0.1.3}/README.md +63 -12
  3. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/PKG-INFO +64 -13
  4. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/SOURCES.txt +3 -1
  5. {pearmut-0.1.1 → pearmut-0.1.3}/pyproject.toml +1 -1
  6. {pearmut-0.1.1 → pearmut-0.1.3}/server/app.py +55 -25
  7. pearmut-0.1.3/server/assignment.py +312 -0
  8. {pearmut-0.1.1 → pearmut-0.1.3}/server/cli.py +37 -19
  9. pearmut-0.1.3/server/static/assets/style.css +228 -0
  10. {pearmut-0.1.1 → pearmut-0.1.3}/server/static/dashboard.bundle.js +1 -1
  11. pearmut-0.1.3/server/static/listwise.bundle.js +1 -0
  12. pearmut-0.1.3/server/static/listwise.html +77 -0
  13. pearmut-0.1.3/server/static/pointwise.bundle.js +1 -0
  14. {pearmut-0.1.1 → pearmut-0.1.3}/server/static/pointwise.html +1 -167
  15. pearmut-0.1.3/server/utils.py +101 -0
  16. pearmut-0.1.1/server/protocols.py +0 -122
  17. pearmut-0.1.1/server/static/assets/style.css +0 -60
  18. pearmut-0.1.1/server/static/pointwise.bundle.js +0 -1
  19. pearmut-0.1.1/server/utils.py +0 -48
  20. {pearmut-0.1.1 → pearmut-0.1.3}/LICENSE +0 -0
  21. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/dependency_links.txt +0 -0
  22. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/entry_points.txt +0 -0
  23. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/requires.txt +0 -0
  24. {pearmut-0.1.1 → pearmut-0.1.3}/pearmut.egg-info/top_level.txt +0 -0
  25. {pearmut-0.1.1 → pearmut-0.1.3}/server/static/assets/favicon.svg +0 -0
  26. {pearmut-0.1.1 → pearmut-0.1.3}/server/static/dashboard.html +0 -0
  27. {pearmut-0.1.1 → pearmut-0.1.3}/server/static/index.html +0 -0
  28. {pearmut-0.1.1 → pearmut-0.1.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -23,7 +23,7 @@ Dynamic: license-file
23
23
 
24
24
  Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
25
25
  It evaluates model outputs, primarily translation but also various other NLP tasks.
26
- Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
26
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols ([DA](https://aclanthology.org/N15-1124/), [ESA](https://aclanthology.org/2024.wmt-1.131/), [ESA<sup>AI</sup>](https://aclanthology.org/2025.naacl-long.255/), [MQM](https://doi.org/10.1162/tacl_a_00437), paired ESA, etc).
27
27
 
28
28
  [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
29
29
  &nbsp;
@@ -31,7 +31,7 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
31
31
  &nbsp;
32
32
  [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
33
33
  &nbsp;
34
- [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
34
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
35
35
 
36
36
  <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
37
37
 
@@ -63,7 +63,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
63
63
  ```python
64
64
  {
65
65
  "info": {
66
- "type": "task-based",
66
+ "assignment": "task-based",
67
67
  "template": "pointwise",
68
68
  "protocol_score": true, # we want scores [0...100] for each segment
69
69
  "protocol_error_spans": true, # we want error spans
@@ -115,19 +115,51 @@ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (co
115
115
  ... # definition of another item (document)
116
116
  ```
117
117
 
118
- We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
118
+ ## Pre-filled Error Spans (ESA<sup>AI</sup> Support)
119
+
120
+ For workflows where you want to provide pre-filled error annotations (e.g., ESA<sup>AI</sup>), you can include an `error_spans` key in each item.
121
+ These spans will be loaded into the interface as existing annotations that users can review, modify, or delete.
122
+
123
+ ```python
124
+ {
125
+ "src": "The quick brown fox jumps over the lazy dog.",
126
+ "tgt": "Rychlá hnědá liška skáče přes líného psa.",
127
+ "error_spans": [
128
+ {
129
+ "start_i": 0, # character index start (inclusive)
130
+ "end_i": 5, # character index end (inclusive)
131
+ "severity": "minor", # "minor", "major", "neutral", or null
132
+ "category": null # MQM category string or null
133
+ },
134
+ {
135
+ "start_i": 27,
136
+ "end_i": 32,
137
+ "severity": "major",
138
+ "category": null
139
+ }
140
+ ]
141
+ }
142
+ ```
143
+
144
+ For **listwise** template, `error_spans` is a 2D array where each inner array corresponds to error spans for that candidate.
145
+
146
+ See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
147
+
148
+ ## Single-stream Assignment
149
+
150
+ We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
119
151
  ```python
120
152
  {
121
153
  "campaign_id": "my campaign 6",
122
154
  "info": {
123
- "type": "task-single",
155
+ "assignment": "single-stream",
124
156
  "template": "pointwise",
125
157
  "protocol_score": True, # collect scores
126
158
  "protocol_error_spans": True, # collect error spans
127
159
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
128
- "users": 50,
160
+ "num_users": 50, # number of annotators
129
161
  },
130
- "data": [...], # list of all items
162
+ "data": [...], # list of all items (shared among all annotators)
131
163
  }
132
164
  ```
133
165
 
@@ -137,10 +169,10 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
137
169
  {
138
170
  "campaign_id": "my campaign 6",
139
171
  "info": {
140
- "type": "dynamic",
141
- "template": "kway",
172
+ "assignment": "dynamic",
173
+ "template": "listwise",
142
174
  "protocol_k": 5,
143
- "users": 50,
175
+ "num_users": 50,
144
176
  },
145
177
  "data": [...], # list of all items
146
178
  }
@@ -154,6 +186,25 @@ pearmut add my_campaign_4.json
154
186
  pearmut run
155
187
  ```
156
188
 
189
+ ## Campaign options
190
+
191
+ In summary, you can select from the assignment types
192
+
193
+ - `task-based`: each user has a predefined set of items
194
+ - `single-stream`: all users are annotating together the same set of items
195
+ - `dynamic`: WIP ⚠️
196
+
197
+ and independently of that select your protocol template:
198
+
199
+ - `pointwise`: evaluate a single output given a single output
200
+ - `protocol_score`: ask for score 0 to 100
201
+ - `protocol_error_spans`: ask for highlighting error spans
202
+ - `protocol_error_categories`: ask for highlighting error categories
203
+ - `listwise`: evaluate multiple outputs at the same time given a single output ⚠️
204
+ - `protocol_score`: ask for score 0 to 100
205
+ - `protocol_error_spans`: ask for highlighting error spans
206
+ - `protocol_error_categories`: ask for highlighting error categories
207
+
157
208
  ## Campaign management
158
209
 
159
210
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
@@ -170,7 +221,7 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
170
221
 
171
222
  We also support anything HTML-compatible both on the input and on the output.
172
223
  This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
173
- For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
224
+ For an example, try [examples/multimodal.json](examples/multimodal.json).
174
225
  Tip: make sure the elements are already appropriately styled.
175
226
 
176
227
  <img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
@@ -185,7 +236,7 @@ To make changes locally, clone the repository and run the following, which will
185
236
  cd pearmut
186
237
  # watch the frontend for changes (in a separate terminal)
187
238
  npm install web/ --prefix web/
188
- npm run watch --prefix web/
239
+ npm run build --prefix web/ # `watch` for rebuild on code change
189
240
 
190
241
  # install local package as editable
191
242
  pip3 install -e .
@@ -2,7 +2,7 @@
2
2
 
3
3
  Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
4
4
  It evaluates model outputs, primarily translation but also various other NLP tasks.
5
- Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
5
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols ([DA](https://aclanthology.org/N15-1124/), [ESA](https://aclanthology.org/2024.wmt-1.131/), [ESA<sup>AI</sup>](https://aclanthology.org/2025.naacl-long.255/), [MQM](https://doi.org/10.1162/tacl_a_00437), paired ESA, etc).
6
6
 
7
7
  [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
8
8
  &nbsp;
@@ -10,7 +10,7 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
10
10
  &nbsp;
11
11
  [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
12
12
  &nbsp;
13
- [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
13
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
14
14
 
15
15
  <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
16
16
 
@@ -42,7 +42,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
42
42
  ```python
43
43
  {
44
44
  "info": {
45
- "type": "task-based",
45
+ "assignment": "task-based",
46
46
  "template": "pointwise",
47
47
  "protocol_score": true, # we want scores [0...100] for each segment
48
48
  "protocol_error_spans": true, # we want error spans
@@ -94,19 +94,51 @@ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (co
94
94
  ... # definition of another item (document)
95
95
  ```
96
96
 
97
- We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
97
+ ## Pre-filled Error Spans (ESA<sup>AI</sup> Support)
98
+
99
+ For workflows where you want to provide pre-filled error annotations (e.g., ESA<sup>AI</sup>), you can include an `error_spans` key in each item.
100
+ These spans will be loaded into the interface as existing annotations that users can review, modify, or delete.
101
+
102
+ ```python
103
+ {
104
+ "src": "The quick brown fox jumps over the lazy dog.",
105
+ "tgt": "Rychlá hnědá liška skáče přes líného psa.",
106
+ "error_spans": [
107
+ {
108
+ "start_i": 0, # character index start (inclusive)
109
+ "end_i": 5, # character index end (inclusive)
110
+ "severity": "minor", # "minor", "major", "neutral", or null
111
+ "category": null # MQM category string or null
112
+ },
113
+ {
114
+ "start_i": 27,
115
+ "end_i": 32,
116
+ "severity": "major",
117
+ "category": null
118
+ }
119
+ ]
120
+ }
121
+ ```
122
+
123
+ For **listwise** template, `error_spans` is a 2D array where each inner array corresponds to error spans for that candidate.
124
+
125
+ See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
126
+
127
+ ## Single-stream Assignment
128
+
129
+ We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
98
130
  ```python
99
131
  {
100
132
  "campaign_id": "my campaign 6",
101
133
  "info": {
102
- "type": "task-single",
134
+ "assignment": "single-stream",
103
135
  "template": "pointwise",
104
136
  "protocol_score": True, # collect scores
105
137
  "protocol_error_spans": True, # collect error spans
106
138
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
107
- "users": 50,
139
+ "num_users": 50, # number of annotators
108
140
  },
109
- "data": [...], # list of all items
141
+ "data": [...], # list of all items (shared among all annotators)
110
142
  }
111
143
  ```
112
144
 
@@ -116,10 +148,10 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
116
148
  {
117
149
  "campaign_id": "my campaign 6",
118
150
  "info": {
119
- "type": "dynamic",
120
- "template": "kway",
151
+ "assignment": "dynamic",
152
+ "template": "listwise",
121
153
  "protocol_k": 5,
122
- "users": 50,
154
+ "num_users": 50,
123
155
  },
124
156
  "data": [...], # list of all items
125
157
  }
@@ -133,6 +165,25 @@ pearmut add my_campaign_4.json
133
165
  pearmut run
134
166
  ```
135
167
 
168
+ ## Campaign options
169
+
170
+ In summary, you can select from the assignment types
171
+
172
+ - `task-based`: each user has a predefined set of items
173
+ - `single-stream`: all users are annotating together the same set of items
174
+ - `dynamic`: WIP ⚠️
175
+
176
+ and independently of that select your protocol template:
177
+
178
+ - `pointwise`: evaluate a single output given a single output
179
+ - `protocol_score`: ask for score 0 to 100
180
+ - `protocol_error_spans`: ask for highlighting error spans
181
+ - `protocol_error_categories`: ask for highlighting error categories
182
+ - `listwise`: evaluate multiple outputs at the same time given a single output ⚠️
183
+ - `protocol_score`: ask for score 0 to 100
184
+ - `protocol_error_spans`: ask for highlighting error spans
185
+ - `protocol_error_categories`: ask for highlighting error categories
186
+
136
187
  ## Campaign management
137
188
 
138
189
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
@@ -149,7 +200,7 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
149
200
 
150
201
  We also support anything HTML-compatible both on the input and on the output.
151
202
  This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
152
- For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
203
+ For an example, try [examples/multimodal.json](examples/multimodal.json).
153
204
  Tip: make sure the elements are already appropriately styled.
154
205
 
155
206
  <img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
@@ -164,7 +215,7 @@ To make changes locally, clone the repository and run the following, which will
164
215
  cd pearmut
165
216
  # watch the frontend for changes (in a separate terminal)
166
217
  npm install web/ --prefix web/
167
- npm run watch --prefix web/
218
+ npm run build --prefix web/ # `watch` for rebuild on code change
168
219
 
169
220
  # install local package as editable
170
221
  pip3 install -e .
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -23,7 +23,7 @@ Dynamic: license-file
23
23
 
24
24
  Pearmut is a **Platform for Evaluation and Reviewing of Multilingual Tasks**.
25
25
  It evaluates model outputs, primarily translation but also various other NLP tasks.
26
- Supports multimodality (text, video, audio, images) and a variety of annotation protocols (DA, ESA, MQM, paired ESA, etc).
26
+ Supports multimodality (text, video, audio, images) and a variety of annotation protocols ([DA](https://aclanthology.org/N15-1124/), [ESA](https://aclanthology.org/2024.wmt-1.131/), [ESA<sup>AI</sup>](https://aclanthology.org/2025.naacl-long.255/), [MQM](https://doi.org/10.1162/tacl_a_00437), paired ESA, etc).
27
27
 
28
28
  [![PyPi version](https://badgen.net/pypi/v/pearmut/)](https://pypi.org/project/pearmut)
29
29
  &nbsp;
@@ -31,7 +31,7 @@ Supports multimodality (text, video, audio, images) and a variety of annotation
31
31
  &nbsp;
32
32
  [![PyPi license](https://badgen.net/pypi/license/pearmut/)](https://pypi.org/project/pearmut/)
33
33
  &nbsp;
34
- [![build status](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/ci.yml)
34
+ [![build status](https://github.com/zouharvi/pearmut/actions/workflows/test.yml/badge.svg)](https://github.com/zouharvi/pearmut/actions/workflows/test.yml)
35
35
 
36
36
  <img width="1000" alt="Screenshot of ESA/MQM interface" src="https://github.com/user-attachments/assets/f14c91a5-44d7-4248-ada9-387e95ca59d0" />
37
37
 
@@ -63,7 +63,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
63
63
  ```python
64
64
  {
65
65
  "info": {
66
- "type": "task-based",
66
+ "assignment": "task-based",
67
67
  "template": "pointwise",
68
68
  "protocol_score": true, # we want scores [0...100] for each segment
69
69
  "protocol_error_spans": true, # we want error spans
@@ -115,19 +115,51 @@ For the standard ones (ESA, DA, MQM), we expect each item to be a dictionary (co
115
115
  ... # definition of another item (document)
116
116
  ```
117
117
 
118
- We also support a super simple allocation of annotations (`task-single`, not yet ⚠️), where you simply pass a list of all examples to be evaluated and they are processed in parallel by all annotators:
118
+ ## Pre-filled Error Spans (ESA<sup>AI</sup> Support)
119
+
120
+ For workflows where you want to provide pre-filled error annotations (e.g., ESA<sup>AI</sup>), you can include an `error_spans` key in each item.
121
+ These spans will be loaded into the interface as existing annotations that users can review, modify, or delete.
122
+
123
+ ```python
124
+ {
125
+ "src": "The quick brown fox jumps over the lazy dog.",
126
+ "tgt": "Rychlá hnědá liška skáče přes líného psa.",
127
+ "error_spans": [
128
+ {
129
+ "start_i": 0, # character index start (inclusive)
130
+ "end_i": 5, # character index end (inclusive)
131
+ "severity": "minor", # "minor", "major", "neutral", or null
132
+ "category": null # MQM category string or null
133
+ },
134
+ {
135
+ "start_i": 27,
136
+ "end_i": 32,
137
+ "severity": "major",
138
+ "category": null
139
+ }
140
+ ]
141
+ }
142
+ ```
143
+
144
+ For **listwise** template, `error_spans` is a 2D array where each inner array corresponds to error spans for that candidate.
145
+
146
+ See [examples/esaai_prefilled.json](examples/esaai_prefilled.json) for a complete example.
147
+
148
+ ## Single-stream Assignment
149
+
150
+ We also support a simple allocation where all annotators draw from the same pool (`single-stream`). Items are randomly assigned to annotators from the pool of unfinished items:
119
151
  ```python
120
152
  {
121
153
  "campaign_id": "my campaign 6",
122
154
  "info": {
123
- "type": "task-single",
155
+ "assignment": "single-stream",
124
156
  "template": "pointwise",
125
157
  "protocol_score": True, # collect scores
126
158
  "protocol_error_spans": True, # collect error spans
127
159
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
128
- "users": 50,
160
+ "num_users": 50, # number of annotators
129
161
  },
130
- "data": [...], # list of all items
162
+ "data": [...], # list of all items (shared among all annotators)
131
163
  }
132
164
  ```
133
165
 
@@ -137,10 +169,10 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
137
169
  {
138
170
  "campaign_id": "my campaign 6",
139
171
  "info": {
140
- "type": "dynamic",
141
- "template": "kway",
172
+ "assignment": "dynamic",
173
+ "template": "listwise",
142
174
  "protocol_k": 5,
143
- "users": 50,
175
+ "num_users": 50,
144
176
  },
145
177
  "data": [...], # list of all items
146
178
  }
@@ -154,6 +186,25 @@ pearmut add my_campaign_4.json
154
186
  pearmut run
155
187
  ```
156
188
 
189
+ ## Campaign options
190
+
191
+ In summary, you can select from the assignment types
192
+
193
+ - `task-based`: each user has a predefined set of items
194
+ - `single-stream`: all users are annotating together the same set of items
195
+ - `dynamic`: WIP ⚠️
196
+
197
+ and independently of that select your protocol template:
198
+
199
+ - `pointwise`: evaluate a single output given a single output
200
+ - `protocol_score`: ask for score 0 to 100
201
+ - `protocol_error_spans`: ask for highlighting error spans
202
+ - `protocol_error_categories`: ask for highlighting error categories
203
+ - `listwise`: evaluate multiple outputs at the same time given a single output ⚠️
204
+ - `protocol_score`: ask for score 0 to 100
205
+ - `protocol_error_spans`: ask for highlighting error spans
206
+ - `protocol_error_categories`: ask for highlighting error categories
207
+
157
208
  ## Campaign management
158
209
 
159
210
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
@@ -170,7 +221,7 @@ An intentionally incorrect token can be shown if the annotations don't pass qual
170
221
 
171
222
  We also support anything HTML-compatible both on the input and on the output.
172
223
  This includes embedded YouTube videos, or even simple `<video ` tags that point to some resource somewhere.
173
- For an example, try [examples/mock_multimodal.json](examples/mock_multimodal.json).
224
+ For an example, try [examples/multimodal.json](examples/multimodal.json).
174
225
  Tip: make sure the elements are already appropriately styled.
175
226
 
176
227
  <img width="800" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/f34a1a3e-ad95-4114-95ee-8a49e8003faf" />
@@ -185,7 +236,7 @@ To make changes locally, clone the repository and run the following, which will
185
236
  cd pearmut
186
237
  # watch the frontend for changes (in a separate terminal)
187
238
  npm install web/ --prefix web/
188
- npm run watch --prefix web/
239
+ npm run build --prefix web/ # `watch` for rebuild on code change
189
240
 
190
241
  # install local package as editable
191
242
  pip3 install -e .
@@ -8,12 +8,14 @@ pearmut.egg-info/entry_points.txt
8
8
  pearmut.egg-info/requires.txt
9
9
  pearmut.egg-info/top_level.txt
10
10
  server/app.py
11
+ server/assignment.py
11
12
  server/cli.py
12
- server/protocols.py
13
13
  server/utils.py
14
14
  server/static/dashboard.bundle.js
15
15
  server/static/dashboard.html
16
16
  server/static/index.html
17
+ server/static/listwise.bundle.js
18
+ server/static/listwise.html
17
19
  server/static/pointwise.bundle.js
18
20
  server/static/pointwise.html
19
21
  server/static/assets/favicon.svg
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "apache-2.0" }
@@ -8,8 +8,8 @@ from fastapi.responses import JSONResponse
8
8
  from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
11
- from .protocols import get_next_item, reset_task, update_progress
12
- from .utils import ROOT, load_progress_data, save_progress_data
11
+ from .assignment import get_i_item, get_next_item, reset_task, update_progress
12
+ from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
13
13
 
14
14
  os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
15
15
 
@@ -36,7 +36,7 @@ class LogResponseRequest(BaseModel):
36
36
  campaign_id: str
37
37
  user_id: str
38
38
  item_i: int
39
- payload: Any
39
+ payload: dict[str, Any]
40
40
 
41
41
 
42
42
  @app.post("/log-response")
@@ -45,6 +45,7 @@ async def _log_response(request: LogResponseRequest):
45
45
 
46
46
  campaign_id = request.campaign_id
47
47
  user_id = request.user_id
48
+ item_i = request.item_i
48
49
 
49
50
  if campaign_id not in progress_data:
50
51
  return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
@@ -52,8 +53,7 @@ async def _log_response(request: LogResponseRequest):
52
53
  return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
53
54
 
54
55
  # append response to the output log
55
- with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
56
- log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
56
+ save_db_payload(campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
57
57
 
58
58
  # if actions were submitted, we can log time data
59
59
  if "actions" in request.payload:
@@ -97,6 +97,32 @@ async def _get_next_item(request: NextItemRequest):
97
97
  )
98
98
 
99
99
 
100
+ class GetItemRequest(BaseModel):
101
+ campaign_id: str
102
+ user_id: str
103
+ item_i: int
104
+
105
+
106
+ @app.post("/get-i-item")
107
+ async def _get_i_item(request: GetItemRequest):
108
+ campaign_id = request.campaign_id
109
+ user_id = request.user_id
110
+ item_i = request.item_i
111
+
112
+ if campaign_id not in progress_data:
113
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
114
+ if user_id not in progress_data[campaign_id]:
115
+ return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
116
+
117
+ return get_i_item(
118
+ campaign_id,
119
+ user_id,
120
+ tasks_data,
121
+ progress_data,
122
+ item_i,
123
+ )
124
+
125
+
100
126
  class DashboardDataRequest(BaseModel):
101
127
  campaign_id: str
102
128
  token: str | None = None
@@ -111,19 +137,20 @@ async def _dashboard_data(request: DashboardDataRequest):
111
137
  if campaign_id not in progress_data:
112
138
  return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
113
139
 
114
- progress_new = {
115
- user_id: {
116
- **user_val,
117
- "total": len(tasks_data[campaign_id]["data"][user_id]),
118
- } | (
119
- # override if not privileged
120
- {
121
- "token_correct": None,
122
- "token_incorrect": None,
123
- } if not is_privileged else {}
124
- )
125
- for user_id, user_val in progress_data[campaign_id].items()
126
- }
140
+ progress_new = {}
141
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
142
+ if assignment not in ["task-based", "single-stream"]:
143
+ return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
144
+
145
+ for user_id, user_val in progress_data[campaign_id].items():
146
+ # shallow copy
147
+ entry = dict(user_val)
148
+
149
+ if not is_privileged:
150
+ entry["token_correct"] = None
151
+ entry["token_incorrect"] = None
152
+
153
+ progress_new[user_id] = entry
127
154
 
128
155
  return JSONResponse(
129
156
  content={
@@ -190,19 +217,22 @@ async def _download_progress(
190
217
  return JSONResponse(content={"error": "Mismatched campaign_id and token count"}, status_code=400)
191
218
 
192
219
  output = {}
193
- for campaign_id, campaign_id in enumerate(campaign_id):
194
- if campaign_id not in progress_data:
195
- return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
196
- if token[campaign_id] != tasks_data[campaign_id]["token"]:
197
- return JSONResponse(content={"error": f"Invalid token for campaign ID {campaign_id}"}, status_code=400)
220
+ for i, cid in enumerate(campaign_id):
221
+ if cid not in progress_data:
222
+ return JSONResponse(content={"error": f"Unknown campaign ID {cid}"}, status_code=400)
223
+ if token[i] != tasks_data[cid]["token"]:
224
+ return JSONResponse(content={"error": f"Invalid token for campaign ID {cid}"}, status_code=400)
198
225
 
199
- output[campaign_id] = progress_data[campaign_id]
226
+ output[cid] = progress_data[cid]
200
227
 
201
228
  return JSONResponse(content=output, status_code=200)
202
229
 
230
+ static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
231
+ if not os.path.exists(static_dir + "index.html"):
232
+ raise FileNotFoundError("Static directory not found. Please build the frontend first.")
203
233
 
204
234
  app.mount(
205
235
  "/",
206
- StaticFiles(directory=f"{os.path.dirname(os.path.abspath(__file__))}/static/" , html=True, follow_symlink=True),
236
+ StaticFiles(directory=static_dir, html=True, follow_symlink=True),
207
237
  name="static",
208
238
  )