pearmut 1.0.2__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pearmut-1.0.2 → pearmut-1.0.3}/PKG-INFO +74 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/README.md +74 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/PKG-INFO +74 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/pyproject.toml +2 -2
- {pearmut-1.0.2 → pearmut-1.0.3}/server/app.py +8 -5
- {pearmut-1.0.2 → pearmut-1.0.3}/server/assignment.py +336 -82
- {pearmut-1.0.2 → pearmut-1.0.3}/server/cli.py +145 -82
- pearmut-1.0.3/server/static/annotate.bundle.js +1 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/annotate.html +11 -7
- pearmut-1.0.3/server/static/dashboard.bundle.js +1 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/dashboard.html +1 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/index.html +1 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/style.css +38 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/utils.py +38 -21
- pearmut-1.0.2/server/static/annotate.bundle.js +0 -1
- pearmut-1.0.2/server/static/dashboard.bundle.js +0 -1
- {pearmut-1.0.2 → pearmut-1.0.3}/LICENSE +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/SOURCES.txt +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/dependency_links.txt +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/entry_points.txt +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/requires.txt +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/pearmut.egg-info/top_level.txt +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/constants.py +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/results_export.py +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/favicon.svg +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/server/static/index.bundle.js +0 -0
- {pearmut-1.0.2 → pearmut-1.0.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,12 +35,15 @@ Dynamic: license-file
|
|
|
35
35
|
- [Assignment Types](#assignment-types)
|
|
36
36
|
- [Advanced Features](#advanced-features)
|
|
37
37
|
- [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
|
|
38
|
+
- [Custom MQM Taxonomy](#custom-mqm-taxonomy)
|
|
38
39
|
- [Tutorial and Attention Checks](#tutorial-and-attention-checks)
|
|
40
|
+
- [Form Items for User Metadata](#form-items-for-user-metadata)
|
|
39
41
|
- [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
|
|
40
42
|
- [Multimodal Annotations](#multimodal-annotations)
|
|
41
43
|
- [Hosting Assets](#hosting-assets)
|
|
42
44
|
- [Campaign Management](#campaign-management)
|
|
43
45
|
- [Custom Completion Messages](#custom-completion-messages)
|
|
46
|
+
- [Prolific Integration](#prolific-integration)
|
|
44
47
|
- [CLI Commands](#cli-commands)
|
|
45
48
|
- [Terminology](#terminology)
|
|
46
49
|
- [Development](#development)
|
|
@@ -141,6 +144,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
|
|
|
141
144
|
"data": [...]
|
|
142
145
|
}
|
|
143
146
|
```
|
|
147
|
+
Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
|
|
144
148
|
|
|
145
149
|
### Showing Model Names
|
|
146
150
|
|
|
@@ -197,6 +201,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
|
|
|
197
201
|
- `"visible"`: Textfield always visible
|
|
198
202
|
- `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
|
|
199
203
|
|
|
204
|
+
### Custom MQM Taxonomy
|
|
205
|
+
|
|
206
|
+
For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
{
|
|
211
|
+
"info": {
|
|
212
|
+
"assignment": "task-based",
|
|
213
|
+
"protocol": "MQM",
|
|
214
|
+
"mqm_categories": {
|
|
215
|
+
"": [], # Empty selection option
|
|
216
|
+
"General": ["", "Accuracy", "Fluency"],
|
|
217
|
+
"Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
|
|
218
|
+
"Style": ["", "Awkward", "Embarassing"],
|
|
219
|
+
"Unknown": [] # Category with no subcategories
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"campaign_id": "custom_mqm_example",
|
|
223
|
+
"data": [...]
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
|
|
228
|
+
|
|
229
|
+
See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
|
|
230
|
+
|
|
200
231
|
### Custom Instructions
|
|
201
232
|
|
|
202
233
|
Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
|
|
@@ -286,6 +317,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
|
|
|
286
317
|
See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
|
|
287
318
|
To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
|
|
288
319
|
|
|
320
|
+
#### Universal Tutorial Items with `data_welcome`
|
|
321
|
+
|
|
322
|
+
Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
|
|
323
|
+
|
|
324
|
+
### Form Items for User Metadata
|
|
325
|
+
|
|
326
|
+
Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
|
|
327
|
+
Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
|
|
328
|
+
Documents must be homogeneous: all form items or all evaluation items.
|
|
329
|
+
|
|
330
|
+
```python
|
|
331
|
+
{
|
|
332
|
+
"data_welcome": [
|
|
333
|
+
[
|
|
334
|
+
{"text": "What is your native language?", "form": "string"},
|
|
335
|
+
{"text": "Rate your expertise (1-10)", "form": "number"}
|
|
336
|
+
]
|
|
337
|
+
]
|
|
338
|
+
}
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
<img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
It is possible to automatically collect additional information from the host system using `"script"` field type.
|
|
345
|
+
Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
|
|
346
|
+
See [examples/user_info_form.json](examples/user_info_form.json).
|
|
347
|
+
|
|
289
348
|
### Single-stream Assignment
|
|
290
349
|
|
|
291
350
|
All annotators draw from a shared pool with random assignment:
|
|
@@ -299,11 +358,14 @@ All annotators draw from a shared pool with random assignment:
|
|
|
299
358
|
# ESA: error spans and scores
|
|
300
359
|
"protocol": "ESA",
|
|
301
360
|
"users": 50, # number of annotators (can also be a list, see below)
|
|
361
|
+
"docs_per_user": 10, # optional: show goodbye after N documents per user
|
|
302
362
|
},
|
|
303
363
|
"data": [...], # list of all items (shared among all annotators)
|
|
304
364
|
}
|
|
305
365
|
```
|
|
306
366
|
|
|
367
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
|
|
368
|
+
|
|
307
369
|
### Dynamic Assignment
|
|
308
370
|
|
|
309
371
|
The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
|
|
@@ -320,11 +382,14 @@ All items must contain outputs from all models for this assignment type to work
|
|
|
320
382
|
"dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
|
|
321
383
|
"dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
|
|
322
384
|
"dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
|
|
385
|
+
"docs_per_user": 20, # optional: show goodbye after N documents per user
|
|
323
386
|
},
|
|
324
387
|
"data": [...], # list of all items (shared among all annotators)
|
|
325
388
|
}
|
|
326
389
|
```
|
|
327
390
|
|
|
391
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
|
|
392
|
+
|
|
328
393
|
**How it works:**
|
|
329
394
|
1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
|
|
330
395
|
2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
|
|
@@ -412,6 +477,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
|
|
|
412
477
|
|
|
413
478
|
Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
|
|
414
479
|
|
|
480
|
+
### Prolific Integration
|
|
481
|
+
|
|
482
|
+
Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
|
|
483
|
+
```json
|
|
484
|
+
"instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
|
|
485
|
+
```
|
|
486
|
+
The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
|
|
487
|
+
|
|
415
488
|
## Terminology
|
|
416
489
|
|
|
417
490
|
- **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
|
|
@@ -14,12 +14,15 @@
|
|
|
14
14
|
- [Assignment Types](#assignment-types)
|
|
15
15
|
- [Advanced Features](#advanced-features)
|
|
16
16
|
- [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
|
|
17
|
+
- [Custom MQM Taxonomy](#custom-mqm-taxonomy)
|
|
17
18
|
- [Tutorial and Attention Checks](#tutorial-and-attention-checks)
|
|
19
|
+
- [Form Items for User Metadata](#form-items-for-user-metadata)
|
|
18
20
|
- [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
|
|
19
21
|
- [Multimodal Annotations](#multimodal-annotations)
|
|
20
22
|
- [Hosting Assets](#hosting-assets)
|
|
21
23
|
- [Campaign Management](#campaign-management)
|
|
22
24
|
- [Custom Completion Messages](#custom-completion-messages)
|
|
25
|
+
- [Prolific Integration](#prolific-integration)
|
|
23
26
|
- [CLI Commands](#cli-commands)
|
|
24
27
|
- [Terminology](#terminology)
|
|
25
28
|
- [Development](#development)
|
|
@@ -120,6 +123,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
|
|
|
120
123
|
"data": [...]
|
|
121
124
|
}
|
|
122
125
|
```
|
|
126
|
+
Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
|
|
123
127
|
|
|
124
128
|
### Showing Model Names
|
|
125
129
|
|
|
@@ -176,6 +180,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
|
|
|
176
180
|
- `"visible"`: Textfield always visible
|
|
177
181
|
- `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
|
|
178
182
|
|
|
183
|
+
### Custom MQM Taxonomy
|
|
184
|
+
|
|
185
|
+
For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
{
|
|
190
|
+
"info": {
|
|
191
|
+
"assignment": "task-based",
|
|
192
|
+
"protocol": "MQM",
|
|
193
|
+
"mqm_categories": {
|
|
194
|
+
"": [], # Empty selection option
|
|
195
|
+
"General": ["", "Accuracy", "Fluency"],
|
|
196
|
+
"Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
|
|
197
|
+
"Style": ["", "Awkward", "Embarassing"],
|
|
198
|
+
"Unknown": [] # Category with no subcategories
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
"campaign_id": "custom_mqm_example",
|
|
202
|
+
"data": [...]
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
|
|
207
|
+
|
|
208
|
+
See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
|
|
209
|
+
|
|
179
210
|
### Custom Instructions
|
|
180
211
|
|
|
181
212
|
Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
|
|
@@ -265,6 +296,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
|
|
|
265
296
|
See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
|
|
266
297
|
To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
|
|
267
298
|
|
|
299
|
+
#### Universal Tutorial Items with `data_welcome`
|
|
300
|
+
|
|
301
|
+
Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
|
|
302
|
+
|
|
303
|
+
### Form Items for User Metadata
|
|
304
|
+
|
|
305
|
+
Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
|
|
306
|
+
Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
|
|
307
|
+
Documents must be homogeneous: all form items or all evaluation items.
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
{
|
|
311
|
+
"data_welcome": [
|
|
312
|
+
[
|
|
313
|
+
{"text": "What is your native language?", "form": "string"},
|
|
314
|
+
{"text": "Rate your expertise (1-10)", "form": "number"}
|
|
315
|
+
]
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
<img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
It is possible to automatically collect additional information from the host system using `"script"` field type.
|
|
324
|
+
Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
|
|
325
|
+
See [examples/user_info_form.json](examples/user_info_form.json).
|
|
326
|
+
|
|
268
327
|
### Single-stream Assignment
|
|
269
328
|
|
|
270
329
|
All annotators draw from a shared pool with random assignment:
|
|
@@ -278,11 +337,14 @@ All annotators draw from a shared pool with random assignment:
|
|
|
278
337
|
# ESA: error spans and scores
|
|
279
338
|
"protocol": "ESA",
|
|
280
339
|
"users": 50, # number of annotators (can also be a list, see below)
|
|
340
|
+
"docs_per_user": 10, # optional: show goodbye after N documents per user
|
|
281
341
|
},
|
|
282
342
|
"data": [...], # list of all items (shared among all annotators)
|
|
283
343
|
}
|
|
284
344
|
```
|
|
285
345
|
|
|
346
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
|
|
347
|
+
|
|
286
348
|
### Dynamic Assignment
|
|
287
349
|
|
|
288
350
|
The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
|
|
@@ -299,11 +361,14 @@ All items must contain outputs from all models for this assignment type to work
|
|
|
299
361
|
"dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
|
|
300
362
|
"dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
|
|
301
363
|
"dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
|
|
364
|
+
"docs_per_user": 20, # optional: show goodbye after N documents per user
|
|
302
365
|
},
|
|
303
366
|
"data": [...], # list of all items (shared among all annotators)
|
|
304
367
|
}
|
|
305
368
|
```
|
|
306
369
|
|
|
370
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
|
|
371
|
+
|
|
307
372
|
**How it works:**
|
|
308
373
|
1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
|
|
309
374
|
2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
|
|
@@ -391,6 +456,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
|
|
|
391
456
|
|
|
392
457
|
Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
|
|
393
458
|
|
|
459
|
+
### Prolific Integration
|
|
460
|
+
|
|
461
|
+
Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
|
|
462
|
+
```json
|
|
463
|
+
"instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
|
|
464
|
+
```
|
|
465
|
+
The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
|
|
466
|
+
|
|
394
467
|
## Terminology
|
|
395
468
|
|
|
396
469
|
- **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
|
|
@@ -467,4 +540,4 @@ If you use this work in your paper, please cite as following.
|
|
|
467
540
|
```
|
|
468
541
|
|
|
469
542
|
Contributions are welcome! Please reach out to [Vilém Zouhar](mailto:vilem.zouhar@gmail.com).
|
|
470
|
-
See changes in [CHANGELOG.md](CHANGELOG.md).
|
|
543
|
+
See changes in [CHANGELOG.md](CHANGELOG.md).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pearmut
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: A tool for evaluation of model outputs, primarily MT.
|
|
5
5
|
Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,12 +35,15 @@ Dynamic: license-file
|
|
|
35
35
|
- [Assignment Types](#assignment-types)
|
|
36
36
|
- [Advanced Features](#advanced-features)
|
|
37
37
|
- [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
|
|
38
|
+
- [Custom MQM Taxonomy](#custom-mqm-taxonomy)
|
|
38
39
|
- [Tutorial and Attention Checks](#tutorial-and-attention-checks)
|
|
40
|
+
- [Form Items for User Metadata](#form-items-for-user-metadata)
|
|
39
41
|
- [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
|
|
40
42
|
- [Multimodal Annotations](#multimodal-annotations)
|
|
41
43
|
- [Hosting Assets](#hosting-assets)
|
|
42
44
|
- [Campaign Management](#campaign-management)
|
|
43
45
|
- [Custom Completion Messages](#custom-completion-messages)
|
|
46
|
+
- [Prolific Integration](#prolific-integration)
|
|
44
47
|
- [CLI Commands](#cli-commands)
|
|
45
48
|
- [Terminology](#terminology)
|
|
46
49
|
- [Development](#development)
|
|
@@ -141,6 +144,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
|
|
|
141
144
|
"data": [...]
|
|
142
145
|
}
|
|
143
146
|
```
|
|
147
|
+
Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
|
|
144
148
|
|
|
145
149
|
### Showing Model Names
|
|
146
150
|
|
|
@@ -197,6 +201,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
|
|
|
197
201
|
- `"visible"`: Textfield always visible
|
|
198
202
|
- `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
|
|
199
203
|
|
|
204
|
+
### Custom MQM Taxonomy
|
|
205
|
+
|
|
206
|
+
For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
{
|
|
211
|
+
"info": {
|
|
212
|
+
"assignment": "task-based",
|
|
213
|
+
"protocol": "MQM",
|
|
214
|
+
"mqm_categories": {
|
|
215
|
+
"": [], # Empty selection option
|
|
216
|
+
"General": ["", "Accuracy", "Fluency"],
|
|
217
|
+
"Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
|
|
218
|
+
"Style": ["", "Awkward", "Embarassing"],
|
|
219
|
+
"Unknown": [] # Category with no subcategories
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"campaign_id": "custom_mqm_example",
|
|
223
|
+
"data": [...]
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
|
|
228
|
+
|
|
229
|
+
See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
|
|
230
|
+
|
|
200
231
|
### Custom Instructions
|
|
201
232
|
|
|
202
233
|
Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
|
|
@@ -286,6 +317,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
|
|
|
286
317
|
See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
|
|
287
318
|
To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
|
|
288
319
|
|
|
320
|
+
#### Universal Tutorial Items with `data_welcome`
|
|
321
|
+
|
|
322
|
+
Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
|
|
323
|
+
|
|
324
|
+
### Form Items for User Metadata
|
|
325
|
+
|
|
326
|
+
Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
|
|
327
|
+
Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
|
|
328
|
+
Documents must be homogeneous: all form items or all evaluation items.
|
|
329
|
+
|
|
330
|
+
```python
|
|
331
|
+
{
|
|
332
|
+
"data_welcome": [
|
|
333
|
+
[
|
|
334
|
+
{"text": "What is your native language?", "form": "string"},
|
|
335
|
+
{"text": "Rate your expertise (1-10)", "form": "number"}
|
|
336
|
+
]
|
|
337
|
+
]
|
|
338
|
+
}
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
<img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
It is possible to automatically collect additional information from the host system using `"script"` field type.
|
|
345
|
+
Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
|
|
346
|
+
See [examples/user_info_form.json](examples/user_info_form.json).
|
|
347
|
+
|
|
289
348
|
### Single-stream Assignment
|
|
290
349
|
|
|
291
350
|
All annotators draw from a shared pool with random assignment:
|
|
@@ -299,11 +358,14 @@ All annotators draw from a shared pool with random assignment:
|
|
|
299
358
|
# ESA: error spans and scores
|
|
300
359
|
"protocol": "ESA",
|
|
301
360
|
"users": 50, # number of annotators (can also be a list, see below)
|
|
361
|
+
"docs_per_user": 10, # optional: show goodbye after N documents per user
|
|
302
362
|
},
|
|
303
363
|
"data": [...], # list of all items (shared among all annotators)
|
|
304
364
|
}
|
|
305
365
|
```
|
|
306
366
|
|
|
367
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
|
|
368
|
+
|
|
307
369
|
### Dynamic Assignment
|
|
308
370
|
|
|
309
371
|
The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
|
|
@@ -320,11 +382,14 @@ All items must contain outputs from all models for this assignment type to work
|
|
|
320
382
|
"dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
|
|
321
383
|
"dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
|
|
322
384
|
"dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
|
|
385
|
+
"docs_per_user": 20, # optional: show goodbye after N documents per user
|
|
323
386
|
},
|
|
324
387
|
"data": [...], # list of all items (shared among all annotators)
|
|
325
388
|
}
|
|
326
389
|
```
|
|
327
390
|
|
|
391
|
+
Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
|
|
392
|
+
|
|
328
393
|
**How it works:**
|
|
329
394
|
1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
|
|
330
395
|
2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
|
|
@@ -412,6 +477,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
|
|
|
412
477
|
|
|
413
478
|
Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
|
|
414
479
|
|
|
480
|
+
### Prolific Integration
|
|
481
|
+
|
|
482
|
+
Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
|
|
483
|
+
```json
|
|
484
|
+
"instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
|
|
485
|
+
```
|
|
486
|
+
The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
|
|
487
|
+
|
|
415
488
|
## Terminology
|
|
416
489
|
|
|
417
490
|
- **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pearmut"
|
|
3
|
-
version = "1.0.
|
|
3
|
+
version = "1.0.3"
|
|
4
4
|
description = "A tool for evaluation of model outputs, primarily MT."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -31,7 +31,7 @@ Repository = "https://github.com/zouharvi/pearmut"
|
|
|
31
31
|
Issues = "https://github.com/zouharvi/pearmut/issues"
|
|
32
32
|
|
|
33
33
|
[tool.setuptools]
|
|
34
|
-
package-dir = {
|
|
34
|
+
package-dir = { pearmut = "server" }
|
|
35
35
|
packages = ["pearmut"]
|
|
36
36
|
|
|
37
37
|
[build-system]
|
|
@@ -49,7 +49,7 @@ for campaign_id in progress_data.keys():
|
|
|
49
49
|
class LogResponseRequest(BaseModel):
|
|
50
50
|
campaign_id: str
|
|
51
51
|
user_id: str
|
|
52
|
-
item_i: int
|
|
52
|
+
item_i: int | str
|
|
53
53
|
payload: dict[str, Any]
|
|
54
54
|
|
|
55
55
|
|
|
@@ -124,7 +124,7 @@ async def _get_next_item(request: NextItemRequest):
|
|
|
124
124
|
class GetItemRequest(BaseModel):
|
|
125
125
|
campaign_id: str
|
|
126
126
|
user_id: str
|
|
127
|
-
item_i: int
|
|
127
|
+
item_i: int | str
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
@app.post("/get-i-item")
|
|
@@ -179,7 +179,11 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
179
179
|
]
|
|
180
180
|
|
|
181
181
|
# Add threshold pass/fail status (only when user is complete)
|
|
182
|
-
if
|
|
182
|
+
if (
|
|
183
|
+
tasks_data[campaign_id]["info"]["assignment"] != "dynamic" and all(v in {"completed", "completed_foreign"} for v in entry["progress"])
|
|
184
|
+
) or (
|
|
185
|
+
tasks_data[campaign_id]["info"]["assignment"] == "dynamic" and all(v in {"completed", "completed_foreign"} for mv in entry["progress"] for v in mv.values())
|
|
186
|
+
):
|
|
183
187
|
entry["threshold_passed"] = check_validation_threshold(
|
|
184
188
|
tasks_data, progress_data, campaign_id, user_id
|
|
185
189
|
)
|
|
@@ -376,7 +380,6 @@ async def _download_annotations(
|
|
|
376
380
|
# NOTE: currently not checking tokens for progress download as it is non-destructive
|
|
377
381
|
# token: list[str] = Query()
|
|
378
382
|
):
|
|
379
|
-
|
|
380
383
|
output = {}
|
|
381
384
|
for campaign_id in campaign_id:
|
|
382
385
|
output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
|
|
@@ -403,7 +406,6 @@ async def _download_annotations(
|
|
|
403
406
|
async def _download_progress(
|
|
404
407
|
campaign_id: list[str] = Query(), token: list[str] = Query()
|
|
405
408
|
):
|
|
406
|
-
|
|
407
409
|
if len(campaign_id) != len(token):
|
|
408
410
|
return JSONResponse(
|
|
409
411
|
content="Mismatched campaign_id and token count", status_code=400
|
|
@@ -435,6 +437,7 @@ if not os.path.exists(static_dir + "index.html"):
|
|
|
435
437
|
"Static directory not found. Please build the frontend first."
|
|
436
438
|
)
|
|
437
439
|
|
|
440
|
+
|
|
438
441
|
# Serve HTML files directly without redirect
|
|
439
442
|
@app.get("/annotate")
|
|
440
443
|
async def serve_annotate():
|