pearmut 1.0.2__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {pearmut-1.0.2 → pearmut-1.1.0}/PKG-INFO +74 -1
  2. {pearmut-1.0.2 → pearmut-1.1.0}/README.md +74 -1
  3. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/PKG-INFO +74 -1
  4. {pearmut-1.0.2 → pearmut-1.1.0}/pyproject.toml +2 -2
  5. {pearmut-1.0.2 → pearmut-1.1.0}/server/app.py +8 -5
  6. {pearmut-1.0.2 → pearmut-1.1.0}/server/assignment.py +336 -82
  7. {pearmut-1.0.2 → pearmut-1.1.0}/server/cli.py +145 -82
  8. pearmut-1.1.0/server/static/annotate.bundle.js +1 -0
  9. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/annotate.html +11 -7
  10. pearmut-1.1.0/server/static/dashboard.bundle.js +1 -0
  11. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/dashboard.html +1 -1
  12. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/index.html +1 -1
  13. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/style.css +38 -0
  14. {pearmut-1.0.2 → pearmut-1.1.0}/server/utils.py +38 -21
  15. pearmut-1.0.2/server/static/annotate.bundle.js +0 -1
  16. pearmut-1.0.2/server/static/dashboard.bundle.js +0 -1
  17. {pearmut-1.0.2 → pearmut-1.1.0}/LICENSE +0 -0
  18. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/SOURCES.txt +0 -0
  19. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/dependency_links.txt +0 -0
  20. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/entry_points.txt +0 -0
  21. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/requires.txt +0 -0
  22. {pearmut-1.0.2 → pearmut-1.1.0}/pearmut.egg-info/top_level.txt +0 -0
  23. {pearmut-1.0.2 → pearmut-1.1.0}/server/constants.py +0 -0
  24. {pearmut-1.0.2 → pearmut-1.1.0}/server/results_export.py +0 -0
  25. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/favicon.svg +0 -0
  26. {pearmut-1.0.2 → pearmut-1.1.0}/server/static/index.bundle.js +0 -0
  27. {pearmut-1.0.2 → pearmut-1.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 1.0.2
3
+ Version: 1.1.0
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: MIT
@@ -35,12 +35,15 @@ Dynamic: license-file
35
35
  - [Assignment Types](#assignment-types)
36
36
  - [Advanced Features](#advanced-features)
37
37
  - [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
38
+ - [Custom MQM Taxonomy](#custom-mqm-taxonomy)
38
39
  - [Tutorial and Attention Checks](#tutorial-and-attention-checks)
40
+ - [Form Items for User Metadata](#form-items-for-user-metadata)
39
41
  - [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
40
42
  - [Multimodal Annotations](#multimodal-annotations)
41
43
  - [Hosting Assets](#hosting-assets)
42
44
  - [Campaign Management](#campaign-management)
43
45
  - [Custom Completion Messages](#custom-completion-messages)
46
+ - [Prolific Integration](#prolific-integration)
44
47
  - [CLI Commands](#cli-commands)
45
48
  - [Terminology](#terminology)
46
49
  - [Development](#development)
@@ -141,6 +144,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
141
144
  "data": [...]
142
145
  }
143
146
  ```
147
+ Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
144
148
 
145
149
  ### Showing Model Names
146
150
 
@@ -197,6 +201,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
197
201
  - `"visible"`: Textfield always visible
198
202
  - `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
199
203
 
204
+ ### Custom MQM Taxonomy
205
+
206
+ For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
207
+
208
+
209
+ ```python
210
+ {
211
+ "info": {
212
+ "assignment": "task-based",
213
+ "protocol": "MQM",
214
+ "mqm_categories": {
215
+ "": [], # Empty selection option
216
+ "General": ["", "Accuracy", "Fluency"],
217
+ "Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
218
+ "Style": ["", "Awkward", "Embarassing"],
219
+ "Unknown": [] # Category with no subcategories
220
+ }
221
+ },
222
+ "campaign_id": "custom_mqm_example",
223
+ "data": [...]
224
+ }
225
+ ```
226
+
227
+ If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
228
+
229
+ See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
230
+
200
231
  ### Custom Instructions
201
232
 
202
233
  Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
@@ -286,6 +317,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
286
317
  See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
287
318
  To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
288
319
 
320
+ #### Universal Tutorial Items with `data_welcome`
321
+
322
+ Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
323
+
324
+ ### Form Items for User Metadata
325
+
326
+ Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
327
+ Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
328
+ Documents must be homogeneous: all form items or all evaluation items.
329
+
330
+ ```python
331
+ {
332
+ "data_welcome": [
333
+ [
334
+ {"text": "What is your native language?", "form": "string"},
335
+ {"text": "Rate your expertise (1-10)", "form": "number"}
336
+ ]
337
+ ]
338
+ }
339
+ ```
340
+
341
+ <img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
342
+
343
+
344
+ It is possible to automatically collect additional information from the host system using `"script"` field type.
345
+ Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
346
+ See [examples/user_info_form.json](examples/user_info_form.json).
347
+
289
348
  ### Single-stream Assignment
290
349
 
291
350
  All annotators draw from a shared pool with random assignment:
@@ -299,11 +358,14 @@ All annotators draw from a shared pool with random assignment:
299
358
  # ESA: error spans and scores
300
359
  "protocol": "ESA",
301
360
  "users": 50, # number of annotators (can also be a list, see below)
361
+ "docs_per_user": 10, # optional: show goodbye after N documents per user
302
362
  },
303
363
  "data": [...], # list of all items (shared among all annotators)
304
364
  }
305
365
  ```
306
366
 
367
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
368
+
307
369
  ### Dynamic Assignment
308
370
 
309
371
  The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
@@ -320,11 +382,14 @@ All items must contain outputs from all models for this assignment type to work
320
382
  "dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
321
383
  "dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
322
384
  "dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
385
+ "docs_per_user": 20, # optional: show goodbye after N documents per user
323
386
  },
324
387
  "data": [...], # list of all items (shared among all annotators)
325
388
  }
326
389
  ```
327
390
 
391
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
392
+
328
393
  **How it works:**
329
394
  1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
330
395
  2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
@@ -412,6 +477,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
412
477
 
413
478
  Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
414
479
 
480
+ ### Prolific Integration
481
+
482
+ Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
483
+ ```json
484
+ "instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
485
+ ```
486
+ The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
487
+
415
488
  ## Terminology
416
489
 
417
490
  - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -14,12 +14,15 @@
14
14
  - [Assignment Types](#assignment-types)
15
15
  - [Advanced Features](#advanced-features)
16
16
  - [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
17
+ - [Custom MQM Taxonomy](#custom-mqm-taxonomy)
17
18
  - [Tutorial and Attention Checks](#tutorial-and-attention-checks)
19
+ - [Form Items for User Metadata](#form-items-for-user-metadata)
18
20
  - [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
19
21
  - [Multimodal Annotations](#multimodal-annotations)
20
22
  - [Hosting Assets](#hosting-assets)
21
23
  - [Campaign Management](#campaign-management)
22
24
  - [Custom Completion Messages](#custom-completion-messages)
25
+ - [Prolific Integration](#prolific-integration)
23
26
  - [CLI Commands](#cli-commands)
24
27
  - [Terminology](#terminology)
25
28
  - [Development](#development)
@@ -120,6 +123,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
120
123
  "data": [...]
121
124
  }
122
125
  ```
126
+ Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
123
127
 
124
128
  ### Showing Model Names
125
129
 
@@ -176,6 +180,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
176
180
  - `"visible"`: Textfield always visible
177
181
  - `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
178
182
 
183
+ ### Custom MQM Taxonomy
184
+
185
+ For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
186
+
187
+
188
+ ```python
189
+ {
190
+ "info": {
191
+ "assignment": "task-based",
192
+ "protocol": "MQM",
193
+ "mqm_categories": {
194
+ "": [], # Empty selection option
195
+ "General": ["", "Accuracy", "Fluency"],
196
+ "Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
197
+ "Style": ["", "Awkward", "Embarassing"],
198
+ "Unknown": [] # Category with no subcategories
199
+ }
200
+ },
201
+ "campaign_id": "custom_mqm_example",
202
+ "data": [...]
203
+ }
204
+ ```
205
+
206
+ If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
207
+
208
+ See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
209
+
179
210
  ### Custom Instructions
180
211
 
181
212
  Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
@@ -265,6 +296,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
265
296
  See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
266
297
  To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
267
298
 
299
+ #### Universal Tutorial Items with `data_welcome`
300
+
301
+ Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
302
+
303
+ ### Form Items for User Metadata
304
+
305
+ Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
306
+ Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
307
+ Documents must be homogeneous: all form items or all evaluation items.
308
+
309
+ ```python
310
+ {
311
+ "data_welcome": [
312
+ [
313
+ {"text": "What is your native language?", "form": "string"},
314
+ {"text": "Rate your expertise (1-10)", "form": "number"}
315
+ ]
316
+ ]
317
+ }
318
+ ```
319
+
320
+ <img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
321
+
322
+
323
+ It is possible to automatically collect additional information from the host system using `"script"` field type.
324
+ Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
325
+ See [examples/user_info_form.json](examples/user_info_form.json).
326
+
268
327
  ### Single-stream Assignment
269
328
 
270
329
  All annotators draw from a shared pool with random assignment:
@@ -278,11 +337,14 @@ All annotators draw from a shared pool with random assignment:
278
337
  # ESA: error spans and scores
279
338
  "protocol": "ESA",
280
339
  "users": 50, # number of annotators (can also be a list, see below)
340
+ "docs_per_user": 10, # optional: show goodbye after N documents per user
281
341
  },
282
342
  "data": [...], # list of all items (shared among all annotators)
283
343
  }
284
344
  ```
285
345
 
346
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
347
+
286
348
  ### Dynamic Assignment
287
349
 
288
350
  The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
@@ -299,11 +361,14 @@ All items must contain outputs from all models for this assignment type to work
299
361
  "dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
300
362
  "dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
301
363
  "dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
364
+ "docs_per_user": 20, # optional: show goodbye after N documents per user
302
365
  },
303
366
  "data": [...], # list of all items (shared among all annotators)
304
367
  }
305
368
  ```
306
369
 
370
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
371
+
307
372
  **How it works:**
308
373
  1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
309
374
  2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
@@ -391,6 +456,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
391
456
 
392
457
  Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
393
458
 
459
+ ### Prolific Integration
460
+
461
+ Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
462
+ ```json
463
+ "instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
464
+ ```
465
+ The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
466
+
394
467
  ## Terminology
395
468
 
396
469
  - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -467,4 +540,4 @@ If you use this work in your paper, please cite as following.
467
540
  ```
468
541
 
469
542
  Contributions are welcome! Please reach out to [Vilém Zouhar](mailto:vilem.zouhar@gmail.com).
470
- See changes in [CHANGELOG.md](CHANGELOG.md).
543
+ See changes in [CHANGELOG.md](CHANGELOG.md).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 1.0.2
3
+ Version: 1.1.0
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: MIT
@@ -35,12 +35,15 @@ Dynamic: license-file
35
35
  - [Assignment Types](#assignment-types)
36
36
  - [Advanced Features](#advanced-features)
37
37
  - [Pre-filled Error Spans (ESA<sup>AI</sup>)](#pre-filled-error-spans-esaai)
38
+ - [Custom MQM Taxonomy](#custom-mqm-taxonomy)
38
39
  - [Tutorial and Attention Checks](#tutorial-and-attention-checks)
40
+ - [Form Items for User Metadata](#form-items-for-user-metadata)
39
41
  - [Pre-defined User IDs and Tokens](#pre-defined-user-ids-and-tokens)
40
42
  - [Multimodal Annotations](#multimodal-annotations)
41
43
  - [Hosting Assets](#hosting-assets)
42
44
  - [Campaign Management](#campaign-management)
43
45
  - [Custom Completion Messages](#custom-completion-messages)
46
+ - [Prolific Integration](#prolific-integration)
44
47
  - [CLI Commands](#cli-commands)
45
48
  - [Terminology](#terminology)
46
49
  - [Development](#development)
@@ -141,6 +144,7 @@ The `shuffle` parameter in campaign `info` controls this behavior:
141
144
  "data": [...]
142
145
  }
143
146
  ```
147
+ Documents in `data_welcome` are not shuffled and so don't require to have the same models in all documents.
144
148
 
145
149
  ### Showing Model Names
146
150
 
@@ -197,6 +201,33 @@ Enable a textfield for post-editing or translation tasks using the `textfield` p
197
201
  - `"visible"`: Textfield always visible
198
202
  - `"prefilled"`: Textfield visible and pre-filled with model output for post-editing
199
203
 
204
+ ### Custom MQM Taxonomy
205
+
206
+ For MQM protocol campaigns, you can define a custom error taxonomy instead of using the default MQM categories. Specify `mqm_categories` in the campaign `info` section as a dictionary mapping main categories to lists of subcategories:
207
+
208
+
209
+ ```python
210
+ {
211
+ "info": {
212
+ "assignment": "task-based",
213
+ "protocol": "MQM",
214
+ "mqm_categories": {
215
+ "": [], # Empty selection option
216
+ "General": ["", "Accuracy", "Fluency"],
217
+ "Audio-specific": ["", "Inaudible", "Background noise", "Speaker overlap", "Misinterpretation"],
218
+ "Style": ["", "Awkward", "Embarassing"],
219
+ "Unknown": [] # Category with no subcategories
220
+ }
221
+ },
222
+ "campaign_id": "custom_mqm_example",
223
+ "data": [...]
224
+ }
225
+ ```
226
+
227
+ If `mqm_categories` is not provided, the default MQM taxonomy will be used. The empty string key `""` provides an unselected state in the dropdown. Categories with empty subcategory lists (e.g., `"Style": []`) do not require a subcategory selection.
228
+
229
+ See [examples/custom_mqm.json](examples/custom_mqm.json) for a complete example.
230
+
200
231
  ### Custom Instructions
201
232
 
202
233
  Set campaign-level instructions using the `instructions` field in `info` (supports HTML).
@@ -286,6 +317,34 @@ The `score_greaterthan` field specifies the index of the candidate that must hav
286
317
  See [examples/tutorial/esa_deen.json](examples/tutorial/esa_deen.json) for a mock campaign with a fully prepared ESA tutorial.
287
318
  To use it, simply extract the `data` attribute and prefix it to each task in your campaign.
288
319
 
320
+ #### Universal Tutorial Items with `data_welcome`
321
+
322
+ Use `data_welcome` to add tutorial items that users must complete before starting regular tasks. The structure is a list of documents (same as `data`). Welcome items have IDs `welcome_0`, `welcome_1`, etc. and are tracked separately via `progress_welcome`.
323
+
324
+ ### Form Items for User Metadata
325
+
326
+ Collect user information (demographics, expertise) before annotation tasks using form items in `data_welcome`.
327
+ Form items have `text` (label/question) and `form` (field type: `null`, `"string"`, `"number"`, `"choices"`, and `"script"`).
328
+ Documents must be homogeneous: all form items or all evaluation items.
329
+
330
+ ```python
331
+ {
332
+ "data_welcome": [
333
+ [
334
+ {"text": "What is your native language?", "form": "string"},
335
+ {"text": "Rate your expertise (1-10)", "form": "number"}
336
+ ]
337
+ ]
338
+ }
339
+ ```
340
+
341
+ <img width="400" alt="Screenshot of a user form" src="https://github.com/user-attachments/assets/2310e8dc-98e9-4abf-8a27-6781b0094efe" />
342
+
343
+
344
+ It is possible to automatically collect additional information from the host system using `"script"` field type.
345
+ Typically such a form document (or their sequence) would be stored in `"data_welcome"` such that it is both mandatory and show to all users.
346
+ See [examples/user_info_form.json](examples/user_info_form.json).
347
+
289
348
  ### Single-stream Assignment
290
349
 
291
350
  All annotators draw from a shared pool with random assignment:
@@ -299,11 +358,14 @@ All annotators draw from a shared pool with random assignment:
299
358
  # ESA: error spans and scores
300
359
  "protocol": "ESA",
301
360
  "users": 50, # number of annotators (can also be a list, see below)
361
+ "docs_per_user": 10, # optional: show goodbye after N documents per user
302
362
  },
303
363
  "data": [...], # list of all items (shared among all annotators)
304
364
  }
305
365
  ```
306
366
 
367
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for single-stream, this is the number of documents).
368
+
307
369
  ### Dynamic Assignment
308
370
 
309
371
  The `dynamic` assignment type intelligently selects items based on current model performance to focus annotation effort on top-performing models using contrastive comparisons.
@@ -320,11 +382,14 @@ All items must contain outputs from all models for this assignment type to work
320
382
  "dynamic_contrastive_models": 2, # how many models to compare per item (optional, default: 1)
321
383
  "dynamic_first": 5, # annotations per model before dynamic kicks in (optional, default: 5)
322
384
  "dynamic_backoff": 0.1, # probability of uniform sampling (optional, default: 0)
385
+ "docs_per_user": 20, # optional: show goodbye after N documents per user
323
386
  },
324
387
  "data": [...], # list of all items (shared among all annotators)
325
388
  }
326
389
  ```
327
390
 
391
+ Set `docs_per_user` to limit how many documents each user annotates before seeing the goodbye message (for dynamic, this is roughly the number of documents × models).
392
+
328
393
  **How it works:**
329
394
  1. Initial phase: Each model gets `dynamic_first` annotations with fully random contrastive evaluation
330
395
  2. Dynamic phase: After the initial phase, top `dynamic_top` models (by average score) are identified
@@ -412,6 +477,14 @@ When tokens are supplied, the dashboard will try to show model rankings based on
412
477
 
413
478
  Customize the goodbye message shown to users when they complete all annotations using the `instructions_goodbye` field in campaign info. Supports arbitrary HTML for styling and formatting with variable replacement: `${TOKEN}` (completion token) and `${USER_ID}` (user ID). Default: `"If someone asks you for a token of completion, show them: ${TOKEN}"`.
414
479
 
480
+ ### Prolific Integration
481
+
482
+ Use task-based assignment with Prolific. For each task, Pearmut generates a unique URL which can be uploaded to Prolific's interface. Add redirect (on completion) to `instructions_goodbye`:
483
+ ```json
484
+ "instructions_goodbye": "<a href='https://app.prolific.com/submissions/complete?cc=${TOKEN}'>Click here to return to Prolific</a>"
485
+ ```
486
+ The `${TOKEN}` is automatically replaced based on passing attention checks (see [Attention checks](#tutorial-and-attention-checks) and [Pre-defined tokens](#pre-defined-user-ids-and-tokens)).
487
+
415
488
  ## Terminology
416
489
 
417
490
  - **Campaign**: An annotation project that contains configuration, data, and user assignments. Each campaign has a unique identifier and is defined in a JSON file.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "1.0.2"
3
+ version = "1.1.0"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -31,7 +31,7 @@ Repository = "https://github.com/zouharvi/pearmut"
31
31
  Issues = "https://github.com/zouharvi/pearmut/issues"
32
32
 
33
33
  [tool.setuptools]
34
- package-dir = { "pearmut" = "server" }
34
+ package-dir = { pearmut = "server" }
35
35
  packages = ["pearmut"]
36
36
 
37
37
  [build-system]
@@ -49,7 +49,7 @@ for campaign_id in progress_data.keys():
49
49
  class LogResponseRequest(BaseModel):
50
50
  campaign_id: str
51
51
  user_id: str
52
- item_i: int
52
+ item_i: int | str
53
53
  payload: dict[str, Any]
54
54
 
55
55
 
@@ -124,7 +124,7 @@ async def _get_next_item(request: NextItemRequest):
124
124
  class GetItemRequest(BaseModel):
125
125
  campaign_id: str
126
126
  user_id: str
127
- item_i: int
127
+ item_i: int | str
128
128
 
129
129
 
130
130
  @app.post("/get-i-item")
@@ -179,7 +179,11 @@ async def _dashboard_data(request: DashboardDataRequest):
179
179
  ]
180
180
 
181
181
  # Add threshold pass/fail status (only when user is complete)
182
- if all(entry["progress"]):
182
+ if (
183
+ tasks_data[campaign_id]["info"]["assignment"] != "dynamic" and all(v in {"completed", "completed_foreign"} for v in entry["progress"])
184
+ ) or (
185
+ tasks_data[campaign_id]["info"]["assignment"] == "dynamic" and all(v in {"completed", "completed_foreign"} for mv in entry["progress"] for v in mv.values())
186
+ ):
183
187
  entry["threshold_passed"] = check_validation_threshold(
184
188
  tasks_data, progress_data, campaign_id, user_id
185
189
  )
@@ -376,7 +380,6 @@ async def _download_annotations(
376
380
  # NOTE: currently not checking tokens for progress download as it is non-destructive
377
381
  # token: list[str] = Query()
378
382
  ):
379
-
380
383
  output = {}
381
384
  for campaign_id in campaign_id:
382
385
  output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
@@ -403,7 +406,6 @@ async def _download_annotations(
403
406
  async def _download_progress(
404
407
  campaign_id: list[str] = Query(), token: list[str] = Query()
405
408
  ):
406
-
407
409
  if len(campaign_id) != len(token):
408
410
  return JSONResponse(
409
411
  content="Mismatched campaign_id and token count", status_code=400
@@ -435,6 +437,7 @@ if not os.path.exists(static_dir + "index.html"):
435
437
  "Static directory not found. Please build the frontend first."
436
438
  )
437
439
 
440
+
438
441
  # Serve HTML files directly without redirect
439
442
  @app.get("/annotate")
440
443
  async def serve_annotate():