pearmut 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {pearmut-0.2.2 → pearmut-0.2.3}/PKG-INFO +59 -7
  2. {pearmut-0.2.2 → pearmut-0.2.3}/README.md +58 -5
  3. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/PKG-INFO +59 -7
  4. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/requires.txt +0 -1
  5. {pearmut-0.2.2 → pearmut-0.2.3}/pyproject.toml +2 -2
  6. {pearmut-0.2.2 → pearmut-0.2.3}/server/app.py +19 -2
  7. {pearmut-0.2.2 → pearmut-0.2.3}/server/assignment.py +29 -11
  8. pearmut-0.2.3/server/cli.py +346 -0
  9. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/dashboard.bundle.js +1 -1
  10. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/dashboard.html +1 -1
  11. pearmut-0.2.3/server/static/listwise.html +77 -0
  12. pearmut-0.2.3/server/static/pointwise.html +69 -0
  13. {pearmut-0.2.2 → pearmut-0.2.3}/server/utils.py +72 -4
  14. pearmut-0.2.2/server/cli.py +0 -226
  15. pearmut-0.2.2/server/static/listwise.html +0 -77
  16. pearmut-0.2.2/server/static/pointwise.html +0 -69
  17. {pearmut-0.2.2 → pearmut-0.2.3}/LICENSE +0 -0
  18. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/SOURCES.txt +0 -0
  19. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/dependency_links.txt +0 -0
  20. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/entry_points.txt +0 -0
  21. {pearmut-0.2.2 → pearmut-0.2.3}/pearmut.egg-info/top_level.txt +0 -0
  22. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/assets/favicon.svg +0 -0
  23. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/assets/style.css +0 -0
  24. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/index.html +0 -0
  25. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/listwise.bundle.js +0 -0
  26. {pearmut-0.2.2 → pearmut-0.2.3}/server/static/pointwise.bundle.js +0 -0
  27. {pearmut-0.2.2 → pearmut-0.2.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -16,7 +16,6 @@ Requires-Dist: wonderwords>=3.0.0
16
16
  Requires-Dist: psutil>=7.1.0
17
17
  Provides-Extra: dev
18
18
  Requires-Dist: pytest; extra == "dev"
19
- Requires-Dist: pynpm>=0.3.0; extra == "dev"
20
19
  Dynamic: license-file
21
20
 
22
21
  # Pearmut 🍐
@@ -165,8 +164,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
165
164
  - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
166
165
  - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
167
166
  - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
167
+
168
168
  For listwise template, `validation` is an array where each element corresponds to a candidate.
169
- The dashboard shows failed/total validation checks per user.
169
+ The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
170
+ Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
170
171
  See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
171
172
 
172
173
  ## Single-stream Assignment
@@ -181,7 +182,7 @@ We also support a simple allocation where all annotators draw from the same pool
181
182
  "protocol_score": True, # collect scores
182
183
  "protocol_error_spans": True, # collect error spans
183
184
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
184
- "num_users": 50, # number of annotators
185
+ "users": 50, # number of annotators (can also be a list, see below)
185
186
  },
186
187
  "data": [...], # list of all items (shared among all annotators)
187
188
  }
@@ -196,12 +197,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
196
197
  "assignment": "dynamic",
197
198
  "template": "listwise",
198
199
  "protocol_k": 5,
199
- "num_users": 50,
200
+ "users": 50,
200
201
  },
201
202
  "data": [...], # list of all items
202
203
  }
203
204
  ```
204
205
 
206
+ ## Pre-defined User IDs and Tokens
207
+
208
+ By default, user IDs and completion tokens are automatically generated. The `users` field can be:
209
+ - A number (e.g., `50`) to generate that many random user IDs
210
+ - A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
211
+ - A list of dictionaries to specify user IDs with custom tokens:
212
+ ```python
213
+ {
214
+ "info": {
215
+ ...
216
+ "users": [
217
+ {"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
218
+ {"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
219
+ ],
220
+ },
221
+ ...
222
+ }
223
+ ```
224
+
205
225
  To load a campaign into the server, run the following.
206
226
  It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
207
227
  It will also output a secret management link. Then, launch the server:
@@ -234,8 +254,7 @@ and independently of that select your protocol template:
234
254
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
235
255
  This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
236
256
 
237
- <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/82470693-a5ec-4d0e-8989-e93d5b0bb840" />
238
-
257
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
239
258
 
240
259
  Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
241
260
  An intentionally incorrect token can be shown if the annotations don't pass quality control.
@@ -252,6 +271,39 @@ Tip: make sure the elements are already appropriately styled.
252
271
 
253
272
  <img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
254
273
 
274
+ ## CLI Commands
275
+
276
+ Pearmut provides the following commands:
277
+
278
+ - `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
279
+ - `-o/--overwrite`: Overwrite existing campaigns with the same ID.
280
+ - `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
281
+ - `pearmut run`: Start the Pearmut server.
282
+ - `--port <port>`: Port to run the server on (default: 8001).
283
+ - `--server <url>`: Prefix server URL for protocol links.
284
+ - `pearmut purge [campaign]`: Remove campaign data.
285
+ - Without arguments: Purges all campaigns (tasks, outputs, progress).
286
+ - With campaign name: Purges only the specified campaign's data.
287
+
288
+
289
+ ## Hosting Assets
290
+
291
+ If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
292
+ When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
293
+
294
+ ```python
295
+ {
296
+ "campaign_id": "my_campaign",
297
+ "info": {
298
+ "assets": "videos", # path to directory containing assets
299
+ ...
300
+ },
301
+ "data": [ ... ]
302
+ }
303
+ ```
304
+
305
+ For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
306
+ The path can be absolute or relative to your current working directory.
255
307
 
256
308
  ## Development
257
309
 
@@ -144,8 +144,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
144
144
  - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
145
145
  - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
146
146
  - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
147
+
147
148
  For listwise template, `validation` is an array where each element corresponds to a candidate.
148
- The dashboard shows failed/total validation checks per user.
149
+ The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
150
+ Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
149
151
  See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
150
152
 
151
153
  ## Single-stream Assignment
@@ -160,7 +162,7 @@ We also support a simple allocation where all annotators draw from the same pool
160
162
  "protocol_score": True, # collect scores
161
163
  "protocol_error_spans": True, # collect error spans
162
164
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
163
- "num_users": 50, # number of annotators
165
+ "users": 50, # number of annotators (can also be a list, see below)
164
166
  },
165
167
  "data": [...], # list of all items (shared among all annotators)
166
168
  }
@@ -175,12 +177,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
175
177
  "assignment": "dynamic",
176
178
  "template": "listwise",
177
179
  "protocol_k": 5,
178
- "num_users": 50,
180
+ "users": 50,
179
181
  },
180
182
  "data": [...], # list of all items
181
183
  }
182
184
  ```
183
185
 
186
+ ## Pre-defined User IDs and Tokens
187
+
188
+ By default, user IDs and completion tokens are automatically generated. The `users` field can be:
189
+ - A number (e.g., `50`) to generate that many random user IDs
190
+ - A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
191
+ - A list of dictionaries to specify user IDs with custom tokens:
192
+ ```python
193
+ {
194
+ "info": {
195
+ ...
196
+ "users": [
197
+ {"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
198
+ {"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
199
+ ],
200
+ },
201
+ ...
202
+ }
203
+ ```
204
+
184
205
  To load a campaign into the server, run the following.
185
206
  It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
186
207
  It will also output a secret management link. Then, launch the server:
@@ -213,8 +234,7 @@ and independently of that select your protocol template:
213
234
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
214
235
  This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
215
236
 
216
- <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/82470693-a5ec-4d0e-8989-e93d5b0bb840" />
217
-
237
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
218
238
 
219
239
  Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
220
240
  An intentionally incorrect token can be shown if the annotations don't pass quality control.
@@ -231,6 +251,39 @@ Tip: make sure the elements are already appropriately styled.
231
251
 
232
252
  <img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
233
253
 
254
+ ## CLI Commands
255
+
256
+ Pearmut provides the following commands:
257
+
258
+ - `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
259
+ - `-o/--overwrite`: Overwrite existing campaigns with the same ID.
260
+ - `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
261
+ - `pearmut run`: Start the Pearmut server.
262
+ - `--port <port>`: Port to run the server on (default: 8001).
263
+ - `--server <url>`: Prefix server URL for protocol links.
264
+ - `pearmut purge [campaign]`: Remove campaign data.
265
+ - Without arguments: Purges all campaigns (tasks, outputs, progress).
266
+ - With campaign name: Purges only the specified campaign's data.
267
+
268
+
269
+ ## Hosting Assets
270
+
271
+ If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
272
+ When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
273
+
274
+ ```python
275
+ {
276
+ "campaign_id": "my_campaign",
277
+ "info": {
278
+ "assets": "videos", # path to directory containing assets
279
+ ...
280
+ },
281
+ "data": [ ... ]
282
+ }
283
+ ```
284
+
285
+ For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
286
+ The path can be absolute or relative to your current working directory.
234
287
 
235
288
  ## Development
236
289
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -16,7 +16,6 @@ Requires-Dist: wonderwords>=3.0.0
16
16
  Requires-Dist: psutil>=7.1.0
17
17
  Provides-Extra: dev
18
18
  Requires-Dist: pytest; extra == "dev"
19
- Requires-Dist: pynpm>=0.3.0; extra == "dev"
20
19
  Dynamic: license-file
21
20
 
22
21
  # Pearmut 🍐
@@ -165,8 +164,10 @@ You can add validation rules to items for tutorials or attention checks. Items w
165
164
  - Tutorial items: Include `allow_skip: true` and `warning` to let users skip after seeing the feedback
166
165
  - Loud attention checks: Include `warning` without `allow_skip` to force users to retry
167
166
  - Silent attention checks: Omit `warning` to silently log failures without user notification (useful for quality control with bad translations)
167
+
168
168
  For listwise template, `validation` is an array where each element corresponds to a candidate.
169
- The dashboard shows failed/total validation checks per user.
169
+ The dashboard shows failed/total validation checks per user, and ✅/❌ based on whether they pass the threshold.
170
+ Set `validation_threshold` in `info` to control pass/fail: integer for max failed count, float in [0,1) for max failed proportion.
170
171
  See [examples/tutorial_pointwise.json](examples/tutorial_pointwise.json) and [examples/tutorial_listwise.json](examples/tutorial_listwise.json) for complete examples.
171
172
 
172
173
  ## Single-stream Assignment
@@ -181,7 +182,7 @@ We also support a simple allocation where all annotators draw from the same pool
181
182
  "protocol_score": True, # collect scores
182
183
  "protocol_error_spans": True, # collect error spans
183
184
  "protocol_error_categories": False, # do not collect MQM categories, so ESA
184
- "num_users": 50, # number of annotators
185
+ "users": 50, # number of annotators (can also be a list, see below)
185
186
  },
186
187
  "data": [...], # list of all items (shared among all annotators)
187
188
  }
@@ -196,12 +197,31 @@ We also support dynamic allocation of annotations (`dynamic`, not yet ⚠️), w
196
197
  "assignment": "dynamic",
197
198
  "template": "listwise",
198
199
  "protocol_k": 5,
199
- "num_users": 50,
200
+ "users": 50,
200
201
  },
201
202
  "data": [...], # list of all items
202
203
  }
203
204
  ```
204
205
 
206
+ ## Pre-defined User IDs and Tokens
207
+
208
+ By default, user IDs and completion tokens are automatically generated. The `users` field can be:
209
+ - A number (e.g., `50`) to generate that many random user IDs
210
+ - A list of strings (e.g., `["alice", "bob"]`) to use specific user IDs
211
+ - A list of dictionaries to specify user IDs with custom tokens:
212
+ ```python
213
+ {
214
+ "info": {
215
+ ...
216
+ "users": [
217
+ {"user_id": "alice", "token_pass": "alice_done", "token_fail": "alice_fail"},
218
+ {"user_id": "bob", "token_pass": "bob_done"} # missing tokens are auto-generated
219
+ ],
220
+ },
221
+ ...
222
+ }
223
+ ```
224
+
205
225
  To load a campaign into the server, run the following.
206
226
  It will fail if an existing campaign with the same `campaign_id` already exists, unless you specify `-o/--overwrite`.
207
227
  It will also output a secret management link. Then, launch the server:
@@ -234,8 +254,7 @@ and independently of that select your protocol template:
234
254
  When adding new campaigns or launching pearmut, a management link is shown that gives an overview of annotator progress but also an easy access to the annotation links or resetting the task progress (no data will be lost).
235
255
  This is also the place where you can download all progress and collected annotations (these files exist also locally but this might be more convenient).
236
256
 
237
- <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/82470693-a5ec-4d0e-8989-e93d5b0bb840" />
238
-
257
+ <img width="800" alt="Management dashboard" src="https://github.com/user-attachments/assets/800a1741-5f41-47ac-9d5d-5cbf6abfc0e6" />
239
258
 
240
259
  Additionally, at the end of an annotation, a token of completion is shown which can be compared to the correct one that you can download in metadat from the dashboard.
241
260
  An intentionally incorrect token can be shown if the annotations don't pass quality control.
@@ -252,6 +271,39 @@ Tip: make sure the elements are already appropriately styled.
252
271
 
253
272
  <img width="1000" alt="Preview of multimodal elements in Pearmut" src="https://github.com/user-attachments/assets/77c4fa96-ee62-4e46-8e78-fd16e9007956" />
254
273
 
274
+ ## CLI Commands
275
+
276
+ Pearmut provides the following commands:
277
+
278
+ - `pearmut add <file(s)>`: Add one or more campaign JSON files. Supports wildcards (e.g., `pearmut add examples/*.json`).
279
+ - `-o/--overwrite`: Overwrite existing campaigns with the same ID.
280
+ - `--server <url>`: Prefix server URL for protocol links (default: `http://localhost:8001`).
281
+ - `pearmut run`: Start the Pearmut server.
282
+ - `--port <port>`: Port to run the server on (default: 8001).
283
+ - `--server <url>`: Prefix server URL for protocol links.
284
+ - `pearmut purge [campaign]`: Remove campaign data.
285
+ - Without arguments: Purges all campaigns (tasks, outputs, progress).
286
+ - With campaign name: Purges only the specified campaign's data.
287
+
288
+
289
+ ## Hosting Assets
290
+
291
+ If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
292
+ When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
293
+
294
+ ```python
295
+ {
296
+ "campaign_id": "my_campaign",
297
+ "info": {
298
+ "assets": "videos", # path to directory containing assets
299
+ ...
300
+ },
301
+ "data": [ ... ]
302
+ }
303
+ ```
304
+
305
+ For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
306
+ The path can be absolute or relative to your current working directory.
255
307
 
256
308
  ## Development
257
309
 
@@ -5,4 +5,3 @@ psutil>=7.1.0
5
5
 
6
6
  [dev]
7
7
  pytest
8
- pynpm>=0.3.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "apache-2.0" }
@@ -20,7 +20,7 @@ dependencies = [
20
20
  ]
21
21
 
22
22
  [project.optional-dependencies]
23
- dev = ["pytest", "pynpm >= 0.3.0"]
23
+ dev = ["pytest"]
24
24
 
25
25
  [project.scripts]
26
26
  pearmut = "pearmut.cli:main"
@@ -9,7 +9,13 @@ from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from .assignment import get_i_item, get_next_item, reset_task, update_progress
12
- from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
12
+ from .utils import (
13
+ ROOT,
14
+ check_validation_threshold,
15
+ load_progress_data,
16
+ save_db_payload,
17
+ save_progress_data,
18
+ )
13
19
 
14
20
  os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
15
21
 
@@ -151,6 +157,9 @@ async def _dashboard_data(request: DashboardDataRequest):
151
157
  if assignment not in ["task-based", "single-stream"]:
152
158
  return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
153
159
 
160
+ # Get threshold info for the campaign
161
+ validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
162
+
154
163
  for user_id, user_val in progress_data[campaign_id].items():
155
164
  # shallow copy
156
165
  entry = dict(user_val)
@@ -159,6 +168,13 @@ async def _dashboard_data(request: DashboardDataRequest):
159
168
  for v in list(entry.get("validations", {}).values())
160
169
  ]
161
170
 
171
+ # Add threshold pass/fail status (only when user is complete)
172
+ if all(entry["progress"]):
173
+ entry["threshold_passed"] = check_validation_threshold(
174
+ tasks_data, progress_data, campaign_id, user_id
175
+ )
176
+ else:
177
+ entry["threshold_passed"] = None
162
178
 
163
179
  if not is_privileged:
164
180
  entry["token_correct"] = None
@@ -169,7 +185,8 @@ async def _dashboard_data(request: DashboardDataRequest):
169
185
  return JSONResponse(
170
186
  content={
171
187
  "status": "ok",
172
- "data": progress_new
188
+ "data": progress_new,
189
+ "validation_threshold": validation_threshold
173
190
  },
174
191
  status_code=200
175
192
  )
@@ -3,18 +3,23 @@ from typing import Any
3
3
 
4
4
  from fastapi.responses import JSONResponse
5
5
 
6
- from .utils import get_db_log_item
6
+ from .utils import (
7
+ RESET_MARKER,
8
+ check_validation_threshold,
9
+ get_db_log_item,
10
+ save_db_payload,
11
+ )
7
12
 
8
13
 
9
14
  def _completed_response(
15
+ tasks_data: dict,
10
16
  progress_data: dict,
11
17
  campaign_id: str,
12
18
  user_id: str,
13
19
  ) -> JSONResponse:
14
20
  """Build a completed response with progress, time, and token."""
15
21
  user_progress = progress_data[campaign_id][user_id]
16
- # TODO: add check for data quality
17
- is_ok = True
22
+ is_ok = check_validation_threshold(tasks_data, progress_data, campaign_id, user_id)
18
23
  return JSONResponse(
19
24
  content={
20
25
  "status": "completed",
@@ -161,7 +166,7 @@ def get_next_item_taskbased(
161
166
  """
162
167
  user_progress = progress_data[campaign_id][user_id]
163
168
  if all(user_progress["progress"]):
164
- return _completed_response(progress_data, campaign_id, user_id)
169
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
165
170
 
166
171
  # find first incomplete item
167
172
  item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
@@ -208,7 +213,7 @@ def get_next_item_singlestream(
208
213
  progress = user_progress["progress"]
209
214
 
210
215
  if all(progress):
211
- return _completed_response(progress_data, campaign_id, user_id)
216
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
212
217
 
213
218
  # find a random incomplete item
214
219
  incomplete_indices = [i for i, v in enumerate(progress) if not v]
@@ -261,20 +266,33 @@ def reset_task(
261
266
  ) -> JSONResponse:
262
267
  """
263
268
  Reset the task progress for the user in the specified campaign.
269
+ Saves a reset marker to mask existing annotations.
264
270
  """
265
271
  assignment = tasks_data[campaign_id]["info"]["assignment"]
266
272
  if assignment == "task-based":
267
- progress_data[campaign_id][user_id]["progress"] = (
268
- [False]*len(tasks_data[campaign_id]["data"][user_id])
269
- )
273
+ # Save reset marker for this user to mask existing annotations
274
+ num_items = len(tasks_data[campaign_id]["data"][user_id])
275
+ for item_i in range(num_items):
276
+ save_db_payload(campaign_id, {
277
+ "user_id": user_id,
278
+ "item_i": item_i,
279
+ "annotations": RESET_MARKER
280
+ })
281
+ progress_data[campaign_id][user_id]["progress"] = [False] * num_items
270
282
  _reset_user_time(progress_data, campaign_id, user_id)
271
283
  return JSONResponse(content={"status": "ok"}, status_code=200)
272
284
  elif assignment == "single-stream":
285
+ # Save reset markers for all items (shared pool)
286
+ num_items = len(tasks_data[campaign_id]["data"])
287
+ for item_i in range(num_items):
288
+ save_db_payload(campaign_id, {
289
+ "user_id": None,
290
+ "item_i": item_i,
291
+ "annotations": RESET_MARKER
292
+ })
273
293
  # for single-stream reset all progress
274
294
  for uid in progress_data[campaign_id]:
275
- progress_data[campaign_id][uid]["progress"] = (
276
- [False]*len(tasks_data[campaign_id]["data"])
277
- )
295
+ progress_data[campaign_id][uid]["progress"] = [False] * num_items
278
296
  _reset_user_time(progress_data, campaign_id, user_id)
279
297
  return JSONResponse(content={"status": "ok"}, status_code=200)
280
298
  else: