pearmut 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +56 -25
- pearmut/assignment.py +340 -105
- pearmut/cli.py +185 -104
- pearmut/results_export.py +1 -1
- pearmut/static/annotate.bundle.js +1 -0
- pearmut/static/annotate.html +164 -0
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +6 -1
- pearmut/static/index.html +1 -1
- pearmut/static/style.css +46 -0
- pearmut/utils.py +40 -21
- {pearmut-1.0.1.dist-info → pearmut-1.0.3.dist-info}/METADATA +119 -65
- pearmut-1.0.3.dist-info/RECORD +20 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.3.dist-info}/WHEEL +1 -1
- pearmut/static/basic.bundle.js +0 -1
- pearmut/static/basic.html +0 -133
- pearmut-1.0.1.dist-info/RECORD +0 -20
- {pearmut-1.0.1.dist-info → pearmut-1.0.3.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.3.dist-info}/top_level.txt +0 -0
pearmut/assignment.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import collections
|
|
2
|
-
import copy
|
|
3
2
|
import random
|
|
4
3
|
import statistics
|
|
5
4
|
from typing import Any
|
|
@@ -12,9 +11,19 @@ from .utils import (
|
|
|
12
11
|
check_validation_threshold,
|
|
13
12
|
get_db_log,
|
|
14
13
|
get_db_log_item,
|
|
14
|
+
is_form_document,
|
|
15
15
|
save_db_payload,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
+
# Public campaign info fields that are sent to the client
|
|
19
|
+
CAMPAIGN_INFO_PUBLIC = {
|
|
20
|
+
"protocol",
|
|
21
|
+
"sliders",
|
|
22
|
+
"textfield",
|
|
23
|
+
"show_model_names",
|
|
24
|
+
"mqm_categories",
|
|
25
|
+
}
|
|
26
|
+
|
|
18
27
|
|
|
19
28
|
def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
|
|
20
29
|
"""Get instructions: custom if provided, else protocol default, else empty."""
|
|
@@ -24,7 +33,6 @@ def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
|
|
|
24
33
|
return PROTOCOL_INSTRUCTIONS.get(campaign_info.get("protocol", ""), "")
|
|
25
34
|
|
|
26
35
|
|
|
27
|
-
|
|
28
36
|
def _completed_response(
|
|
29
37
|
tasks_data: dict,
|
|
30
38
|
progress_data: dict,
|
|
@@ -56,6 +64,7 @@ def _completed_response(
|
|
|
56
64
|
content={
|
|
57
65
|
"status": "goodbye",
|
|
58
66
|
"progress": progress,
|
|
67
|
+
"progress_welcome": user_progress["progress_welcome"],
|
|
59
68
|
"time": user_progress["time"],
|
|
60
69
|
"token": token,
|
|
61
70
|
"instructions_goodbye": instructions_goodbye,
|
|
@@ -116,12 +125,34 @@ def get_i_item_taskbased(
|
|
|
116
125
|
user_id: str,
|
|
117
126
|
data_all: dict,
|
|
118
127
|
progress_data: dict,
|
|
119
|
-
item_i: int,
|
|
128
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
120
129
|
) -> JSONResponse:
|
|
121
130
|
"""
|
|
122
131
|
Get specific item for task-based protocol.
|
|
123
132
|
"""
|
|
124
133
|
user_progress = progress_data[campaign_id][user_id]
|
|
134
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
135
|
+
|
|
136
|
+
# if welcome_X, payload is from data_welcome[X], otherwise data[user][X]
|
|
137
|
+
if isinstance(item_i, str) and item_i.startswith("welcome_"):
|
|
138
|
+
actual_index = int(item_i.split("_")[1])
|
|
139
|
+
if actual_index < 0 or actual_index >= len(
|
|
140
|
+
data_all[campaign_id]["data_welcome"]
|
|
141
|
+
):
|
|
142
|
+
return JSONResponse(
|
|
143
|
+
content="Welcome item index out of range", status_code=400
|
|
144
|
+
)
|
|
145
|
+
payload = data_all[campaign_id]["data_welcome"][actual_index]
|
|
146
|
+
else:
|
|
147
|
+
# Prevent accessing regular items unless all welcome items are complete
|
|
148
|
+
if not all(progress_welcome):
|
|
149
|
+
return JSONResponse(
|
|
150
|
+
content="Complete all welcome items before accessing regular items",
|
|
151
|
+
status_code=400,
|
|
152
|
+
)
|
|
153
|
+
if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
|
|
154
|
+
return JSONResponse(content="Item index out of range", status_code=400)
|
|
155
|
+
payload = data_all[campaign_id]["data"][user_id][item_i]
|
|
125
156
|
|
|
126
157
|
# try to get existing annotations if any
|
|
127
158
|
items_existing = get_db_log_item(campaign_id, user_id, item_i)
|
|
@@ -133,13 +164,13 @@ def get_i_item_taskbased(
|
|
|
133
164
|
if "comment" in latest_item:
|
|
134
165
|
payload_existing["comment"] = latest_item["comment"]
|
|
135
166
|
|
|
136
|
-
|
|
137
|
-
return JSONResponse(content="Item index out of range", status_code=400)
|
|
167
|
+
is_form = is_form_document(payload)
|
|
138
168
|
|
|
139
169
|
return JSONResponse(
|
|
140
170
|
content={
|
|
141
|
-
"status": "ok",
|
|
171
|
+
"status": "form" if is_form else "ok",
|
|
142
172
|
"progress": user_progress["progress"],
|
|
173
|
+
"progress_welcome": progress_welcome,
|
|
143
174
|
"time": user_progress["time"],
|
|
144
175
|
"info": {
|
|
145
176
|
"item_i": item_i,
|
|
@@ -148,9 +179,9 @@ def get_i_item_taskbased(
|
|
|
148
179
|
| {
|
|
149
180
|
k: v
|
|
150
181
|
for k, v in data_all[campaign_id]["info"].items()
|
|
151
|
-
if k in
|
|
182
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
152
183
|
},
|
|
153
|
-
"payload":
|
|
184
|
+
"payload": payload,
|
|
154
185
|
}
|
|
155
186
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
156
187
|
status_code=200,
|
|
@@ -162,16 +193,37 @@ def get_i_item_singlestream(
|
|
|
162
193
|
user_id: str,
|
|
163
194
|
data_all: dict,
|
|
164
195
|
progress_data: dict,
|
|
165
|
-
item_i: int,
|
|
196
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
166
197
|
) -> JSONResponse:
|
|
167
198
|
"""
|
|
168
199
|
Get specific item for single-stream assignment.
|
|
169
200
|
"""
|
|
170
201
|
user_progress = progress_data[campaign_id][user_id]
|
|
202
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
203
|
+
|
|
204
|
+
# Convert welcome_X string to integer index
|
|
205
|
+
actual_index = item_i
|
|
206
|
+
is_welcome_item = isinstance(item_i, str) and item_i.startswith("welcome_")
|
|
207
|
+
if is_welcome_item:
|
|
208
|
+
actual_index = int(item_i.split("_")[1])
|
|
209
|
+
# Validate against total number of welcome items
|
|
210
|
+
if actual_index < 0 or actual_index >= len(progress_welcome):
|
|
211
|
+
return JSONResponse(
|
|
212
|
+
content="Welcome item index out of range", status_code=400
|
|
213
|
+
)
|
|
214
|
+
else:
|
|
215
|
+
# Prevent accessing regular items unless all welcome items are complete
|
|
216
|
+
if not all(progress_welcome):
|
|
217
|
+
return JSONResponse(
|
|
218
|
+
content="Complete all welcome items before accessing regular items",
|
|
219
|
+
status_code=400,
|
|
220
|
+
)
|
|
171
221
|
|
|
172
222
|
# try to get existing annotations if any
|
|
173
|
-
#
|
|
174
|
-
items_existing = get_db_log_item(
|
|
223
|
+
# use user_id for welcome items (per-user), None for shared items
|
|
224
|
+
items_existing = get_db_log_item(
|
|
225
|
+
campaign_id, user_id if is_welcome_item else None, item_i
|
|
226
|
+
)
|
|
175
227
|
payload_existing = None
|
|
176
228
|
if items_existing:
|
|
177
229
|
# get the latest ones
|
|
@@ -180,13 +232,17 @@ def get_i_item_singlestream(
|
|
|
180
232
|
if "comment" in latest_item:
|
|
181
233
|
payload_existing["comment"] = latest_item["comment"]
|
|
182
234
|
|
|
183
|
-
if
|
|
235
|
+
if actual_index < 0 or actual_index >= len(data_all[campaign_id]["data"]):
|
|
184
236
|
return JSONResponse(content="Item index out of range", status_code=400)
|
|
185
237
|
|
|
238
|
+
payload = data_all[campaign_id]["data"][actual_index]
|
|
239
|
+
is_form = is_form_document(payload)
|
|
240
|
+
|
|
186
241
|
return JSONResponse(
|
|
187
242
|
content={
|
|
188
|
-
"status": "ok",
|
|
243
|
+
"status": "form" if is_form else "ok",
|
|
189
244
|
"progress": user_progress["progress"],
|
|
245
|
+
"progress_welcome": progress_welcome,
|
|
190
246
|
"time": user_progress["time"],
|
|
191
247
|
"info": {
|
|
192
248
|
"item_i": item_i,
|
|
@@ -195,9 +251,9 @@ def get_i_item_singlestream(
|
|
|
195
251
|
| {
|
|
196
252
|
k: v
|
|
197
253
|
for k, v in data_all[campaign_id]["info"].items()
|
|
198
|
-
if k in
|
|
254
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
199
255
|
},
|
|
200
|
-
"payload":
|
|
256
|
+
"payload": payload,
|
|
201
257
|
}
|
|
202
258
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
203
259
|
status_code=200,
|
|
@@ -214,11 +270,56 @@ def get_next_item_taskbased(
|
|
|
214
270
|
Get the next item for task-based assignment.
|
|
215
271
|
"""
|
|
216
272
|
user_progress = progress_data[campaign_id][user_id]
|
|
217
|
-
|
|
273
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
274
|
+
|
|
275
|
+
# Check if there are incomplete welcome items first
|
|
276
|
+
if not all(progress_welcome):
|
|
277
|
+
# Find first incomplete welcome item
|
|
278
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
279
|
+
item_id = f"welcome_{item_i}"
|
|
280
|
+
|
|
281
|
+
# try to get existing annotations if any
|
|
282
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
283
|
+
payload_existing = None
|
|
284
|
+
if items_existing:
|
|
285
|
+
# get the latest ones
|
|
286
|
+
latest_item = items_existing[-1]
|
|
287
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
288
|
+
if "comment" in latest_item:
|
|
289
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
290
|
+
|
|
291
|
+
payload = data_all[campaign_id]["data_welcome"][item_i]
|
|
292
|
+
is_form = is_form_document(payload)
|
|
293
|
+
|
|
294
|
+
return JSONResponse(
|
|
295
|
+
content={
|
|
296
|
+
"status": "form" if is_form else "ok",
|
|
297
|
+
"progress": user_progress["progress"],
|
|
298
|
+
"progress_welcome": progress_welcome,
|
|
299
|
+
"time": user_progress["time"],
|
|
300
|
+
"info": {
|
|
301
|
+
"item_i": item_id,
|
|
302
|
+
"instructions": _get_instructions(data_all, campaign_id),
|
|
303
|
+
}
|
|
304
|
+
| {
|
|
305
|
+
k: v
|
|
306
|
+
for k, v in data_all[campaign_id]["info"].items()
|
|
307
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
308
|
+
},
|
|
309
|
+
"payload": payload,
|
|
310
|
+
}
|
|
311
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
312
|
+
status_code=200,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# All welcome items complete, proceed with regular items
|
|
316
|
+
if all(v == "completed" for v in user_progress["progress"]):
|
|
218
317
|
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
219
318
|
|
|
220
319
|
# find first incomplete item
|
|
221
|
-
item_i = min(
|
|
320
|
+
item_i = min(
|
|
321
|
+
[i for i, v in enumerate(user_progress["progress"]) if v != "completed"]
|
|
322
|
+
)
|
|
222
323
|
|
|
223
324
|
# try to get existing annotations if any
|
|
224
325
|
items_existing = get_db_log_item(campaign_id, user_id, item_i)
|
|
@@ -230,10 +331,14 @@ def get_next_item_taskbased(
|
|
|
230
331
|
if "comment" in latest_item:
|
|
231
332
|
payload_existing["comment"] = latest_item["comment"]
|
|
232
333
|
|
|
334
|
+
payload = data_all[campaign_id]["data"][user_id][item_i]
|
|
335
|
+
is_form = is_form_document(payload)
|
|
336
|
+
|
|
233
337
|
return JSONResponse(
|
|
234
338
|
content={
|
|
235
|
-
"status": "ok",
|
|
339
|
+
"status": "form" if is_form else "ok",
|
|
236
340
|
"progress": user_progress["progress"],
|
|
341
|
+
"progress_welcome": progress_welcome,
|
|
237
342
|
"time": user_progress["time"],
|
|
238
343
|
"info": {
|
|
239
344
|
"item_i": item_i,
|
|
@@ -242,7 +347,7 @@ def get_next_item_taskbased(
|
|
|
242
347
|
| {
|
|
243
348
|
k: v
|
|
244
349
|
for k, v in data_all[campaign_id]["info"].items()
|
|
245
|
-
if k in
|
|
350
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
246
351
|
},
|
|
247
352
|
"payload": data_all[campaign_id]["data"][user_id][item_i],
|
|
248
353
|
}
|
|
@@ -267,12 +372,64 @@ def get_next_item_singlestream(
|
|
|
267
372
|
"""
|
|
268
373
|
user_progress = progress_data[campaign_id][user_id]
|
|
269
374
|
progress = user_progress["progress"]
|
|
375
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
376
|
+
|
|
377
|
+
# Check if there are incomplete welcome items first - must complete all before proceeding
|
|
378
|
+
if not all(progress_welcome):
|
|
379
|
+
# Find first incomplete welcome item (sequential, not random)
|
|
380
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
381
|
+
item_id = f"welcome_{item_i}"
|
|
382
|
+
|
|
383
|
+
# try to get existing annotations if any
|
|
384
|
+
# note the user_id since welcome items are per-user
|
|
385
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
386
|
+
payload_existing = None
|
|
387
|
+
if items_existing:
|
|
388
|
+
# get the latest ones
|
|
389
|
+
latest_item = items_existing[-1]
|
|
390
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
391
|
+
if "comment" in latest_item:
|
|
392
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
393
|
+
|
|
394
|
+
payload = data_all[campaign_id]["data"][item_i]
|
|
395
|
+
is_form = is_form_document(payload)
|
|
270
396
|
|
|
271
|
-
|
|
397
|
+
return JSONResponse(
|
|
398
|
+
content={
|
|
399
|
+
"status": "form" if is_form else "ok",
|
|
400
|
+
"time": user_progress["time"],
|
|
401
|
+
"progress": progress,
|
|
402
|
+
"progress_welcome": progress_welcome,
|
|
403
|
+
"info": {
|
|
404
|
+
"item_i": item_id,
|
|
405
|
+
"instructions": _get_instructions(data_all, campaign_id),
|
|
406
|
+
}
|
|
407
|
+
| {
|
|
408
|
+
k: v
|
|
409
|
+
for k, v in data_all[campaign_id]["info"].items()
|
|
410
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
411
|
+
},
|
|
412
|
+
"payload": payload,
|
|
413
|
+
}
|
|
414
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
415
|
+
status_code=200,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# All welcome items complete, proceed with regular items
|
|
419
|
+
# Check if user reached docs_per_user limit (if specified)
|
|
420
|
+
if (
|
|
421
|
+
docs_per_user := data_all[campaign_id]["info"].get("docs_per_user")
|
|
422
|
+
) is not None:
|
|
423
|
+
completed_docs = sum(v == "completed" for v in progress if v)
|
|
424
|
+
if completed_docs >= docs_per_user:
|
|
425
|
+
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
426
|
+
elif all(v in {"completed", "completed_foreign"} for v in progress):
|
|
272
427
|
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
273
428
|
|
|
274
429
|
# find a random incomplete item
|
|
275
|
-
incomplete_indices = [
|
|
430
|
+
incomplete_indices = [
|
|
431
|
+
i for i, v in enumerate(progress) if v not in {"completed", "completed_foreign"}
|
|
432
|
+
]
|
|
276
433
|
item_i = random.choice(incomplete_indices)
|
|
277
434
|
|
|
278
435
|
# try to get existing annotations if any
|
|
@@ -286,11 +443,15 @@ def get_next_item_singlestream(
|
|
|
286
443
|
if "comment" in latest_item:
|
|
287
444
|
payload_existing["comment"] = latest_item["comment"]
|
|
288
445
|
|
|
446
|
+
payload = data_all[campaign_id]["data"][item_i]
|
|
447
|
+
is_form = is_form_document(payload)
|
|
448
|
+
|
|
289
449
|
return JSONResponse(
|
|
290
450
|
content={
|
|
291
|
-
"status": "ok",
|
|
451
|
+
"status": "form" if is_form else "ok",
|
|
292
452
|
"time": user_progress["time"],
|
|
293
453
|
"progress": progress,
|
|
454
|
+
"progress_welcome": progress_welcome,
|
|
294
455
|
"info": {
|
|
295
456
|
"item_i": item_i,
|
|
296
457
|
"instructions": _get_instructions(data_all, campaign_id),
|
|
@@ -298,9 +459,9 @@ def get_next_item_singlestream(
|
|
|
298
459
|
| {
|
|
299
460
|
k: v
|
|
300
461
|
for k, v in data_all[campaign_id]["info"].items()
|
|
301
|
-
if k in
|
|
462
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
302
463
|
},
|
|
303
|
-
"payload":
|
|
464
|
+
"payload": payload,
|
|
304
465
|
}
|
|
305
466
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
306
467
|
status_code=200,
|
|
@@ -320,8 +481,8 @@ def get_next_item_dynamic(
|
|
|
320
481
|
|
|
321
482
|
In this mode, items are selected based on the current performance of models:
|
|
322
483
|
1. Contrastive comparison: `dynamic_contrastive_models` models are randomly selected and shown per item
|
|
323
|
-
2.
|
|
324
|
-
3. After
|
|
484
|
+
2. Warmup phase: Each model gets `dynamic_warmup` annotations with fully random selection
|
|
485
|
+
3. After warmup phase: Top `dynamic_top` models are identified, K randomly selected from them
|
|
325
486
|
4. Items with least annotations for the selected models are prioritized
|
|
326
487
|
5. With probability `dynamic_backoff`, uniformly random selection is used instead
|
|
327
488
|
"""
|
|
@@ -329,18 +490,69 @@ def get_next_item_dynamic(
|
|
|
329
490
|
|
|
330
491
|
user_progress = progress_data[campaign_id][user_id]
|
|
331
492
|
campaign_data = tasks_data[campaign_id]
|
|
493
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
494
|
+
|
|
495
|
+
# Check if there are incomplete welcome items first - must complete all before proceeding
|
|
496
|
+
if not all(progress_welcome):
|
|
497
|
+
# Find first incomplete welcome item (sequential)
|
|
498
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
499
|
+
item_id = f"welcome_{item_i}"
|
|
500
|
+
|
|
501
|
+
# try to get existing annotations if any
|
|
502
|
+
# note the user_id since welcome items are per-user
|
|
503
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
504
|
+
payload_existing = None
|
|
505
|
+
if items_existing:
|
|
506
|
+
# get the latest ones
|
|
507
|
+
latest_item = items_existing[-1]
|
|
508
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
509
|
+
if "comment" in latest_item:
|
|
510
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
511
|
+
|
|
512
|
+
return JSONResponse(
|
|
513
|
+
content={
|
|
514
|
+
"status": "ok",
|
|
515
|
+
"time": user_progress["time"],
|
|
516
|
+
"progress": user_progress["progress"],
|
|
517
|
+
"progress_welcome": progress_welcome,
|
|
518
|
+
"info": {
|
|
519
|
+
"item_i": item_id,
|
|
520
|
+
"instructions": _get_instructions(campaign_data, campaign_id),
|
|
521
|
+
}
|
|
522
|
+
| {
|
|
523
|
+
k: v
|
|
524
|
+
for k, v in campaign_data["info"].items()
|
|
525
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
526
|
+
},
|
|
527
|
+
"payload": campaign_data["data"][item_i],
|
|
528
|
+
}
|
|
529
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
530
|
+
status_code=200,
|
|
531
|
+
)
|
|
332
532
|
|
|
333
533
|
# Get all unique models in the campaign (all items must have all models)
|
|
334
534
|
all_models = list(set(campaign_data["data"][0][0]["tgt"].keys()))
|
|
335
535
|
|
|
336
|
-
# Check if completed
|
|
337
|
-
#
|
|
338
|
-
if
|
|
536
|
+
# Check if completed
|
|
537
|
+
# First check if docs_per_user limit is reached
|
|
538
|
+
if (docs_per_user := campaign_data["info"].get("docs_per_user")) is not None:
|
|
539
|
+
# Count specifically number of annotations across models
|
|
540
|
+
completed_docs = sum(
|
|
541
|
+
v == "completed" for mv in user_progress["progress"] for v in mv.values()
|
|
542
|
+
)
|
|
543
|
+
if completed_docs >= docs_per_user:
|
|
544
|
+
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
545
|
+
# Otherwise check if all models completed for all items
|
|
546
|
+
elif all(
|
|
547
|
+
v in {"completed", "completed_foreign"}
|
|
548
|
+
for mv in user_progress["progress"]
|
|
549
|
+
for v in mv.values()
|
|
550
|
+
):
|
|
339
551
|
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
340
552
|
|
|
341
553
|
# Get configuration parameters
|
|
342
554
|
dynamic_top = campaign_data["info"].get("dynamic_top", 2)
|
|
343
|
-
|
|
555
|
+
dynamic_warmup = campaign_data["info"].get("dynamic_warmup", 5)
|
|
344
556
|
dynamic_contrastive_models = campaign_data["info"].get(
|
|
345
557
|
"dynamic_contrastive_models", 1
|
|
346
558
|
)
|
|
@@ -360,18 +572,18 @@ def get_next_item_dynamic(
|
|
|
360
572
|
model_total_counts[model] += 1
|
|
361
573
|
|
|
362
574
|
# Check if we're still in the first phase (collecting initial data)
|
|
363
|
-
|
|
364
|
-
model_total_counts.get(model, 0) <
|
|
575
|
+
in_warmup_phase = any(
|
|
576
|
+
model_total_counts.get(model, 0) < dynamic_warmup for model in all_models
|
|
365
577
|
)
|
|
366
578
|
|
|
367
579
|
# Select which models to show
|
|
368
|
-
if
|
|
580
|
+
if in_warmup_phase:
|
|
369
581
|
# First phase or backoff: select models that don't have enough annotations yet
|
|
370
582
|
selected_models = random.sample(
|
|
371
583
|
[
|
|
372
584
|
model
|
|
373
585
|
for model in all_models
|
|
374
|
-
if model_total_counts.get(model, 0) <
|
|
586
|
+
if model_total_counts.get(model, 0) < dynamic_warmup
|
|
375
587
|
],
|
|
376
588
|
k=min(dynamic_contrastive_models, len(all_models)),
|
|
377
589
|
)
|
|
@@ -405,20 +617,19 @@ def get_next_item_dynamic(
|
|
|
405
617
|
top_models, k=min(dynamic_contrastive_models, len(top_models))
|
|
406
618
|
)
|
|
407
619
|
|
|
408
|
-
# Find incomplete items
|
|
409
|
-
|
|
410
|
-
i
|
|
411
|
-
for i,
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
# Select item with minimum annotations (with random tiebreaking)
|
|
415
|
-
min_annotations = min(item_annotation_counts.values())
|
|
416
|
-
items_with_min = [
|
|
417
|
-
item_i
|
|
418
|
-
for item_i, count in item_annotation_counts.items()
|
|
419
|
-
if count == min_annotations
|
|
620
|
+
# Find incomplete items (None or completed_foreign status)
|
|
621
|
+
incomplete_indices = [
|
|
622
|
+
i
|
|
623
|
+
for i, mv in enumerate(user_progress["progress"])
|
|
624
|
+
if not all(v in {"completed", "completed_foreign"} for v in mv.values())
|
|
420
625
|
]
|
|
421
|
-
|
|
626
|
+
|
|
627
|
+
# If no incomplete items, user (and everyone) is done
|
|
628
|
+
if not incomplete_indices:
|
|
629
|
+
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
630
|
+
|
|
631
|
+
# Select a random incomplete item
|
|
632
|
+
item_i = random.choice(incomplete_indices)
|
|
422
633
|
|
|
423
634
|
# Prune the payload to only include selected models
|
|
424
635
|
original_item = campaign_data["data"][item_i]
|
|
@@ -459,7 +670,7 @@ def get_next_item_dynamic(
|
|
|
459
670
|
| {
|
|
460
671
|
k: v
|
|
461
672
|
for k, v in campaign_data["info"].items()
|
|
462
|
-
if k in
|
|
673
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
463
674
|
},
|
|
464
675
|
"payload": pruned_item,
|
|
465
676
|
},
|
|
@@ -475,16 +686,17 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
|
|
|
475
686
|
progress_data[campaign_id][user_id]["validations"] = {}
|
|
476
687
|
|
|
477
688
|
|
|
478
|
-
def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int]:
|
|
689
|
+
def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int | str]:
|
|
479
690
|
"""
|
|
480
691
|
Get the set of item indices that a specific user has annotated.
|
|
481
|
-
|
|
692
|
+
|
|
482
693
|
Args:
|
|
483
694
|
campaign_id: The campaign identifier
|
|
484
695
|
user_id: The user identifier
|
|
485
|
-
|
|
696
|
+
|
|
486
697
|
Returns:
|
|
487
|
-
Set of item indices (item_i) that the user has annotated
|
|
698
|
+
Set of item indices (item_i) that the user has annotated.
|
|
699
|
+
Can include both int indices for regular items and string IDs like "welcome_0" for welcome items.
|
|
488
700
|
"""
|
|
489
701
|
log = get_db_log(campaign_id)
|
|
490
702
|
user_items = set()
|
|
@@ -504,9 +716,16 @@ def reset_task(
|
|
|
504
716
|
"""
|
|
505
717
|
Reset the task progress for the user in the specified campaign.
|
|
506
718
|
Saves a reset marker to mask existing annotations.
|
|
719
|
+
|
|
720
|
+
Note: Dynamic assignment does not support user-level deletion.
|
|
507
721
|
"""
|
|
508
722
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
509
|
-
if assignment == "
|
|
723
|
+
if assignment == "dynamic":
|
|
724
|
+
return JSONResponse(
|
|
725
|
+
content="User-level deletion is not supported for dynamic assignments",
|
|
726
|
+
status_code=400,
|
|
727
|
+
)
|
|
728
|
+
elif assignment == "task-based":
|
|
510
729
|
# Save reset marker for this user to mask existing annotations
|
|
511
730
|
num_items = len(tasks_data[campaign_id]["data"][user_id])
|
|
512
731
|
for item_i in range(num_items):
|
|
@@ -514,49 +733,42 @@ def reset_task(
|
|
|
514
733
|
campaign_id,
|
|
515
734
|
{"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
|
|
516
735
|
)
|
|
517
|
-
progress_data[campaign_id][user_id]["progress"] = [
|
|
736
|
+
progress_data[campaign_id][user_id]["progress"] = [None] * num_items
|
|
737
|
+
# Reset welcome items progress if it exists
|
|
738
|
+
if "progress_welcome" in progress_data[campaign_id][user_id]:
|
|
739
|
+
num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
|
|
740
|
+
progress_data[campaign_id][user_id]["progress_welcome"] = [
|
|
741
|
+
False
|
|
742
|
+
] * num_welcome
|
|
518
743
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
519
744
|
return JSONResponse(content="ok", status_code=200)
|
|
520
745
|
elif assignment == "single-stream":
|
|
521
|
-
# Find all items that this user has annotated
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
# Reset only the touched items in all users' progress (shared pool)
|
|
532
|
-
for uid in progress_data[campaign_id]:
|
|
533
|
-
for item_i in user_items:
|
|
534
|
-
progress_data[campaign_id][uid]["progress"][item_i] = False
|
|
535
|
-
|
|
536
|
-
# Reset only the specified user's time
|
|
537
|
-
_reset_user_time(progress_data, campaign_id, user_id)
|
|
538
|
-
return JSONResponse(content="ok", status_code=200)
|
|
539
|
-
elif assignment == "dynamic":
|
|
540
|
-
# Find all items that this user has annotated
|
|
541
|
-
user_items = _get_user_annotated_items(campaign_id, user_id)
|
|
542
|
-
|
|
543
|
-
# Save reset markers only for items this user has touched
|
|
544
|
-
for item_i in user_items:
|
|
746
|
+
# Find all items that this user has annotated (has "completed")
|
|
747
|
+
user_items_to_reset = [
|
|
748
|
+
i
|
|
749
|
+
for i, status in enumerate(progress_data[campaign_id][user_id]["progress"])
|
|
750
|
+
if status == "completed"
|
|
751
|
+
]
|
|
752
|
+
|
|
753
|
+
# Save reset markers for all items this user has touched
|
|
754
|
+
for item_i in user_items_to_reset:
|
|
545
755
|
save_db_payload(
|
|
546
756
|
campaign_id,
|
|
547
757
|
{"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
|
|
548
758
|
)
|
|
549
759
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
# Reset only the touched items in all users' progress (shared pool, use lists to track models)
|
|
760
|
+
# Reset the touched regular items in all users' progress (shared pool)
|
|
553
761
|
for uid in progress_data[campaign_id]:
|
|
554
|
-
for item_i in
|
|
555
|
-
progress_data[campaign_id][uid]["progress"][item_i] =
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
762
|
+
for item_i in user_items_to_reset:
|
|
763
|
+
progress_data[campaign_id][uid]["progress"][item_i] = None
|
|
764
|
+
|
|
765
|
+
# Reset all welcome items progress for this user (per-user, not shared)
|
|
766
|
+
if "progress_welcome" in progress_data[campaign_id][user_id]:
|
|
767
|
+
num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
|
|
768
|
+
progress_data[campaign_id][user_id]["progress_welcome"] = [
|
|
769
|
+
False
|
|
770
|
+
] * num_welcome
|
|
771
|
+
|
|
560
772
|
# Reset only the specified user's time
|
|
561
773
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
562
774
|
return JSONResponse(content="ok", status_code=200)
|
|
@@ -571,35 +783,58 @@ def update_progress(
|
|
|
571
783
|
user_id: str,
|
|
572
784
|
tasks_data: dict,
|
|
573
785
|
progress_data: dict,
|
|
574
|
-
item_i: int,
|
|
786
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
575
787
|
payload: Any,
|
|
576
788
|
) -> JSONResponse:
|
|
577
789
|
"""
|
|
578
790
|
Log the user's response for the specified item in the campaign.
|
|
579
791
|
"""
|
|
792
|
+
# Check if it's a welcome item
|
|
793
|
+
if isinstance(item_i, str) and item_i.startswith("welcome_"):
|
|
794
|
+
welcome_index = int(item_i.split("_")[1])
|
|
795
|
+
# Update only this user's progress_welcome (not shared)
|
|
796
|
+
progress_data[campaign_id][user_id]["progress_welcome"][welcome_index] = (
|
|
797
|
+
"completed"
|
|
798
|
+
)
|
|
799
|
+
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
800
|
+
|
|
580
801
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
581
802
|
if assignment == "task-based":
|
|
582
|
-
#
|
|
583
|
-
progress_data[campaign_id][user_id]["progress"][item_i] =
|
|
803
|
+
# Mark as completed for this user
|
|
804
|
+
progress_data[campaign_id][user_id]["progress"][item_i] = "completed"
|
|
584
805
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
585
806
|
elif assignment == "single-stream":
|
|
586
|
-
#
|
|
807
|
+
# Mark as completed for the current user, completed_foreign for others
|
|
587
808
|
for uid in progress_data[campaign_id]:
|
|
588
|
-
progress_data[campaign_id][uid]["progress"][item_i]
|
|
809
|
+
current_status = progress_data[campaign_id][uid]["progress"][item_i]
|
|
810
|
+
if uid == user_id:
|
|
811
|
+
# User who completed it gets "completed"
|
|
812
|
+
progress_data[campaign_id][uid]["progress"][item_i] = "completed"
|
|
813
|
+
elif current_status is None:
|
|
814
|
+
# Other users get "completed_foreign" if not already completed
|
|
815
|
+
progress_data[campaign_id][uid]["progress"][item_i] = (
|
|
816
|
+
"completed_foreign"
|
|
817
|
+
)
|
|
818
|
+
# If already "completed", keep it as "completed"
|
|
589
819
|
return JSONResponse(content="ok", status_code=200)
|
|
590
|
-
|
|
591
|
-
#
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
820
|
+
if assignment == "dynamic":
|
|
821
|
+
# Mark as completed for the current user, completed_foreign for others
|
|
822
|
+
for model in payload["annotation"][0].keys():
|
|
823
|
+
for uid in progress_data[campaign_id]:
|
|
824
|
+
current_status = progress_data[campaign_id][uid]["progress"][item_i][
|
|
825
|
+
model
|
|
826
|
+
]
|
|
827
|
+
if uid == user_id:
|
|
828
|
+
# User who completed it gets "completed"
|
|
829
|
+
progress_data[campaign_id][uid]["progress"][item_i][model] = (
|
|
830
|
+
"completed"
|
|
831
|
+
)
|
|
832
|
+
elif current_status is None:
|
|
833
|
+
# Other users get "completed_foreign" if not already completed
|
|
834
|
+
progress_data[campaign_id][uid]["progress"][item_i][model] = (
|
|
835
|
+
"completed_foreign"
|
|
836
|
+
)
|
|
837
|
+
# If already "completed", keep it as "completed"
|
|
603
838
|
return JSONResponse(content="ok", status_code=200)
|
|
604
839
|
else:
|
|
605
840
|
return JSONResponse(content="Unknown campaign assignment type", status_code=400)
|