pearmut 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +8 -5
- pearmut/assignment.py +336 -82
- pearmut/cli.py +145 -82
- pearmut/static/annotate.bundle.js +1 -1
- pearmut/static/annotate.html +11 -7
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +1 -1
- pearmut/static/index.html +1 -1
- pearmut/static/style.css +38 -0
- pearmut/utils.py +38 -21
- {pearmut-1.0.2.dist-info → pearmut-1.0.3.dist-info}/METADATA +74 -1
- pearmut-1.0.3.dist-info/RECORD +20 -0
- {pearmut-1.0.2.dist-info → pearmut-1.0.3.dist-info}/WHEEL +1 -1
- pearmut-1.0.2.dist-info/RECORD +0 -20
- {pearmut-1.0.2.dist-info → pearmut-1.0.3.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.2.dist-info → pearmut-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.2.dist-info → pearmut-1.0.3.dist-info}/top_level.txt +0 -0
pearmut/assignment.py
CHANGED
|
@@ -11,9 +11,19 @@ from .utils import (
|
|
|
11
11
|
check_validation_threshold,
|
|
12
12
|
get_db_log,
|
|
13
13
|
get_db_log_item,
|
|
14
|
+
is_form_document,
|
|
14
15
|
save_db_payload,
|
|
15
16
|
)
|
|
16
17
|
|
|
18
|
+
# Public campaign info fields that are sent to the client
|
|
19
|
+
CAMPAIGN_INFO_PUBLIC = {
|
|
20
|
+
"protocol",
|
|
21
|
+
"sliders",
|
|
22
|
+
"textfield",
|
|
23
|
+
"show_model_names",
|
|
24
|
+
"mqm_categories",
|
|
25
|
+
}
|
|
26
|
+
|
|
17
27
|
|
|
18
28
|
def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
|
|
19
29
|
"""Get instructions: custom if provided, else protocol default, else empty."""
|
|
@@ -23,7 +33,6 @@ def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
|
|
|
23
33
|
return PROTOCOL_INSTRUCTIONS.get(campaign_info.get("protocol", ""), "")
|
|
24
34
|
|
|
25
35
|
|
|
26
|
-
|
|
27
36
|
def _completed_response(
|
|
28
37
|
tasks_data: dict,
|
|
29
38
|
progress_data: dict,
|
|
@@ -55,6 +64,7 @@ def _completed_response(
|
|
|
55
64
|
content={
|
|
56
65
|
"status": "goodbye",
|
|
57
66
|
"progress": progress,
|
|
67
|
+
"progress_welcome": user_progress["progress_welcome"],
|
|
58
68
|
"time": user_progress["time"],
|
|
59
69
|
"token": token,
|
|
60
70
|
"instructions_goodbye": instructions_goodbye,
|
|
@@ -115,12 +125,34 @@ def get_i_item_taskbased(
|
|
|
115
125
|
user_id: str,
|
|
116
126
|
data_all: dict,
|
|
117
127
|
progress_data: dict,
|
|
118
|
-
item_i: int,
|
|
128
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
119
129
|
) -> JSONResponse:
|
|
120
130
|
"""
|
|
121
131
|
Get specific item for task-based protocol.
|
|
122
132
|
"""
|
|
123
133
|
user_progress = progress_data[campaign_id][user_id]
|
|
134
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
135
|
+
|
|
136
|
+
# if welcome_X, payload is from data_welcome[X], otherwise data[user][X]
|
|
137
|
+
if isinstance(item_i, str) and item_i.startswith("welcome_"):
|
|
138
|
+
actual_index = int(item_i.split("_")[1])
|
|
139
|
+
if actual_index < 0 or actual_index >= len(
|
|
140
|
+
data_all[campaign_id]["data_welcome"]
|
|
141
|
+
):
|
|
142
|
+
return JSONResponse(
|
|
143
|
+
content="Welcome item index out of range", status_code=400
|
|
144
|
+
)
|
|
145
|
+
payload = data_all[campaign_id]["data_welcome"][actual_index]
|
|
146
|
+
else:
|
|
147
|
+
# Prevent accessing regular items unless all welcome items are complete
|
|
148
|
+
if not all(progress_welcome):
|
|
149
|
+
return JSONResponse(
|
|
150
|
+
content="Complete all welcome items before accessing regular items",
|
|
151
|
+
status_code=400,
|
|
152
|
+
)
|
|
153
|
+
if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
|
|
154
|
+
return JSONResponse(content="Item index out of range", status_code=400)
|
|
155
|
+
payload = data_all[campaign_id]["data"][user_id][item_i]
|
|
124
156
|
|
|
125
157
|
# try to get existing annotations if any
|
|
126
158
|
items_existing = get_db_log_item(campaign_id, user_id, item_i)
|
|
@@ -132,13 +164,13 @@ def get_i_item_taskbased(
|
|
|
132
164
|
if "comment" in latest_item:
|
|
133
165
|
payload_existing["comment"] = latest_item["comment"]
|
|
134
166
|
|
|
135
|
-
|
|
136
|
-
return JSONResponse(content="Item index out of range", status_code=400)
|
|
167
|
+
is_form = is_form_document(payload)
|
|
137
168
|
|
|
138
169
|
return JSONResponse(
|
|
139
170
|
content={
|
|
140
|
-
"status": "ok",
|
|
171
|
+
"status": "form" if is_form else "ok",
|
|
141
172
|
"progress": user_progress["progress"],
|
|
173
|
+
"progress_welcome": progress_welcome,
|
|
142
174
|
"time": user_progress["time"],
|
|
143
175
|
"info": {
|
|
144
176
|
"item_i": item_i,
|
|
@@ -147,9 +179,9 @@ def get_i_item_taskbased(
|
|
|
147
179
|
| {
|
|
148
180
|
k: v
|
|
149
181
|
for k, v in data_all[campaign_id]["info"].items()
|
|
150
|
-
if k in
|
|
182
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
151
183
|
},
|
|
152
|
-
"payload":
|
|
184
|
+
"payload": payload,
|
|
153
185
|
}
|
|
154
186
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
155
187
|
status_code=200,
|
|
@@ -161,16 +193,37 @@ def get_i_item_singlestream(
|
|
|
161
193
|
user_id: str,
|
|
162
194
|
data_all: dict,
|
|
163
195
|
progress_data: dict,
|
|
164
|
-
item_i: int,
|
|
196
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
165
197
|
) -> JSONResponse:
|
|
166
198
|
"""
|
|
167
199
|
Get specific item for single-stream assignment.
|
|
168
200
|
"""
|
|
169
201
|
user_progress = progress_data[campaign_id][user_id]
|
|
202
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
203
|
+
|
|
204
|
+
# Convert welcome_X string to integer index
|
|
205
|
+
actual_index = item_i
|
|
206
|
+
is_welcome_item = isinstance(item_i, str) and item_i.startswith("welcome_")
|
|
207
|
+
if is_welcome_item:
|
|
208
|
+
actual_index = int(item_i.split("_")[1])
|
|
209
|
+
# Validate against total number of welcome items
|
|
210
|
+
if actual_index < 0 or actual_index >= len(progress_welcome):
|
|
211
|
+
return JSONResponse(
|
|
212
|
+
content="Welcome item index out of range", status_code=400
|
|
213
|
+
)
|
|
214
|
+
else:
|
|
215
|
+
# Prevent accessing regular items unless all welcome items are complete
|
|
216
|
+
if not all(progress_welcome):
|
|
217
|
+
return JSONResponse(
|
|
218
|
+
content="Complete all welcome items before accessing regular items",
|
|
219
|
+
status_code=400,
|
|
220
|
+
)
|
|
170
221
|
|
|
171
222
|
# try to get existing annotations if any
|
|
172
|
-
#
|
|
173
|
-
items_existing = get_db_log_item(
|
|
223
|
+
# use user_id for welcome items (per-user), None for shared items
|
|
224
|
+
items_existing = get_db_log_item(
|
|
225
|
+
campaign_id, user_id if is_welcome_item else None, item_i
|
|
226
|
+
)
|
|
174
227
|
payload_existing = None
|
|
175
228
|
if items_existing:
|
|
176
229
|
# get the latest ones
|
|
@@ -179,13 +232,17 @@ def get_i_item_singlestream(
|
|
|
179
232
|
if "comment" in latest_item:
|
|
180
233
|
payload_existing["comment"] = latest_item["comment"]
|
|
181
234
|
|
|
182
|
-
if
|
|
235
|
+
if actual_index < 0 or actual_index >= len(data_all[campaign_id]["data"]):
|
|
183
236
|
return JSONResponse(content="Item index out of range", status_code=400)
|
|
184
237
|
|
|
238
|
+
payload = data_all[campaign_id]["data"][actual_index]
|
|
239
|
+
is_form = is_form_document(payload)
|
|
240
|
+
|
|
185
241
|
return JSONResponse(
|
|
186
242
|
content={
|
|
187
|
-
"status": "ok",
|
|
243
|
+
"status": "form" if is_form else "ok",
|
|
188
244
|
"progress": user_progress["progress"],
|
|
245
|
+
"progress_welcome": progress_welcome,
|
|
189
246
|
"time": user_progress["time"],
|
|
190
247
|
"info": {
|
|
191
248
|
"item_i": item_i,
|
|
@@ -194,9 +251,9 @@ def get_i_item_singlestream(
|
|
|
194
251
|
| {
|
|
195
252
|
k: v
|
|
196
253
|
for k, v in data_all[campaign_id]["info"].items()
|
|
197
|
-
if k in
|
|
254
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
198
255
|
},
|
|
199
|
-
"payload":
|
|
256
|
+
"payload": payload,
|
|
200
257
|
}
|
|
201
258
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
202
259
|
status_code=200,
|
|
@@ -213,11 +270,56 @@ def get_next_item_taskbased(
|
|
|
213
270
|
Get the next item for task-based assignment.
|
|
214
271
|
"""
|
|
215
272
|
user_progress = progress_data[campaign_id][user_id]
|
|
216
|
-
|
|
273
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
274
|
+
|
|
275
|
+
# Check if there are incomplete welcome items first
|
|
276
|
+
if not all(progress_welcome):
|
|
277
|
+
# Find first incomplete welcome item
|
|
278
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
279
|
+
item_id = f"welcome_{item_i}"
|
|
280
|
+
|
|
281
|
+
# try to get existing annotations if any
|
|
282
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
283
|
+
payload_existing = None
|
|
284
|
+
if items_existing:
|
|
285
|
+
# get the latest ones
|
|
286
|
+
latest_item = items_existing[-1]
|
|
287
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
288
|
+
if "comment" in latest_item:
|
|
289
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
290
|
+
|
|
291
|
+
payload = data_all[campaign_id]["data_welcome"][item_i]
|
|
292
|
+
is_form = is_form_document(payload)
|
|
293
|
+
|
|
294
|
+
return JSONResponse(
|
|
295
|
+
content={
|
|
296
|
+
"status": "form" if is_form else "ok",
|
|
297
|
+
"progress": user_progress["progress"],
|
|
298
|
+
"progress_welcome": progress_welcome,
|
|
299
|
+
"time": user_progress["time"],
|
|
300
|
+
"info": {
|
|
301
|
+
"item_i": item_id,
|
|
302
|
+
"instructions": _get_instructions(data_all, campaign_id),
|
|
303
|
+
}
|
|
304
|
+
| {
|
|
305
|
+
k: v
|
|
306
|
+
for k, v in data_all[campaign_id]["info"].items()
|
|
307
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
308
|
+
},
|
|
309
|
+
"payload": payload,
|
|
310
|
+
}
|
|
311
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
312
|
+
status_code=200,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# All welcome items complete, proceed with regular items
|
|
316
|
+
if all(v == "completed" for v in user_progress["progress"]):
|
|
217
317
|
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
218
318
|
|
|
219
319
|
# find first incomplete item
|
|
220
|
-
item_i = min(
|
|
320
|
+
item_i = min(
|
|
321
|
+
[i for i, v in enumerate(user_progress["progress"]) if v != "completed"]
|
|
322
|
+
)
|
|
221
323
|
|
|
222
324
|
# try to get existing annotations if any
|
|
223
325
|
items_existing = get_db_log_item(campaign_id, user_id, item_i)
|
|
@@ -229,10 +331,14 @@ def get_next_item_taskbased(
|
|
|
229
331
|
if "comment" in latest_item:
|
|
230
332
|
payload_existing["comment"] = latest_item["comment"]
|
|
231
333
|
|
|
334
|
+
payload = data_all[campaign_id]["data"][user_id][item_i]
|
|
335
|
+
is_form = is_form_document(payload)
|
|
336
|
+
|
|
232
337
|
return JSONResponse(
|
|
233
338
|
content={
|
|
234
|
-
"status": "ok",
|
|
339
|
+
"status": "form" if is_form else "ok",
|
|
235
340
|
"progress": user_progress["progress"],
|
|
341
|
+
"progress_welcome": progress_welcome,
|
|
236
342
|
"time": user_progress["time"],
|
|
237
343
|
"info": {
|
|
238
344
|
"item_i": item_i,
|
|
@@ -241,7 +347,7 @@ def get_next_item_taskbased(
|
|
|
241
347
|
| {
|
|
242
348
|
k: v
|
|
243
349
|
for k, v in data_all[campaign_id]["info"].items()
|
|
244
|
-
if k in
|
|
350
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
245
351
|
},
|
|
246
352
|
"payload": data_all[campaign_id]["data"][user_id][item_i],
|
|
247
353
|
}
|
|
@@ -266,12 +372,64 @@ def get_next_item_singlestream(
|
|
|
266
372
|
"""
|
|
267
373
|
user_progress = progress_data[campaign_id][user_id]
|
|
268
374
|
progress = user_progress["progress"]
|
|
375
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
376
|
+
|
|
377
|
+
# Check if there are incomplete welcome items first - must complete all before proceeding
|
|
378
|
+
if not all(progress_welcome):
|
|
379
|
+
# Find first incomplete welcome item (sequential, not random)
|
|
380
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
381
|
+
item_id = f"welcome_{item_i}"
|
|
382
|
+
|
|
383
|
+
# try to get existing annotations if any
|
|
384
|
+
# note the user_id since welcome items are per-user
|
|
385
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
386
|
+
payload_existing = None
|
|
387
|
+
if items_existing:
|
|
388
|
+
# get the latest ones
|
|
389
|
+
latest_item = items_existing[-1]
|
|
390
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
391
|
+
if "comment" in latest_item:
|
|
392
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
393
|
+
|
|
394
|
+
payload = data_all[campaign_id]["data"][item_i]
|
|
395
|
+
is_form = is_form_document(payload)
|
|
269
396
|
|
|
270
|
-
|
|
397
|
+
return JSONResponse(
|
|
398
|
+
content={
|
|
399
|
+
"status": "form" if is_form else "ok",
|
|
400
|
+
"time": user_progress["time"],
|
|
401
|
+
"progress": progress,
|
|
402
|
+
"progress_welcome": progress_welcome,
|
|
403
|
+
"info": {
|
|
404
|
+
"item_i": item_id,
|
|
405
|
+
"instructions": _get_instructions(data_all, campaign_id),
|
|
406
|
+
}
|
|
407
|
+
| {
|
|
408
|
+
k: v
|
|
409
|
+
for k, v in data_all[campaign_id]["info"].items()
|
|
410
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
411
|
+
},
|
|
412
|
+
"payload": payload,
|
|
413
|
+
}
|
|
414
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
415
|
+
status_code=200,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# All welcome items complete, proceed with regular items
|
|
419
|
+
# Check if user reached docs_per_user limit (if specified)
|
|
420
|
+
if (
|
|
421
|
+
docs_per_user := data_all[campaign_id]["info"].get("docs_per_user")
|
|
422
|
+
) is not None:
|
|
423
|
+
completed_docs = sum(v == "completed" for v in progress if v)
|
|
424
|
+
if completed_docs >= docs_per_user:
|
|
425
|
+
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
426
|
+
elif all(v in {"completed", "completed_foreign"} for v in progress):
|
|
271
427
|
return _completed_response(data_all, progress_data, campaign_id, user_id)
|
|
272
428
|
|
|
273
429
|
# find a random incomplete item
|
|
274
|
-
incomplete_indices = [
|
|
430
|
+
incomplete_indices = [
|
|
431
|
+
i for i, v in enumerate(progress) if v not in {"completed", "completed_foreign"}
|
|
432
|
+
]
|
|
275
433
|
item_i = random.choice(incomplete_indices)
|
|
276
434
|
|
|
277
435
|
# try to get existing annotations if any
|
|
@@ -285,11 +443,15 @@ def get_next_item_singlestream(
|
|
|
285
443
|
if "comment" in latest_item:
|
|
286
444
|
payload_existing["comment"] = latest_item["comment"]
|
|
287
445
|
|
|
446
|
+
payload = data_all[campaign_id]["data"][item_i]
|
|
447
|
+
is_form = is_form_document(payload)
|
|
448
|
+
|
|
288
449
|
return JSONResponse(
|
|
289
450
|
content={
|
|
290
|
-
"status": "ok",
|
|
451
|
+
"status": "form" if is_form else "ok",
|
|
291
452
|
"time": user_progress["time"],
|
|
292
453
|
"progress": progress,
|
|
454
|
+
"progress_welcome": progress_welcome,
|
|
293
455
|
"info": {
|
|
294
456
|
"item_i": item_i,
|
|
295
457
|
"instructions": _get_instructions(data_all, campaign_id),
|
|
@@ -297,9 +459,9 @@ def get_next_item_singlestream(
|
|
|
297
459
|
| {
|
|
298
460
|
k: v
|
|
299
461
|
for k, v in data_all[campaign_id]["info"].items()
|
|
300
|
-
if k in
|
|
462
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
301
463
|
},
|
|
302
|
-
"payload":
|
|
464
|
+
"payload": payload,
|
|
303
465
|
}
|
|
304
466
|
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
305
467
|
status_code=200,
|
|
@@ -319,8 +481,8 @@ def get_next_item_dynamic(
|
|
|
319
481
|
|
|
320
482
|
In this mode, items are selected based on the current performance of models:
|
|
321
483
|
1. Contrastive comparison: `dynamic_contrastive_models` models are randomly selected and shown per item
|
|
322
|
-
2.
|
|
323
|
-
3. After
|
|
484
|
+
2. Warmup phase: Each model gets `dynamic_warmup` annotations with fully random selection
|
|
485
|
+
3. After warmup phase: Top `dynamic_top` models are identified, K randomly selected from them
|
|
324
486
|
4. Items with least annotations for the selected models are prioritized
|
|
325
487
|
5. With probability `dynamic_backoff`, uniformly random selection is used instead
|
|
326
488
|
"""
|
|
@@ -328,18 +490,69 @@ def get_next_item_dynamic(
|
|
|
328
490
|
|
|
329
491
|
user_progress = progress_data[campaign_id][user_id]
|
|
330
492
|
campaign_data = tasks_data[campaign_id]
|
|
493
|
+
progress_welcome = user_progress["progress_welcome"]
|
|
494
|
+
|
|
495
|
+
# Check if there are incomplete welcome items first - must complete all before proceeding
|
|
496
|
+
if not all(progress_welcome):
|
|
497
|
+
# Find first incomplete welcome item (sequential)
|
|
498
|
+
item_i = next(i for i, v in enumerate(progress_welcome) if not v)
|
|
499
|
+
item_id = f"welcome_{item_i}"
|
|
500
|
+
|
|
501
|
+
# try to get existing annotations if any
|
|
502
|
+
# note the user_id since welcome items are per-user
|
|
503
|
+
items_existing = get_db_log_item(campaign_id, user_id, item_id)
|
|
504
|
+
payload_existing = None
|
|
505
|
+
if items_existing:
|
|
506
|
+
# get the latest ones
|
|
507
|
+
latest_item = items_existing[-1]
|
|
508
|
+
payload_existing = {"annotation": latest_item["annotation"]}
|
|
509
|
+
if "comment" in latest_item:
|
|
510
|
+
payload_existing["comment"] = latest_item["comment"]
|
|
511
|
+
|
|
512
|
+
return JSONResponse(
|
|
513
|
+
content={
|
|
514
|
+
"status": "ok",
|
|
515
|
+
"time": user_progress["time"],
|
|
516
|
+
"progress": user_progress["progress"],
|
|
517
|
+
"progress_welcome": progress_welcome,
|
|
518
|
+
"info": {
|
|
519
|
+
"item_i": item_id,
|
|
520
|
+
"instructions": _get_instructions(campaign_data, campaign_id),
|
|
521
|
+
}
|
|
522
|
+
| {
|
|
523
|
+
k: v
|
|
524
|
+
for k, v in campaign_data["info"].items()
|
|
525
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
526
|
+
},
|
|
527
|
+
"payload": campaign_data["data"][item_i],
|
|
528
|
+
}
|
|
529
|
+
| ({"payload_existing": payload_existing} if payload_existing else {}),
|
|
530
|
+
status_code=200,
|
|
531
|
+
)
|
|
331
532
|
|
|
332
533
|
# Get all unique models in the campaign (all items must have all models)
|
|
333
534
|
all_models = list(set(campaign_data["data"][0][0]["tgt"].keys()))
|
|
334
535
|
|
|
335
|
-
# Check if completed
|
|
336
|
-
#
|
|
337
|
-
if
|
|
536
|
+
# Check if completed
|
|
537
|
+
# First check if docs_per_user limit is reached
|
|
538
|
+
if (docs_per_user := campaign_data["info"].get("docs_per_user")) is not None:
|
|
539
|
+
# Count specifically number of annotations across models
|
|
540
|
+
completed_docs = sum(
|
|
541
|
+
v == "completed" for mv in user_progress["progress"] for v in mv.values()
|
|
542
|
+
)
|
|
543
|
+
if completed_docs >= docs_per_user:
|
|
544
|
+
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
545
|
+
# Otherwise check if all models completed for all items
|
|
546
|
+
elif all(
|
|
547
|
+
v in {"completed", "completed_foreign"}
|
|
548
|
+
for mv in user_progress["progress"]
|
|
549
|
+
for v in mv.values()
|
|
550
|
+
):
|
|
338
551
|
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
339
552
|
|
|
340
553
|
# Get configuration parameters
|
|
341
554
|
dynamic_top = campaign_data["info"].get("dynamic_top", 2)
|
|
342
|
-
|
|
555
|
+
dynamic_warmup = campaign_data["info"].get("dynamic_warmup", 5)
|
|
343
556
|
dynamic_contrastive_models = campaign_data["info"].get(
|
|
344
557
|
"dynamic_contrastive_models", 1
|
|
345
558
|
)
|
|
@@ -359,18 +572,18 @@ def get_next_item_dynamic(
|
|
|
359
572
|
model_total_counts[model] += 1
|
|
360
573
|
|
|
361
574
|
# Check if we're still in the first phase (collecting initial data)
|
|
362
|
-
|
|
363
|
-
model_total_counts.get(model, 0) <
|
|
575
|
+
in_warmup_phase = any(
|
|
576
|
+
model_total_counts.get(model, 0) < dynamic_warmup for model in all_models
|
|
364
577
|
)
|
|
365
578
|
|
|
366
579
|
# Select which models to show
|
|
367
|
-
if
|
|
580
|
+
if in_warmup_phase:
|
|
368
581
|
# First phase or backoff: select models that don't have enough annotations yet
|
|
369
582
|
selected_models = random.sample(
|
|
370
583
|
[
|
|
371
584
|
model
|
|
372
585
|
for model in all_models
|
|
373
|
-
if model_total_counts.get(model, 0) <
|
|
586
|
+
if model_total_counts.get(model, 0) < dynamic_warmup
|
|
374
587
|
],
|
|
375
588
|
k=min(dynamic_contrastive_models, len(all_models)),
|
|
376
589
|
)
|
|
@@ -404,20 +617,19 @@ def get_next_item_dynamic(
|
|
|
404
617
|
top_models, k=min(dynamic_contrastive_models, len(top_models))
|
|
405
618
|
)
|
|
406
619
|
|
|
407
|
-
# Find incomplete items
|
|
408
|
-
|
|
409
|
-
i
|
|
410
|
-
for i,
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
# Select item with minimum annotations (with random tiebreaking)
|
|
414
|
-
min_annotations = min(item_annotation_counts.values())
|
|
415
|
-
items_with_min = [
|
|
416
|
-
item_i
|
|
417
|
-
for item_i, count in item_annotation_counts.items()
|
|
418
|
-
if count == min_annotations
|
|
620
|
+
# Find incomplete items (None or completed_foreign status)
|
|
621
|
+
incomplete_indices = [
|
|
622
|
+
i
|
|
623
|
+
for i, mv in enumerate(user_progress["progress"])
|
|
624
|
+
if not all(v in {"completed", "completed_foreign"} for v in mv.values())
|
|
419
625
|
]
|
|
420
|
-
|
|
626
|
+
|
|
627
|
+
# If no incomplete items, user (and everyone) is done
|
|
628
|
+
if not incomplete_indices:
|
|
629
|
+
return _completed_response(tasks_data, progress_data, campaign_id, user_id)
|
|
630
|
+
|
|
631
|
+
# Select a random incomplete item
|
|
632
|
+
item_i = random.choice(incomplete_indices)
|
|
421
633
|
|
|
422
634
|
# Prune the payload to only include selected models
|
|
423
635
|
original_item = campaign_data["data"][item_i]
|
|
@@ -458,7 +670,7 @@ def get_next_item_dynamic(
|
|
|
458
670
|
| {
|
|
459
671
|
k: v
|
|
460
672
|
for k, v in campaign_data["info"].items()
|
|
461
|
-
if k in
|
|
673
|
+
if k in CAMPAIGN_INFO_PUBLIC
|
|
462
674
|
},
|
|
463
675
|
"payload": pruned_item,
|
|
464
676
|
},
|
|
@@ -474,16 +686,17 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
|
|
|
474
686
|
progress_data[campaign_id][user_id]["validations"] = {}
|
|
475
687
|
|
|
476
688
|
|
|
477
|
-
def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int]:
|
|
689
|
+
def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int | str]:
|
|
478
690
|
"""
|
|
479
691
|
Get the set of item indices that a specific user has annotated.
|
|
480
|
-
|
|
692
|
+
|
|
481
693
|
Args:
|
|
482
694
|
campaign_id: The campaign identifier
|
|
483
695
|
user_id: The user identifier
|
|
484
|
-
|
|
696
|
+
|
|
485
697
|
Returns:
|
|
486
|
-
Set of item indices (item_i) that the user has annotated
|
|
698
|
+
Set of item indices (item_i) that the user has annotated.
|
|
699
|
+
Can include both int indices for regular items and string IDs like "welcome_0" for welcome items.
|
|
487
700
|
"""
|
|
488
701
|
log = get_db_log(campaign_id)
|
|
489
702
|
user_items = set()
|
|
@@ -503,13 +716,14 @@ def reset_task(
|
|
|
503
716
|
"""
|
|
504
717
|
Reset the task progress for the user in the specified campaign.
|
|
505
718
|
Saves a reset marker to mask existing annotations.
|
|
506
|
-
|
|
719
|
+
|
|
507
720
|
Note: Dynamic assignment does not support user-level deletion.
|
|
508
721
|
"""
|
|
509
722
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
510
723
|
if assignment == "dynamic":
|
|
511
724
|
return JSONResponse(
|
|
512
|
-
content="User-level deletion is not supported for dynamic assignments",
|
|
725
|
+
content="User-level deletion is not supported for dynamic assignments",
|
|
726
|
+
status_code=400,
|
|
513
727
|
)
|
|
514
728
|
elif assignment == "task-based":
|
|
515
729
|
# Save reset marker for this user to mask existing annotations
|
|
@@ -519,25 +733,42 @@ def reset_task(
|
|
|
519
733
|
campaign_id,
|
|
520
734
|
{"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
|
|
521
735
|
)
|
|
522
|
-
progress_data[campaign_id][user_id]["progress"] = [
|
|
736
|
+
progress_data[campaign_id][user_id]["progress"] = [None] * num_items
|
|
737
|
+
# Reset welcome items progress if it exists
|
|
738
|
+
if "progress_welcome" in progress_data[campaign_id][user_id]:
|
|
739
|
+
num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
|
|
740
|
+
progress_data[campaign_id][user_id]["progress_welcome"] = [
|
|
741
|
+
False
|
|
742
|
+
] * num_welcome
|
|
523
743
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
524
744
|
return JSONResponse(content="ok", status_code=200)
|
|
525
745
|
elif assignment == "single-stream":
|
|
526
|
-
# Find all items that this user has annotated
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
746
|
+
# Find all items that this user has annotated (has "completed")
|
|
747
|
+
user_items_to_reset = [
|
|
748
|
+
i
|
|
749
|
+
for i, status in enumerate(progress_data[campaign_id][user_id]["progress"])
|
|
750
|
+
if status == "completed"
|
|
751
|
+
]
|
|
752
|
+
|
|
753
|
+
# Save reset markers for all items this user has touched
|
|
754
|
+
for item_i in user_items_to_reset:
|
|
531
755
|
save_db_payload(
|
|
532
756
|
campaign_id,
|
|
533
757
|
{"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
|
|
534
758
|
)
|
|
535
|
-
|
|
536
|
-
# Reset
|
|
759
|
+
|
|
760
|
+
# Reset the touched regular items in all users' progress (shared pool)
|
|
537
761
|
for uid in progress_data[campaign_id]:
|
|
538
|
-
for item_i in
|
|
539
|
-
progress_data[campaign_id][uid]["progress"][item_i] =
|
|
540
|
-
|
|
762
|
+
for item_i in user_items_to_reset:
|
|
763
|
+
progress_data[campaign_id][uid]["progress"][item_i] = None
|
|
764
|
+
|
|
765
|
+
# Reset all welcome items progress for this user (per-user, not shared)
|
|
766
|
+
if "progress_welcome" in progress_data[campaign_id][user_id]:
|
|
767
|
+
num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
|
|
768
|
+
progress_data[campaign_id][user_id]["progress_welcome"] = [
|
|
769
|
+
False
|
|
770
|
+
] * num_welcome
|
|
771
|
+
|
|
541
772
|
# Reset only the specified user's time
|
|
542
773
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
543
774
|
return JSONResponse(content="ok", status_code=200)
|
|
@@ -552,35 +783,58 @@ def update_progress(
|
|
|
552
783
|
user_id: str,
|
|
553
784
|
tasks_data: dict,
|
|
554
785
|
progress_data: dict,
|
|
555
|
-
item_i: int,
|
|
786
|
+
item_i: int | str, # Can be int or str like "welcome_0"
|
|
556
787
|
payload: Any,
|
|
557
788
|
) -> JSONResponse:
|
|
558
789
|
"""
|
|
559
790
|
Log the user's response for the specified item in the campaign.
|
|
560
791
|
"""
|
|
792
|
+
# Check if it's a welcome item
|
|
793
|
+
if isinstance(item_i, str) and item_i.startswith("welcome_"):
|
|
794
|
+
welcome_index = int(item_i.split("_")[1])
|
|
795
|
+
# Update only this user's progress_welcome (not shared)
|
|
796
|
+
progress_data[campaign_id][user_id]["progress_welcome"][welcome_index] = (
|
|
797
|
+
"completed"
|
|
798
|
+
)
|
|
799
|
+
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
800
|
+
|
|
561
801
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
562
802
|
if assignment == "task-based":
|
|
563
|
-
#
|
|
564
|
-
progress_data[campaign_id][user_id]["progress"][item_i] =
|
|
803
|
+
# Mark as completed for this user
|
|
804
|
+
progress_data[campaign_id][user_id]["progress"][item_i] = "completed"
|
|
565
805
|
return JSONResponse(content={"status": "ok"}, status_code=200)
|
|
566
806
|
elif assignment == "single-stream":
|
|
567
|
-
#
|
|
807
|
+
# Mark as completed for the current user, completed_foreign for others
|
|
568
808
|
for uid in progress_data[campaign_id]:
|
|
569
|
-
progress_data[campaign_id][uid]["progress"][item_i]
|
|
809
|
+
current_status = progress_data[campaign_id][uid]["progress"][item_i]
|
|
810
|
+
if uid == user_id:
|
|
811
|
+
# User who completed it gets "completed"
|
|
812
|
+
progress_data[campaign_id][uid]["progress"][item_i] = "completed"
|
|
813
|
+
elif current_status is None:
|
|
814
|
+
# Other users get "completed_foreign" if not already completed
|
|
815
|
+
progress_data[campaign_id][uid]["progress"][item_i] = (
|
|
816
|
+
"completed_foreign"
|
|
817
|
+
)
|
|
818
|
+
# If already "completed", keep it as "completed"
|
|
570
819
|
return JSONResponse(content="ok", status_code=200)
|
|
571
|
-
|
|
572
|
-
#
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
820
|
+
if assignment == "dynamic":
|
|
821
|
+
# Mark as completed for the current user, completed_foreign for others
|
|
822
|
+
for model in payload["annotation"][0].keys():
|
|
823
|
+
for uid in progress_data[campaign_id]:
|
|
824
|
+
current_status = progress_data[campaign_id][uid]["progress"][item_i][
|
|
825
|
+
model
|
|
826
|
+
]
|
|
827
|
+
if uid == user_id:
|
|
828
|
+
# User who completed it gets "completed"
|
|
829
|
+
progress_data[campaign_id][uid]["progress"][item_i][model] = (
|
|
830
|
+
"completed"
|
|
831
|
+
)
|
|
832
|
+
elif current_status is None:
|
|
833
|
+
# Other users get "completed_foreign" if not already completed
|
|
834
|
+
progress_data[campaign_id][uid]["progress"][item_i][model] = (
|
|
835
|
+
"completed_foreign"
|
|
836
|
+
)
|
|
837
|
+
# If already "completed", keep it as "completed"
|
|
584
838
|
return JSONResponse(content="ok", status_code=200)
|
|
585
839
|
else:
|
|
586
840
|
return JSONResponse(content="Unknown campaign assignment type", status_code=400)
|