pearmut 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/assignment.py CHANGED
@@ -11,9 +11,19 @@ from .utils import (
11
11
  check_validation_threshold,
12
12
  get_db_log,
13
13
  get_db_log_item,
14
+ is_form_document,
14
15
  save_db_payload,
15
16
  )
16
17
 
18
+ # Public campaign info fields that are sent to the client
19
+ CAMPAIGN_INFO_PUBLIC = {
20
+ "protocol",
21
+ "sliders",
22
+ "textfield",
23
+ "show_model_names",
24
+ "mqm_categories",
25
+ }
26
+
17
27
 
18
28
  def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
19
29
  """Get instructions: custom if provided, else protocol default, else empty."""
@@ -23,7 +33,6 @@ def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
23
33
  return PROTOCOL_INSTRUCTIONS.get(campaign_info.get("protocol", ""), "")
24
34
 
25
35
 
26
-
27
36
  def _completed_response(
28
37
  tasks_data: dict,
29
38
  progress_data: dict,
@@ -55,6 +64,7 @@ def _completed_response(
55
64
  content={
56
65
  "status": "goodbye",
57
66
  "progress": progress,
67
+ "progress_welcome": user_progress["progress_welcome"],
58
68
  "time": user_progress["time"],
59
69
  "token": token,
60
70
  "instructions_goodbye": instructions_goodbye,
@@ -115,12 +125,34 @@ def get_i_item_taskbased(
115
125
  user_id: str,
116
126
  data_all: dict,
117
127
  progress_data: dict,
118
- item_i: int,
128
+ item_i: int | str, # Can be int or str like "welcome_0"
119
129
  ) -> JSONResponse:
120
130
  """
121
131
  Get specific item for task-based protocol.
122
132
  """
123
133
  user_progress = progress_data[campaign_id][user_id]
134
+ progress_welcome = user_progress["progress_welcome"]
135
+
136
+ # if welcome_X, payload is from data_welcome[X], otherwise data[user][X]
137
+ if isinstance(item_i, str) and item_i.startswith("welcome_"):
138
+ actual_index = int(item_i.split("_")[1])
139
+ if actual_index < 0 or actual_index >= len(
140
+ data_all[campaign_id]["data_welcome"]
141
+ ):
142
+ return JSONResponse(
143
+ content="Welcome item index out of range", status_code=400
144
+ )
145
+ payload = data_all[campaign_id]["data_welcome"][actual_index]
146
+ else:
147
+ # Prevent accessing regular items unless all welcome items are complete
148
+ if not all(progress_welcome):
149
+ return JSONResponse(
150
+ content="Complete all welcome items before accessing regular items",
151
+ status_code=400,
152
+ )
153
+ if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
154
+ return JSONResponse(content="Item index out of range", status_code=400)
155
+ payload = data_all[campaign_id]["data"][user_id][item_i]
124
156
 
125
157
  # try to get existing annotations if any
126
158
  items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -132,13 +164,13 @@ def get_i_item_taskbased(
132
164
  if "comment" in latest_item:
133
165
  payload_existing["comment"] = latest_item["comment"]
134
166
 
135
- if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
136
- return JSONResponse(content="Item index out of range", status_code=400)
167
+ is_form = is_form_document(payload)
137
168
 
138
169
  return JSONResponse(
139
170
  content={
140
- "status": "ok",
171
+ "status": "form" if is_form else "ok",
141
172
  "progress": user_progress["progress"],
173
+ "progress_welcome": progress_welcome,
142
174
  "time": user_progress["time"],
143
175
  "info": {
144
176
  "item_i": item_i,
@@ -147,9 +179,9 @@ def get_i_item_taskbased(
147
179
  | {
148
180
  k: v
149
181
  for k, v in data_all[campaign_id]["info"].items()
150
- if k in {"protocol", "sliders", "textfield", "show_model_names"}
182
+ if k in CAMPAIGN_INFO_PUBLIC
151
183
  },
152
- "payload": data_all[campaign_id]["data"][user_id][item_i],
184
+ "payload": payload,
153
185
  }
154
186
  | ({"payload_existing": payload_existing} if payload_existing else {}),
155
187
  status_code=200,
@@ -161,16 +193,37 @@ def get_i_item_singlestream(
161
193
  user_id: str,
162
194
  data_all: dict,
163
195
  progress_data: dict,
164
- item_i: int,
196
+ item_i: int | str, # Can be int or str like "welcome_0"
165
197
  ) -> JSONResponse:
166
198
  """
167
199
  Get specific item for single-stream assignment.
168
200
  """
169
201
  user_progress = progress_data[campaign_id][user_id]
202
+ progress_welcome = user_progress["progress_welcome"]
203
+
204
+ # Convert welcome_X string to integer index
205
+ actual_index = item_i
206
+ is_welcome_item = isinstance(item_i, str) and item_i.startswith("welcome_")
207
+ if is_welcome_item:
208
+ actual_index = int(item_i.split("_")[1])
209
+ # Validate against total number of welcome items
210
+ if actual_index < 0 or actual_index >= len(progress_welcome):
211
+ return JSONResponse(
212
+ content="Welcome item index out of range", status_code=400
213
+ )
214
+ else:
215
+ # Prevent accessing regular items unless all welcome items are complete
216
+ if not all(progress_welcome):
217
+ return JSONResponse(
218
+ content="Complete all welcome items before accessing regular items",
219
+ status_code=400,
220
+ )
170
221
 
171
222
  # try to get existing annotations if any
172
- # note the None user_id since it is shared
173
- items_existing = get_db_log_item(campaign_id, None, item_i)
223
+ # use user_id for welcome items (per-user), None for shared items
224
+ items_existing = get_db_log_item(
225
+ campaign_id, user_id if is_welcome_item else None, item_i
226
+ )
174
227
  payload_existing = None
175
228
  if items_existing:
176
229
  # get the latest ones
@@ -179,13 +232,17 @@ def get_i_item_singlestream(
179
232
  if "comment" in latest_item:
180
233
  payload_existing["comment"] = latest_item["comment"]
181
234
 
182
- if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
235
+ if actual_index < 0 or actual_index >= len(data_all[campaign_id]["data"]):
183
236
  return JSONResponse(content="Item index out of range", status_code=400)
184
237
 
238
+ payload = data_all[campaign_id]["data"][actual_index]
239
+ is_form = is_form_document(payload)
240
+
185
241
  return JSONResponse(
186
242
  content={
187
- "status": "ok",
243
+ "status": "form" if is_form else "ok",
188
244
  "progress": user_progress["progress"],
245
+ "progress_welcome": progress_welcome,
189
246
  "time": user_progress["time"],
190
247
  "info": {
191
248
  "item_i": item_i,
@@ -194,9 +251,9 @@ def get_i_item_singlestream(
194
251
  | {
195
252
  k: v
196
253
  for k, v in data_all[campaign_id]["info"].items()
197
- if k in {"protocol", "sliders", "textfield", "show_model_names"}
254
+ if k in CAMPAIGN_INFO_PUBLIC
198
255
  },
199
- "payload": data_all[campaign_id]["data"][item_i],
256
+ "payload": payload,
200
257
  }
201
258
  | ({"payload_existing": payload_existing} if payload_existing else {}),
202
259
  status_code=200,
@@ -213,11 +270,56 @@ def get_next_item_taskbased(
213
270
  Get the next item for task-based assignment.
214
271
  """
215
272
  user_progress = progress_data[campaign_id][user_id]
216
- if all(user_progress["progress"]):
273
+ progress_welcome = user_progress["progress_welcome"]
274
+
275
+ # Check if there are incomplete welcome items first
276
+ if not all(progress_welcome):
277
+ # Find first incomplete welcome item
278
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
279
+ item_id = f"welcome_{item_i}"
280
+
281
+ # try to get existing annotations if any
282
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
283
+ payload_existing = None
284
+ if items_existing:
285
+ # get the latest ones
286
+ latest_item = items_existing[-1]
287
+ payload_existing = {"annotation": latest_item["annotation"]}
288
+ if "comment" in latest_item:
289
+ payload_existing["comment"] = latest_item["comment"]
290
+
291
+ payload = data_all[campaign_id]["data_welcome"][item_i]
292
+ is_form = is_form_document(payload)
293
+
294
+ return JSONResponse(
295
+ content={
296
+ "status": "form" if is_form else "ok",
297
+ "progress": user_progress["progress"],
298
+ "progress_welcome": progress_welcome,
299
+ "time": user_progress["time"],
300
+ "info": {
301
+ "item_i": item_id,
302
+ "instructions": _get_instructions(data_all, campaign_id),
303
+ }
304
+ | {
305
+ k: v
306
+ for k, v in data_all[campaign_id]["info"].items()
307
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
308
+ },
309
+ "payload": payload,
310
+ }
311
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
312
+ status_code=200,
313
+ )
314
+
315
+ # All welcome items complete, proceed with regular items
316
+ if all(v == "completed" for v in user_progress["progress"]):
217
317
  return _completed_response(data_all, progress_data, campaign_id, user_id)
218
318
 
219
319
  # find first incomplete item
220
- item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
320
+ item_i = min(
321
+ [i for i, v in enumerate(user_progress["progress"]) if v != "completed"]
322
+ )
221
323
 
222
324
  # try to get existing annotations if any
223
325
  items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -229,10 +331,14 @@ def get_next_item_taskbased(
229
331
  if "comment" in latest_item:
230
332
  payload_existing["comment"] = latest_item["comment"]
231
333
 
334
+ payload = data_all[campaign_id]["data"][user_id][item_i]
335
+ is_form = is_form_document(payload)
336
+
232
337
  return JSONResponse(
233
338
  content={
234
- "status": "ok",
339
+ "status": "form" if is_form else "ok",
235
340
  "progress": user_progress["progress"],
341
+ "progress_welcome": progress_welcome,
236
342
  "time": user_progress["time"],
237
343
  "info": {
238
344
  "item_i": item_i,
@@ -241,7 +347,7 @@ def get_next_item_taskbased(
241
347
  | {
242
348
  k: v
243
349
  for k, v in data_all[campaign_id]["info"].items()
244
- if k in {"protocol", "sliders", "textfield", "show_model_names"}
350
+ if k in CAMPAIGN_INFO_PUBLIC
245
351
  },
246
352
  "payload": data_all[campaign_id]["data"][user_id][item_i],
247
353
  }
@@ -266,12 +372,64 @@ def get_next_item_singlestream(
266
372
  """
267
373
  user_progress = progress_data[campaign_id][user_id]
268
374
  progress = user_progress["progress"]
375
+ progress_welcome = user_progress["progress_welcome"]
376
+
377
+ # Check if there are incomplete welcome items first - must complete all before proceeding
378
+ if not all(progress_welcome):
379
+ # Find first incomplete welcome item (sequential, not random)
380
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
381
+ item_id = f"welcome_{item_i}"
382
+
383
+ # try to get existing annotations if any
384
+ # note the user_id since welcome items are per-user
385
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
386
+ payload_existing = None
387
+ if items_existing:
388
+ # get the latest ones
389
+ latest_item = items_existing[-1]
390
+ payload_existing = {"annotation": latest_item["annotation"]}
391
+ if "comment" in latest_item:
392
+ payload_existing["comment"] = latest_item["comment"]
393
+
394
+ payload = data_all[campaign_id]["data"][item_i]
395
+ is_form = is_form_document(payload)
269
396
 
270
- if all(progress):
397
+ return JSONResponse(
398
+ content={
399
+ "status": "form" if is_form else "ok",
400
+ "time": user_progress["time"],
401
+ "progress": progress,
402
+ "progress_welcome": progress_welcome,
403
+ "info": {
404
+ "item_i": item_id,
405
+ "instructions": _get_instructions(data_all, campaign_id),
406
+ }
407
+ | {
408
+ k: v
409
+ for k, v in data_all[campaign_id]["info"].items()
410
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
411
+ },
412
+ "payload": payload,
413
+ }
414
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
415
+ status_code=200,
416
+ )
417
+
418
+ # All welcome items complete, proceed with regular items
419
+ # Check if user reached docs_per_user limit (if specified)
420
+ if (
421
+ docs_per_user := data_all[campaign_id]["info"].get("docs_per_user")
422
+ ) is not None:
423
+ completed_docs = sum(v == "completed" for v in progress if v)
424
+ if completed_docs >= docs_per_user:
425
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
426
+ elif all(v in {"completed", "completed_foreign"} for v in progress):
271
427
  return _completed_response(data_all, progress_data, campaign_id, user_id)
272
428
 
273
429
  # find a random incomplete item
274
- incomplete_indices = [i for i, v in enumerate(progress) if not v]
430
+ incomplete_indices = [
431
+ i for i, v in enumerate(progress) if v not in {"completed", "completed_foreign"}
432
+ ]
275
433
  item_i = random.choice(incomplete_indices)
276
434
 
277
435
  # try to get existing annotations if any
@@ -285,11 +443,15 @@ def get_next_item_singlestream(
285
443
  if "comment" in latest_item:
286
444
  payload_existing["comment"] = latest_item["comment"]
287
445
 
446
+ payload = data_all[campaign_id]["data"][item_i]
447
+ is_form = is_form_document(payload)
448
+
288
449
  return JSONResponse(
289
450
  content={
290
- "status": "ok",
451
+ "status": "form" if is_form else "ok",
291
452
  "time": user_progress["time"],
292
453
  "progress": progress,
454
+ "progress_welcome": progress_welcome,
293
455
  "info": {
294
456
  "item_i": item_i,
295
457
  "instructions": _get_instructions(data_all, campaign_id),
@@ -297,9 +459,9 @@ def get_next_item_singlestream(
297
459
  | {
298
460
  k: v
299
461
  for k, v in data_all[campaign_id]["info"].items()
300
- if k in {"protocol", "sliders", "textfield", "show_model_names"}
462
+ if k in CAMPAIGN_INFO_PUBLIC
301
463
  },
302
- "payload": data_all[campaign_id]["data"][item_i],
464
+ "payload": payload,
303
465
  }
304
466
  | ({"payload_existing": payload_existing} if payload_existing else {}),
305
467
  status_code=200,
@@ -319,8 +481,8 @@ def get_next_item_dynamic(
319
481
 
320
482
  In this mode, items are selected based on the current performance of models:
321
483
  1. Contrastive comparison: `dynamic_contrastive_models` models are randomly selected and shown per item
322
- 2. First phase: Each model gets `dynamic_first` annotations with fully random selection
323
- 3. After first phase: Top `dynamic_top` models are identified, K randomly selected from them
484
+ 2. Warmup phase: Each model gets `dynamic_warmup` annotations with fully random selection
485
+ 3. After warmup phase: Top `dynamic_top` models are identified, K randomly selected from them
324
486
  4. Items with least annotations for the selected models are prioritized
325
487
  5. With probability `dynamic_backoff`, uniformly random selection is used instead
326
488
  """
@@ -328,18 +490,69 @@ def get_next_item_dynamic(
328
490
 
329
491
  user_progress = progress_data[campaign_id][user_id]
330
492
  campaign_data = tasks_data[campaign_id]
493
+ progress_welcome = user_progress["progress_welcome"]
494
+
495
+ # Check if there are incomplete welcome items first - must complete all before proceeding
496
+ if not all(progress_welcome):
497
+ # Find first incomplete welcome item (sequential)
498
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
499
+ item_id = f"welcome_{item_i}"
500
+
501
+ # try to get existing annotations if any
502
+ # note the user_id since welcome items are per-user
503
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
504
+ payload_existing = None
505
+ if items_existing:
506
+ # get the latest ones
507
+ latest_item = items_existing[-1]
508
+ payload_existing = {"annotation": latest_item["annotation"]}
509
+ if "comment" in latest_item:
510
+ payload_existing["comment"] = latest_item["comment"]
511
+
512
+ return JSONResponse(
513
+ content={
514
+ "status": "ok",
515
+ "time": user_progress["time"],
516
+ "progress": user_progress["progress"],
517
+ "progress_welcome": progress_welcome,
518
+ "info": {
519
+ "item_i": item_id,
520
+ "instructions": _get_instructions(campaign_data, campaign_id),
521
+ }
522
+ | {
523
+ k: v
524
+ for k, v in campaign_data["info"].items()
525
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
526
+ },
527
+ "payload": campaign_data["data"][item_i],
528
+ }
529
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
530
+ status_code=200,
531
+ )
331
532
 
332
533
  # Get all unique models in the campaign (all items must have all models)
333
534
  all_models = list(set(campaign_data["data"][0][0]["tgt"].keys()))
334
535
 
335
- # Check if completed (all models completed for all items)
336
- # NOTE: this will rarely trigger but we don't have a good way to know when to end anyway for now
337
- if all(len(v) == len(all_models) for v in user_progress["progress"]):
536
+ # Check if completed
537
+ # First check if docs_per_user limit is reached
538
+ if (docs_per_user := campaign_data["info"].get("docs_per_user")) is not None:
539
+ # Count specifically number of annotations across models
540
+ completed_docs = sum(
541
+ v == "completed" for mv in user_progress["progress"] for v in mv.values()
542
+ )
543
+ if completed_docs >= docs_per_user:
544
+ return _completed_response(tasks_data, progress_data, campaign_id, user_id)
545
+ # Otherwise check if all models completed for all items
546
+ elif all(
547
+ v in {"completed", "completed_foreign"}
548
+ for mv in user_progress["progress"]
549
+ for v in mv.values()
550
+ ):
338
551
  return _completed_response(tasks_data, progress_data, campaign_id, user_id)
339
552
 
340
553
  # Get configuration parameters
341
554
  dynamic_top = campaign_data["info"].get("dynamic_top", 2)
342
- dynamic_first = campaign_data["info"].get("dynamic_first", 5)
555
+ dynamic_warmup = campaign_data["info"].get("dynamic_warmup", 5)
343
556
  dynamic_contrastive_models = campaign_data["info"].get(
344
557
  "dynamic_contrastive_models", 1
345
558
  )
@@ -359,18 +572,18 @@ def get_next_item_dynamic(
359
572
  model_total_counts[model] += 1
360
573
 
361
574
  # Check if we're still in the first phase (collecting initial data)
362
- in_first_phase = any(
363
- model_total_counts.get(model, 0) < dynamic_first for model in all_models
575
+ in_warmup_phase = any(
576
+ model_total_counts.get(model, 0) < dynamic_warmup for model in all_models
364
577
  )
365
578
 
366
579
  # Select which models to show
367
- if in_first_phase:
580
+ if in_warmup_phase:
368
581
  # First phase or backoff: select models that don't have enough annotations yet
369
582
  selected_models = random.sample(
370
583
  [
371
584
  model
372
585
  for model in all_models
373
- if model_total_counts.get(model, 0) < dynamic_first
586
+ if model_total_counts.get(model, 0) < dynamic_warmup
374
587
  ],
375
588
  k=min(dynamic_contrastive_models, len(all_models)),
376
589
  )
@@ -404,20 +617,19 @@ def get_next_item_dynamic(
404
617
  top_models, k=min(dynamic_contrastive_models, len(top_models))
405
618
  )
406
619
 
407
- # Find incomplete items for the selected models (items where not all selected models are done)
408
- item_annotation_counts = {
409
- i: sum(model in completed_models for model in selected_models)
410
- for i, completed_models in enumerate(user_progress["progress"])
411
- }
412
-
413
- # Select item with minimum annotations (with random tiebreaking)
414
- min_annotations = min(item_annotation_counts.values())
415
- items_with_min = [
416
- item_i
417
- for item_i, count in item_annotation_counts.items()
418
- if count == min_annotations
620
+ # Find incomplete items (None or completed_foreign status)
621
+ incomplete_indices = [
622
+ i
623
+ for i, mv in enumerate(user_progress["progress"])
624
+ if not all(v in {"completed", "completed_foreign"} for v in mv.values())
419
625
  ]
420
- item_i = random.choice(items_with_min)
626
+
627
+ # If no incomplete items, user (and everyone) is done
628
+ if not incomplete_indices:
629
+ return _completed_response(tasks_data, progress_data, campaign_id, user_id)
630
+
631
+ # Select a random incomplete item
632
+ item_i = random.choice(incomplete_indices)
421
633
 
422
634
  # Prune the payload to only include selected models
423
635
  original_item = campaign_data["data"][item_i]
@@ -458,7 +670,7 @@ def get_next_item_dynamic(
458
670
  | {
459
671
  k: v
460
672
  for k, v in campaign_data["info"].items()
461
- if k in {"protocol", "sliders", "textfield", "show_model_names"}
673
+ if k in CAMPAIGN_INFO_PUBLIC
462
674
  },
463
675
  "payload": pruned_item,
464
676
  },
@@ -474,16 +686,17 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
474
686
  progress_data[campaign_id][user_id]["validations"] = {}
475
687
 
476
688
 
477
- def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int]:
689
+ def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int | str]:
478
690
  """
479
691
  Get the set of item indices that a specific user has annotated.
480
-
692
+
481
693
  Args:
482
694
  campaign_id: The campaign identifier
483
695
  user_id: The user identifier
484
-
696
+
485
697
  Returns:
486
- Set of item indices (item_i) that the user has annotated
698
+ Set of item indices (item_i) that the user has annotated.
699
+ Can include both int indices for regular items and string IDs like "welcome_0" for welcome items.
487
700
  """
488
701
  log = get_db_log(campaign_id)
489
702
  user_items = set()
@@ -503,13 +716,14 @@ def reset_task(
503
716
  """
504
717
  Reset the task progress for the user in the specified campaign.
505
718
  Saves a reset marker to mask existing annotations.
506
-
719
+
507
720
  Note: Dynamic assignment does not support user-level deletion.
508
721
  """
509
722
  assignment = tasks_data[campaign_id]["info"]["assignment"]
510
723
  if assignment == "dynamic":
511
724
  return JSONResponse(
512
- content="User-level deletion is not supported for dynamic assignments", status_code=400
725
+ content="User-level deletion is not supported for dynamic assignments",
726
+ status_code=400,
513
727
  )
514
728
  elif assignment == "task-based":
515
729
  # Save reset marker for this user to mask existing annotations
@@ -519,25 +733,42 @@ def reset_task(
519
733
  campaign_id,
520
734
  {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
521
735
  )
522
- progress_data[campaign_id][user_id]["progress"] = [False] * num_items
736
+ progress_data[campaign_id][user_id]["progress"] = [None] * num_items
737
+ # Reset welcome items progress if it exists
738
+ if "progress_welcome" in progress_data[campaign_id][user_id]:
739
+ num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
740
+ progress_data[campaign_id][user_id]["progress_welcome"] = [
741
+ False
742
+ ] * num_welcome
523
743
  _reset_user_time(progress_data, campaign_id, user_id)
524
744
  return JSONResponse(content="ok", status_code=200)
525
745
  elif assignment == "single-stream":
526
- # Find all items that this user has annotated
527
- user_items = _get_user_annotated_items(campaign_id, user_id)
528
-
529
- # Save reset markers only for items this user has touched
530
- for item_i in user_items:
746
+ # Find all items that this user has annotated (has "completed")
747
+ user_items_to_reset = [
748
+ i
749
+ for i, status in enumerate(progress_data[campaign_id][user_id]["progress"])
750
+ if status == "completed"
751
+ ]
752
+
753
+ # Save reset markers for all items this user has touched
754
+ for item_i in user_items_to_reset:
531
755
  save_db_payload(
532
756
  campaign_id,
533
757
  {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
534
758
  )
535
-
536
- # Reset only the touched items in all users' progress (shared pool)
759
+
760
+ # Reset the touched regular items in all users' progress (shared pool)
537
761
  for uid in progress_data[campaign_id]:
538
- for item_i in user_items:
539
- progress_data[campaign_id][uid]["progress"][item_i] = False
540
-
762
+ for item_i in user_items_to_reset:
763
+ progress_data[campaign_id][uid]["progress"][item_i] = None
764
+
765
+ # Reset all welcome items progress for this user (per-user, not shared)
766
+ if "progress_welcome" in progress_data[campaign_id][user_id]:
767
+ num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
768
+ progress_data[campaign_id][user_id]["progress_welcome"] = [
769
+ False
770
+ ] * num_welcome
771
+
541
772
  # Reset only the specified user's time
542
773
  _reset_user_time(progress_data, campaign_id, user_id)
543
774
  return JSONResponse(content="ok", status_code=200)
@@ -552,35 +783,58 @@ def update_progress(
552
783
  user_id: str,
553
784
  tasks_data: dict,
554
785
  progress_data: dict,
555
- item_i: int,
786
+ item_i: int | str, # Can be int or str like "welcome_0"
556
787
  payload: Any,
557
788
  ) -> JSONResponse:
558
789
  """
559
790
  Log the user's response for the specified item in the campaign.
560
791
  """
792
+ # Check if it's a welcome item
793
+ if isinstance(item_i, str) and item_i.startswith("welcome_"):
794
+ welcome_index = int(item_i.split("_")[1])
795
+ # Update only this user's progress_welcome (not shared)
796
+ progress_data[campaign_id][user_id]["progress_welcome"][welcome_index] = (
797
+ "completed"
798
+ )
799
+ return JSONResponse(content={"status": "ok"}, status_code=200)
800
+
561
801
  assignment = tasks_data[campaign_id]["info"]["assignment"]
562
802
  if assignment == "task-based":
563
- # even if it's already set it should be fine
564
- progress_data[campaign_id][user_id]["progress"][item_i] = True
803
+ # Mark as completed for this user
804
+ progress_data[campaign_id][user_id]["progress"][item_i] = "completed"
565
805
  return JSONResponse(content={"status": "ok"}, status_code=200)
566
806
  elif assignment == "single-stream":
567
- # progress all users
807
+ # Mark as completed for the current user, completed_foreign for others
568
808
  for uid in progress_data[campaign_id]:
569
- progress_data[campaign_id][uid]["progress"][item_i] = True
809
+ current_status = progress_data[campaign_id][uid]["progress"][item_i]
810
+ if uid == user_id:
811
+ # User who completed it gets "completed"
812
+ progress_data[campaign_id][uid]["progress"][item_i] = "completed"
813
+ elif current_status is None:
814
+ # Other users get "completed_foreign" if not already completed
815
+ progress_data[campaign_id][uid]["progress"][item_i] = (
816
+ "completed_foreign"
817
+ )
818
+ # If already "completed", keep it as "completed"
570
819
  return JSONResponse(content="ok", status_code=200)
571
- elif assignment == "dynamic":
572
- # For dynamic, track which models were annotated
573
- # Extract models from the payload annotation
574
- annotated_models = []
575
- if "annotation" in payload:
576
- for annotation_item in payload.get("annotation", []):
577
- if isinstance(annotation_item, dict):
578
- annotated_models.extend(annotation_item.keys())
579
-
580
- # Update progress for all users (shared pool)
581
- for uid in progress_data[campaign_id]:
582
- # Add the newly annotated models
583
- progress_data[campaign_id][uid]["progress"][item_i].extend(annotated_models)
820
+ if assignment == "dynamic":
821
+ # Mark as completed for the current user, completed_foreign for others
822
+ for model in payload["annotation"][0].keys():
823
+ for uid in progress_data[campaign_id]:
824
+ current_status = progress_data[campaign_id][uid]["progress"][item_i][
825
+ model
826
+ ]
827
+ if uid == user_id:
828
+ # User who completed it gets "completed"
829
+ progress_data[campaign_id][uid]["progress"][item_i][model] = (
830
+ "completed"
831
+ )
832
+ elif current_status is None:
833
+ # Other users get "completed_foreign" if not already completed
834
+ progress_data[campaign_id][uid]["progress"][item_i][model] = (
835
+ "completed_foreign"
836
+ )
837
+ # If already "completed", keep it as "completed"
584
838
  return JSONResponse(content="ok", status_code=200)
585
839
  else:
586
840
  return JSONResponse(content="Unknown campaign assignment type", status_code=400)