pearmut 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/assignment.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import collections
2
- import copy
3
2
  import random
4
3
  import statistics
5
4
  from typing import Any
@@ -12,9 +11,19 @@ from .utils import (
12
11
  check_validation_threshold,
13
12
  get_db_log,
14
13
  get_db_log_item,
14
+ is_form_document,
15
15
  save_db_payload,
16
16
  )
17
17
 
18
+ # Public campaign info fields that are sent to the client
19
+ CAMPAIGN_INFO_PUBLIC = {
20
+ "protocol",
21
+ "sliders",
22
+ "textfield",
23
+ "show_model_names",
24
+ "mqm_categories",
25
+ }
26
+
18
27
 
19
28
  def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
20
29
  """Get instructions: custom if provided, else protocol default, else empty."""
@@ -24,7 +33,6 @@ def _get_instructions(tasks_data: dict, campaign_id: str) -> str:
24
33
  return PROTOCOL_INSTRUCTIONS.get(campaign_info.get("protocol", ""), "")
25
34
 
26
35
 
27
-
28
36
  def _completed_response(
29
37
  tasks_data: dict,
30
38
  progress_data: dict,
@@ -56,6 +64,7 @@ def _completed_response(
56
64
  content={
57
65
  "status": "goodbye",
58
66
  "progress": progress,
67
+ "progress_welcome": user_progress["progress_welcome"],
59
68
  "time": user_progress["time"],
60
69
  "token": token,
61
70
  "instructions_goodbye": instructions_goodbye,
@@ -116,12 +125,34 @@ def get_i_item_taskbased(
116
125
  user_id: str,
117
126
  data_all: dict,
118
127
  progress_data: dict,
119
- item_i: int,
128
+ item_i: int | str, # Can be int or str like "welcome_0"
120
129
  ) -> JSONResponse:
121
130
  """
122
131
  Get specific item for task-based protocol.
123
132
  """
124
133
  user_progress = progress_data[campaign_id][user_id]
134
+ progress_welcome = user_progress["progress_welcome"]
135
+
136
+ # if welcome_X, payload is from data_welcome[X], otherwise data[user][X]
137
+ if isinstance(item_i, str) and item_i.startswith("welcome_"):
138
+ actual_index = int(item_i.split("_")[1])
139
+ if actual_index < 0 or actual_index >= len(
140
+ data_all[campaign_id]["data_welcome"]
141
+ ):
142
+ return JSONResponse(
143
+ content="Welcome item index out of range", status_code=400
144
+ )
145
+ payload = data_all[campaign_id]["data_welcome"][actual_index]
146
+ else:
147
+ # Prevent accessing regular items unless all welcome items are complete
148
+ if not all(progress_welcome):
149
+ return JSONResponse(
150
+ content="Complete all welcome items before accessing regular items",
151
+ status_code=400,
152
+ )
153
+ if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
154
+ return JSONResponse(content="Item index out of range", status_code=400)
155
+ payload = data_all[campaign_id]["data"][user_id][item_i]
125
156
 
126
157
  # try to get existing annotations if any
127
158
  items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -133,13 +164,13 @@ def get_i_item_taskbased(
133
164
  if "comment" in latest_item:
134
165
  payload_existing["comment"] = latest_item["comment"]
135
166
 
136
- if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
137
- return JSONResponse(content="Item index out of range", status_code=400)
167
+ is_form = is_form_document(payload)
138
168
 
139
169
  return JSONResponse(
140
170
  content={
141
- "status": "ok",
171
+ "status": "form" if is_form else "ok",
142
172
  "progress": user_progress["progress"],
173
+ "progress_welcome": progress_welcome,
143
174
  "time": user_progress["time"],
144
175
  "info": {
145
176
  "item_i": item_i,
@@ -148,9 +179,9 @@ def get_i_item_taskbased(
148
179
  | {
149
180
  k: v
150
181
  for k, v in data_all[campaign_id]["info"].items()
151
- if k in {"protocol", "sliders"}
182
+ if k in CAMPAIGN_INFO_PUBLIC
152
183
  },
153
- "payload": data_all[campaign_id]["data"][user_id][item_i],
184
+ "payload": payload,
154
185
  }
155
186
  | ({"payload_existing": payload_existing} if payload_existing else {}),
156
187
  status_code=200,
@@ -162,16 +193,37 @@ def get_i_item_singlestream(
162
193
  user_id: str,
163
194
  data_all: dict,
164
195
  progress_data: dict,
165
- item_i: int,
196
+ item_i: int | str, # Can be int or str like "welcome_0"
166
197
  ) -> JSONResponse:
167
198
  """
168
199
  Get specific item for single-stream assignment.
169
200
  """
170
201
  user_progress = progress_data[campaign_id][user_id]
202
+ progress_welcome = user_progress["progress_welcome"]
203
+
204
+ # Convert welcome_X string to integer index
205
+ actual_index = item_i
206
+ is_welcome_item = isinstance(item_i, str) and item_i.startswith("welcome_")
207
+ if is_welcome_item:
208
+ actual_index = int(item_i.split("_")[1])
209
+ # Validate against total number of welcome items
210
+ if actual_index < 0 or actual_index >= len(progress_welcome):
211
+ return JSONResponse(
212
+ content="Welcome item index out of range", status_code=400
213
+ )
214
+ else:
215
+ # Prevent accessing regular items unless all welcome items are complete
216
+ if not all(progress_welcome):
217
+ return JSONResponse(
218
+ content="Complete all welcome items before accessing regular items",
219
+ status_code=400,
220
+ )
171
221
 
172
222
  # try to get existing annotations if any
173
- # note the None user_id since it is shared
174
- items_existing = get_db_log_item(campaign_id, None, item_i)
223
+ # use user_id for welcome items (per-user), None for shared items
224
+ items_existing = get_db_log_item(
225
+ campaign_id, user_id if is_welcome_item else None, item_i
226
+ )
175
227
  payload_existing = None
176
228
  if items_existing:
177
229
  # get the latest ones
@@ -180,13 +232,17 @@ def get_i_item_singlestream(
180
232
  if "comment" in latest_item:
181
233
  payload_existing["comment"] = latest_item["comment"]
182
234
 
183
- if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
235
+ if actual_index < 0 or actual_index >= len(data_all[campaign_id]["data"]):
184
236
  return JSONResponse(content="Item index out of range", status_code=400)
185
237
 
238
+ payload = data_all[campaign_id]["data"][actual_index]
239
+ is_form = is_form_document(payload)
240
+
186
241
  return JSONResponse(
187
242
  content={
188
- "status": "ok",
243
+ "status": "form" if is_form else "ok",
189
244
  "progress": user_progress["progress"],
245
+ "progress_welcome": progress_welcome,
190
246
  "time": user_progress["time"],
191
247
  "info": {
192
248
  "item_i": item_i,
@@ -195,9 +251,9 @@ def get_i_item_singlestream(
195
251
  | {
196
252
  k: v
197
253
  for k, v in data_all[campaign_id]["info"].items()
198
- if k in {"protocol", "sliders"}
254
+ if k in CAMPAIGN_INFO_PUBLIC
199
255
  },
200
- "payload": data_all[campaign_id]["data"][item_i],
256
+ "payload": payload,
201
257
  }
202
258
  | ({"payload_existing": payload_existing} if payload_existing else {}),
203
259
  status_code=200,
@@ -214,11 +270,56 @@ def get_next_item_taskbased(
214
270
  Get the next item for task-based assignment.
215
271
  """
216
272
  user_progress = progress_data[campaign_id][user_id]
217
- if all(user_progress["progress"]):
273
+ progress_welcome = user_progress["progress_welcome"]
274
+
275
+ # Check if there are incomplete welcome items first
276
+ if not all(progress_welcome):
277
+ # Find first incomplete welcome item
278
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
279
+ item_id = f"welcome_{item_i}"
280
+
281
+ # try to get existing annotations if any
282
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
283
+ payload_existing = None
284
+ if items_existing:
285
+ # get the latest ones
286
+ latest_item = items_existing[-1]
287
+ payload_existing = {"annotation": latest_item["annotation"]}
288
+ if "comment" in latest_item:
289
+ payload_existing["comment"] = latest_item["comment"]
290
+
291
+ payload = data_all[campaign_id]["data_welcome"][item_i]
292
+ is_form = is_form_document(payload)
293
+
294
+ return JSONResponse(
295
+ content={
296
+ "status": "form" if is_form else "ok",
297
+ "progress": user_progress["progress"],
298
+ "progress_welcome": progress_welcome,
299
+ "time": user_progress["time"],
300
+ "info": {
301
+ "item_i": item_id,
302
+ "instructions": _get_instructions(data_all, campaign_id),
303
+ }
304
+ | {
305
+ k: v
306
+ for k, v in data_all[campaign_id]["info"].items()
307
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
308
+ },
309
+ "payload": payload,
310
+ }
311
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
312
+ status_code=200,
313
+ )
314
+
315
+ # All welcome items complete, proceed with regular items
316
+ if all(v == "completed" for v in user_progress["progress"]):
218
317
  return _completed_response(data_all, progress_data, campaign_id, user_id)
219
318
 
220
319
  # find first incomplete item
221
- item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
320
+ item_i = min(
321
+ [i for i, v in enumerate(user_progress["progress"]) if v != "completed"]
322
+ )
222
323
 
223
324
  # try to get existing annotations if any
224
325
  items_existing = get_db_log_item(campaign_id, user_id, item_i)
@@ -230,10 +331,14 @@ def get_next_item_taskbased(
230
331
  if "comment" in latest_item:
231
332
  payload_existing["comment"] = latest_item["comment"]
232
333
 
334
+ payload = data_all[campaign_id]["data"][user_id][item_i]
335
+ is_form = is_form_document(payload)
336
+
233
337
  return JSONResponse(
234
338
  content={
235
- "status": "ok",
339
+ "status": "form" if is_form else "ok",
236
340
  "progress": user_progress["progress"],
341
+ "progress_welcome": progress_welcome,
237
342
  "time": user_progress["time"],
238
343
  "info": {
239
344
  "item_i": item_i,
@@ -242,7 +347,7 @@ def get_next_item_taskbased(
242
347
  | {
243
348
  k: v
244
349
  for k, v in data_all[campaign_id]["info"].items()
245
- if k in {"protocol", "sliders"}
350
+ if k in CAMPAIGN_INFO_PUBLIC
246
351
  },
247
352
  "payload": data_all[campaign_id]["data"][user_id][item_i],
248
353
  }
@@ -267,12 +372,64 @@ def get_next_item_singlestream(
267
372
  """
268
373
  user_progress = progress_data[campaign_id][user_id]
269
374
  progress = user_progress["progress"]
375
+ progress_welcome = user_progress["progress_welcome"]
376
+
377
+ # Check if there are incomplete welcome items first - must complete all before proceeding
378
+ if not all(progress_welcome):
379
+ # Find first incomplete welcome item (sequential, not random)
380
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
381
+ item_id = f"welcome_{item_i}"
382
+
383
+ # try to get existing annotations if any
384
+ # note the user_id since welcome items are per-user
385
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
386
+ payload_existing = None
387
+ if items_existing:
388
+ # get the latest ones
389
+ latest_item = items_existing[-1]
390
+ payload_existing = {"annotation": latest_item["annotation"]}
391
+ if "comment" in latest_item:
392
+ payload_existing["comment"] = latest_item["comment"]
393
+
394
+ payload = data_all[campaign_id]["data"][item_i]
395
+ is_form = is_form_document(payload)
270
396
 
271
- if all(progress):
397
+ return JSONResponse(
398
+ content={
399
+ "status": "form" if is_form else "ok",
400
+ "time": user_progress["time"],
401
+ "progress": progress,
402
+ "progress_welcome": progress_welcome,
403
+ "info": {
404
+ "item_i": item_id,
405
+ "instructions": _get_instructions(data_all, campaign_id),
406
+ }
407
+ | {
408
+ k: v
409
+ for k, v in data_all[campaign_id]["info"].items()
410
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
411
+ },
412
+ "payload": payload,
413
+ }
414
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
415
+ status_code=200,
416
+ )
417
+
418
+ # All welcome items complete, proceed with regular items
419
+ # Check if user reached docs_per_user limit (if specified)
420
+ if (
421
+ docs_per_user := data_all[campaign_id]["info"].get("docs_per_user")
422
+ ) is not None:
423
+ completed_docs = sum(v == "completed" for v in progress if v)
424
+ if completed_docs >= docs_per_user:
425
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
426
+ elif all(v in {"completed", "completed_foreign"} for v in progress):
272
427
  return _completed_response(data_all, progress_data, campaign_id, user_id)
273
428
 
274
429
  # find a random incomplete item
275
- incomplete_indices = [i for i, v in enumerate(progress) if not v]
430
+ incomplete_indices = [
431
+ i for i, v in enumerate(progress) if v not in {"completed", "completed_foreign"}
432
+ ]
276
433
  item_i = random.choice(incomplete_indices)
277
434
 
278
435
  # try to get existing annotations if any
@@ -286,11 +443,15 @@ def get_next_item_singlestream(
286
443
  if "comment" in latest_item:
287
444
  payload_existing["comment"] = latest_item["comment"]
288
445
 
446
+ payload = data_all[campaign_id]["data"][item_i]
447
+ is_form = is_form_document(payload)
448
+
289
449
  return JSONResponse(
290
450
  content={
291
- "status": "ok",
451
+ "status": "form" if is_form else "ok",
292
452
  "time": user_progress["time"],
293
453
  "progress": progress,
454
+ "progress_welcome": progress_welcome,
294
455
  "info": {
295
456
  "item_i": item_i,
296
457
  "instructions": _get_instructions(data_all, campaign_id),
@@ -298,9 +459,9 @@ def get_next_item_singlestream(
298
459
  | {
299
460
  k: v
300
461
  for k, v in data_all[campaign_id]["info"].items()
301
- if k in {"protocol", "sliders"}
462
+ if k in CAMPAIGN_INFO_PUBLIC
302
463
  },
303
- "payload": data_all[campaign_id]["data"][item_i],
464
+ "payload": payload,
304
465
  }
305
466
  | ({"payload_existing": payload_existing} if payload_existing else {}),
306
467
  status_code=200,
@@ -320,8 +481,8 @@ def get_next_item_dynamic(
320
481
 
321
482
  In this mode, items are selected based on the current performance of models:
322
483
  1. Contrastive comparison: `dynamic_contrastive_models` models are randomly selected and shown per item
323
- 2. First phase: Each model gets `dynamic_first` annotations with fully random selection
324
- 3. After first phase: Top `dynamic_top` models are identified, K randomly selected from them
484
+ 2. Warmup phase: Each model gets `dynamic_warmup` annotations with fully random selection
485
+ 3. After warmup phase: Top `dynamic_top` models are identified, K randomly selected from them
325
486
  4. Items with least annotations for the selected models are prioritized
326
487
  5. With probability `dynamic_backoff`, uniformly random selection is used instead
327
488
  """
@@ -329,18 +490,69 @@ def get_next_item_dynamic(
329
490
 
330
491
  user_progress = progress_data[campaign_id][user_id]
331
492
  campaign_data = tasks_data[campaign_id]
493
+ progress_welcome = user_progress["progress_welcome"]
494
+
495
+ # Check if there are incomplete welcome items first - must complete all before proceeding
496
+ if not all(progress_welcome):
497
+ # Find first incomplete welcome item (sequential)
498
+ item_i = next(i for i, v in enumerate(progress_welcome) if not v)
499
+ item_id = f"welcome_{item_i}"
500
+
501
+ # try to get existing annotations if any
502
+ # note the user_id since welcome items are per-user
503
+ items_existing = get_db_log_item(campaign_id, user_id, item_id)
504
+ payload_existing = None
505
+ if items_existing:
506
+ # get the latest ones
507
+ latest_item = items_existing[-1]
508
+ payload_existing = {"annotation": latest_item["annotation"]}
509
+ if "comment" in latest_item:
510
+ payload_existing["comment"] = latest_item["comment"]
511
+
512
+ return JSONResponse(
513
+ content={
514
+ "status": "ok",
515
+ "time": user_progress["time"],
516
+ "progress": user_progress["progress"],
517
+ "progress_welcome": progress_welcome,
518
+ "info": {
519
+ "item_i": item_id,
520
+ "instructions": _get_instructions(campaign_data, campaign_id),
521
+ }
522
+ | {
523
+ k: v
524
+ for k, v in campaign_data["info"].items()
525
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
526
+ },
527
+ "payload": campaign_data["data"][item_i],
528
+ }
529
+ | ({"payload_existing": payload_existing} if payload_existing else {}),
530
+ status_code=200,
531
+ )
332
532
 
333
533
  # Get all unique models in the campaign (all items must have all models)
334
534
  all_models = list(set(campaign_data["data"][0][0]["tgt"].keys()))
335
535
 
336
- # Check if completed (all models completed for all items)
337
- # NOTE: this will rarely trigger but we don't have a good way to know when to end anyway for now
338
- if all(len(v) == len(all_models) for v in user_progress["progress"]):
536
+ # Check if completed
537
+ # First check if docs_per_user limit is reached
538
+ if (docs_per_user := campaign_data["info"].get("docs_per_user")) is not None:
539
+ # Count specifically number of annotations across models
540
+ completed_docs = sum(
541
+ v == "completed" for mv in user_progress["progress"] for v in mv.values()
542
+ )
543
+ if completed_docs >= docs_per_user:
544
+ return _completed_response(tasks_data, progress_data, campaign_id, user_id)
545
+ # Otherwise check if all models completed for all items
546
+ elif all(
547
+ v in {"completed", "completed_foreign"}
548
+ for mv in user_progress["progress"]
549
+ for v in mv.values()
550
+ ):
339
551
  return _completed_response(tasks_data, progress_data, campaign_id, user_id)
340
552
 
341
553
  # Get configuration parameters
342
554
  dynamic_top = campaign_data["info"].get("dynamic_top", 2)
343
- dynamic_first = campaign_data["info"].get("dynamic_first", 5)
555
+ dynamic_warmup = campaign_data["info"].get("dynamic_warmup", 5)
344
556
  dynamic_contrastive_models = campaign_data["info"].get(
345
557
  "dynamic_contrastive_models", 1
346
558
  )
@@ -360,18 +572,18 @@ def get_next_item_dynamic(
360
572
  model_total_counts[model] += 1
361
573
 
362
574
  # Check if we're still in the first phase (collecting initial data)
363
- in_first_phase = any(
364
- model_total_counts.get(model, 0) < dynamic_first for model in all_models
575
+ in_warmup_phase = any(
576
+ model_total_counts.get(model, 0) < dynamic_warmup for model in all_models
365
577
  )
366
578
 
367
579
  # Select which models to show
368
- if in_first_phase:
580
+ if in_warmup_phase:
369
581
  # First phase or backoff: select models that don't have enough annotations yet
370
582
  selected_models = random.sample(
371
583
  [
372
584
  model
373
585
  for model in all_models
374
- if model_total_counts.get(model, 0) < dynamic_first
586
+ if model_total_counts.get(model, 0) < dynamic_warmup
375
587
  ],
376
588
  k=min(dynamic_contrastive_models, len(all_models)),
377
589
  )
@@ -405,20 +617,19 @@ def get_next_item_dynamic(
405
617
  top_models, k=min(dynamic_contrastive_models, len(top_models))
406
618
  )
407
619
 
408
- # Find incomplete items for the selected models (items where not all selected models are done)
409
- item_annotation_counts = {
410
- i: sum(model in completed_models for model in selected_models)
411
- for i, completed_models in enumerate(user_progress["progress"])
412
- }
413
-
414
- # Select item with minimum annotations (with random tiebreaking)
415
- min_annotations = min(item_annotation_counts.values())
416
- items_with_min = [
417
- item_i
418
- for item_i, count in item_annotation_counts.items()
419
- if count == min_annotations
620
+ # Find incomplete items (None or completed_foreign status)
621
+ incomplete_indices = [
622
+ i
623
+ for i, mv in enumerate(user_progress["progress"])
624
+ if not all(v in {"completed", "completed_foreign"} for v in mv.values())
420
625
  ]
421
- item_i = random.choice(items_with_min)
626
+
627
+ # If no incomplete items, user (and everyone) is done
628
+ if not incomplete_indices:
629
+ return _completed_response(tasks_data, progress_data, campaign_id, user_id)
630
+
631
+ # Select a random incomplete item
632
+ item_i = random.choice(incomplete_indices)
422
633
 
423
634
  # Prune the payload to only include selected models
424
635
  original_item = campaign_data["data"][item_i]
@@ -459,7 +670,7 @@ def get_next_item_dynamic(
459
670
  | {
460
671
  k: v
461
672
  for k, v in campaign_data["info"].items()
462
- if k in {"protocol", "sliders"}
673
+ if k in CAMPAIGN_INFO_PUBLIC
463
674
  },
464
675
  "payload": pruned_item,
465
676
  },
@@ -475,16 +686,17 @@ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> Non
475
686
  progress_data[campaign_id][user_id]["validations"] = {}
476
687
 
477
688
 
478
- def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int]:
689
+ def _get_user_annotated_items(campaign_id: str, user_id: str) -> set[int | str]:
479
690
  """
480
691
  Get the set of item indices that a specific user has annotated.
481
-
692
+
482
693
  Args:
483
694
  campaign_id: The campaign identifier
484
695
  user_id: The user identifier
485
-
696
+
486
697
  Returns:
487
- Set of item indices (item_i) that the user has annotated
698
+ Set of item indices (item_i) that the user has annotated.
699
+ Can include both int indices for regular items and string IDs like "welcome_0" for welcome items.
488
700
  """
489
701
  log = get_db_log(campaign_id)
490
702
  user_items = set()
@@ -504,9 +716,16 @@ def reset_task(
504
716
  """
505
717
  Reset the task progress for the user in the specified campaign.
506
718
  Saves a reset marker to mask existing annotations.
719
+
720
+ Note: Dynamic assignment does not support user-level deletion.
507
721
  """
508
722
  assignment = tasks_data[campaign_id]["info"]["assignment"]
509
- if assignment == "task-based":
723
+ if assignment == "dynamic":
724
+ return JSONResponse(
725
+ content="User-level deletion is not supported for dynamic assignments",
726
+ status_code=400,
727
+ )
728
+ elif assignment == "task-based":
510
729
  # Save reset marker for this user to mask existing annotations
511
730
  num_items = len(tasks_data[campaign_id]["data"][user_id])
512
731
  for item_i in range(num_items):
@@ -514,49 +733,42 @@ def reset_task(
514
733
  campaign_id,
515
734
  {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
516
735
  )
517
- progress_data[campaign_id][user_id]["progress"] = [False] * num_items
736
+ progress_data[campaign_id][user_id]["progress"] = [None] * num_items
737
+ # Reset welcome items progress if it exists
738
+ if "progress_welcome" in progress_data[campaign_id][user_id]:
739
+ num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
740
+ progress_data[campaign_id][user_id]["progress_welcome"] = [
741
+ False
742
+ ] * num_welcome
518
743
  _reset_user_time(progress_data, campaign_id, user_id)
519
744
  return JSONResponse(content="ok", status_code=200)
520
745
  elif assignment == "single-stream":
521
- # Find all items that this user has annotated
522
- user_items = _get_user_annotated_items(campaign_id, user_id)
523
-
524
- # Save reset markers only for items this user has touched
525
- for item_i in user_items:
526
- save_db_payload(
527
- campaign_id,
528
- {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
529
- )
530
-
531
- # Reset only the touched items in all users' progress (shared pool)
532
- for uid in progress_data[campaign_id]:
533
- for item_i in user_items:
534
- progress_data[campaign_id][uid]["progress"][item_i] = False
535
-
536
- # Reset only the specified user's time
537
- _reset_user_time(progress_data, campaign_id, user_id)
538
- return JSONResponse(content="ok", status_code=200)
539
- elif assignment == "dynamic":
540
- # Find all items that this user has annotated
541
- user_items = _get_user_annotated_items(campaign_id, user_id)
542
-
543
- # Save reset markers only for items this user has touched
544
- for item_i in user_items:
746
+ # Find all items that this user has annotated (has "completed")
747
+ user_items_to_reset = [
748
+ i
749
+ for i, status in enumerate(progress_data[campaign_id][user_id]["progress"])
750
+ if status == "completed"
751
+ ]
752
+
753
+ # Save reset markers for all items this user has touched
754
+ for item_i in user_items_to_reset:
545
755
  save_db_payload(
546
756
  campaign_id,
547
757
  {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
548
758
  )
549
759
 
550
- progress_data_user = copy.deepcopy(progress_data[campaign_id][user_id]["progress"])
551
-
552
- # Reset only the touched items in all users' progress (shared pool, use lists to track models)
760
+ # Reset the touched regular items in all users' progress (shared pool)
553
761
  for uid in progress_data[campaign_id]:
554
- for item_i in user_items:
555
- progress_data[campaign_id][uid]["progress"][item_i] = [
556
- x for x in progress_data[campaign_id][uid]["progress"][item_i]
557
- if x not in progress_data_user[item_i]
558
- ]
559
-
762
+ for item_i in user_items_to_reset:
763
+ progress_data[campaign_id][uid]["progress"][item_i] = None
764
+
765
+ # Reset all welcome items progress for this user (per-user, not shared)
766
+ if "progress_welcome" in progress_data[campaign_id][user_id]:
767
+ num_welcome = len(progress_data[campaign_id][user_id]["progress_welcome"])
768
+ progress_data[campaign_id][user_id]["progress_welcome"] = [
769
+ False
770
+ ] * num_welcome
771
+
560
772
  # Reset only the specified user's time
561
773
  _reset_user_time(progress_data, campaign_id, user_id)
562
774
  return JSONResponse(content="ok", status_code=200)
@@ -571,35 +783,58 @@ def update_progress(
571
783
  user_id: str,
572
784
  tasks_data: dict,
573
785
  progress_data: dict,
574
- item_i: int,
786
+ item_i: int | str, # Can be int or str like "welcome_0"
575
787
  payload: Any,
576
788
  ) -> JSONResponse:
577
789
  """
578
790
  Log the user's response for the specified item in the campaign.
579
791
  """
792
+ # Check if it's a welcome item
793
+ if isinstance(item_i, str) and item_i.startswith("welcome_"):
794
+ welcome_index = int(item_i.split("_")[1])
795
+ # Update only this user's progress_welcome (not shared)
796
+ progress_data[campaign_id][user_id]["progress_welcome"][welcome_index] = (
797
+ "completed"
798
+ )
799
+ return JSONResponse(content={"status": "ok"}, status_code=200)
800
+
580
801
  assignment = tasks_data[campaign_id]["info"]["assignment"]
581
802
  if assignment == "task-based":
582
- # even if it's already set it should be fine
583
- progress_data[campaign_id][user_id]["progress"][item_i] = True
803
+ # Mark as completed for this user
804
+ progress_data[campaign_id][user_id]["progress"][item_i] = "completed"
584
805
  return JSONResponse(content={"status": "ok"}, status_code=200)
585
806
  elif assignment == "single-stream":
586
- # progress all users
807
+ # Mark as completed for the current user, completed_foreign for others
587
808
  for uid in progress_data[campaign_id]:
588
- progress_data[campaign_id][uid]["progress"][item_i] = True
809
+ current_status = progress_data[campaign_id][uid]["progress"][item_i]
810
+ if uid == user_id:
811
+ # User who completed it gets "completed"
812
+ progress_data[campaign_id][uid]["progress"][item_i] = "completed"
813
+ elif current_status is None:
814
+ # Other users get "completed_foreign" if not already completed
815
+ progress_data[campaign_id][uid]["progress"][item_i] = (
816
+ "completed_foreign"
817
+ )
818
+ # If already "completed", keep it as "completed"
589
819
  return JSONResponse(content="ok", status_code=200)
590
- elif assignment == "dynamic":
591
- # For dynamic, track which models were annotated
592
- # Extract models from the payload annotation
593
- annotated_models = []
594
- if "annotation" in payload:
595
- for annotation_item in payload.get("annotation", []):
596
- if isinstance(annotation_item, dict):
597
- annotated_models.extend(annotation_item.keys())
598
-
599
- # Update progress for all users (shared pool)
600
- for uid in progress_data[campaign_id]:
601
- # Add the newly annotated models
602
- progress_data[campaign_id][uid]["progress"][item_i].extend(annotated_models)
820
+ if assignment == "dynamic":
821
+ # Mark as completed for the current user, completed_foreign for others
822
+ for model in payload["annotation"][0].keys():
823
+ for uid in progress_data[campaign_id]:
824
+ current_status = progress_data[campaign_id][uid]["progress"][item_i][
825
+ model
826
+ ]
827
+ if uid == user_id:
828
+ # User who completed it gets "completed"
829
+ progress_data[campaign_id][uid]["progress"][item_i][model] = (
830
+ "completed"
831
+ )
832
+ elif current_status is None:
833
+ # Other users get "completed_foreign" if not already completed
834
+ progress_data[campaign_id][uid]["progress"][item_i][model] = (
835
+ "completed_foreign"
836
+ )
837
+ # If already "completed", keep it as "completed"
603
838
  return JSONResponse(content="ok", status_code=200)
604
839
  else:
605
840
  return JSONResponse(content="Unknown campaign assignment type", status_code=400)