pearmut 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -8,8 +8,8 @@ from fastapi.responses import JSONResponse
8
8
  from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
11
- from .protocols import get_next_item, reset_task, update_progress
12
- from .utils import ROOT, load_progress_data, save_progress_data
11
+ from .assignment import get_i_item, get_next_item, reset_task, update_progress
12
+ from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
13
13
 
14
14
  os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
15
15
 
@@ -36,7 +36,7 @@ class LogResponseRequest(BaseModel):
36
36
  campaign_id: str
37
37
  user_id: str
38
38
  item_i: int
39
- payload: Any
39
+ payload: dict[str, Any]
40
40
 
41
41
 
42
42
  @app.post("/log-response")
@@ -45,6 +45,7 @@ async def _log_response(request: LogResponseRequest):
45
45
 
46
46
  campaign_id = request.campaign_id
47
47
  user_id = request.user_id
48
+ item_i = request.item_i
48
49
 
49
50
  if campaign_id not in progress_data:
50
51
  return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
@@ -52,8 +53,7 @@ async def _log_response(request: LogResponseRequest):
52
53
  return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
53
54
 
54
55
  # append response to the output log
55
- with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
56
- log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
56
+ save_db_payload(campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
57
57
 
58
58
  # if actions were submitted, we can log time data
59
59
  if "actions" in request.payload:
@@ -97,6 +97,32 @@ async def _get_next_item(request: NextItemRequest):
97
97
  )
98
98
 
99
99
 
100
+ class GetItemRequest(BaseModel):
101
+ campaign_id: str
102
+ user_id: str
103
+ item_i: int
104
+
105
+
106
+ @app.post("/get-i-item")
107
+ async def _get_i_item(request: GetItemRequest):
108
+ campaign_id = request.campaign_id
109
+ user_id = request.user_id
110
+ item_i = request.item_i
111
+
112
+ if campaign_id not in progress_data:
113
+ return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
114
+ if user_id not in progress_data[campaign_id]:
115
+ return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
116
+
117
+ return get_i_item(
118
+ campaign_id,
119
+ user_id,
120
+ tasks_data,
121
+ progress_data,
122
+ item_i,
123
+ )
124
+
125
+
100
126
  class DashboardDataRequest(BaseModel):
101
127
  campaign_id: str
102
128
  token: str | None = None
@@ -111,19 +137,20 @@ async def _dashboard_data(request: DashboardDataRequest):
111
137
  if campaign_id not in progress_data:
112
138
  return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
113
139
 
114
- progress_new = {
115
- user_id: {
116
- **user_val,
117
- "total": len(tasks_data[campaign_id]["data"][user_id]),
118
- } | (
119
- # override if not privileged
120
- {
121
- "token_correct": None,
122
- "token_incorrect": None,
123
- } if not is_privileged else {}
124
- )
125
- for user_id, user_val in progress_data[campaign_id].items()
126
- }
140
+ progress_new = {}
141
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
142
+ if assignment not in ["task-based", "single-stream"]:
143
+ return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
144
+
145
+ for user_id, user_val in progress_data[campaign_id].items():
146
+ # shallow copy
147
+ entry = dict(user_val)
148
+
149
+ if not is_privileged:
150
+ entry["token_correct"] = None
151
+ entry["token_incorrect"] = None
152
+
153
+ progress_new[user_id] = entry
127
154
 
128
155
  return JSONResponse(
129
156
  content={
@@ -190,19 +217,22 @@ async def _download_progress(
190
217
  return JSONResponse(content={"error": "Mismatched campaign_id and token count"}, status_code=400)
191
218
 
192
219
  output = {}
193
- for campaign_id, campaign_id in enumerate(campaign_id):
194
- if campaign_id not in progress_data:
195
- return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
196
- if token[campaign_id] != tasks_data[campaign_id]["token"]:
197
- return JSONResponse(content={"error": f"Invalid token for campaign ID {campaign_id}"}, status_code=400)
220
+ for i, cid in enumerate(campaign_id):
221
+ if cid not in progress_data:
222
+ return JSONResponse(content={"error": f"Unknown campaign ID {cid}"}, status_code=400)
223
+ if token[i] != tasks_data[cid]["token"]:
224
+ return JSONResponse(content={"error": f"Invalid token for campaign ID {cid}"}, status_code=400)
198
225
 
199
- output[campaign_id] = progress_data[campaign_id]
226
+ output[cid] = progress_data[cid]
200
227
 
201
228
  return JSONResponse(content=output, status_code=200)
202
229
 
230
+ static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
231
+ if not os.path.exists(static_dir + "index.html"):
232
+ raise FileNotFoundError("Static directory not found. Please build the frontend first.")
203
233
 
204
234
  app.mount(
205
235
  "/",
206
- StaticFiles(directory=f"{os.path.dirname(os.path.abspath(__file__))}/static/" , html=True, follow_symlink=True),
236
+ StaticFiles(directory=static_dir, html=True, follow_symlink=True),
207
237
  name="static",
208
238
  )
pearmut/assignment.py ADDED
@@ -0,0 +1,312 @@
1
+ import random
2
+ from typing import Any
3
+
4
+ from fastapi.responses import JSONResponse
5
+
6
+ from .utils import get_db_log_item
7
+
8
+
9
+ def _completed_response(
10
+ progress_data: dict,
11
+ campaign_id: str,
12
+ user_id: str,
13
+ ) -> JSONResponse:
14
+ """Build a completed response with progress, time, and token."""
15
+ user_progress = progress_data[campaign_id][user_id]
16
+ # TODO: add check for data quality
17
+ is_ok = True
18
+ return JSONResponse(
19
+ content={
20
+ "status": "completed",
21
+ "progress": user_progress["progress"],
22
+ "time": user_progress["time"],
23
+ "token": user_progress["token_correct" if is_ok else "token_incorrect"],
24
+ },
25
+ status_code=200
26
+ )
27
+
28
+
29
+ def get_next_item(
30
+ campaign_id: str,
31
+ user_id: str,
32
+ tasks_data: dict,
33
+ progress_data: dict,
34
+ ) -> JSONResponse:
35
+ """
36
+ Get the next item for the user in the specified campaign.
37
+ """
38
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
39
+ if assignment == "task-based":
40
+ return get_next_item_taskbased(campaign_id, user_id, tasks_data, progress_data)
41
+ elif assignment == "single-stream":
42
+ return get_next_item_singlestream(campaign_id, user_id, tasks_data, progress_data)
43
+ elif assignment == "dynamic":
44
+ return get_next_item_dynamic(campaign_id, user_id, tasks_data, progress_data)
45
+ else:
46
+ return JSONResponse(content={"error": "Unknown campaign assignment type"}, status_code=400)
47
+
48
+
49
+ def get_i_item(
50
+ campaign_id: str,
51
+ user_id: str,
52
+ tasks_data: dict,
53
+ progress_data: dict,
54
+ item_i: int,
55
+ ) -> JSONResponse:
56
+ """
57
+ Get a specific item by index for the user in the specified campaign.
58
+ """
59
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
60
+ if assignment == "task-based":
61
+ return get_i_item_taskbased(campaign_id, user_id, tasks_data, progress_data, item_i)
62
+ elif assignment == "single-stream":
63
+ return get_i_item_singlestream(campaign_id, user_id, tasks_data, progress_data, item_i)
64
+ else:
65
+ return JSONResponse(content={"error": "Get item not supported for this assignment type"}, status_code=400)
66
+
67
+
68
+ def get_i_item_taskbased(
69
+ campaign_id: str,
70
+ user_id: str,
71
+ data_all: dict,
72
+ progress_data: dict,
73
+ item_i: int,
74
+ ) -> JSONResponse:
75
+ """
76
+ Get specific item for task-based protocol.
77
+ """
78
+ user_progress = progress_data[campaign_id][user_id]
79
+ if all(user_progress["progress"]):
80
+ return _completed_response(progress_data, campaign_id, user_id)
81
+
82
+ # try to get existing annotations if any
83
+ items_existing = get_db_log_item(campaign_id, user_id, item_i)
84
+ if items_existing:
85
+ # get the latest ones
86
+ payload_existing = items_existing[-1]["annotations"]
87
+
88
+ if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
89
+ return JSONResponse(
90
+ content={"status": "error", "message": "Item index out of range"},
91
+ status_code=400
92
+ )
93
+
94
+ return JSONResponse(
95
+ content={
96
+ "status": "ok",
97
+ "progress": user_progress["progress"],
98
+ "time": user_progress["time"],
99
+ "info": {
100
+ "item_i": item_i,
101
+ } | {
102
+ k: v
103
+ for k, v in data_all[campaign_id]["info"].items()
104
+ if k.startswith("protocol")
105
+ },
106
+ "payload": data_all[campaign_id]["data"][user_id][item_i]
107
+ } | ({"payload_existing": payload_existing} if items_existing else {}),
108
+ status_code=200
109
+ )
110
+
111
+
112
+ def get_i_item_singlestream(
113
+ campaign_id: str,
114
+ user_id: str,
115
+ data_all: dict,
116
+ progress_data: dict,
117
+ item_i: int,
118
+ ) -> JSONResponse:
119
+ """
120
+ Get specific item for single-stream assignment.
121
+ """
122
+ user_progress = progress_data[campaign_id][user_id]
123
+ if all(user_progress["progress"]):
124
+ return _completed_response(progress_data, campaign_id, user_id)
125
+
126
+ # try to get existing annotations if any
127
+ # note the None user_id since it is shared
128
+ items_existing = get_db_log_item(campaign_id, None, item_i)
129
+ if items_existing:
130
+ # get the latest ones
131
+ payload_existing = items_existing[-1]["annotations"]
132
+
133
+ if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
134
+ return JSONResponse(
135
+ content={"status": "error", "message": "Item index out of range"},
136
+ status_code=400
137
+ )
138
+
139
+ return JSONResponse(
140
+ content={
141
+ "status": "ok",
142
+ "progress": user_progress["progress"],
143
+ "time": user_progress["time"],
144
+ "info": {
145
+ "item_i": item_i,
146
+ } | {
147
+ k: v
148
+ for k, v in data_all[campaign_id]["info"].items()
149
+ if k.startswith("protocol")
150
+ },
151
+ "payload": data_all[campaign_id]["data"][item_i]
152
+ } | ({"payload_existing": payload_existing} if items_existing else {}),
153
+ status_code=200
154
+ )
155
+
156
+
157
+ def get_next_item_taskbased(
158
+ campaign_id: str,
159
+ user_id: str,
160
+ data_all: dict,
161
+ progress_data: dict,
162
+ ) -> JSONResponse:
163
+ """
164
+ Get the next item for task-based assignment.
165
+ """
166
+ user_progress = progress_data[campaign_id][user_id]
167
+ if all(user_progress["progress"]):
168
+ return _completed_response(progress_data, campaign_id, user_id)
169
+
170
+ # find first incomplete item
171
+ item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
172
+
173
+ # try to get existing annotations if any
174
+ items_existing = get_db_log_item(campaign_id, user_id, item_i)
175
+ if items_existing:
176
+ # get the latest ones
177
+ payload_existing = items_existing[-1]["annotations"]
178
+
179
+ return JSONResponse(
180
+ content={
181
+ "status": "ok",
182
+ "progress": user_progress["progress"],
183
+ "time": user_progress["time"],
184
+ "info": {
185
+ "item_i": item_i,
186
+ } | {
187
+ k: v
188
+ for k, v in data_all[campaign_id]["info"].items()
189
+ if k.startswith("protocol")
190
+ },
191
+ "payload": data_all[campaign_id]["data"][user_id][item_i]
192
+ } | ({"payload_existing": payload_existing} if items_existing else {}),
193
+ status_code=200
194
+ )
195
+
196
+
197
+ def get_next_item_singlestream(
198
+ campaign_id: str,
199
+ user_id: str,
200
+ data_all: dict,
201
+ progress_data: dict,
202
+ ) -> JSONResponse:
203
+ """
204
+ Get the next item for single-stream assignment.
205
+ In this mode, all users share the same pool of items.
206
+ Items are randomly selected from unfinished items.
207
+
208
+ Note: There is a potential race condition where multiple users could
209
+ receive the same item simultaneously. This is fine since we store all responses.
210
+ """
211
+ user_progress = progress_data[campaign_id][user_id]
212
+ progress = user_progress["progress"]
213
+
214
+ if all(progress):
215
+ return _completed_response(progress_data, campaign_id, user_id)
216
+
217
+ # find a random incomplete item
218
+ incomplete_indices = [i for i, v in enumerate(progress) if not v]
219
+ item_i = random.choice(incomplete_indices)
220
+
221
+ # try to get existing annotations if any
222
+ # note the None user_id since it is shared
223
+ items_existing = get_db_log_item(campaign_id, None, item_i)
224
+ if items_existing:
225
+ # get the latest ones
226
+ payload_existing = items_existing[-1]["annotations"]
227
+
228
+ return JSONResponse(
229
+ content={
230
+ "status": "ok",
231
+ "time": user_progress["time"],
232
+ "progress": progress,
233
+ "info": {
234
+ "item_i": item_i,
235
+ } | {
236
+ k: v
237
+ for k, v in data_all[campaign_id]["info"].items()
238
+ if k.startswith("protocol")
239
+ },
240
+ "payload": data_all[campaign_id]["data"][item_i]
241
+ } | ({"payload_existing": payload_existing} if items_existing else {}),
242
+ status_code=200
243
+ )
244
+
245
+
246
+
247
+ def get_next_item_dynamic(campaign_data: dict, user_id: str, progress_data: dict, data_all: dict):
248
+ raise NotImplementedError("Dynamic protocol is not implemented yet.")
249
+
250
+
251
+
252
+ def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> None:
253
+ """Reset time tracking fields for a user."""
254
+ progress_data[campaign_id][user_id]["time"] = 0.0
255
+ progress_data[campaign_id][user_id]["time_start"] = None
256
+ progress_data[campaign_id][user_id]["time_end"] = None
257
+
258
+
259
+ def reset_task(
260
+ campaign_id: str,
261
+ user_id: str,
262
+ tasks_data: dict,
263
+ progress_data: dict,
264
+ ) -> JSONResponse:
265
+ """
266
+ Reset the task progress for the user in the specified campaign.
267
+ """
268
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
269
+ if assignment == "task-based":
270
+ progress_data[campaign_id][user_id]["progress"] = (
271
+ [False]*len(tasks_data[campaign_id]["data"][user_id])
272
+ )
273
+ _reset_user_time(progress_data, campaign_id, user_id)
274
+ return JSONResponse(content={"status": "ok"}, status_code=200)
275
+ elif assignment == "single-stream":
276
+ # for single-stream reset all progress
277
+ for uid in progress_data[campaign_id]:
278
+ progress_data[campaign_id][uid]["progress"] = (
279
+ [False]*len(tasks_data[campaign_id]["data"])
280
+ )
281
+ _reset_user_time(progress_data, campaign_id, user_id)
282
+ return JSONResponse(content={"status": "ok"}, status_code=200)
283
+ else:
284
+ return JSONResponse(content={"status": "error", "message": "Reset not supported for this assignment type"}, status_code=400)
285
+
286
+
287
+ def update_progress(
288
+ campaign_id: str,
289
+ user_id: str,
290
+ tasks_data: dict,
291
+ progress_data: dict,
292
+ item_i: int,
293
+ payload: Any,
294
+ ) -> JSONResponse:
295
+ """
296
+ Log the user's response for the specified item in the campaign.
297
+ """
298
+ assignment = tasks_data[campaign_id]["info"]["assignment"]
299
+ if assignment == "task-based":
300
+ # even if it's already set it should be fine
301
+ progress_data[campaign_id][user_id]["progress"][item_i] = True
302
+ # TODO: log attention checks/quality?
303
+ return JSONResponse(content={"status": "ok"}, status_code=200)
304
+ elif assignment == "single-stream":
305
+ # progress all users
306
+ for uid in progress_data[campaign_id]:
307
+ progress_data[campaign_id][uid]["progress"][item_i] = True
308
+ return JSONResponse(content={"status": "ok"}, status_code=200)
309
+ elif assignment == "dynamic":
310
+ return JSONResponse(content={"status": "error", "message": "Dynamic protocol logging not implemented yet."}, status_code=400)
311
+ else:
312
+ return JSONResponse(content={"status": "error", "message": "Unknown campaign assignment type"}, status_code=400)
pearmut/cli.py CHANGED
@@ -90,33 +90,41 @@ def _add_campaign(args_unknown):
90
90
  raise ValueError("Campaign data must contain 'info' field.")
91
91
  if "data" not in campaign_data:
92
92
  raise ValueError("Campaign data must contain 'data' field.")
93
- if "type" not in campaign_data["info"]:
94
- raise ValueError("Campaign 'info' must contain 'type' field.")
93
+ if "assignment" not in campaign_data["info"]:
94
+ raise ValueError("Campaign 'info' must contain 'assignment' field.")
95
95
  if "template" not in campaign_data["info"]:
96
96
  raise ValueError("Campaign 'info' must contain 'template' field.")
97
97
 
98
+ assignment = campaign_data["info"]["assignment"]
98
99
  # use random words for identifying users
99
100
  rng = random.Random(campaign_data["campaign_id"])
100
101
  rword = wonderwords.RandomWord(rng=rng)
101
- if campaign_data["info"]["type"] == "task-based":
102
+ if assignment == "task-based":
102
103
  tasks = campaign_data["data"]
103
104
  if not isinstance(tasks, list):
104
- raise ValueError("Task-based campaign 'data' must be a list of tasks.")
105
+ raise ValueError(
106
+ "Task-based campaign 'data' must be a list of tasks.")
105
107
  if not all(isinstance(task, list) for task in tasks):
106
- raise ValueError("Each task in task-based campaign 'data' must be a list of items.")
107
- amount = len(tasks)
108
- elif campaign_data["info"]["type"] == "dynamic":
109
- if "num_users" not in campaign_data:
110
- raise ValueError("Dynamic campaigns must specify 'num_users'.")
108
+ raise ValueError(
109
+ "Each task in task-based campaign 'data' must be a list of items.")
110
+ num_users = len(tasks)
111
+ elif assignment == "single-stream":
112
+ tasks = campaign_data["data"]
113
+ if "num_users" not in campaign_data["info"]:
114
+ raise ValueError(
115
+ "Single-stream campaigns must specify 'num_users' in info.")
111
116
  if not isinstance(campaign_data["data"], list):
112
- raise ValueError("Dynamic campaign 'data' must be a list of items.")
113
- amount = campaign_data["num_users"]
117
+ raise ValueError(
118
+ "Single-stream campaign 'data' must be a list of items.")
119
+ num_users = campaign_data["info"]["num_users"]
120
+ elif assignment == "dynamic":
121
+ raise NotImplementedError(
122
+ "Dynamic campaign assignment is not yet implemented.")
114
123
  else:
115
- raise ValueError(
116
- f"Unknown campaign type: {campaign_data["info"]['type']}")
124
+ raise ValueError(f"Unknown campaign assignment type: {assignment}")
117
125
 
118
126
  user_ids = []
119
- while len(user_ids) < amount:
127
+ while len(user_ids) < num_users:
120
128
  # generate random user IDs
121
129
  new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
122
130
  if new_id not in user_ids:
@@ -126,10 +134,15 @@ def _add_campaign(args_unknown):
126
134
  for user_id in user_ids
127
135
  ]
128
136
 
129
- campaign_data["data"] = {
130
- user_id: task
131
- for user_id, task in zip(user_ids, tasks)
132
- }
137
+ # For task-based, data is a dict mapping user_id -> tasks
138
+ # For single-stream, data is a flat list (shared among all users)
139
+ if assignment == "task-based":
140
+ campaign_data["data"] = {
141
+ user_id: task
142
+ for user_id, task in zip(user_ids, tasks)
143
+ }
144
+ elif assignment == "single-stream":
145
+ campaign_data["data"] = tasks
133
146
 
134
147
  # generate a token for dashboard access if not present
135
148
  if "token" not in campaign_data:
@@ -139,7 +152,12 @@ def _add_campaign(args_unknown):
139
152
 
140
153
  user_progress = {
141
154
  user_id: {
142
- "progress": [False]*len(campaign_data["data"][user_id]) if campaign_data["info"]["type"] == "task-based" else [],
155
+ # TODO: progress tracking could be based on the assignment type
156
+ "progress": (
157
+ [False]*len(campaign_data["data"][user_id]) if assignment == "task-based"
158
+ else [False]*len(campaign_data["data"]) if assignment == "single-stream"
159
+ else []
160
+ ),
143
161
  "time_start": None,
144
162
  "time_end": None,
145
163
  "time": 0,