pearmut 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -9,7 +9,13 @@ from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from .assignment import get_i_item, get_next_item, reset_task, update_progress
12
- from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
12
+ from .utils import (
13
+ ROOT,
14
+ check_validation_threshold,
15
+ load_progress_data,
16
+ save_db_payload,
17
+ save_progress_data,
18
+ )
13
19
 
14
20
  os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
15
21
 
@@ -151,6 +157,9 @@ async def _dashboard_data(request: DashboardDataRequest):
151
157
  if assignment not in ["task-based", "single-stream"]:
152
158
  return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
153
159
 
160
+ # Get threshold info for the campaign
161
+ validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
162
+
154
163
  for user_id, user_val in progress_data[campaign_id].items():
155
164
  # shallow copy
156
165
  entry = dict(user_val)
@@ -159,6 +168,13 @@ async def _dashboard_data(request: DashboardDataRequest):
159
168
  for v in list(entry.get("validations", {}).values())
160
169
  ]
161
170
 
171
+ # Add threshold pass/fail status (only when user is complete)
172
+ if all(entry["progress"]):
173
+ entry["threshold_passed"] = check_validation_threshold(
174
+ tasks_data, progress_data, campaign_id, user_id
175
+ )
176
+ else:
177
+ entry["threshold_passed"] = None
162
178
 
163
179
  if not is_privileged:
164
180
  entry["token_correct"] = None
@@ -169,7 +185,8 @@ async def _dashboard_data(request: DashboardDataRequest):
169
185
  return JSONResponse(
170
186
  content={
171
187
  "status": "ok",
172
- "data": progress_new
188
+ "data": progress_new,
189
+ "validation_threshold": validation_threshold
173
190
  },
174
191
  status_code=200
175
192
  )
pearmut/assignment.py CHANGED
@@ -3,18 +3,23 @@ from typing import Any
3
3
 
4
4
  from fastapi.responses import JSONResponse
5
5
 
6
- from .utils import get_db_log_item
6
+ from .utils import (
7
+ RESET_MARKER,
8
+ check_validation_threshold,
9
+ get_db_log_item,
10
+ save_db_payload,
11
+ )
7
12
 
8
13
 
9
14
  def _completed_response(
15
+ tasks_data: dict,
10
16
  progress_data: dict,
11
17
  campaign_id: str,
12
18
  user_id: str,
13
19
  ) -> JSONResponse:
14
20
  """Build a completed response with progress, time, and token."""
15
21
  user_progress = progress_data[campaign_id][user_id]
16
- # TODO: add check for data quality
17
- is_ok = True
22
+ is_ok = check_validation_threshold(tasks_data, progress_data, campaign_id, user_id)
18
23
  return JSONResponse(
19
24
  content={
20
25
  "status": "completed",
@@ -161,7 +166,7 @@ def get_next_item_taskbased(
161
166
  """
162
167
  user_progress = progress_data[campaign_id][user_id]
163
168
  if all(user_progress["progress"]):
164
- return _completed_response(progress_data, campaign_id, user_id)
169
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
165
170
 
166
171
  # find first incomplete item
167
172
  item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
@@ -208,7 +213,7 @@ def get_next_item_singlestream(
208
213
  progress = user_progress["progress"]
209
214
 
210
215
  if all(progress):
211
- return _completed_response(progress_data, campaign_id, user_id)
216
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
212
217
 
213
218
  # find a random incomplete item
214
219
  incomplete_indices = [i for i, v in enumerate(progress) if not v]
@@ -261,20 +266,33 @@ def reset_task(
261
266
  ) -> JSONResponse:
262
267
  """
263
268
  Reset the task progress for the user in the specified campaign.
269
+ Saves a reset marker to mask existing annotations.
264
270
  """
265
271
  assignment = tasks_data[campaign_id]["info"]["assignment"]
266
272
  if assignment == "task-based":
267
- progress_data[campaign_id][user_id]["progress"] = (
268
- [False]*len(tasks_data[campaign_id]["data"][user_id])
269
- )
273
+ # Save reset marker for this user to mask existing annotations
274
+ num_items = len(tasks_data[campaign_id]["data"][user_id])
275
+ for item_i in range(num_items):
276
+ save_db_payload(campaign_id, {
277
+ "user_id": user_id,
278
+ "item_i": item_i,
279
+ "annotations": RESET_MARKER
280
+ })
281
+ progress_data[campaign_id][user_id]["progress"] = [False] * num_items
270
282
  _reset_user_time(progress_data, campaign_id, user_id)
271
283
  return JSONResponse(content={"status": "ok"}, status_code=200)
272
284
  elif assignment == "single-stream":
285
+ # Save reset markers for all items (shared pool)
286
+ num_items = len(tasks_data[campaign_id]["data"])
287
+ for item_i in range(num_items):
288
+ save_db_payload(campaign_id, {
289
+ "user_id": None,
290
+ "item_i": item_i,
291
+ "annotations": RESET_MARKER
292
+ })
273
293
  # for single-stream reset all progress
274
294
  for uid in progress_data[campaign_id]:
275
- progress_data[campaign_id][uid]["progress"] = (
276
- [False]*len(tasks_data[campaign_id]["data"])
277
- )
295
+ progress_data[campaign_id][uid]["progress"] = [False] * num_items
278
296
  _reset_user_time(progress_data, campaign_id, user_id)
279
297
  return JSONResponse(content={"status": "ok"}, status_code=200)
280
298
  else:
pearmut/cli.py CHANGED
@@ -10,7 +10,7 @@ import urllib.parse
10
10
 
11
11
  import psutil
12
12
 
13
- from .utils import ROOT, load_progress_data
13
+ from .utils import ROOT, load_progress_data, save_progress_data
14
14
 
15
15
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
16
  load_progress_data(warn=None)
@@ -50,36 +50,21 @@ def _run(args_unknown):
50
50
  )
51
51
 
52
52
 
53
- def _add_campaign(args_unknown):
53
+ def _add_single_campaign(data_file, overwrite, server):
54
54
  """
55
- Add a new campaign from a JSON data file.
55
+ Add a single campaign from a JSON data file.
56
56
  """
57
57
  import random
58
58
 
59
59
  import wonderwords
60
60
 
61
- args = argparse.ArgumentParser()
62
- args.add_argument(
63
- 'data_file', type=str,
64
- help='Path to the campaign data file'
65
- )
66
- args.add_argument(
67
- "-o", "--overwrite", action="store_true",
68
- help="Overwrite existing campaign if it exists"
69
- )
70
- args.add_argument(
71
- "--server", default="http://localhost:8001",
72
- help="Prefix server URL for protocol links"
73
- )
74
- args = args.parse_args(args_unknown)
75
-
76
- with open(args.data_file, 'r') as f:
61
+ with open(data_file, 'r') as f:
77
62
  campaign_data = json.load(f)
78
63
 
79
64
  with open(f"{ROOT}/data/progress.json", "r") as f:
80
65
  progress_data = json.load(f)
81
66
 
82
- if campaign_data['campaign_id'] in progress_data and not args.overwrite:
67
+ if campaign_data['campaign_id'] in progress_data and not overwrite:
83
68
  print(
84
69
  f"Campaign {campaign_data['campaign_id']} already exists.",
85
70
  "Use -o to overwrite."
@@ -99,6 +84,11 @@ def _add_campaign(args_unknown):
99
84
  # use random words for identifying users
100
85
  rng = random.Random(campaign_data["campaign_id"])
101
86
  rword = wonderwords.RandomWord(rng=rng)
87
+
88
+ # Parse users specification from info
89
+ users_spec = campaign_data["info"].get("users")
90
+ user_tokens = {} # user_id -> {"pass": ..., "fail": ...}
91
+
102
92
  if assignment == "task-based":
103
93
  tasks = campaign_data["data"]
104
94
  if not isinstance(tasks, list):
@@ -110,29 +100,58 @@ def _add_campaign(args_unknown):
110
100
  num_users = len(tasks)
111
101
  elif assignment == "single-stream":
112
102
  tasks = campaign_data["data"]
113
- if "num_users" not in campaign_data["info"]:
103
+ if users_spec is None:
114
104
  raise ValueError(
115
- "Single-stream campaigns must specify 'num_users' in info.")
105
+ "Single-stream campaigns must specify 'users' in info.")
116
106
  if not isinstance(campaign_data["data"], list):
117
107
  raise ValueError(
118
108
  "Single-stream campaign 'data' must be a list of items.")
119
- num_users = campaign_data["info"]["num_users"]
109
+ if isinstance(users_spec, int):
110
+ num_users = users_spec
111
+ elif isinstance(users_spec, list):
112
+ num_users = len(users_spec)
113
+ else:
114
+ raise ValueError("'users' must be an integer or a list.")
120
115
  elif assignment == "dynamic":
121
116
  raise NotImplementedError(
122
117
  "Dynamic campaign assignment is not yet implemented.")
123
118
  else:
124
119
  raise ValueError(f"Unknown campaign assignment type: {assignment}")
125
120
 
126
- user_ids = []
127
- while len(user_ids) < num_users:
128
- # generate random user IDs
129
- new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
130
- if new_id not in user_ids:
131
- user_ids.append(new_id)
132
- user_ids = [
133
- f"{user_id}-{rng.randint(0, 999):03d}"
134
- for user_id in user_ids
135
- ]
121
+ # Generate or parse user IDs based on users specification
122
+ if users_spec is None or isinstance(users_spec, int):
123
+ # Generate random user IDs
124
+ user_ids = []
125
+ while len(user_ids) < num_users:
126
+ new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
127
+ if new_id not in user_ids:
128
+ user_ids.append(new_id)
129
+ user_ids = [
130
+ f"{user_id}-{rng.randint(0, 999):03d}"
131
+ for user_id in user_ids
132
+ ]
133
+ elif isinstance(users_spec, list):
134
+ if len(users_spec) != num_users:
135
+ raise ValueError(
136
+ f"Number of users ({len(users_spec)}) must match expected count ({num_users}).")
137
+ if all(isinstance(u, str) for u in users_spec):
138
+ # List of string IDs
139
+ user_ids = users_spec
140
+ elif all(isinstance(u, dict) for u in users_spec):
141
+ # List of dicts with user_id, token_pass, token_fail
142
+ user_ids = []
143
+ for u in users_spec:
144
+ if "user_id" not in u:
145
+ raise ValueError("Each user dict must contain 'user_id'.")
146
+ user_ids.append(u["user_id"])
147
+ user_tokens[u["user_id"]] = {
148
+ "pass": u.get("token_pass"),
149
+ "fail": u.get("token_fail"),
150
+ }
151
+ else:
152
+ raise ValueError("'users' list must contain all strings or all dicts.")
153
+ else:
154
+ raise ValueError("'users' must be an integer or a list.")
136
155
 
137
156
  # For task-based, data is a dict mapping user_id -> tasks
138
157
  # For single-stream, data is a flat list (shared among all users)
@@ -150,6 +169,13 @@ def _add_campaign(args_unknown):
150
169
  hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
151
170
  )
152
171
 
172
+ def get_token(user_id, token_type):
173
+ """Get user token or generate a random one."""
174
+ token = user_tokens.get(user_id, {}).get(token_type)
175
+ if token is not None:
176
+ return token
177
+ return hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
178
+
153
179
  user_progress = {
154
180
  user_id: {
155
181
  # TODO: progress tracking could be based on the assignment type
@@ -162,16 +188,48 @@ def _add_campaign(args_unknown):
162
188
  "time_end": None,
163
189
  "time": 0,
164
190
  "url": (
165
- f"{args.server}/{campaign_data["info"]["template"]}.html"
191
+ f"{campaign_data["info"]["template"]}.html"
166
192
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
167
193
  f"&user_id={user_id}"
168
194
  ),
169
- "token_correct": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
170
- "token_incorrect": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
195
+ "token_correct": get_token(user_id, "pass"),
196
+ "token_incorrect": get_token(user_id, "fail"),
171
197
  }
172
198
  for user_id in user_ids
173
199
  }
174
200
 
201
+ # Handle assets symlink if specified
202
+ if "assets" in campaign_data["info"]:
203
+ assets_real_path = campaign_data["info"]["assets"]
204
+
205
+ # Resolve relative paths from the caller's current working directory
206
+ assets_real_path = os.path.abspath(assets_real_path)
207
+
208
+ if not os.path.isdir(assets_real_path):
209
+ raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
210
+
211
+ static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
212
+ dir_name = assets_real_path.split(os.sep)[-1]
213
+
214
+ if not os.path.isdir(static_dir):
215
+ raise ValueError(
216
+ f"Static directory '{static_dir}' does not exist. "
217
+ "Please build the frontend first."
218
+ )
219
+ symlink_path = f"{static_dir}/assets/{dir_name}"
220
+
221
+ # Remove existing symlink if present and we are overriding
222
+ if os.path.exists(symlink_path):
223
+ if overwrite:
224
+ os.remove(symlink_path)
225
+ else:
226
+ raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
227
+
228
+ os.symlink(assets_real_path, symlink_path, target_is_directory=True)
229
+ print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
230
+
231
+
232
+ # commit to transaction
175
233
  with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
176
234
  json.dump(campaign_data, f, indent=2, ensure_ascii=False)
177
235
 
@@ -180,15 +238,44 @@ def _add_campaign(args_unknown):
180
238
  with open(f"{ROOT}/data/progress.json", "w") as f:
181
239
  json.dump(progress_data, f, indent=2, ensure_ascii=False)
182
240
 
241
+
183
242
  print(
184
- f"{args.server}/dashboard.html"
243
+ "🎛️ ",
244
+ f"{server}/dashboard.html"
185
245
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
186
246
  f"&token={campaign_data['token']}"
187
247
  )
188
- print("-"*10)
189
248
  for user_id, user_val in user_progress.items():
190
249
  # point to the protocol URL
191
- print(user_val["url"])
250
+ print(f'{server}/{user_val["url"]}')
251
+ print()
252
+
253
+
254
+ def _add_campaign(args_unknown):
255
+ """
256
+ Add campaigns from one or more JSON data files.
257
+ """
258
+ args = argparse.ArgumentParser()
259
+ args.add_argument(
260
+ 'data_files', type=str, nargs='+',
261
+ help='One or more paths to campaign data files'
262
+ )
263
+ args.add_argument(
264
+ "-o", "--overwrite", action="store_true",
265
+ help="Overwrite existing campaign if it exists"
266
+ )
267
+ args.add_argument(
268
+ "--server", default="http://localhost:8001",
269
+ help="Prefix server URL for protocol links"
270
+ )
271
+ args = args.parse_args(args_unknown)
272
+
273
+ for data_file in args.data_files:
274
+ try:
275
+ _add_single_campaign(data_file, args.overwrite, args.server)
276
+ except Exception as e:
277
+ print(f"Error processing {data_file}: {e}")
278
+ exit(1)
192
279
 
193
280
 
194
281
  def main():
@@ -213,14 +300,47 @@ def main():
213
300
  elif args.command == 'purge':
214
301
  import shutil
215
302
 
216
- confirm = input(
217
- "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
303
+ # Parse optional campaign name
304
+ purge_args = argparse.ArgumentParser()
305
+ purge_args.add_argument(
306
+ 'campaign', type=str, nargs='?', default=None,
307
+ help='Optional campaign name to purge (purges all if not specified)'
218
308
  )
219
- if confirm.lower() == 'y':
220
- shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
221
- shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
222
- if os.path.exists(f"{ROOT}/data/progress.json"):
223
- os.remove(f"{ROOT}/data/progress.json")
224
- print("All campaign data purged.")
309
+ purge_args = purge_args.parse_args(args_unknown)
310
+
311
+ if purge_args.campaign is not None:
312
+ # Purge specific campaign
313
+ campaign_id = purge_args.campaign
314
+ confirm = input(
315
+ f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
316
+ )
317
+ if confirm.lower() == 'y':
318
+ # Remove task file
319
+ task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
320
+ if os.path.exists(task_file):
321
+ os.remove(task_file)
322
+ # Remove output file
323
+ output_file = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
324
+ if os.path.exists(output_file):
325
+ os.remove(output_file)
326
+ # Remove from progress data
327
+ progress_data = load_progress_data()
328
+ if campaign_id in progress_data:
329
+ del progress_data[campaign_id]
330
+ save_progress_data(progress_data)
331
+ print(f"Campaign '{campaign_id}' purged.")
332
+ else:
333
+ print("Cancelled.")
225
334
  else:
226
- print("Cancelled.")
335
+ # Purge all campaigns
336
+ confirm = input(
337
+ "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
338
+ )
339
+ if confirm.lower() == 'y':
340
+ shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
341
+ shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
342
+ if os.path.exists(f"{ROOT}/data/progress.json"):
343
+ os.remove(f"{ROOT}/data/progress.json")
344
+ print("All campaign data purged.")
345
+ else:
346
+ print("Cancelled.")