pearmut 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -9,7 +9,13 @@ from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from .assignment import get_i_item, get_next_item, reset_task, update_progress
12
- from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
12
+ from .utils import (
13
+ ROOT,
14
+ check_validation_threshold,
15
+ load_progress_data,
16
+ save_db_payload,
17
+ save_progress_data,
18
+ )
13
19
 
14
20
  os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
15
21
 
@@ -141,16 +147,19 @@ class DashboardDataRequest(BaseModel):
141
147
  async def _dashboard_data(request: DashboardDataRequest):
142
148
  campaign_id = request.campaign_id
143
149
 
144
- is_privileged = (request.token == tasks_data[campaign_id]["token"])
145
-
146
150
  if campaign_id not in progress_data:
147
151
  return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
152
+
153
+ is_privileged = (request.token == tasks_data[campaign_id]["token"])
148
154
 
149
155
  progress_new = {}
150
156
  assignment = tasks_data[campaign_id]["info"]["assignment"]
151
157
  if assignment not in ["task-based", "single-stream"]:
152
158
  return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
153
159
 
160
+ # Get threshold info for the campaign
161
+ validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
162
+
154
163
  for user_id, user_val in progress_data[campaign_id].items():
155
164
  # shallow copy
156
165
  entry = dict(user_val)
@@ -159,6 +168,13 @@ async def _dashboard_data(request: DashboardDataRequest):
159
168
  for v in list(entry.get("validations", {}).values())
160
169
  ]
161
170
 
171
+ # Add threshold pass/fail status (only when user is complete)
172
+ if all(entry["progress"]):
173
+ entry["threshold_passed"] = check_validation_threshold(
174
+ tasks_data, progress_data, campaign_id, user_id
175
+ )
176
+ else:
177
+ entry["threshold_passed"] = None
162
178
 
163
179
  if not is_privileged:
164
180
  entry["token_correct"] = None
@@ -169,7 +185,8 @@ async def _dashboard_data(request: DashboardDataRequest):
169
185
  return JSONResponse(
170
186
  content={
171
187
  "status": "ok",
172
- "data": progress_new
188
+ "data": progress_new,
189
+ "validation_threshold": validation_threshold
173
190
  },
174
191
  status_code=200
175
192
  )
pearmut/assignment.py CHANGED
@@ -3,18 +3,23 @@ from typing import Any
3
3
 
4
4
  from fastapi.responses import JSONResponse
5
5
 
6
- from .utils import get_db_log_item
6
+ from .utils import (
7
+ RESET_MARKER,
8
+ check_validation_threshold,
9
+ get_db_log_item,
10
+ save_db_payload,
11
+ )
7
12
 
8
13
 
9
14
  def _completed_response(
15
+ tasks_data: dict,
10
16
  progress_data: dict,
11
17
  campaign_id: str,
12
18
  user_id: str,
13
19
  ) -> JSONResponse:
14
20
  """Build a completed response with progress, time, and token."""
15
21
  user_progress = progress_data[campaign_id][user_id]
16
- # TODO: add check for data quality
17
- is_ok = True
22
+ is_ok = check_validation_threshold(tasks_data, progress_data, campaign_id, user_id)
18
23
  return JSONResponse(
19
24
  content={
20
25
  "status": "completed",
@@ -161,7 +166,7 @@ def get_next_item_taskbased(
161
166
  """
162
167
  user_progress = progress_data[campaign_id][user_id]
163
168
  if all(user_progress["progress"]):
164
- return _completed_response(progress_data, campaign_id, user_id)
169
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
165
170
 
166
171
  # find first incomplete item
167
172
  item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
@@ -208,7 +213,7 @@ def get_next_item_singlestream(
208
213
  progress = user_progress["progress"]
209
214
 
210
215
  if all(progress):
211
- return _completed_response(progress_data, campaign_id, user_id)
216
+ return _completed_response(data_all, progress_data, campaign_id, user_id)
212
217
 
213
218
  # find a random incomplete item
214
219
  incomplete_indices = [i for i, v in enumerate(progress) if not v]
@@ -261,20 +266,33 @@ def reset_task(
261
266
  ) -> JSONResponse:
262
267
  """
263
268
  Reset the task progress for the user in the specified campaign.
269
+ Saves a reset marker to mask existing annotations.
264
270
  """
265
271
  assignment = tasks_data[campaign_id]["info"]["assignment"]
266
272
  if assignment == "task-based":
267
- progress_data[campaign_id][user_id]["progress"] = (
268
- [False]*len(tasks_data[campaign_id]["data"][user_id])
269
- )
273
+ # Save reset marker for this user to mask existing annotations
274
+ num_items = len(tasks_data[campaign_id]["data"][user_id])
275
+ for item_i in range(num_items):
276
+ save_db_payload(campaign_id, {
277
+ "user_id": user_id,
278
+ "item_i": item_i,
279
+ "annotations": RESET_MARKER
280
+ })
281
+ progress_data[campaign_id][user_id]["progress"] = [False] * num_items
270
282
  _reset_user_time(progress_data, campaign_id, user_id)
271
283
  return JSONResponse(content={"status": "ok"}, status_code=200)
272
284
  elif assignment == "single-stream":
285
+ # Save reset markers for all items (shared pool)
286
+ num_items = len(tasks_data[campaign_id]["data"])
287
+ for item_i in range(num_items):
288
+ save_db_payload(campaign_id, {
289
+ "user_id": None,
290
+ "item_i": item_i,
291
+ "annotations": RESET_MARKER
292
+ })
273
293
  # for single-stream reset all progress
274
294
  for uid in progress_data[campaign_id]:
275
- progress_data[campaign_id][uid]["progress"] = (
276
- [False]*len(tasks_data[campaign_id]["data"])
277
- )
295
+ progress_data[campaign_id][uid]["progress"] = [False] * num_items
278
296
  _reset_user_time(progress_data, campaign_id, user_id)
279
297
  return JSONResponse(content={"status": "ok"}, status_code=200)
280
298
  else:
pearmut/cli.py CHANGED
@@ -10,7 +10,7 @@ import urllib.parse
10
10
 
11
11
  import psutil
12
12
 
13
- from .utils import ROOT, load_progress_data
13
+ from .utils import ROOT, load_progress_data, save_progress_data
14
14
 
15
15
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
16
  load_progress_data(warn=None)
@@ -50,41 +50,25 @@ def _run(args_unknown):
50
50
  )
51
51
 
52
52
 
53
- def _add_campaign(args_unknown):
53
+ def _add_single_campaign(data_file, overwrite, server):
54
54
  """
55
- Add a new campaign from a JSON data file.
55
+ Add a single campaign from a JSON data file.
56
56
  """
57
57
  import random
58
58
 
59
59
  import wonderwords
60
60
 
61
- args = argparse.ArgumentParser()
62
- args.add_argument(
63
- 'data_file', type=str,
64
- help='Path to the campaign data file'
65
- )
66
- args.add_argument(
67
- "-o", "--overwrite", action="store_true",
68
- help="Overwrite existing campaign if it exists"
69
- )
70
- args.add_argument(
71
- "--server", default="http://localhost:8001",
72
- help="Prefix server URL for protocol links"
73
- )
74
- args = args.parse_args(args_unknown)
75
-
76
- with open(args.data_file, 'r') as f:
61
+ with open(data_file, 'r') as f:
77
62
  campaign_data = json.load(f)
78
63
 
79
64
  with open(f"{ROOT}/data/progress.json", "r") as f:
80
65
  progress_data = json.load(f)
81
66
 
82
- if campaign_data['campaign_id'] in progress_data and not args.overwrite:
83
- print(
84
- f"Campaign {campaign_data['campaign_id']} already exists.",
67
+ if campaign_data['campaign_id'] in progress_data and not overwrite:
68
+ raise ValueError(
69
+ f"Campaign {campaign_data['campaign_id']} already exists.\n"
85
70
  "Use -o to overwrite."
86
71
  )
87
- exit(1)
88
72
 
89
73
  if "info" not in campaign_data:
90
74
  raise ValueError("Campaign data must contain 'info' field.")
@@ -99,6 +83,11 @@ def _add_campaign(args_unknown):
99
83
  # use random words for identifying users
100
84
  rng = random.Random(campaign_data["campaign_id"])
101
85
  rword = wonderwords.RandomWord(rng=rng)
86
+
87
+ # Parse users specification from info
88
+ users_spec = campaign_data["info"].get("users")
89
+ user_tokens = {} # user_id -> {"pass": ..., "fail": ...}
90
+
102
91
  if assignment == "task-based":
103
92
  tasks = campaign_data["data"]
104
93
  if not isinstance(tasks, list):
@@ -110,29 +99,58 @@ def _add_campaign(args_unknown):
110
99
  num_users = len(tasks)
111
100
  elif assignment == "single-stream":
112
101
  tasks = campaign_data["data"]
113
- if "num_users" not in campaign_data["info"]:
102
+ if users_spec is None:
114
103
  raise ValueError(
115
- "Single-stream campaigns must specify 'num_users' in info.")
104
+ "Single-stream campaigns must specify 'users' in info.")
116
105
  if not isinstance(campaign_data["data"], list):
117
106
  raise ValueError(
118
107
  "Single-stream campaign 'data' must be a list of items.")
119
- num_users = campaign_data["info"]["num_users"]
108
+ if isinstance(users_spec, int):
109
+ num_users = users_spec
110
+ elif isinstance(users_spec, list):
111
+ num_users = len(users_spec)
112
+ else:
113
+ raise ValueError("'users' must be an integer or a list.")
120
114
  elif assignment == "dynamic":
121
115
  raise NotImplementedError(
122
116
  "Dynamic campaign assignment is not yet implemented.")
123
117
  else:
124
118
  raise ValueError(f"Unknown campaign assignment type: {assignment}")
125
119
 
126
- user_ids = []
127
- while len(user_ids) < num_users:
128
- # generate random user IDs
129
- new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
130
- if new_id not in user_ids:
131
- user_ids.append(new_id)
132
- user_ids = [
133
- f"{user_id}-{rng.randint(0, 999):03d}"
134
- for user_id in user_ids
135
- ]
120
+ # Generate or parse user IDs based on users specification
121
+ if users_spec is None or isinstance(users_spec, int):
122
+ # Generate random user IDs
123
+ user_ids = []
124
+ while len(user_ids) < num_users:
125
+ new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
126
+ if new_id not in user_ids:
127
+ user_ids.append(new_id)
128
+ user_ids = [
129
+ f"{user_id}-{rng.randint(0, 999):03d}"
130
+ for user_id in user_ids
131
+ ]
132
+ elif isinstance(users_spec, list):
133
+ if len(users_spec) != num_users:
134
+ raise ValueError(
135
+ f"Number of users ({len(users_spec)}) must match expected count ({num_users}).")
136
+ if all(isinstance(u, str) for u in users_spec):
137
+ # List of string IDs
138
+ user_ids = users_spec
139
+ elif all(isinstance(u, dict) for u in users_spec):
140
+ # List of dicts with user_id, token_pass, token_fail
141
+ user_ids = []
142
+ for u in users_spec:
143
+ if "user_id" not in u:
144
+ raise ValueError("Each user dict must contain 'user_id'.")
145
+ user_ids.append(u["user_id"])
146
+ user_tokens[u["user_id"]] = {
147
+ "pass": u.get("token_pass"),
148
+ "fail": u.get("token_fail"),
149
+ }
150
+ else:
151
+ raise ValueError("'users' list must contain all strings or all dicts.")
152
+ else:
153
+ raise ValueError("'users' must be an integer or a list.")
136
154
 
137
155
  # For task-based, data is a dict mapping user_id -> tasks
138
156
  # For single-stream, data is a flat list (shared among all users)
@@ -150,6 +168,13 @@ def _add_campaign(args_unknown):
150
168
  hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
151
169
  )
152
170
 
171
+ def get_token(user_id, token_type):
172
+ """Get user token or generate a random one."""
173
+ token = user_tokens.get(user_id, {}).get(token_type)
174
+ if token is not None:
175
+ return token
176
+ return hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
177
+
153
178
  user_progress = {
154
179
  user_id: {
155
180
  # TODO: progress tracking could be based on the assignment type
@@ -166,12 +191,44 @@ def _add_campaign(args_unknown):
166
191
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
167
192
  f"&user_id={user_id}"
168
193
  ),
169
- "token_correct": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
170
- "token_incorrect": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
194
+ "token_correct": get_token(user_id, "pass"),
195
+ "token_incorrect": get_token(user_id, "fail"),
171
196
  }
172
197
  for user_id in user_ids
173
198
  }
174
199
 
200
+ # Handle assets symlink if specified
201
+ if "assets" in campaign_data["info"]:
202
+ assets_real_path = campaign_data["info"]["assets"]
203
+
204
+ # Resolve relative paths from the caller's current working directory
205
+ assets_real_path = os.path.abspath(assets_real_path)
206
+
207
+ if not os.path.isdir(assets_real_path):
208
+ raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
209
+
210
+ static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
211
+ dir_name = assets_real_path.split(os.sep)[-1]
212
+
213
+ if not os.path.isdir(static_dir):
214
+ raise ValueError(
215
+ f"Static directory '{static_dir}' does not exist. "
216
+ "Please build the frontend first."
217
+ )
218
+ symlink_path = f"{static_dir}/assets/{dir_name}"
219
+
220
+ # Remove existing symlink if present and we are overriding
221
+ if os.path.exists(symlink_path):
222
+ if overwrite:
223
+ os.remove(symlink_path)
224
+ else:
225
+ raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
226
+
227
+ os.symlink(assets_real_path, symlink_path, target_is_directory=True)
228
+ print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
229
+
230
+
231
+ # commit to transaction
175
232
  with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
176
233
  json.dump(campaign_data, f, indent=2, ensure_ascii=False)
177
234
 
@@ -180,15 +237,44 @@ def _add_campaign(args_unknown):
180
237
  with open(f"{ROOT}/data/progress.json", "w") as f:
181
238
  json.dump(progress_data, f, indent=2, ensure_ascii=False)
182
239
 
240
+
183
241
  print(
184
- f"{args.server}/dashboard.html"
242
+ "🎛️ ",
243
+ f"{server}/dashboard.html"
185
244
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
186
245
  f"&token={campaign_data['token']}"
187
246
  )
188
- print("-"*10)
189
247
  for user_id, user_val in user_progress.items():
190
248
  # point to the protocol URL
191
- print(f"{args.server}/{user_val["url"]}")
249
+ print(f'{server}/{user_val["url"]}')
250
+ print()
251
+
252
+
253
+ def _add_campaign(args_unknown):
254
+ """
255
+ Add campaigns from one or more JSON data files.
256
+ """
257
+ args = argparse.ArgumentParser()
258
+ args.add_argument(
259
+ 'data_files', type=str, nargs='+',
260
+ help='One or more paths to campaign data files'
261
+ )
262
+ args.add_argument(
263
+ "-o", "--overwrite", action="store_true",
264
+ help="Overwrite existing campaign if it exists"
265
+ )
266
+ args.add_argument(
267
+ "--server", default="http://localhost:8001",
268
+ help="Prefix server URL for protocol links"
269
+ )
270
+ args = args.parse_args(args_unknown)
271
+
272
+ for data_file in args.data_files:
273
+ try:
274
+ _add_single_campaign(data_file, args.overwrite, args.server)
275
+ except Exception as e:
276
+ print(f"Error processing {data_file}: {e}")
277
+ exit(1)
192
278
 
193
279
 
194
280
  def main():
@@ -213,14 +299,47 @@ def main():
213
299
  elif args.command == 'purge':
214
300
  import shutil
215
301
 
216
- confirm = input(
217
- "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
302
+ # Parse optional campaign name
303
+ purge_args = argparse.ArgumentParser()
304
+ purge_args.add_argument(
305
+ 'campaign', type=str, nargs='?', default=None,
306
+ help='Optional campaign name to purge (purges all if not specified)'
218
307
  )
219
- if confirm.lower() == 'y':
220
- shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
221
- shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
222
- if os.path.exists(f"{ROOT}/data/progress.json"):
223
- os.remove(f"{ROOT}/data/progress.json")
224
- print("All campaign data purged.")
308
+ purge_args = purge_args.parse_args(args_unknown)
309
+
310
+ if purge_args.campaign is not None:
311
+ # Purge specific campaign
312
+ campaign_id = purge_args.campaign
313
+ confirm = input(
314
+ f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
315
+ )
316
+ if confirm.lower() == 'y':
317
+ # Remove task file
318
+ task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
319
+ if os.path.exists(task_file):
320
+ os.remove(task_file)
321
+ # Remove output file
322
+ output_file = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
323
+ if os.path.exists(output_file):
324
+ os.remove(output_file)
325
+ # Remove from progress data
326
+ progress_data = load_progress_data()
327
+ if campaign_id in progress_data:
328
+ del progress_data[campaign_id]
329
+ save_progress_data(progress_data)
330
+ print(f"Campaign '{campaign_id}' purged.")
331
+ else:
332
+ print("Cancelled.")
225
333
  else:
226
- print("Cancelled.")
334
+ # Purge all campaigns
335
+ confirm = input(
336
+ "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
337
+ )
338
+ if confirm.lower() == 'y':
339
+ shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
340
+ shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
341
+ if os.path.exists(f"{ROOT}/data/progress.json"):
342
+ os.remove(f"{ROOT}/data/progress.json")
343
+ print("All campaign data purged.")
344
+ else:
345
+ print("Cancelled.")
@@ -232,4 +232,8 @@ input[type="button"].error_delete:hover {
232
232
  margin-right: 5px;
233
233
  position: relative;
234
234
  top: -5px;
235
+ }
236
+
237
+ .char_missing {
238
+ font-family: monospace;
235
239
  }