pearmut 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -30,7 +30,8 @@ app.add_middleware(
30
30
 
31
31
  tasks_data = {}
32
32
  progress_data = load_progress_data(
33
- warn="No progress.json found. Running, but no campaign will be available.")
33
+ warn="No progress.json found. Running, but no campaign will be available."
34
+ )
34
35
 
35
36
  # load all tasks into data_all
36
37
  for campaign_id in progress_data.keys():
@@ -60,30 +61,31 @@ async def _log_response(request: LogResponseRequest):
60
61
 
61
62
  # append response to the output log
62
63
  save_db_payload(
63
- campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
64
+ campaign_id, request.payload | {"user_id": user_id, "item_i": item_i}
65
+ )
64
66
 
65
67
  # if actions were submitted, we can log time data
66
68
  if "actions" in request.payload:
67
- times = [
68
- x["time"] for x in request.payload["actions"]
69
- ]
69
+ times = [x["time"] for x in request.payload["actions"]]
70
70
  if progress_data[campaign_id][user_id]["time_start"] is None:
71
71
  progress_data[campaign_id][user_id]["time_start"] = min(times)
72
72
  progress_data[campaign_id][user_id]["time_end"] = max(times)
73
- progress_data[campaign_id][user_id]["time"] += sum([
74
- min(b - a, 60)
75
- for a, b in zip(times, times[1:])
76
- ])
73
+ progress_data[campaign_id][user_id]["time"] += sum(
74
+ [min(b - a, 60) for a, b in zip(times, times[1:])]
75
+ )
77
76
 
78
77
  # Initialize validation_checks if it doesn't exist
79
78
  if "validations" in request.payload:
80
79
  if "validations" not in progress_data[campaign_id][user_id]:
81
80
  progress_data[campaign_id][user_id]["validations"] = {}
82
81
 
83
- progress_data[campaign_id][user_id]["validations"][request.item_i] = request.payload["validations"]
82
+ progress_data[campaign_id][user_id]["validations"][request.item_i] = (
83
+ request.payload["validations"]
84
+ )
84
85
 
85
- update_progress(campaign_id, user_id, tasks_data,
86
- progress_data, request.item_i, request.payload)
86
+ update_progress(
87
+ campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload
88
+ )
87
89
  save_progress_data(progress_data)
88
90
 
89
91
  return JSONResponse(content="ok", status_code=200)
@@ -149,13 +151,15 @@ async def _dashboard_data(request: DashboardDataRequest):
149
151
 
150
152
  if campaign_id not in progress_data:
151
153
  return JSONResponse(content="Unknown campaign ID", status_code=400)
152
-
153
- is_privileged = (request.token == tasks_data[campaign_id]["token"])
154
+
155
+ is_privileged = request.token == tasks_data[campaign_id]["token"]
154
156
 
155
157
  progress_new = {}
156
158
  assignment = tasks_data[campaign_id]["info"]["assignment"]
157
159
  if assignment not in ["task-based", "single-stream"]:
158
- return JSONResponse(content="Unsupported campaign assignment type", status_code=400)
160
+ return JSONResponse(
161
+ content="Unsupported campaign assignment type", status_code=400
162
+ )
159
163
 
160
164
  # Get threshold info for the campaign
161
165
  validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
@@ -164,10 +168,9 @@ async def _dashboard_data(request: DashboardDataRequest):
164
168
  # shallow copy
165
169
  entry = dict(user_val)
166
170
  entry["validations"] = [
167
- all(v)
168
- for v in list(entry.get("validations", {}).values())
171
+ all(v) for v in list(entry.get("validations", {}).values())
169
172
  ]
170
-
173
+
171
174
  # Add threshold pass/fail status (only when user is complete)
172
175
  if all(entry["progress"]):
173
176
  entry["threshold_passed"] = check_validation_threshold(
@@ -183,11 +186,8 @@ async def _dashboard_data(request: DashboardDataRequest):
183
186
  progress_new[user_id] = entry
184
187
 
185
188
  return JSONResponse(
186
- content={
187
- "data": progress_new,
188
- "validation_threshold": validation_threshold
189
- },
190
- status_code=200
189
+ content={"data": progress_new, "validation_threshold": validation_threshold},
190
+ status_code=200,
191
191
  )
192
192
 
193
193
 
@@ -227,7 +227,9 @@ async def _download_annotations(
227
227
  for campaign_id in campaign_id:
228
228
  output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
229
229
  if campaign_id not in progress_data:
230
- return JSONResponse(content=f"Unknown campaign ID {campaign_id}", status_code=400)
230
+ return JSONResponse(
231
+ content=f"Unknown campaign ID {campaign_id}", status_code=400
232
+ )
231
233
  if not os.path.exists(output_path):
232
234
  output[campaign_id] = []
233
235
  else:
@@ -239,28 +241,33 @@ async def _download_annotations(
239
241
 
240
242
  @app.get("/download-progress")
241
243
  async def _download_progress(
242
- campaign_id: list[str] = Query(),
243
- token: list[str] = Query()
244
+ campaign_id: list[str] = Query(), token: list[str] = Query()
244
245
  ):
245
246
 
246
247
  if len(campaign_id) != len(token):
247
- return JSONResponse(content="Mismatched campaign_id and token count", status_code=400)
248
+ return JSONResponse(
249
+ content="Mismatched campaign_id and token count", status_code=400
250
+ )
248
251
 
249
252
  output = {}
250
253
  for i, cid in enumerate(campaign_id):
251
254
  if cid not in progress_data:
252
255
  return JSONResponse(content=f"Unknown campaign ID {cid}", status_code=400)
253
256
  if token[i] != tasks_data[cid]["token"]:
254
- return JSONResponse(content=f"Invalid token for campaign ID {cid}", status_code=400)
257
+ return JSONResponse(
258
+ content=f"Invalid token for campaign ID {cid}", status_code=400
259
+ )
255
260
 
256
261
  output[cid] = progress_data[cid]
257
262
 
258
263
  return JSONResponse(content=output, status_code=200)
259
264
 
265
+
260
266
  static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
261
267
  if not os.path.exists(static_dir + "index.html"):
262
268
  raise FileNotFoundError(
263
- "Static directory not found. Please build the frontend first.")
269
+ "Static directory not found. Please build the frontend first."
270
+ )
264
271
 
265
272
  app.mount(
266
273
  "/",
pearmut/cli.py CHANGED
@@ -12,6 +12,9 @@ import psutil
12
12
 
13
13
  from .utils import ROOT, load_progress_data, save_progress_data
14
14
 
15
+ # Static directory path (constant for consistency)
16
+ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
17
+
15
18
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
19
  load_progress_data(warn=None)
17
20
 
@@ -50,6 +53,40 @@ def _run(args_unknown):
50
53
  )
51
54
 
52
55
 
56
+ def _validate_item_structure(items, template):
57
+ """
58
+ Validate that items have the correct structure.
59
+ Items should be lists of dictionaries with 'src' and 'tgt' keys.
60
+
61
+ Args:
62
+ items: List of item dictionaries to validate
63
+ template: Template type ('pointwise' or 'listwise') for type validation
64
+ """
65
+ if not isinstance(items, list):
66
+ raise ValueError("Items must be a list")
67
+
68
+ for item in items:
69
+ if not isinstance(item, dict):
70
+ raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
71
+ if 'src' not in item or 'tgt' not in item:
72
+ raise ValueError("Each item must contain 'src' and 'tgt' keys")
73
+
74
+ # Validate src is always a string
75
+ if not isinstance(item['src'], str):
76
+ raise ValueError("Item 'src' must be a string")
77
+
78
+ # Validate tgt type based on template
79
+ if template == 'listwise':
80
+ if not isinstance(item['tgt'], list):
81
+ raise ValueError("Item 'tgt' must be a list for listwise template")
82
+ # Check that all elements in tgt list are strings
83
+ if not all(isinstance(t, str) for t in item['tgt']):
84
+ raise ValueError("All elements in 'tgt' list must be strings for listwise template")
85
+ elif template == 'pointwise':
86
+ if not isinstance(item['tgt'], str):
87
+ raise ValueError("Item 'tgt' must be a string for pointwise template")
88
+
89
+
53
90
  def _add_single_campaign(data_file, overwrite, server):
54
91
  """
55
92
  Add a single campaign from a JSON data file.
@@ -80,6 +117,7 @@ def _add_single_campaign(data_file, overwrite, server):
80
117
  raise ValueError("Campaign 'info' must contain 'template' field.")
81
118
 
82
119
  assignment = campaign_data["info"]["assignment"]
120
+ template = campaign_data["info"]["template"]
83
121
  # use random words for identifying users
84
122
  rng = random.Random(campaign_data["campaign_id"])
85
123
  rword = wonderwords.RandomWord(rng=rng)
@@ -96,6 +134,13 @@ def _add_single_campaign(data_file, overwrite, server):
96
134
  if not all(isinstance(task, list) for task in tasks):
97
135
  raise ValueError(
98
136
  "Each task in task-based campaign 'data' must be a list of items.")
137
+ # Validate item structure for each task
138
+ for task_i, task in enumerate(tasks):
139
+ for doc_i, doc in enumerate(task):
140
+ try:
141
+ _validate_item_structure(doc, template)
142
+ except ValueError as e:
143
+ raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
99
144
  num_users = len(tasks)
100
145
  elif assignment == "single-stream":
101
146
  tasks = campaign_data["data"]
@@ -105,6 +150,12 @@ def _add_single_campaign(data_file, overwrite, server):
105
150
  if not isinstance(campaign_data["data"], list):
106
151
  raise ValueError(
107
152
  "Single-stream campaign 'data' must be a list of items.")
153
+ # Validate item structure for single-stream
154
+ for doc_i, doc in enumerate(tasks):
155
+ try:
156
+ _validate_item_structure(doc, template)
157
+ except ValueError as e:
158
+ raise ValueError(f"Document {doc_i}: {e}")
108
159
  if isinstance(users_spec, int):
109
160
  num_users = users_spec
110
161
  elif isinstance(users_spec, list):
@@ -199,30 +250,62 @@ def _add_single_campaign(data_file, overwrite, server):
199
250
 
200
251
  # Handle assets symlink if specified
201
252
  if "assets" in campaign_data["info"]:
202
- assets_real_path = campaign_data["info"]["assets"]
253
+ assets_config = campaign_data["info"]["assets"]
254
+
255
+ # assets must be a dictionary with source and destination keys
256
+ if not isinstance(assets_config, dict):
257
+ raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
258
+ if "source" not in assets_config or "destination" not in assets_config:
259
+ raise ValueError("Assets config must contain 'source' and 'destination' keys.")
260
+
261
+ assets_source = assets_config["source"]
262
+ assets_destination = assets_config["destination"]
263
+
264
+ # Validate destination starts with 'assets/'
265
+ if not assets_destination.startswith("assets/"):
266
+ raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
203
267
 
204
268
  # Resolve relative paths from the caller's current working directory
205
- assets_real_path = os.path.abspath(assets_real_path)
269
+ assets_real_path = os.path.abspath(assets_source)
206
270
 
207
271
  if not os.path.isdir(assets_real_path):
208
- raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
272
+ raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
209
273
 
210
- static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
211
- dir_name = assets_real_path.split(os.sep)[-1]
212
-
213
- if not os.path.isdir(static_dir):
274
+ if not os.path.isdir(STATIC_DIR):
214
275
  raise ValueError(
215
- f"Static directory '{static_dir}' does not exist. "
276
+ f"Static directory '{STATIC_DIR}' does not exist. "
216
277
  "Please build the frontend first."
217
278
  )
218
- symlink_path = f"{static_dir}/assets/{dir_name}"
219
-
220
- # Remove existing symlink if present and we are overriding
221
- if os.path.exists(symlink_path):
279
+
280
+ # Symlink path is based on the destination, stripping the 'assets/' prefix
281
+ symlink_path = f"{STATIC_DIR}/{assets_destination}"
282
+
283
+ # Remove existing symlink if present and we are overriding the same campaign
284
+ if os.path.lexists(symlink_path):
285
+ # Check if any other campaign is using this destination
286
+ current_campaign_id = campaign_data['campaign_id']
287
+ tasks_dir = f"{ROOT}/data/tasks"
288
+ if os.path.exists(tasks_dir):
289
+ for task_file in os.listdir(tasks_dir):
290
+ if task_file.endswith('.json'):
291
+ other_campaign_id = task_file[:-5]
292
+ if other_campaign_id != current_campaign_id:
293
+ with open(f"{tasks_dir}/{task_file}", "r") as f:
294
+ other_campaign = json.load(f)
295
+ other_assets = other_campaign.get("info", {}).get("assets")
296
+ if other_assets and isinstance(other_assets, dict):
297
+ if other_assets.get("destination") == assets_destination:
298
+ raise ValueError(
299
+ f"Assets destination '{assets_destination}' is already used by campaign '{other_campaign_id}'."
300
+ )
301
+ # Only allow overwrite if it's the same campaign
222
302
  if overwrite:
223
303
  os.remove(symlink_path)
224
304
  else:
225
- raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
305
+ raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
306
+
307
+ # Ensure the assets directory exists
308
+ os.makedirs(f"{STATIC_DIR}/assets", exist_ok=True)
226
309
 
227
310
  os.symlink(assets_real_path, symlink_path, target_is_directory=True)
228
311
  print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
@@ -299,6 +382,20 @@ def main():
299
382
  elif args.command == 'purge':
300
383
  import shutil
301
384
 
385
+ def _unlink_assets(campaign_id):
386
+ """Unlink assets symlink for a campaign if it exists."""
387
+ task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
388
+ if not os.path.exists(task_file):
389
+ return
390
+ with open(task_file, "r") as f:
391
+ campaign_data = json.load(f)
392
+ destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
393
+ if destination:
394
+ symlink_path = f"{STATIC_DIR}/{destination}"
395
+ if os.path.islink(symlink_path):
396
+ os.remove(symlink_path)
397
+ print(f"Assets symlink removed: {symlink_path}")
398
+
302
399
  # Parse optional campaign name
303
400
  purge_args = argparse.ArgumentParser()
304
401
  purge_args.add_argument(
@@ -314,6 +411,8 @@ def main():
314
411
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
315
412
  )
316
413
  if confirm.lower() == 'y':
414
+ # Unlink assets before removing task file
415
+ _unlink_assets(campaign_id)
317
416
  # Remove task file
318
417
  task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
319
418
  if os.path.exists(task_file):
@@ -336,6 +435,13 @@ def main():
336
435
  "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
337
436
  )
338
437
  if confirm.lower() == 'y':
438
+ # Unlink all assets first
439
+ tasks_dir = f"{ROOT}/data/tasks"
440
+ if os.path.exists(tasks_dir):
441
+ for task_file in os.listdir(tasks_dir):
442
+ if task_file.endswith('.json'):
443
+ campaign_id = task_file[:-5]
444
+ _unlink_assets(campaign_id)
339
445
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
340
446
  shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
341
447
  if os.path.exists(f"{ROOT}/data/progress.json"):