pearmut 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {pearmut-0.2.4 → pearmut-0.2.6}/PKG-INFO +12 -7
  2. {pearmut-0.2.4 → pearmut-0.2.6}/README.md +11 -6
  3. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/PKG-INFO +12 -7
  4. {pearmut-0.2.4 → pearmut-0.2.6}/pyproject.toml +1 -1
  5. {pearmut-0.2.4 → pearmut-0.2.6}/server/app.py +16 -17
  6. {pearmut-0.2.4 → pearmut-0.2.6}/server/assignment.py +10 -10
  7. {pearmut-0.2.4 → pearmut-0.2.6}/server/cli.py +119 -13
  8. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/dashboard.bundle.js +1 -1
  9. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/dashboard.html +2 -1
  10. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/listwise.bundle.js +1 -1
  11. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/listwise.html +1 -1
  12. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/pointwise.bundle.js +1 -1
  13. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/pointwise.html +1 -1
  14. {pearmut-0.2.4 → pearmut-0.2.6}/LICENSE +0 -0
  15. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/SOURCES.txt +0 -0
  16. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/dependency_links.txt +0 -0
  17. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/entry_points.txt +0 -0
  18. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/requires.txt +0 -0
  19. {pearmut-0.2.4 → pearmut-0.2.6}/pearmut.egg-info/top_level.txt +0 -0
  20. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/assets/favicon.svg +0 -0
  21. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/assets/style.css +0 -0
  22. {pearmut-0.2.4 → pearmut-0.2.6}/server/static/index.html +0 -0
  23. {pearmut-0.2.4 → pearmut-0.2.6}/server/utils.py +0 -0
  24. {pearmut-0.2.4 → pearmut-0.2.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -67,7 +67,6 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
67
67
  "protocol_score": true, # we want scores [0...100] for each segment
68
68
  "protocol_error_spans": true, # we want error spans
69
69
  "protocol_error_categories": false, # we do not want error span categories
70
- "instructions": "Evaluate translation from en to cs_CZ", # message to show to users
71
70
  },
72
71
  "campaign_id": "wmt25_#_en-cs_CZ",
73
72
  "data": [
@@ -76,6 +75,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
76
75
  [
77
76
  # each evaluation item is a document
78
77
  {
78
+ "instructions": "Evaluate translation from en to cs_CZ", # message to show to users above the first item
79
79
  "src": "This will be the year that Guinness loses its cool. Cheers to that!",
80
80
  "tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
81
81
  },
@@ -288,22 +288,25 @@ Pearmut provides the following commands:
288
288
 
289
289
  ## Hosting Assets
290
290
 
291
- If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
292
- When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
291
+ To host local assets (e.g., audio files, images, videos), use the `assets` key in your campaign file:
293
292
 
294
293
  ```python
295
294
  {
296
295
  "campaign_id": "my_campaign",
297
296
  "info": {
298
- "assets": "videos", # path to directory containing assets
297
+ "assets": {
298
+ "source": "videos", # path to directory containing assets
299
+ "destination": "assets/my_videos" # where to mount (must start with "assets/")
300
+ },
299
301
  ...
300
302
  },
301
303
  "data": [ ... ]
302
304
  }
303
305
  ```
304
306
 
305
- For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
306
- The path can be absolute or relative to your current working directory.
307
+ Files in `videos/` become accessible at `localhost:8001/assets/my_videos/`.
308
+ This creates a symlink, so the source directory must exist throughout the annotation period.
309
+ If another campaign already uses the destination path, the add command will fail.
307
310
 
308
311
  ## Development
309
312
 
@@ -333,6 +336,8 @@ The `pearmut run` also accepts `--port` (default 8001).
333
336
  If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
334
337
  A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
335
338
 
339
+ To run the platform, you need to run this on some publicly-facing server, or run it locally and tunnel your port to a public IP/domain.
340
+
336
341
  ## Citation
337
342
 
338
343
  If you use this work in your paper, please cite as:
@@ -47,7 +47,6 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
47
47
  "protocol_score": true, # we want scores [0...100] for each segment
48
48
  "protocol_error_spans": true, # we want error spans
49
49
  "protocol_error_categories": false, # we do not want error span categories
50
- "instructions": "Evaluate translation from en to cs_CZ", # message to show to users
51
50
  },
52
51
  "campaign_id": "wmt25_#_en-cs_CZ",
53
52
  "data": [
@@ -56,6 +55,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
56
55
  [
57
56
  # each evaluation item is a document
58
57
  {
58
+ "instructions": "Evaluate translation from en to cs_CZ", # message to show to users above the first item
59
59
  "src": "This will be the year that Guinness loses its cool. Cheers to that!",
60
60
  "tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
61
61
  },
@@ -268,22 +268,25 @@ Pearmut provides the following commands:
268
268
 
269
269
  ## Hosting Assets
270
270
 
271
- If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
272
- When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
271
+ To host local assets (e.g., audio files, images, videos), use the `assets` key in your campaign file:
273
272
 
274
273
  ```python
275
274
  {
276
275
  "campaign_id": "my_campaign",
277
276
  "info": {
278
- "assets": "videos", # path to directory containing assets
277
+ "assets": {
278
+ "source": "videos", # path to directory containing assets
279
+ "destination": "assets/my_videos" # where to mount (must start with "assets/")
280
+ },
279
281
  ...
280
282
  },
281
283
  "data": [ ... ]
282
284
  }
283
285
  ```
284
286
 
285
- For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
286
- The path can be absolute or relative to your current working directory.
287
+ Files in `videos/` become accessible at `localhost:8001/assets/my_videos/`.
288
+ This creates a symlink, so the source directory must exist throughout the annotation period.
289
+ If another campaign already uses the destination path, the add command will fail.
287
290
 
288
291
  ## Development
289
292
 
@@ -313,6 +316,8 @@ The `pearmut run` also accepts `--port` (default 8001).
313
316
  If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
314
317
  A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
315
318
 
319
+ To run the platform, you need to run this on some publicly-facing server, or run it locally and tunnel your port to a public IP/domain.
320
+
316
321
  ## Citation
317
322
 
318
323
  If you use this work in your paper, please cite as:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pearmut
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: A tool for evaluation of model outputs, primarily MT.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: apache-2.0
@@ -67,7 +67,6 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
67
67
  "protocol_score": true, # we want scores [0...100] for each segment
68
68
  "protocol_error_spans": true, # we want error spans
69
69
  "protocol_error_categories": false, # we do not want error span categories
70
- "instructions": "Evaluate translation from en to cs_CZ", # message to show to users
71
70
  },
72
71
  "campaign_id": "wmt25_#_en-cs_CZ",
73
72
  "data": [
@@ -76,6 +75,7 @@ One of the simplest ones, where each user has a pre-defined list of tasks (`task
76
75
  [
77
76
  # each evaluation item is a document
78
77
  {
78
+ "instructions": "Evaluate translation from en to cs_CZ", # message to show to users above the first item
79
79
  "src": "This will be the year that Guinness loses its cool. Cheers to that!",
80
80
  "tgt": "Nevím přesně, kdy jsem to poprvé zaznamenal. Možná to bylo ve chvíli, ..."
81
81
  },
@@ -288,22 +288,25 @@ Pearmut provides the following commands:
288
288
 
289
289
  ## Hosting Assets
290
290
 
291
- If you need to host local assets (e.g., audio files, images, videos) via Pearmut, you can use the `assets` key in your campaign file.
292
- When present, this directory is symlinked to the `static/` directory so its contents become accessible from the server.
291
+ To host local assets (e.g., audio files, images, videos), use the `assets` key in your campaign file:
293
292
 
294
293
  ```python
295
294
  {
296
295
  "campaign_id": "my_campaign",
297
296
  "info": {
298
- "assets": "videos", # path to directory containing assets
297
+ "assets": {
298
+ "source": "videos", # path to directory containing assets
299
+ "destination": "assets/my_videos" # where to mount (must start with "assets/")
300
+ },
299
301
  ...
300
302
  },
301
303
  "data": [ ... ]
302
304
  }
303
305
  ```
304
306
 
305
- For example, if `videos` contains `audio.mp3`, it will be accessible at `localhost:8001/assets/videos/audio.mp3`.
306
- The path can be absolute or relative to your current working directory.
307
+ Files in `videos/` become accessible at `localhost:8001/assets/my_videos/`.
308
+ This creates a symlink, so the source directory must exist throughout the annotation period.
309
+ If another campaign already uses the destination path, the add command will fail.
307
310
 
308
311
  ## Development
309
312
 
@@ -333,6 +336,8 @@ The `pearmut run` also accepts `--port` (default 8001).
333
336
  If you wish to create a new protocol (referenceable from `info->template`), simply create a new HTML and TS file in `web/src` and add a rule to `webpack.config.js` so that your template gets built.
334
337
  A template can call the server for data etc (see [web/src/pointwise.ts](web/src/pointwise.ts) as an exmple).
335
338
 
339
+ To run the platform, you need to run this on some publicly-facing server, or run it locally and tunnel your port to a public IP/domain.
340
+
336
341
  ## Citation
337
342
 
338
343
  If you use this work in your paper, please cite as:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pearmut"
3
- version = "0.2.4"
3
+ version = "0.2.6"
4
4
  description = "A tool for evaluation of model outputs, primarily MT."
5
5
  readme = "README.md"
6
6
  license = { text = "apache-2.0" }
@@ -54,9 +54,9 @@ async def _log_response(request: LogResponseRequest):
54
54
  item_i = request.item_i
55
55
 
56
56
  if campaign_id not in progress_data:
57
- return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
57
+ return JSONResponse(content="Unknown campaign ID", status_code=400)
58
58
  if user_id not in progress_data[campaign_id]:
59
- return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
59
+ return JSONResponse(content="Unknown user ID", status_code=400)
60
60
 
61
61
  # append response to the output log
62
62
  save_db_payload(
@@ -86,7 +86,7 @@ async def _log_response(request: LogResponseRequest):
86
86
  progress_data, request.item_i, request.payload)
87
87
  save_progress_data(progress_data)
88
88
 
89
- return JSONResponse(content={"status": "ok"}, status_code=200)
89
+ return JSONResponse(content="ok", status_code=200)
90
90
 
91
91
 
92
92
  class NextItemRequest(BaseModel):
@@ -100,9 +100,9 @@ async def _get_next_item(request: NextItemRequest):
100
100
  user_id = request.user_id
101
101
 
102
102
  if campaign_id not in progress_data:
103
- return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
103
+ return JSONResponse(content="Unknown campaign ID", status_code=400)
104
104
  if user_id not in progress_data[campaign_id]:
105
- return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
105
+ return JSONResponse(content="Unknown user ID", status_code=400)
106
106
 
107
107
  return get_next_item(
108
108
  campaign_id,
@@ -125,9 +125,9 @@ async def _get_i_item(request: GetItemRequest):
125
125
  item_i = request.item_i
126
126
 
127
127
  if campaign_id not in progress_data:
128
- return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
128
+ return JSONResponse(content="Unknown campaign ID", status_code=400)
129
129
  if user_id not in progress_data[campaign_id]:
130
- return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
130
+ return JSONResponse(content="Unknown user ID", status_code=400)
131
131
 
132
132
  return get_i_item(
133
133
  campaign_id,
@@ -148,14 +148,14 @@ async def _dashboard_data(request: DashboardDataRequest):
148
148
  campaign_id = request.campaign_id
149
149
 
150
150
  if campaign_id not in progress_data:
151
- return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
151
+ return JSONResponse(content="Unknown campaign ID", status_code=400)
152
152
 
153
153
  is_privileged = (request.token == tasks_data[campaign_id]["token"])
154
154
 
155
155
  progress_new = {}
156
156
  assignment = tasks_data[campaign_id]["info"]["assignment"]
157
157
  if assignment not in ["task-based", "single-stream"]:
158
- return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
158
+ return JSONResponse(content="Unsupported campaign assignment type", status_code=400)
159
159
 
160
160
  # Get threshold info for the campaign
161
161
  validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
@@ -184,7 +184,6 @@ async def _dashboard_data(request: DashboardDataRequest):
184
184
 
185
185
  return JSONResponse(
186
186
  content={
187
- "status": "ok",
188
187
  "data": progress_new,
189
188
  "validation_threshold": validation_threshold
190
189
  },
@@ -206,11 +205,11 @@ async def _reset_task(request: ResetTaskRequest):
206
205
  token = request.token
207
206
 
208
207
  if campaign_id not in progress_data:
209
- return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
208
+ return JSONResponse(content="Unknown campaign ID", status_code=400)
210
209
  if token != tasks_data[campaign_id]["token"]:
211
- return JSONResponse(content={"error": "Invalid token"}, status_code=400)
210
+ return JSONResponse(content="Invalid token", status_code=400)
212
211
  if user_id not in progress_data[campaign_id]:
213
- return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
212
+ return JSONResponse(content="Unknown user ID", status_code=400)
214
213
 
215
214
  response = reset_task(campaign_id, user_id, tasks_data, progress_data)
216
215
  save_progress_data(progress_data)
@@ -228,7 +227,7 @@ async def _download_annotations(
228
227
  for campaign_id in campaign_id:
229
228
  output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
230
229
  if campaign_id not in progress_data:
231
- return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
230
+ return JSONResponse(content=f"Unknown campaign ID {campaign_id}", status_code=400)
232
231
  if not os.path.exists(output_path):
233
232
  output[campaign_id] = []
234
233
  else:
@@ -245,14 +244,14 @@ async def _download_progress(
245
244
  ):
246
245
 
247
246
  if len(campaign_id) != len(token):
248
- return JSONResponse(content={"error": "Mismatched campaign_id and token count"}, status_code=400)
247
+ return JSONResponse(content="Mismatched campaign_id and token count", status_code=400)
249
248
 
250
249
  output = {}
251
250
  for i, cid in enumerate(campaign_id):
252
251
  if cid not in progress_data:
253
- return JSONResponse(content={"error": f"Unknown campaign ID {cid}"}, status_code=400)
252
+ return JSONResponse(content=f"Unknown campaign ID {cid}", status_code=400)
254
253
  if token[i] != tasks_data[cid]["token"]:
255
- return JSONResponse(content={"error": f"Invalid token for campaign ID {cid}"}, status_code=400)
254
+ return JSONResponse(content=f"Invalid token for campaign ID {cid}", status_code=400)
256
255
 
257
256
  output[cid] = progress_data[cid]
258
257
 
@@ -48,7 +48,7 @@ def get_next_item(
48
48
  elif assignment == "dynamic":
49
49
  return get_next_item_dynamic(campaign_id, user_id, tasks_data, progress_data)
50
50
  else:
51
- return JSONResponse(content={"error": "Unknown campaign assignment type"}, status_code=400)
51
+ return JSONResponse(content="Unknown campaign assignment type", status_code=400)
52
52
 
53
53
 
54
54
  def get_i_item(
@@ -67,7 +67,7 @@ def get_i_item(
67
67
  elif assignment == "single-stream":
68
68
  return get_i_item_singlestream(campaign_id, user_id, tasks_data, progress_data, item_i)
69
69
  else:
70
- return JSONResponse(content={"error": "Get item not supported for this assignment type"}, status_code=400)
70
+ return JSONResponse(content="Get item not supported for this assignment type", status_code=400)
71
71
 
72
72
 
73
73
  def get_i_item_taskbased(
@@ -90,7 +90,7 @@ def get_i_item_taskbased(
90
90
 
91
91
  if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
92
92
  return JSONResponse(
93
- content={"status": "error", "message": "Item index out of range"},
93
+ content="Item index out of range",
94
94
  status_code=400
95
95
  )
96
96
 
@@ -133,7 +133,7 @@ def get_i_item_singlestream(
133
133
 
134
134
  if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
135
135
  return JSONResponse(
136
- content={"status": "error", "message": "Item index out of range"},
136
+ content="Item index out of range",
137
137
  status_code=400
138
138
  )
139
139
 
@@ -280,7 +280,7 @@ def reset_task(
280
280
  })
281
281
  progress_data[campaign_id][user_id]["progress"] = [False] * num_items
282
282
  _reset_user_time(progress_data, campaign_id, user_id)
283
- return JSONResponse(content={"status": "ok"}, status_code=200)
283
+ return JSONResponse(content="ok", status_code=200)
284
284
  elif assignment == "single-stream":
285
285
  # Save reset markers for all items (shared pool)
286
286
  num_items = len(tasks_data[campaign_id]["data"])
@@ -294,9 +294,9 @@ def reset_task(
294
294
  for uid in progress_data[campaign_id]:
295
295
  progress_data[campaign_id][uid]["progress"] = [False] * num_items
296
296
  _reset_user_time(progress_data, campaign_id, user_id)
297
- return JSONResponse(content={"status": "ok"}, status_code=200)
297
+ return JSONResponse(content="ok", status_code=200)
298
298
  else:
299
- return JSONResponse(content={"status": "error", "message": "Reset not supported for this assignment type"}, status_code=400)
299
+ return JSONResponse(content="Reset not supported for this assignment type", status_code=400)
300
300
 
301
301
 
302
302
  def update_progress(
@@ -319,8 +319,8 @@ def update_progress(
319
319
  # progress all users
320
320
  for uid in progress_data[campaign_id]:
321
321
  progress_data[campaign_id][uid]["progress"][item_i] = True
322
- return JSONResponse(content={"status": "ok"}, status_code=200)
322
+ return JSONResponse(content="ok", status_code=200)
323
323
  elif assignment == "dynamic":
324
- return JSONResponse(content={"status": "error", "message": "Dynamic protocol logging not implemented yet."}, status_code=400)
324
+ return JSONResponse(content="Dynamic protocol logging not implemented yet.", status_code=400)
325
325
  else:
326
- return JSONResponse(content={"status": "error", "message": "Unknown campaign assignment type"}, status_code=400)
326
+ return JSONResponse(content="Unknown campaign assignment type", status_code=400)
@@ -12,6 +12,9 @@ import psutil
12
12
 
13
13
  from .utils import ROOT, load_progress_data, save_progress_data
14
14
 
15
+ # Static directory path (constant for consistency)
16
+ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
17
+
15
18
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
19
  load_progress_data(warn=None)
17
20
 
@@ -50,6 +53,40 @@ def _run(args_unknown):
50
53
  )
51
54
 
52
55
 
56
+ def _validate_item_structure(items, template):
57
+ """
58
+ Validate that items have the correct structure.
59
+ Items should be lists of dictionaries with 'src' and 'tgt' keys.
60
+
61
+ Args:
62
+ items: List of item dictionaries to validate
63
+ template: Template type ('pointwise' or 'listwise') for type validation
64
+ """
65
+ if not isinstance(items, list):
66
+ raise ValueError("Items must be a list")
67
+
68
+ for item in items:
69
+ if not isinstance(item, dict):
70
+ raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
71
+ if 'src' not in item or 'tgt' not in item:
72
+ raise ValueError("Each item must contain 'src' and 'tgt' keys")
73
+
74
+ # Validate src is always a string
75
+ if not isinstance(item['src'], str):
76
+ raise ValueError("Item 'src' must be a string")
77
+
78
+ # Validate tgt type based on template
79
+ if template == 'listwise':
80
+ if not isinstance(item['tgt'], list):
81
+ raise ValueError("Item 'tgt' must be a list for listwise template")
82
+ # Check that all elements in tgt list are strings
83
+ if not all(isinstance(t, str) for t in item['tgt']):
84
+ raise ValueError("All elements in 'tgt' list must be strings for listwise template")
85
+ elif template == 'pointwise':
86
+ if not isinstance(item['tgt'], str):
87
+ raise ValueError("Item 'tgt' must be a string for pointwise template")
88
+
89
+
53
90
  def _add_single_campaign(data_file, overwrite, server):
54
91
  """
55
92
  Add a single campaign from a JSON data file.
@@ -80,6 +117,7 @@ def _add_single_campaign(data_file, overwrite, server):
80
117
  raise ValueError("Campaign 'info' must contain 'template' field.")
81
118
 
82
119
  assignment = campaign_data["info"]["assignment"]
120
+ template = campaign_data["info"]["template"]
83
121
  # use random words for identifying users
84
122
  rng = random.Random(campaign_data["campaign_id"])
85
123
  rword = wonderwords.RandomWord(rng=rng)
@@ -96,6 +134,13 @@ def _add_single_campaign(data_file, overwrite, server):
96
134
  if not all(isinstance(task, list) for task in tasks):
97
135
  raise ValueError(
98
136
  "Each task in task-based campaign 'data' must be a list of items.")
137
+ # Validate item structure for each task
138
+ for task_i, task in enumerate(tasks):
139
+ for doc_i, doc in enumerate(task):
140
+ try:
141
+ _validate_item_structure(doc, template)
142
+ except ValueError as e:
143
+ raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
99
144
  num_users = len(tasks)
100
145
  elif assignment == "single-stream":
101
146
  tasks = campaign_data["data"]
@@ -105,6 +150,12 @@ def _add_single_campaign(data_file, overwrite, server):
105
150
  if not isinstance(campaign_data["data"], list):
106
151
  raise ValueError(
107
152
  "Single-stream campaign 'data' must be a list of items.")
153
+ # Validate item structure for single-stream
154
+ for doc_i, doc in enumerate(tasks):
155
+ try:
156
+ _validate_item_structure(doc, template)
157
+ except ValueError as e:
158
+ raise ValueError(f"Document {doc_i}: {e}")
108
159
  if isinstance(users_spec, int):
109
160
  num_users = users_spec
110
161
  elif isinstance(users_spec, list):
@@ -199,30 +250,62 @@ def _add_single_campaign(data_file, overwrite, server):
199
250
 
200
251
  # Handle assets symlink if specified
201
252
  if "assets" in campaign_data["info"]:
202
- assets_real_path = campaign_data["info"]["assets"]
253
+ assets_config = campaign_data["info"]["assets"]
254
+
255
+ # assets must be a dictionary with source and destination keys
256
+ if not isinstance(assets_config, dict):
257
+ raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
258
+ if "source" not in assets_config or "destination" not in assets_config:
259
+ raise ValueError("Assets config must contain 'source' and 'destination' keys.")
260
+
261
+ assets_source = assets_config["source"]
262
+ assets_destination = assets_config["destination"]
263
+
264
+ # Validate destination starts with 'assets/'
265
+ if not assets_destination.startswith("assets/"):
266
+ raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
203
267
 
204
268
  # Resolve relative paths from the caller's current working directory
205
- assets_real_path = os.path.abspath(assets_real_path)
269
+ assets_real_path = os.path.abspath(assets_source)
206
270
 
207
271
  if not os.path.isdir(assets_real_path):
208
- raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
272
+ raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
209
273
 
210
- static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
211
- dir_name = assets_real_path.split(os.sep)[-1]
212
-
213
- if not os.path.isdir(static_dir):
274
+ if not os.path.isdir(STATIC_DIR):
214
275
  raise ValueError(
215
- f"Static directory '{static_dir}' does not exist. "
276
+ f"Static directory '{STATIC_DIR}' does not exist. "
216
277
  "Please build the frontend first."
217
278
  )
218
- symlink_path = f"{static_dir}/assets/{dir_name}"
219
-
220
- # Remove existing symlink if present and we are overriding
221
- if os.path.exists(symlink_path):
279
+
280
+ # Symlink path is based on the destination, stripping the 'assets/' prefix
281
+ symlink_path = f"{STATIC_DIR}/{assets_destination}"
282
+
283
+ # Remove existing symlink if present and we are overriding the same campaign
284
+ if os.path.lexists(symlink_path):
285
+ # Check if any other campaign is using this destination
286
+ current_campaign_id = campaign_data['campaign_id']
287
+ tasks_dir = f"{ROOT}/data/tasks"
288
+ if os.path.exists(tasks_dir):
289
+ for task_file in os.listdir(tasks_dir):
290
+ if task_file.endswith('.json'):
291
+ other_campaign_id = task_file[:-5]
292
+ if other_campaign_id != current_campaign_id:
293
+ with open(f"{tasks_dir}/{task_file}", "r") as f:
294
+ other_campaign = json.load(f)
295
+ other_assets = other_campaign.get("info", {}).get("assets")
296
+ if other_assets and isinstance(other_assets, dict):
297
+ if other_assets.get("destination") == assets_destination:
298
+ raise ValueError(
299
+ f"Assets destination '{assets_destination}' is already used by campaign '{other_campaign_id}'."
300
+ )
301
+ # Only allow overwrite if it's the same campaign
222
302
  if overwrite:
223
303
  os.remove(symlink_path)
224
304
  else:
225
- raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
305
+ raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
306
+
307
+ # Ensure the assets directory exists
308
+ os.makedirs(f"{STATIC_DIR}/assets", exist_ok=True)
226
309
 
227
310
  os.symlink(assets_real_path, symlink_path, target_is_directory=True)
228
311
  print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
@@ -299,6 +382,20 @@ def main():
299
382
  elif args.command == 'purge':
300
383
  import shutil
301
384
 
385
+ def _unlink_assets(campaign_id):
386
+ """Unlink assets symlink for a campaign if it exists."""
387
+ task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
388
+ if not os.path.exists(task_file):
389
+ return
390
+ with open(task_file, "r") as f:
391
+ campaign_data = json.load(f)
392
+ destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
393
+ if destination:
394
+ symlink_path = f"{STATIC_DIR}/{destination}"
395
+ if os.path.islink(symlink_path):
396
+ os.remove(symlink_path)
397
+ print(f"Assets symlink removed: {symlink_path}")
398
+
302
399
  # Parse optional campaign name
303
400
  purge_args = argparse.ArgumentParser()
304
401
  purge_args.add_argument(
@@ -314,6 +411,8 @@ def main():
314
411
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
315
412
  )
316
413
  if confirm.lower() == 'y':
414
+ # Unlink assets before removing task file
415
+ _unlink_assets(campaign_id)
317
416
  # Remove task file
318
417
  task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
319
418
  if os.path.exists(task_file):
@@ -336,6 +435,13 @@ def main():
336
435
  "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
337
436
  )
338
437
  if confirm.lower() == 'y':
438
+ # Unlink all assets first
439
+ tasks_dir = f"{ROOT}/data/tasks"
440
+ if os.path.exists(tasks_dir):
441
+ for task_file in os.listdir(tasks_dir):
442
+ if task_file.endswith('.json'):
443
+ campaign_id = task_file[:-5]
444
+ _unlink_assets(campaign_id)
339
445
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
340
446
  shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
341
447
  if os.path.exists(f"{ROOT}/data/progress.json"):