pearmut 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/cli.py CHANGED
@@ -12,6 +12,9 @@ import psutil
12
12
 
13
13
  from .utils import ROOT, load_progress_data, save_progress_data
14
14
 
15
+ # Static directory path (constant for consistency)
16
+ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
17
+
15
18
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
19
  load_progress_data(warn=None)
17
20
 
@@ -50,6 +53,40 @@ def _run(args_unknown):
50
53
  )
51
54
 
52
55
 
56
+ def _validate_item_structure(items, template):
57
+ """
58
+ Validate that items have the correct structure.
59
+ Items should be lists of dictionaries with 'src' and 'tgt' keys.
60
+
61
+ Args:
62
+ items: List of item dictionaries to validate
63
+ template: Template type ('pointwise' or 'listwise') for type validation
64
+ """
65
+ if not isinstance(items, list):
66
+ raise ValueError("Items must be a list")
67
+
68
+ for item in items:
69
+ if not isinstance(item, dict):
70
+ raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
71
+ if 'src' not in item or 'tgt' not in item:
72
+ raise ValueError("Each item must contain 'src' and 'tgt' keys")
73
+
74
+ # Validate src is always a string
75
+ if not isinstance(item['src'], str):
76
+ raise ValueError("Item 'src' must be a string")
77
+
78
+ # Validate tgt type based on template
79
+ if template == 'listwise':
80
+ if not isinstance(item['tgt'], list):
81
+ raise ValueError("Item 'tgt' must be a list for listwise template")
82
+ # Check that all elements in tgt list are strings
83
+ if not all(isinstance(t, str) for t in item['tgt']):
84
+ raise ValueError("All elements in 'tgt' list must be strings for listwise template")
85
+ elif template == 'pointwise':
86
+ if not isinstance(item['tgt'], str):
87
+ raise ValueError("Item 'tgt' must be a string for pointwise template")
88
+
89
+
53
90
  def _add_single_campaign(data_file, overwrite, server):
54
91
  """
55
92
  Add a single campaign from a JSON data file.
@@ -80,6 +117,7 @@ def _add_single_campaign(data_file, overwrite, server):
80
117
  raise ValueError("Campaign 'info' must contain 'template' field.")
81
118
 
82
119
  assignment = campaign_data["info"]["assignment"]
120
+ template = campaign_data["info"]["template"]
83
121
  # use random words for identifying users
84
122
  rng = random.Random(campaign_data["campaign_id"])
85
123
  rword = wonderwords.RandomWord(rng=rng)
@@ -96,6 +134,13 @@ def _add_single_campaign(data_file, overwrite, server):
96
134
  if not all(isinstance(task, list) for task in tasks):
97
135
  raise ValueError(
98
136
  "Each task in task-based campaign 'data' must be a list of items.")
137
+ # Validate item structure for each task
138
+ for task_i, task in enumerate(tasks):
139
+ for doc_i, doc in enumerate(task):
140
+ try:
141
+ _validate_item_structure(doc, template)
142
+ except ValueError as e:
143
+ raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
99
144
  num_users = len(tasks)
100
145
  elif assignment == "single-stream":
101
146
  tasks = campaign_data["data"]
@@ -105,6 +150,12 @@ def _add_single_campaign(data_file, overwrite, server):
105
150
  if not isinstance(campaign_data["data"], list):
106
151
  raise ValueError(
107
152
  "Single-stream campaign 'data' must be a list of items.")
153
+ # Validate item structure for single-stream
154
+ for doc_i, doc in enumerate(tasks):
155
+ try:
156
+ _validate_item_structure(doc, template)
157
+ except ValueError as e:
158
+ raise ValueError(f"Document {doc_i}: {e}")
108
159
  if isinstance(users_spec, int):
109
160
  num_users = users_spec
110
161
  elif isinstance(users_spec, list):
@@ -199,30 +250,62 @@ def _add_single_campaign(data_file, overwrite, server):
199
250
 
200
251
  # Handle assets symlink if specified
201
252
  if "assets" in campaign_data["info"]:
202
- assets_real_path = campaign_data["info"]["assets"]
253
+ assets_config = campaign_data["info"]["assets"]
254
+
255
+ # assets must be a dictionary with source and destination keys
256
+ if not isinstance(assets_config, dict):
257
+ raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
258
+ if "source" not in assets_config or "destination" not in assets_config:
259
+ raise ValueError("Assets config must contain 'source' and 'destination' keys.")
260
+
261
+ assets_source = assets_config["source"]
262
+ assets_destination = assets_config["destination"]
263
+
264
+ # Validate destination starts with 'assets/'
265
+ if not assets_destination.startswith("assets/"):
266
+ raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
203
267
 
204
268
  # Resolve relative paths from the caller's current working directory
205
- assets_real_path = os.path.abspath(assets_real_path)
269
+ assets_real_path = os.path.abspath(assets_source)
206
270
 
207
271
  if not os.path.isdir(assets_real_path):
208
- raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
272
+ raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
209
273
 
210
- static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
211
- dir_name = assets_real_path.split(os.sep)[-1]
212
-
213
- if not os.path.isdir(static_dir):
274
+ if not os.path.isdir(STATIC_DIR):
214
275
  raise ValueError(
215
- f"Static directory '{static_dir}' does not exist. "
276
+ f"Static directory '{STATIC_DIR}' does not exist. "
216
277
  "Please build the frontend first."
217
278
  )
218
- symlink_path = f"{static_dir}/assets/{dir_name}"
219
-
220
- # Remove existing symlink if present and we are overriding
221
- if os.path.exists(symlink_path):
279
+
280
+ # Symlink path is based on the destination, stripping the 'assets/' prefix
281
+ symlink_path = f"{STATIC_DIR}/{assets_destination}"
282
+
283
+ # Remove existing symlink if present and we are overriding the same campaign
284
+ if os.path.lexists(symlink_path):
285
+ # Check if any other campaign is using this destination
286
+ current_campaign_id = campaign_data['campaign_id']
287
+ tasks_dir = f"{ROOT}/data/tasks"
288
+ if os.path.exists(tasks_dir):
289
+ for task_file in os.listdir(tasks_dir):
290
+ if task_file.endswith('.json'):
291
+ other_campaign_id = task_file[:-5]
292
+ if other_campaign_id != current_campaign_id:
293
+ with open(f"{tasks_dir}/{task_file}", "r") as f:
294
+ other_campaign = json.load(f)
295
+ other_assets = other_campaign.get("info", {}).get("assets")
296
+ if other_assets and isinstance(other_assets, dict):
297
+ if other_assets.get("destination") == assets_destination:
298
+ raise ValueError(
299
+ f"Assets destination '{assets_destination}' is already used by campaign '{other_campaign_id}'."
300
+ )
301
+ # Only allow overwrite if it's the same campaign
222
302
  if overwrite:
223
303
  os.remove(symlink_path)
224
304
  else:
225
- raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
305
+ raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
306
+
307
+ # Ensure the assets directory exists
308
+ os.makedirs(f"{STATIC_DIR}/assets", exist_ok=True)
226
309
 
227
310
  os.symlink(assets_real_path, symlink_path, target_is_directory=True)
228
311
  print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
@@ -299,6 +382,20 @@ def main():
299
382
  elif args.command == 'purge':
300
383
  import shutil
301
384
 
385
+ def _unlink_assets(campaign_id):
386
+ """Unlink assets symlink for a campaign if it exists."""
387
+ task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
388
+ if not os.path.exists(task_file):
389
+ return
390
+ with open(task_file, "r") as f:
391
+ campaign_data = json.load(f)
392
+ destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
393
+ if destination:
394
+ symlink_path = f"{STATIC_DIR}/{destination}"
395
+ if os.path.islink(symlink_path):
396
+ os.remove(symlink_path)
397
+ print(f"Assets symlink removed: {symlink_path}")
398
+
302
399
  # Parse optional campaign name
303
400
  purge_args = argparse.ArgumentParser()
304
401
  purge_args.add_argument(
@@ -314,6 +411,8 @@ def main():
314
411
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
315
412
  )
316
413
  if confirm.lower() == 'y':
414
+ # Unlink assets before removing task file
415
+ _unlink_assets(campaign_id)
317
416
  # Remove task file
318
417
  task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
319
418
  if os.path.exists(task_file):
@@ -336,6 +435,13 @@ def main():
336
435
  "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
337
436
  )
338
437
  if confirm.lower() == 'y':
438
+ # Unlink all assets first
439
+ tasks_dir = f"{ROOT}/data/tasks"
440
+ if os.path.exists(tasks_dir):
441
+ for task_file in os.listdir(tasks_dir):
442
+ if task_file.endswith('.json'):
443
+ campaign_id = task_file[:-5]
444
+ _unlink_assets(campaign_id)
339
445
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
340
446
  shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
341
447
  if os.path.exists(f"{ROOT}/data/progress.json"):