pearmut 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/cli.py CHANGED
@@ -3,51 +3,84 @@ Command-line interface for managing and running the Pearmut server.
3
3
  """
4
4
 
5
5
  import argparse
6
+ import atexit
7
+ import fcntl
6
8
  import hashlib
7
9
  import json
8
10
  import os
9
11
  import urllib.parse
10
12
 
11
- import psutil
12
-
13
- from .utils import ROOT, load_progress_data, save_progress_data
13
+ from .utils import ROOT, TOKEN_MAIN, load_progress_data, save_progress_data
14
14
 
15
15
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
16
  load_progress_data(warn=None)
17
17
 
18
18
 
19
+
19
20
  def _run(args_unknown):
21
+ # Acquire lock before starting server
22
+ lock_file = f"{ROOT}/data/.lock"
23
+ try:
24
+ lock_fd = open(lock_file, "a+")
25
+ fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
26
+ lock_fd.seek(0)
27
+ lock_fd.truncate()
28
+ lock_fd.write(str(os.getpid()))
29
+ lock_fd.flush()
30
+ except BlockingIOError:
31
+ try:
32
+ with open(lock_file, "r") as f:
33
+ pid = f.read().strip()
34
+ print("You can't run multiple instances of Pearmut in the same directory.")
35
+ if pid:
36
+ print(f"Another instance (PID {pid}) is holding the lock.")
37
+ except (FileNotFoundError, PermissionError, OSError):
38
+ print("You can't run multiple instances of Pearmut in the same directory.")
39
+ exit(1)
40
+
41
+ # Register cleanup to remove lock file on exit
42
+ atexit.register(lambda: os.path.exists(lock_file) and os.remove(lock_file))
43
+
20
44
  import uvicorn
21
45
 
22
46
  from .app import app, tasks_data
23
47
 
24
48
  args = argparse.ArgumentParser()
25
49
  args.add_argument(
26
- "--port", type=int, default=8001,
27
- help="Port to run the server on"
50
+ "--port", type=int, default=8001, help="Port to run the server on"
28
51
  )
29
52
  args.add_argument(
30
- "--server", default="http://localhost:8001",
31
- help="Prefix server URL for protocol links"
53
+ "--server",
54
+ default="http://localhost:8001",
55
+ help="Prefix server URL for protocol links",
32
56
  )
33
57
  args = args.parse_args(args_unknown)
34
58
 
35
59
  # print access dashboard URL for all campaigns
36
- if tasks_data:
37
- dashboard_url = args.server + "/dashboard.html?" + "&".join([
38
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
39
- for campaign_id, campaign_data in tasks_data.items()
40
- ])
41
- print("\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m")
42
- print("🍐", dashboard_url+"\n", flush=True)
43
-
60
+ dashboard_url = (
61
+ args.server
62
+ + "/dashboard?"
63
+ + f"token_main={TOKEN_MAIN}"
64
+ + "".join(
65
+ [
66
+ f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
67
+ for campaign_id, campaign_data in tasks_data.items()
68
+ ]
69
+ )
70
+ )
71
+ print(
72
+ "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
73
+ )
74
+ print("🍐", dashboard_url + "\n", flush=True)
75
+
76
+
44
77
  # disable startup message
45
78
  uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
46
79
  # set time logging
47
80
  uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
48
- uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
49
- '%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s'
50
- )
81
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
82
+ "fmt"
83
+ ] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
51
84
  uvicorn.run(
52
85
  app,
53
86
  host="0.0.0.0",
@@ -59,9 +92,9 @@ def _run(args_unknown):
59
92
  def _validate_item_structure(items):
60
93
  """
61
94
  Validate that items have the correct structure.
62
- Items should be lists of dictionaries with 'src' and 'tgt' keys.
95
+ Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
63
96
  The 'tgt' field should be a dictionary mapping model names to translations.
64
-
97
+
65
98
  Args:
66
99
  items: List of item dictionaries to validate
67
100
  """
@@ -70,71 +103,91 @@ def _validate_item_structure(items):
70
103
 
71
104
  for item in items:
72
105
  if not isinstance(item, dict):
73
- raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
74
- if 'src' not in item or 'tgt' not in item:
75
- raise ValueError("Each item must contain 'src' and 'tgt' keys")
76
-
77
- # Validate src is always a string
78
- if not isinstance(item['src'], str):
106
+ raise ValueError("Each item must be a dictionary with 'tgt' key")
107
+ if "tgt" not in item:
108
+ raise ValueError("Each item must contain 'tgt' key")
109
+
110
+ # Validate src is a string if present
111
+ if "src" in item and not isinstance(item["src"], str):
79
112
  raise ValueError("Item 'src' must be a string")
80
-
81
- # Validate tgt is a dictionary (basic template with model names)
82
- if isinstance(item['tgt'], str):
113
+
114
+ # Validate ref is a string if present
115
+ if "ref" in item and not isinstance(item["ref"], str):
116
+ raise ValueError("Item 'ref' must be a string")
117
+
118
+ # Validate tgt is a dictionary (annotate template with model names)
119
+ if isinstance(item["tgt"], str):
83
120
  # String not allowed - suggest using dictionary (don't include user input to prevent injection)
84
- raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations. For single translation, use {\"default\": \"your_translation\"}")
85
- elif isinstance(item['tgt'], dict):
121
+ raise ValueError(
122
+ 'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
123
+ )
124
+ elif isinstance(item["tgt"], dict):
86
125
  # Dictionary mapping model names to translations
87
126
  # Validate that model names don't contain only numbers (JavaScript ordering issue)
88
- for model_name, translation in item['tgt'].items():
127
+ for model_name, translation in item["tgt"].items():
89
128
  if not isinstance(model_name, str):
90
129
  raise ValueError("Model names in 'tgt' dictionary must be strings")
91
130
  if model_name.isdigit():
92
- raise ValueError(f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)")
131
+ raise ValueError(
132
+ f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
133
+ )
93
134
  if not isinstance(translation, str):
94
- raise ValueError(f"Translation for model '{model_name}' must be a string")
135
+ raise ValueError(
136
+ f"Translation for model '{model_name}' must be a string"
137
+ )
95
138
  else:
96
- raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations")
97
-
139
+ raise ValueError(
140
+ "Item 'tgt' must be a dictionary mapping model names to translations"
141
+ )
142
+
98
143
  # Validate error_spans structure if present
99
- if 'error_spans' in item:
100
- if not isinstance(item['error_spans'], dict):
101
- raise ValueError("'error_spans' must be a dictionary mapping model names to error span lists")
102
- for model_name, spans in item['error_spans'].items():
144
+ if "error_spans" in item:
145
+ if not isinstance(item["error_spans"], dict):
146
+ raise ValueError(
147
+ "'error_spans' must be a dictionary mapping model names to error span lists"
148
+ )
149
+ for model_name, spans in item["error_spans"].items():
103
150
  if not isinstance(spans, list):
104
- raise ValueError(f"Error spans for model '{model_name}' must be a list")
105
-
151
+ raise ValueError(
152
+ f"Error spans for model '{model_name}' must be a list"
153
+ )
154
+
106
155
  # Validate validation structure if present
107
- if 'validation' in item:
108
- if not isinstance(item['validation'], dict):
109
- raise ValueError("'validation' must be a dictionary mapping model names to validation rules")
110
- for model_name, val_rule in item['validation'].items():
156
+ if "validation" in item:
157
+ if not isinstance(item["validation"], dict):
158
+ raise ValueError(
159
+ "'validation' must be a dictionary mapping model names to validation rules"
160
+ )
161
+ for model_name, val_rule in item["validation"].items():
111
162
  if not isinstance(val_rule, dict):
112
- raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
163
+ raise ValueError(
164
+ f"Validation rule for model '{model_name}' must be a dictionary"
165
+ )
113
166
 
114
167
 
115
168
  def _validate_document_models(doc):
116
169
  """
117
170
  Validate that all items in a document have the same model outputs.
118
-
171
+
119
172
  Args:
120
173
  doc: List of items in a document
121
-
174
+
122
175
  Returns:
123
176
  None if valid
124
-
177
+
125
178
  Raises:
126
179
  ValueError: If items have different model outputs
127
180
  """
128
181
  # Get model names from the first item
129
182
  first_item = doc[0]
130
- first_models = set(first_item['tgt'].keys())
131
-
183
+ first_models = set(first_item["tgt"].keys())
184
+
132
185
  # Check all other items have the same model names
133
186
  for i, item in enumerate(doc[1:], start=1):
134
- if 'tgt' not in item or not isinstance(item['tgt'], dict):
187
+ if "tgt" not in item or not isinstance(item["tgt"], dict):
135
188
  continue
136
-
137
- item_models = set(item['tgt'].keys())
189
+
190
+ item_models = set(item["tgt"].keys())
138
191
  if item_models != first_models:
139
192
  raise ValueError(
140
193
  f"Document contains items with different model outputs. "
@@ -147,33 +200,31 @@ def _validate_document_models(doc):
147
200
  def _shuffle_campaign_data(campaign_data, rng):
148
201
  """
149
202
  Shuffle campaign data at the document level in-place
150
-
203
+
151
204
  For each document, randomly shuffles the order of models in the tgt dictionary.
152
-
205
+
153
206
  Args:
154
207
  campaign_data: The campaign data dictionary
155
208
  rng: Random number generator with campaign-specific seed
156
209
  """
210
+
157
211
  def shuffle_document(doc):
158
212
  """Shuffle a single document (list of items) by reordering models in tgt dict."""
159
213
  # Validate that all items have the same models
160
214
  _validate_document_models(doc)
161
-
215
+
162
216
  # Get all model names from the first item's tgt dict
163
217
  first_item = doc[0]
164
- model_names = list(first_item['tgt'].keys())
218
+ model_names = list(first_item["tgt"].keys())
165
219
  rng.shuffle(model_names)
166
-
220
+
167
221
  # Reorder tgt dict for all items in the document
168
222
  for item in doc:
169
- if 'tgt' in item and isinstance(item['tgt'], dict):
170
- item["tgt"] = {
171
- model: item["tgt"][model]
172
- for model in model_names
173
- }
174
-
223
+ if "tgt" in item and isinstance(item["tgt"], dict):
224
+ item["tgt"] = {model: item["tgt"][model] for model in model_names}
225
+
175
226
  assignment = campaign_data["info"]["assignment"]
176
-
227
+
177
228
  if assignment == "task-based":
178
229
  # After transformation, data is a dict mapping user_id -> tasks
179
230
  for user_id, task in campaign_data["data"].items():
@@ -185,34 +236,34 @@ def _shuffle_campaign_data(campaign_data, rng):
185
236
  shuffle_document(doc)
186
237
 
187
238
 
188
- def _add_single_campaign(data_file, overwrite, server):
239
+ def _add_single_campaign(campaign_data, overwrite, server):
189
240
  """
190
- Add a single campaign from a JSON data file.
241
+ Add a single campaign from campaign data dictionary.
191
242
  """
192
243
  import random
193
244
 
194
245
  import wonderwords
195
246
 
196
- with open(data_file, 'r') as f:
197
- campaign_data = json.load(f)
247
+ if "campaign_id" not in campaign_data:
248
+ raise ValueError("Campaign data must contain 'campaign_id' field.")
249
+ if "info" not in campaign_data:
250
+ raise ValueError("Campaign data must contain 'info' field.")
251
+ if "data" not in campaign_data:
252
+ raise ValueError("Campaign data must contain 'data' field.")
198
253
 
199
254
  with open(f"{ROOT}/data/progress.json", "r") as f:
200
255
  progress_data = json.load(f)
201
256
 
202
- if campaign_data['campaign_id'] in progress_data and not overwrite:
257
+ if campaign_data["campaign_id"] in progress_data and not overwrite:
203
258
  raise ValueError(
204
259
  f"Campaign {campaign_data['campaign_id']} already exists.\n"
205
260
  "Use -o to overwrite."
206
261
  )
207
262
 
208
- if "info" not in campaign_data:
209
- raise ValueError("Campaign data must contain 'info' field.")
210
- if "data" not in campaign_data:
211
- raise ValueError("Campaign data must contain 'data' field.")
212
263
  if "assignment" not in campaign_data["info"]:
213
264
  raise ValueError("Campaign 'info' must contain 'assignment' field.")
214
-
215
- # Template defaults to "basic" if not specified
265
+
266
+ # Template defaults to "annotate" if not specified
216
267
  assignment = campaign_data["info"]["assignment"]
217
268
  # use random words for identifying users
218
269
  rng = random.Random()
@@ -225,11 +276,11 @@ def _add_single_campaign(data_file, overwrite, server):
225
276
  if assignment == "task-based":
226
277
  tasks = campaign_data["data"]
227
278
  if not isinstance(tasks, list):
228
- raise ValueError(
229
- "Task-based campaign 'data' must be a list of tasks.")
279
+ raise ValueError("Task-based campaign 'data' must be a list of tasks.")
230
280
  if not all(isinstance(task, list) for task in tasks):
231
281
  raise ValueError(
232
- "Each task in task-based campaign 'data' must be a list of items.")
282
+ "Each task in task-based campaign 'data' must be a list of items."
283
+ )
233
284
  # Validate item structure for each task
234
285
  for task_i, task in enumerate(tasks):
235
286
  for doc_i, doc in enumerate(task):
@@ -241,11 +292,9 @@ def _add_single_campaign(data_file, overwrite, server):
241
292
  elif assignment == "single-stream":
242
293
  tasks = campaign_data["data"]
243
294
  if users_spec is None:
244
- raise ValueError(
245
- "Single-stream campaigns must specify 'users' in info.")
295
+ raise ValueError("Single-stream campaigns must specify 'users' in info.")
246
296
  if not isinstance(campaign_data["data"], list):
247
- raise ValueError(
248
- "Single-stream campaign 'data' must be a list of items.")
297
+ raise ValueError("Single-stream campaign 'data' must be a list of items.")
249
298
  # Validate item structure for single-stream
250
299
  for doc_i, doc in enumerate(tasks):
251
300
  try:
@@ -261,11 +310,9 @@ def _add_single_campaign(data_file, overwrite, server):
261
310
  elif assignment == "dynamic":
262
311
  tasks = campaign_data["data"]
263
312
  if users_spec is None:
264
- raise ValueError(
265
- "Dynamic campaigns must specify 'users' in info.")
313
+ raise ValueError("Dynamic campaigns must specify 'users' in info.")
266
314
  if not isinstance(campaign_data["data"], list):
267
- raise ValueError(
268
- "Dynamic campaign 'data' must be a list of items.")
315
+ raise ValueError("Dynamic campaign 'data' must be a list of items.")
269
316
  # Validate item structure for dynamic
270
317
  for doc_i, doc in enumerate(tasks):
271
318
  try:
@@ -286,10 +333,14 @@ def _add_single_campaign(data_file, overwrite, server):
286
333
  if "dynamic_contrastive_models" not in campaign_data["info"]:
287
334
  campaign_data["info"]["dynamic_contrastive_models"] = 1
288
335
  # Validate that dynamic_first is at least 1
289
- assert campaign_data["info"]["dynamic_first"] >= 1, "dynamic_first must be at least 1"
336
+ assert (
337
+ campaign_data["info"]["dynamic_first"] >= 1
338
+ ), "dynamic_first must be at least 1"
290
339
  # Validate that dynamic_contrastive_models is at most dynamic_top
291
- assert campaign_data["info"]["dynamic_contrastive_models"] <= campaign_data["info"]["dynamic_top"], \
292
- "dynamic_contrastive_models must be at most dynamic_top"
340
+ assert (
341
+ campaign_data["info"]["dynamic_contrastive_models"]
342
+ <= campaign_data["info"]["dynamic_top"]
343
+ ), "dynamic_contrastive_models must be at most dynamic_top"
293
344
  # Validate that all items have the same models
294
345
  all_models = set()
295
346
  for item in campaign_data["data"]:
@@ -298,7 +349,9 @@ def _add_single_campaign(data_file, overwrite, server):
298
349
  for item in campaign_data["data"]:
299
350
  if item and len(item) > 0:
300
351
  item_models = set(item[0]["tgt"].keys())
301
- assert item_models == all_models, "All items must have the same model outputs"
352
+ assert (
353
+ item_models == all_models
354
+ ), "All items must have the same model outputs"
302
355
  else:
303
356
  raise ValueError(f"Unknown campaign assignment type: {assignment}")
304
357
 
@@ -310,14 +363,12 @@ def _add_single_campaign(data_file, overwrite, server):
310
363
  new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
311
364
  if new_id not in user_ids:
312
365
  user_ids.append(new_id)
313
- user_ids = [
314
- f"{user_id}-{rng.randint(0, 999):03d}"
315
- for user_id in user_ids
316
- ]
366
+ user_ids = [f"{user_id}-{rng.randint(0, 999):03d}" for user_id in user_ids]
317
367
  elif isinstance(users_spec, list):
318
368
  if len(users_spec) != num_users:
319
369
  raise ValueError(
320
- f"Number of users ({len(users_spec)}) must match expected count ({num_users}).")
370
+ f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
371
+ )
321
372
  if all(isinstance(u, str) for u in users_spec):
322
373
  # List of string IDs
323
374
  user_ids = users_spec
@@ -336,13 +387,31 @@ def _add_single_campaign(data_file, overwrite, server):
336
387
  raise ValueError("'users' list must contain all strings or all dicts.")
337
388
  else:
338
389
  raise ValueError("'users' must be an integer or a list.")
339
-
390
+
340
391
  if "protocol" not in campaign_data["info"]:
341
392
  campaign_data["info"]["protocol"] = "ESA"
342
- print("Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'.")
393
+ print(
394
+ "Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'."
395
+ )
396
+
397
+ # Validate sliders structure if present
398
+ if "sliders" in campaign_data["info"]:
399
+ if not all(
400
+ isinstance(s, dict)
401
+ and all(k in s for k in ("name", "min", "max", "step"))
402
+ and isinstance(s.get("min"), (int, float))
403
+ and isinstance(s.get("max"), (int, float))
404
+ and isinstance(s.get("step"), (int, float))
405
+ and s["min"] <= s["max"]
406
+ and s["step"] > 0
407
+ for s in campaign_data["info"]["sliders"]
408
+ ):
409
+ raise ValueError(
410
+ "Each slider must be a dict with 'name', 'min', 'max', and 'step' keys, where min/max/step are numeric, min <= max, and step > 0"
411
+ )
343
412
 
344
413
  # Remove output file when overwriting (after all validations pass)
345
- if overwrite and campaign_data['campaign_id'] in progress_data:
414
+ if overwrite and campaign_data["campaign_id"] in progress_data:
346
415
  output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
347
416
  if os.path.exists(output_file):
348
417
  os.remove(output_file)
@@ -351,17 +420,14 @@ def _add_single_campaign(data_file, overwrite, server):
351
420
  # For single-stream and dynamic, data is a flat list (shared among all users)
352
421
  if assignment == "task-based":
353
422
  campaign_data["data"] = {
354
- user_id: task
355
- for user_id, task in zip(user_ids, tasks)
423
+ user_id: task for user_id, task in zip(user_ids, tasks)
356
424
  }
357
425
  elif assignment in ["single-stream", "dynamic"]:
358
426
  campaign_data["data"] = tasks
359
427
 
360
428
  # generate a token for dashboard access if not present
361
429
  if "token" not in campaign_data:
362
- campaign_data["token"] = (
363
- hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
364
- )
430
+ campaign_data["token"] = hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
365
431
 
366
432
  def get_token(user_id, token_type):
367
433
  """Get user token or generate a random one."""
@@ -374,16 +440,23 @@ def _add_single_campaign(data_file, overwrite, server):
374
440
  user_id: {
375
441
  # TODO: progress tracking could be based on the assignment type
376
442
  "progress": (
377
- [False]*len(campaign_data["data"][user_id]) if assignment == "task-based"
378
- else [False]*len(campaign_data["data"]) if assignment == "single-stream"
379
- else [list() for _ in range(len(campaign_data["data"]))] if assignment == "dynamic"
380
- else []
443
+ [False] * len(campaign_data["data"][user_id])
444
+ if assignment == "task-based"
445
+ else (
446
+ [False] * len(campaign_data["data"])
447
+ if assignment == "single-stream"
448
+ else (
449
+ [list() for _ in range(len(campaign_data["data"]))]
450
+ if assignment == "dynamic"
451
+ else []
452
+ )
453
+ )
381
454
  ),
382
455
  "time_start": None,
383
456
  "time_end": None,
384
457
  "time": 0,
385
458
  "url": (
386
- f"{campaign_data['info'].get("template", "basic")}.html"
459
+ f"{campaign_data['info'].get('template', 'annotate')}"
387
460
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
388
461
  f"&user_id={user_id}"
389
462
  ),
@@ -396,26 +469,34 @@ def _add_single_campaign(data_file, overwrite, server):
396
469
  # Handle assets symlink if specified
397
470
  if "assets" in campaign_data["info"]:
398
471
  assets_config = campaign_data["info"]["assets"]
399
-
472
+
400
473
  # assets must be a dictionary with source and destination keys
401
474
  if not isinstance(assets_config, dict):
402
- raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
475
+ raise ValueError(
476
+ "Assets must be a dictionary with 'source' and 'destination' keys."
477
+ )
403
478
  if "source" not in assets_config or "destination" not in assets_config:
404
- raise ValueError("Assets config must contain 'source' and 'destination' keys.")
405
-
479
+ raise ValueError(
480
+ "Assets config must contain 'source' and 'destination' keys."
481
+ )
482
+
406
483
  assets_source = assets_config["source"]
407
484
  assets_destination = assets_config["destination"]
408
-
485
+
409
486
  # Validate destination starts with 'assets/'
410
487
  if not assets_destination.startswith("assets/"):
411
- raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
412
-
488
+ raise ValueError(
489
+ f"Assets destination '{assets_destination}' must start with 'assets/'."
490
+ )
491
+
413
492
  # Resolve relative paths from the caller's current working directory
414
493
  assets_real_path = os.path.abspath(assets_source)
415
494
 
416
495
  if not os.path.isdir(assets_real_path):
417
- raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
418
-
496
+ raise ValueError(
497
+ f"Assets source path '{assets_real_path}' must be an existing directory."
498
+ )
499
+
419
500
  # Symlink path is based on the destination, stripping the 'assets/' prefix
420
501
  # User assets are now stored under data/assets/ instead of static/assets/
421
502
  symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
@@ -423,7 +504,7 @@ def _add_single_campaign(data_file, overwrite, server):
423
504
  # Remove existing symlink if present and we are overriding the same campaign
424
505
  if os.path.lexists(symlink_path):
425
506
  # Check if any other campaign is using this destination
426
- current_campaign_id = campaign_data['campaign_id']
507
+ current_campaign_id = campaign_data["campaign_id"]
427
508
 
428
509
  for other_campaign_id in progress_data.keys():
429
510
  if other_campaign_id == current_campaign_id:
@@ -440,8 +521,10 @@ def _add_single_campaign(data_file, overwrite, server):
440
521
  if overwrite:
441
522
  os.remove(symlink_path)
442
523
  else:
443
- raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
444
-
524
+ raise ValueError(
525
+ f"Assets destination '{assets_destination}' is already taken."
526
+ )
527
+
445
528
  # Ensure the assets directory exists
446
529
  # get parent of symlink_path dir
447
530
  os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
@@ -449,7 +532,6 @@ def _add_single_campaign(data_file, overwrite, server):
449
532
  os.symlink(assets_real_path, symlink_path, target_is_directory=True)
450
533
  print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
451
534
 
452
-
453
535
  # Shuffle data if shuffle parameter is true (defaults to true)
454
536
  should_shuffle = campaign_data["info"].get("shuffle", True)
455
537
  if should_shuffle:
@@ -459,15 +541,14 @@ def _add_single_campaign(data_file, overwrite, server):
459
541
  with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
460
542
  json.dump(campaign_data, f, indent=2, ensure_ascii=False)
461
543
 
462
- progress_data[campaign_data['campaign_id']] = user_progress
544
+ progress_data[campaign_data["campaign_id"]] = user_progress
463
545
  save_progress_data(progress_data)
464
546
 
465
-
466
547
  print(
467
548
  "🎛️ ",
468
549
  f"{server}/dashboard.html"
469
550
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
470
- f"&token={campaign_data['token']}"
551
+ f"&token={campaign_data['token']}",
471
552
  )
472
553
  for user_id, user_val in user_progress.items():
473
554
  # point to the protocol URL
@@ -481,22 +562,28 @@ def _add_campaign(args_unknown):
481
562
  """
482
563
  args = argparse.ArgumentParser()
483
564
  args.add_argument(
484
- 'data_files', type=str, nargs='+',
485
- help='One or more paths to campaign data files'
565
+ "data_files",
566
+ type=str,
567
+ nargs="+",
568
+ help="One or more paths to campaign data files",
486
569
  )
487
570
  args.add_argument(
488
- "-o", "--overwrite", action="store_true",
489
- help="Overwrite existing campaign if it exists"
571
+ "-o",
572
+ "--overwrite",
573
+ action="store_true",
574
+ help="Overwrite existing campaign if it exists",
490
575
  )
491
576
  args.add_argument(
492
- "--server", default="http://localhost:8001",
493
- help="Prefix server URL for protocol links"
577
+ "--server",
578
+ default="http://localhost:8001",
579
+ help="Prefix server URL for protocol links",
494
580
  )
495
581
  args = args.parse_args(args_unknown)
496
582
 
497
583
  for data_file in args.data_files:
498
584
  try:
499
- _add_single_campaign(data_file, args.overwrite, args.server)
585
+ with open(data_file, "r") as f:
586
+ _add_single_campaign(json.load(f), args.overwrite, args.server)
500
587
  except Exception as e:
501
588
  print(f"Error processing {data_file}: {e}")
502
589
  exit(1)
@@ -507,21 +594,20 @@ def main():
507
594
  Main entry point for the CLI.
508
595
  """
509
596
  args = argparse.ArgumentParser()
510
- args.add_argument('command', type=str, choices=['run', 'add', 'purge'])
597
+ args.add_argument(
598
+ "command",
599
+ type=str,
600
+ choices=["run", "add", "purge"],
601
+ default="run",
602
+ nargs="?",
603
+ )
511
604
  args, args_unknown = args.parse_known_args()
512
605
 
513
- # enforce that only one pearmut process is running
514
- for p in psutil.process_iter():
515
- if "pearmut" == p.name() and p.pid != os.getpid():
516
- print("Exit all running pearmut processes before running more commands.")
517
- print(p)
518
- exit(1)
519
-
520
- if args.command == 'run':
606
+ if args.command == "run":
521
607
  _run(args_unknown)
522
- elif args.command == 'add':
608
+ elif args.command == "add":
523
609
  _add_campaign(args_unknown)
524
- elif args.command == 'purge':
610
+ elif args.command == "purge":
525
611
  import shutil
526
612
 
527
613
  def _unlink_assets(campaign_id):
@@ -531,7 +617,9 @@ def main():
531
617
  return
532
618
  with open(task_file, "r") as f:
533
619
  campaign_data = json.load(f)
534
- destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
620
+ destination = (
621
+ campaign_data.get("info", {}).get("assets", {}).get("destination")
622
+ )
535
623
  if destination:
536
624
  symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
537
625
  if os.path.islink(symlink_path):
@@ -541,8 +629,11 @@ def main():
541
629
  # Parse optional campaign name
542
630
  purge_args = argparse.ArgumentParser()
543
631
  purge_args.add_argument(
544
- 'campaign', type=str, nargs='?', default=None,
545
- help='Optional campaign name to purge (purges all if not specified)'
632
+ "campaign",
633
+ type=str,
634
+ nargs="?",
635
+ default=None,
636
+ help="Optional campaign name to purge (purges all if not specified)",
546
637
  )
547
638
  purge_args = purge_args.parse_args(args_unknown)
548
639
  progress_data = load_progress_data()
@@ -556,7 +647,7 @@ def main():
556
647
  confirm = input(
557
648
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
558
649
  )
559
- if confirm.lower() == 'y':
650
+ if confirm.lower() == "y":
560
651
  # Unlink assets before removing task file
561
652
  _unlink_assets(campaign_id)
562
653
  # Remove task file
@@ -580,7 +671,7 @@ def main():
580
671
  confirm = input(
581
672
  "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
582
673
  )
583
- if confirm.lower() == 'y':
674
+ if confirm.lower() == "y":
584
675
  # Unlink all assets first
585
676
  for campaign_id in progress_data.keys():
586
677
  _unlink_assets(campaign_id)