pearmut 0.3.3__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/cli.py CHANGED
@@ -23,31 +23,39 @@ def _run(args_unknown):
23
23
 
24
24
  args = argparse.ArgumentParser()
25
25
  args.add_argument(
26
- "--port", type=int, default=8001,
27
- help="Port to run the server on"
26
+ "--port", type=int, default=8001, help="Port to run the server on"
28
27
  )
29
28
  args.add_argument(
30
- "--server", default="http://localhost:8001",
31
- help="Prefix server URL for protocol links"
29
+ "--server",
30
+ default="http://localhost:8001",
31
+ help="Prefix server URL for protocol links",
32
32
  )
33
33
  args = args.parse_args(args_unknown)
34
34
 
35
35
  # print access dashboard URL for all campaigns
36
36
  if tasks_data:
37
- dashboard_url = args.server + "/dashboard.html?" + "&".join([
38
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
39
- for campaign_id, campaign_data in tasks_data.items()
40
- ])
41
- print("\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m")
42
- print("🍐", dashboard_url+"\n", flush=True)
43
-
37
+ dashboard_url = (
38
+ args.server
39
+ + "/dashboard.html?"
40
+ + "&".join(
41
+ [
42
+ f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
43
+ for campaign_id, campaign_data in tasks_data.items()
44
+ ]
45
+ )
46
+ )
47
+ print(
48
+ "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
49
+ )
50
+ print("🍐", dashboard_url + "\n", flush=True)
51
+
44
52
  # disable startup message
45
53
  uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
46
54
  # set time logging
47
55
  uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
48
- uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
49
- '%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s'
50
- )
56
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
57
+ "fmt"
58
+ ] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
51
59
  uvicorn.run(
52
60
  app,
53
61
  host="0.0.0.0",
@@ -59,9 +67,9 @@ def _run(args_unknown):
59
67
  def _validate_item_structure(items):
60
68
  """
61
69
  Validate that items have the correct structure.
62
- Items should be lists of dictionaries with 'src' and 'tgt' keys.
70
+ Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
63
71
  The 'tgt' field should be a dictionary mapping model names to translations.
64
-
72
+
65
73
  Args:
66
74
  items: List of item dictionaries to validate
67
75
  """
@@ -70,71 +78,91 @@ def _validate_item_structure(items):
70
78
 
71
79
  for item in items:
72
80
  if not isinstance(item, dict):
73
- raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
74
- if 'src' not in item or 'tgt' not in item:
75
- raise ValueError("Each item must contain 'src' and 'tgt' keys")
76
-
77
- # Validate src is always a string
78
- if not isinstance(item['src'], str):
81
+ raise ValueError("Each item must be a dictionary with 'tgt' key")
82
+ if "tgt" not in item:
83
+ raise ValueError("Each item must contain 'tgt' key")
84
+
85
+ # Validate src is a string if present
86
+ if "src" in item and not isinstance(item["src"], str):
79
87
  raise ValueError("Item 'src' must be a string")
80
-
88
+
89
+ # Validate ref is a string if present
90
+ if "ref" in item and not isinstance(item["ref"], str):
91
+ raise ValueError("Item 'ref' must be a string")
92
+
81
93
  # Validate tgt is a dictionary (basic template with model names)
82
- if isinstance(item['tgt'], str):
94
+ if isinstance(item["tgt"], str):
83
95
  # String not allowed - suggest using dictionary (don't include user input to prevent injection)
84
- raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations. For single translation, use {\"default\": \"your_translation\"}")
85
- elif isinstance(item['tgt'], dict):
96
+ raise ValueError(
97
+ 'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
98
+ )
99
+ elif isinstance(item["tgt"], dict):
86
100
  # Dictionary mapping model names to translations
87
101
  # Validate that model names don't contain only numbers (JavaScript ordering issue)
88
- for model_name, translation in item['tgt'].items():
102
+ for model_name, translation in item["tgt"].items():
89
103
  if not isinstance(model_name, str):
90
104
  raise ValueError("Model names in 'tgt' dictionary must be strings")
91
105
  if model_name.isdigit():
92
- raise ValueError(f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)")
106
+ raise ValueError(
107
+ f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
108
+ )
93
109
  if not isinstance(translation, str):
94
- raise ValueError(f"Translation for model '{model_name}' must be a string")
110
+ raise ValueError(
111
+ f"Translation for model '{model_name}' must be a string"
112
+ )
95
113
  else:
96
- raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations")
97
-
114
+ raise ValueError(
115
+ "Item 'tgt' must be a dictionary mapping model names to translations"
116
+ )
117
+
98
118
  # Validate error_spans structure if present
99
- if 'error_spans' in item:
100
- if not isinstance(item['error_spans'], dict):
101
- raise ValueError("'error_spans' must be a dictionary mapping model names to error span lists")
102
- for model_name, spans in item['error_spans'].items():
119
+ if "error_spans" in item:
120
+ if not isinstance(item["error_spans"], dict):
121
+ raise ValueError(
122
+ "'error_spans' must be a dictionary mapping model names to error span lists"
123
+ )
124
+ for model_name, spans in item["error_spans"].items():
103
125
  if not isinstance(spans, list):
104
- raise ValueError(f"Error spans for model '{model_name}' must be a list")
105
-
126
+ raise ValueError(
127
+ f"Error spans for model '{model_name}' must be a list"
128
+ )
129
+
106
130
  # Validate validation structure if present
107
- if 'validation' in item:
108
- if not isinstance(item['validation'], dict):
109
- raise ValueError("'validation' must be a dictionary mapping model names to validation rules")
110
- for model_name, val_rule in item['validation'].items():
131
+ if "validation" in item:
132
+ if not isinstance(item["validation"], dict):
133
+ raise ValueError(
134
+ "'validation' must be a dictionary mapping model names to validation rules"
135
+ )
136
+ for model_name, val_rule in item["validation"].items():
111
137
  if not isinstance(val_rule, dict):
112
- raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
138
+ raise ValueError(
139
+ f"Validation rule for model '{model_name}' must be a dictionary"
140
+ )
113
141
 
114
142
 
115
143
  def _validate_document_models(doc):
116
144
  """
117
145
  Validate that all items in a document have the same model outputs.
118
-
146
+
119
147
  Args:
120
148
  doc: List of items in a document
121
-
149
+
122
150
  Returns:
123
151
  None if valid
124
-
152
+
125
153
  Raises:
126
154
  ValueError: If items have different model outputs
127
155
  """
128
156
  # Get model names from the first item
129
157
  first_item = doc[0]
130
- first_models = set(first_item['tgt'].keys())
131
-
158
+ first_models = set(first_item["tgt"].keys())
159
+
132
160
  # Check all other items have the same model names
133
161
  for i, item in enumerate(doc[1:], start=1):
134
- if 'tgt' not in item or not isinstance(item['tgt'], dict):
162
+ if "tgt" not in item or not isinstance(item["tgt"], dict):
135
163
  continue
136
-
137
- item_models = set(item['tgt'].keys())
164
+
165
+ item_models = set(item["tgt"].keys())
138
166
  if item_models != first_models:
139
167
  raise ValueError(
140
168
  f"Document contains items with different model outputs. "
@@ -147,71 +175,69 @@ def _validate_document_models(doc):
147
175
  def _shuffle_campaign_data(campaign_data, rng):
148
176
  """
149
177
  Shuffle campaign data at the document level in-place
150
-
178
+
151
179
  For each document, randomly shuffles the order of models in the tgt dictionary.
152
-
180
+
153
181
  Args:
154
182
  campaign_data: The campaign data dictionary
155
183
  rng: Random number generator with campaign-specific seed
156
184
  """
185
+
157
186
  def shuffle_document(doc):
158
187
  """Shuffle a single document (list of items) by reordering models in tgt dict."""
159
188
  # Validate that all items have the same models
160
189
  _validate_document_models(doc)
161
-
190
+
162
191
  # Get all model names from the first item's tgt dict
163
192
  first_item = doc[0]
164
- model_names = list(first_item['tgt'].keys())
193
+ model_names = list(first_item["tgt"].keys())
165
194
  rng.shuffle(model_names)
166
-
195
+
167
196
  # Reorder tgt dict for all items in the document
168
197
  for item in doc:
169
- if 'tgt' in item and isinstance(item['tgt'], dict):
170
- item["tgt"] = {
171
- model: item["tgt"][model]
172
- for model in model_names
173
- }
174
-
198
+ if "tgt" in item and isinstance(item["tgt"], dict):
199
+ item["tgt"] = {model: item["tgt"][model] for model in model_names}
200
+
175
201
  assignment = campaign_data["info"]["assignment"]
176
-
202
+
177
203
  if assignment == "task-based":
178
204
  # After transformation, data is a dict mapping user_id -> tasks
179
205
  for user_id, task in campaign_data["data"].items():
180
206
  for doc in task:
181
207
  shuffle_document(doc)
182
- elif assignment == "single-stream":
208
+ elif assignment in ["single-stream", "dynamic"]:
183
209
  # Shuffle each document in the shared pool
184
210
  for doc in campaign_data["data"]:
185
211
  shuffle_document(doc)
186
212
 
187
213
 
188
- def _add_single_campaign(data_file, overwrite, server):
214
+ def _add_single_campaign(campaign_data, overwrite, server):
189
215
  """
190
- Add a single campaign from a JSON data file.
216
+ Add a single campaign from campaign data dictionary.
191
217
  """
192
218
  import random
193
219
 
194
220
  import wonderwords
195
221
 
196
- with open(data_file, 'r') as f:
197
- campaign_data = json.load(f)
222
+ if "campaign_id" not in campaign_data:
223
+ raise ValueError("Campaign data must contain 'campaign_id' field.")
224
+ if "info" not in campaign_data:
225
+ raise ValueError("Campaign data must contain 'info' field.")
226
+ if "data" not in campaign_data:
227
+ raise ValueError("Campaign data must contain 'data' field.")
198
228
 
199
229
  with open(f"{ROOT}/data/progress.json", "r") as f:
200
230
  progress_data = json.load(f)
201
231
 
202
- if campaign_data['campaign_id'] in progress_data and not overwrite:
232
+ if campaign_data["campaign_id"] in progress_data and not overwrite:
203
233
  raise ValueError(
204
234
  f"Campaign {campaign_data['campaign_id']} already exists.\n"
205
235
  "Use -o to overwrite."
206
236
  )
207
237
 
208
- if "info" not in campaign_data:
209
- raise ValueError("Campaign data must contain 'info' field.")
210
- if "data" not in campaign_data:
211
- raise ValueError("Campaign data must contain 'data' field.")
212
238
  if "assignment" not in campaign_data["info"]:
213
239
  raise ValueError("Campaign 'info' must contain 'assignment' field.")
214
-
240
+
215
241
  # Template defaults to "basic" if not specified
216
242
  assignment = campaign_data["info"]["assignment"]
217
243
  # use random words for identifying users
@@ -225,11 +251,11 @@ def _add_single_campaign(data_file, overwrite, server):
225
251
  if assignment == "task-based":
226
252
  tasks = campaign_data["data"]
227
253
  if not isinstance(tasks, list):
228
- raise ValueError(
229
- "Task-based campaign 'data' must be a list of tasks.")
254
+ raise ValueError("Task-based campaign 'data' must be a list of tasks.")
230
255
  if not all(isinstance(task, list) for task in tasks):
231
256
  raise ValueError(
232
- "Each task in task-based campaign 'data' must be a list of items.")
257
+ "Each task in task-based campaign 'data' must be a list of items."
258
+ )
233
259
  # Validate item structure for each task
234
260
  for task_i, task in enumerate(tasks):
235
261
  for doc_i, doc in enumerate(task):
@@ -241,11 +267,9 @@ def _add_single_campaign(data_file, overwrite, server):
241
267
  elif assignment == "single-stream":
242
268
  tasks = campaign_data["data"]
243
269
  if users_spec is None:
244
- raise ValueError(
245
- "Single-stream campaigns must specify 'users' in info.")
270
+ raise ValueError("Single-stream campaigns must specify 'users' in info.")
246
271
  if not isinstance(campaign_data["data"], list):
247
- raise ValueError(
248
- "Single-stream campaign 'data' must be a list of items.")
272
+ raise ValueError("Single-stream campaign 'data' must be a list of items.")
249
273
  # Validate item structure for single-stream
250
274
  for doc_i, doc in enumerate(tasks):
251
275
  try:
@@ -259,8 +283,50 @@ def _add_single_campaign(data_file, overwrite, server):
259
283
  else:
260
284
  raise ValueError("'users' must be an integer or a list.")
261
285
  elif assignment == "dynamic":
262
- raise NotImplementedError(
263
- "Dynamic campaign assignment is not yet implemented.")
286
+ tasks = campaign_data["data"]
287
+ if users_spec is None:
288
+ raise ValueError("Dynamic campaigns must specify 'users' in info.")
289
+ if not isinstance(campaign_data["data"], list):
290
+ raise ValueError("Dynamic campaign 'data' must be a list of items.")
291
+ # Validate item structure for dynamic
292
+ for doc_i, doc in enumerate(tasks):
293
+ try:
294
+ _validate_item_structure(doc)
295
+ except ValueError as e:
296
+ raise ValueError(f"Document {doc_i}: {e}")
297
+ if isinstance(users_spec, int):
298
+ num_users = users_spec
299
+ elif isinstance(users_spec, list):
300
+ num_users = len(users_spec)
301
+ else:
302
+ raise ValueError("'users' must be an integer or a list.")
303
+ # Validate dynamic-specific parameters
304
+ if "dynamic_top" not in campaign_data["info"]:
305
+ campaign_data["info"]["dynamic_top"] = 2
306
+ if "dynamic_first" not in campaign_data["info"]:
307
+ campaign_data["info"]["dynamic_first"] = 5
308
+ if "dynamic_contrastive_models" not in campaign_data["info"]:
309
+ campaign_data["info"]["dynamic_contrastive_models"] = 1
310
+ # Validate that dynamic_first is at least 1
311
+ assert (
312
+ campaign_data["info"]["dynamic_first"] >= 1
313
+ ), "dynamic_first must be at least 1"
314
+ # Validate that dynamic_contrastive_models is at most dynamic_top
315
+ assert (
316
+ campaign_data["info"]["dynamic_contrastive_models"]
317
+ <= campaign_data["info"]["dynamic_top"]
318
+ ), "dynamic_contrastive_models must be at most dynamic_top"
319
+ # Validate that all items have the same models
320
+ all_models = set()
321
+ for item in campaign_data["data"]:
322
+ if item and len(item) > 0:
323
+ all_models.update(item[0]["tgt"].keys())
324
+ for item in campaign_data["data"]:
325
+ if item and len(item) > 0:
326
+ item_models = set(item[0]["tgt"].keys())
327
+ assert (
328
+ item_models == all_models
329
+ ), "All items must have the same model outputs"
264
330
  else:
265
331
  raise ValueError(f"Unknown campaign assignment type: {assignment}")
266
332
 
@@ -272,14 +338,12 @@ def _add_single_campaign(data_file, overwrite, server):
272
338
  new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
273
339
  if new_id not in user_ids:
274
340
  user_ids.append(new_id)
275
- user_ids = [
276
- f"{user_id}-{rng.randint(0, 999):03d}"
277
- for user_id in user_ids
278
- ]
341
+ user_ids = [f"{user_id}-{rng.randint(0, 999):03d}" for user_id in user_ids]
279
342
  elif isinstance(users_spec, list):
280
343
  if len(users_spec) != num_users:
281
344
  raise ValueError(
282
- f"Number of users ({len(users_spec)}) must match expected count ({num_users}).")
345
+ f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
346
+ )
283
347
  if all(isinstance(u, str) for u in users_spec):
284
348
  # List of string IDs
285
349
  user_ids = users_spec
@@ -298,32 +362,47 @@ def _add_single_campaign(data_file, overwrite, server):
298
362
  raise ValueError("'users' list must contain all strings or all dicts.")
299
363
  else:
300
364
  raise ValueError("'users' must be an integer or a list.")
301
-
365
+
302
366
  if "protocol" not in campaign_data["info"]:
303
367
  campaign_data["info"]["protocol"] = "ESA"
304
- print("Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'.")
368
+ print(
369
+ "Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'."
370
+ )
371
+
372
+ # Validate sliders structure if present
373
+ if "sliders" in campaign_data["info"]:
374
+ if not all(
375
+ isinstance(s, dict)
376
+ and all(k in s for k in ("name", "min", "max", "step"))
377
+ and isinstance(s.get("min"), (int, float))
378
+ and isinstance(s.get("max"), (int, float))
379
+ and isinstance(s.get("step"), (int, float))
380
+ and s["min"] <= s["max"]
381
+ and s["step"] > 0
382
+ for s in campaign_data["info"]["sliders"]
383
+ ):
384
+ raise ValueError(
385
+ "Each slider must be a dict with 'name', 'min', 'max', and 'step' keys, where min/max/step are numeric, min <= max, and step > 0"
386
+ )
305
387
 
306
388
  # Remove output file when overwriting (after all validations pass)
307
- if overwrite and campaign_data['campaign_id'] in progress_data:
389
+ if overwrite and campaign_data["campaign_id"] in progress_data:
308
390
  output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
309
391
  if os.path.exists(output_file):
310
392
  os.remove(output_file)
311
393
 
312
394
  # For task-based, data is a dict mapping user_id -> tasks
313
- # For single-stream, data is a flat list (shared among all users)
395
+ # For single-stream and dynamic, data is a flat list (shared among all users)
314
396
  if assignment == "task-based":
315
397
  campaign_data["data"] = {
316
- user_id: task
317
- for user_id, task in zip(user_ids, tasks)
398
+ user_id: task for user_id, task in zip(user_ids, tasks)
318
399
  }
319
- elif assignment == "single-stream":
400
+ elif assignment in ["single-stream", "dynamic"]:
320
401
  campaign_data["data"] = tasks
321
402
 
322
403
  # generate a token for dashboard access if not present
323
404
  if "token" not in campaign_data:
324
- campaign_data["token"] = (
325
- hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
326
- )
405
+ campaign_data["token"] = hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
327
406
 
328
407
  def get_token(user_id, token_type):
329
408
  """Get user token or generate a random one."""
@@ -336,9 +415,17 @@ def _add_single_campaign(data_file, overwrite, server):
336
415
  user_id: {
337
416
  # TODO: progress tracking could be based on the assignment type
338
417
  "progress": (
339
- [False]*len(campaign_data["data"][user_id]) if assignment == "task-based"
340
- else [False]*len(campaign_data["data"]) if assignment == "single-stream"
341
- else []
418
+ [False] * len(campaign_data["data"][user_id])
419
+ if assignment == "task-based"
420
+ else (
421
+ [False] * len(campaign_data["data"])
422
+ if assignment == "single-stream"
423
+ else (
424
+ [list() for _ in range(len(campaign_data["data"]))]
425
+ if assignment == "dynamic"
426
+ else []
427
+ )
428
+ )
342
429
  ),
343
430
  "time_start": None,
344
431
  "time_end": None,
@@ -357,26 +444,34 @@ def _add_single_campaign(data_file, overwrite, server):
357
444
  # Handle assets symlink if specified
358
445
  if "assets" in campaign_data["info"]:
359
446
  assets_config = campaign_data["info"]["assets"]
360
-
447
+
361
448
  # assets must be a dictionary with source and destination keys
362
449
  if not isinstance(assets_config, dict):
363
- raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
450
+ raise ValueError(
451
+ "Assets must be a dictionary with 'source' and 'destination' keys."
452
+ )
364
453
  if "source" not in assets_config or "destination" not in assets_config:
365
- raise ValueError("Assets config must contain 'source' and 'destination' keys.")
366
-
454
+ raise ValueError(
455
+ "Assets config must contain 'source' and 'destination' keys."
456
+ )
457
+
367
458
  assets_source = assets_config["source"]
368
459
  assets_destination = assets_config["destination"]
369
-
460
+
370
461
  # Validate destination starts with 'assets/'
371
462
  if not assets_destination.startswith("assets/"):
372
- raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
373
-
463
+ raise ValueError(
464
+ f"Assets destination '{assets_destination}' must start with 'assets/'."
465
+ )
466
+
374
467
  # Resolve relative paths from the caller's current working directory
375
468
  assets_real_path = os.path.abspath(assets_source)
376
469
 
377
470
  if not os.path.isdir(assets_real_path):
378
- raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
379
-
471
+ raise ValueError(
472
+ f"Assets source path '{assets_real_path}' must be an existing directory."
473
+ )
474
+
380
475
  # Symlink path is based on the destination, stripping the 'assets/' prefix
381
476
  # User assets are now stored under data/assets/ instead of static/assets/
382
477
  symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
@@ -384,7 +479,7 @@ def _add_single_campaign(data_file, overwrite, server):
384
479
  # Remove existing symlink if present and we are overriding the same campaign
385
480
  if os.path.lexists(symlink_path):
386
481
  # Check if any other campaign is using this destination
387
- current_campaign_id = campaign_data['campaign_id']
482
+ current_campaign_id = campaign_data["campaign_id"]
388
483
 
389
484
  for other_campaign_id in progress_data.keys():
390
485
  if other_campaign_id == current_campaign_id:
@@ -401,8 +496,10 @@ def _add_single_campaign(data_file, overwrite, server):
401
496
  if overwrite:
402
497
  os.remove(symlink_path)
403
498
  else:
404
- raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
405
-
499
+ raise ValueError(
500
+ f"Assets destination '{assets_destination}' is already taken."
501
+ )
502
+
406
503
  # Ensure the assets directory exists
407
504
  # get parent of symlink_path dir
408
505
  os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
@@ -410,7 +507,6 @@ def _add_single_campaign(data_file, overwrite, server):
410
507
  os.symlink(assets_real_path, symlink_path, target_is_directory=True)
411
508
  print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
412
509
 
413
-
414
510
  # Shuffle data if shuffle parameter is true (defaults to true)
415
511
  should_shuffle = campaign_data["info"].get("shuffle", True)
416
512
  if should_shuffle:
@@ -420,17 +516,14 @@ def _add_single_campaign(data_file, overwrite, server):
420
516
  with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
421
517
  json.dump(campaign_data, f, indent=2, ensure_ascii=False)
422
518
 
423
- progress_data[campaign_data['campaign_id']] = user_progress
424
-
425
- with open(f"{ROOT}/data/progress.json", "w") as f:
426
- json.dump(progress_data, f, indent=2, ensure_ascii=False)
427
-
519
+ progress_data[campaign_data["campaign_id"]] = user_progress
520
+ save_progress_data(progress_data)
428
521
 
429
522
  print(
430
523
  "🎛️ ",
431
524
  f"{server}/dashboard.html"
432
525
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
433
- f"&token={campaign_data['token']}"
526
+ f"&token={campaign_data['token']}",
434
527
  )
435
528
  for user_id, user_val in user_progress.items():
436
529
  # point to the protocol URL
@@ -444,22 +537,28 @@ def _add_campaign(args_unknown):
444
537
  """
445
538
  args = argparse.ArgumentParser()
446
539
  args.add_argument(
447
- 'data_files', type=str, nargs='+',
448
- help='One or more paths to campaign data files'
540
+ "data_files",
541
+ type=str,
542
+ nargs="+",
543
+ help="One or more paths to campaign data files",
449
544
  )
450
545
  args.add_argument(
451
- "-o", "--overwrite", action="store_true",
452
- help="Overwrite existing campaign if it exists"
546
+ "-o",
547
+ "--overwrite",
548
+ action="store_true",
549
+ help="Overwrite existing campaign if it exists",
453
550
  )
454
551
  args.add_argument(
455
- "--server", default="http://localhost:8001",
456
- help="Prefix server URL for protocol links"
552
+ "--server",
553
+ default="http://localhost:8001",
554
+ help="Prefix server URL for protocol links",
457
555
  )
458
556
  args = args.parse_args(args_unknown)
459
557
 
460
558
  for data_file in args.data_files:
461
559
  try:
462
- _add_single_campaign(data_file, args.overwrite, args.server)
560
+ with open(data_file, "r") as f:
561
+ _add_single_campaign(json.load(f), args.overwrite, args.server)
463
562
  except Exception as e:
464
563
  print(f"Error processing {data_file}: {e}")
465
564
  exit(1)
@@ -470,7 +569,13 @@ def main():
470
569
  Main entry point for the CLI.
471
570
  """
472
571
  args = argparse.ArgumentParser()
473
- args.add_argument('command', type=str, choices=['run', 'add', 'purge'])
572
+ args.add_argument(
573
+ "command",
574
+ type=str,
575
+ choices=["run", "add", "purge"],
576
+ default="run",
577
+ nargs="?",
578
+ )
474
579
  args, args_unknown = args.parse_known_args()
475
580
 
476
581
  # enforce that only one pearmut process is running
@@ -480,11 +585,11 @@ def main():
480
585
  print(p)
481
586
  exit(1)
482
587
 
483
- if args.command == 'run':
588
+ if args.command == "run":
484
589
  _run(args_unknown)
485
- elif args.command == 'add':
590
+ elif args.command == "add":
486
591
  _add_campaign(args_unknown)
487
- elif args.command == 'purge':
592
+ elif args.command == "purge":
488
593
  import shutil
489
594
 
490
595
  def _unlink_assets(campaign_id):
@@ -494,7 +599,9 @@ def main():
494
599
  return
495
600
  with open(task_file, "r") as f:
496
601
  campaign_data = json.load(f)
497
- destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
602
+ destination = (
603
+ campaign_data.get("info", {}).get("assets", {}).get("destination")
604
+ )
498
605
  if destination:
499
606
  symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
500
607
  if os.path.islink(symlink_path):
@@ -504,8 +611,11 @@ def main():
504
611
  # Parse optional campaign name
505
612
  purge_args = argparse.ArgumentParser()
506
613
  purge_args.add_argument(
507
- 'campaign', type=str, nargs='?', default=None,
508
- help='Optional campaign name to purge (purges all if not specified)'
614
+ "campaign",
615
+ type=str,
616
+ nargs="?",
617
+ default=None,
618
+ help="Optional campaign name to purge (purges all if not specified)",
509
619
  )
510
620
  purge_args = purge_args.parse_args(args_unknown)
511
621
  progress_data = load_progress_data()
@@ -519,7 +629,7 @@ def main():
519
629
  confirm = input(
520
630
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
521
631
  )
522
- if confirm.lower() == 'y':
632
+ if confirm.lower() == "y":
523
633
  # Unlink assets before removing task file
524
634
  _unlink_assets(campaign_id)
525
635
  # Remove task file
@@ -543,7 +653,7 @@ def main():
543
653
  confirm = input(
544
654
  "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
545
655
  )
546
- if confirm.lower() == 'y':
656
+ if confirm.lower() == "y":
547
657
  # Unlink all assets first
548
658
  for campaign_id in progress_data.keys():
549
659
  _unlink_assets(campaign_id)